In [None]:
import yaml

import cv2
import torch
from torch.autograd import Variable

from models.yolov3 import *
from utils.utils import *
from utils.parse_yolo_weights import parse_yolo_weights

from captum.attr import GradientShap

import os
import glob

from utils.vis_bbox import vis_bbox
import matplotlib.pyplot as plt

import json

## Visualization
You need to input the image, the information about your model, and the output index.

In [None]:
cfg = '' #config file path
gpu = 0
image = '' #image file path
baselines = None
target_y == 'cls'
output_id = [0,0,0,0] #output index (layer anchor, x, y)
weights_path = ''
ckpt = ''

with open(cfg, 'r') as f:
    cfg = yaml.load(f)
imgsize = cfg['TEST']['IMGSIZE']
model = YOLOv3(cfg['MODEL'])
num_classes = cfg['MODEL']['N_CLASSES']

confthre = cfg['TEST']['CONFTHRE']
nmsthre = cfg['TEST']['NMSTHRE']
if gpu >= 0:
    model.cuda(gpu)    


assert weights_path or ckpt, 'One of --weights_path and --ckpt must be specified'

if weights_path:
    print("loading yolo weights %s" % (weights_path))
    parse_yolo_weights(model, weights_path)
elif ckpt:
    print("loading checkpoint %s" % (ckpt))
    state = torch.load(ckpt)
    if 'model_state_dict' in state.keys():
        model.load_state_dict(state['model_state_dict'])
    else:
        model.load_state_dict(state)

model.eval()

img = cv2.imread(image_path)
img_raw = img.copy()[:, :, ::-1].transpose((2, 0, 1))
img, info_img = preprocess(img, imgsize, jitter=0)  # info = (h, w, nh, nw, dx, dy)
img = np.transpose(img / 255., (2, 0, 1))
img = torch.from_numpy(img).float().unsqueeze(0)

if gpu >= 0:
    img = Variable(img.type(torch.cuda.FloatTensor))
else:
    img = Variable(img.type(torch.FloatTensor))

if baselines==None:
    baselines = img * 0    
    
if gpu >= 0:
    img = Variable(img.type(torch.cuda.FloatTensor))
    baselines = Variable(baselines.type(torch.cuda.FloatTensor))
else:
    img = Variable(img.type(torch.FloatTensor))
    baselines = Variable(baselines.type(torch.FloatTensor))

if target_y == 'obj':
    target_y = 4
elif target_y == 'cls':
    target_y = 5


# setting wrapper
def yolo_wrapper(inp, output_id):
    layer_num, anchor_num, x, y = output_id
    output = model(inp, shap=True)
    return output[layer_num][:,anchor_num,y,x]
                
with torch.no_grad():
    gs = GradientShap(yolo_wrapper,multiply_by_inputs=multiply_by_inputs)
    
with torch.no_grad():
    attr, delta = gs.attribute(img, additional_forward_args=output_id, n_samples=n_samples, stdevs=stdevs, baselines=baselines, target=target_y, return_convergence_delta=True)
# postprocessing of attribution
attr = np.transpose(attr.squeeze().cpu().detach().numpy(), (1,2,0))
original_image = np.transpose(img.squeeze().cpu().detach().numpy(), (1,2,0))
# visualization of attribution
pos_fig, pos_axis = viz.visualize_image_attr(attr,
                                                original_image,
                                                "heat_map",
                                                "positive",
                                                cmap="Reds",
                                                show_colorbar=True,
                                                fig_size=(8,6))
neg_fig, neg_axis = viz.visualize_image_attr(attr,
                                                original_image,
                                                "heat_map",
                                                "positive",
                                                cmap="Blues",
                                                show_colorbar=True,
                                                fig_size=(8,6))


## Evaluation/Data Selection with SHAP

In [None]:
cfg = '' #config file path
gpu = 0
image = '' #image file path
baselines = None
target_y == 'cls'
output_id = [0,0,0,0] #output index (layer anchor, x, y)
weights_path = ''
ckpt = ''
bboxes = [] #bounding box index list

with open(cfg, 'r') as f:
    cfg = yaml.load(f)
imgsize = cfg['TEST']['IMGSIZE']
model = YOLOv3(cfg['MODEL'])
num_classes = cfg['MODEL']['N_CLASSES']

confthre = cfg['TEST']['CONFTHRE']
nmsthre = cfg['TEST']['NMSTHRE']
if gpu >= 0:
    model.cuda(gpu)    


assert weights_path or ckpt, 'One of --weights_path and --ckpt must be specified'

if weights_path:
    print("loading yolo weights %s" % (weights_path))
    parse_yolo_weights(model, weights_path)
elif ckpt:
    print("loading checkpoint %s" % (ckpt))
    state = torch.load(ckpt)
    if 'model_state_dict' in state.keys():
        model.load_state_dict(state['model_state_dict'])
    else:
        model.load_state_dict(state)

model.eval()

img = cv2.imread(image_path)
img_raw = img.copy()[:, :, ::-1].transpose((2, 0, 1))
img, info_img = preprocess(img, imgsize, jitter=0)  # info = (h, w, nh, nw, dx, dy)
img = np.transpose(img / 255., (2, 0, 1))
img = torch.from_numpy(img).float().unsqueeze(0)

if gpu >= 0:
    img = Variable(img.type(torch.cuda.FloatTensor))
else:
    img = Variable(img.type(torch.FloatTensor))

if baselines==None:
    baselines = img * 0    
    
if gpu >= 0:
    img = Variable(img.type(torch.cuda.FloatTensor))
    baselines = Variable(baselines.type(torch.cuda.FloatTensor))
else:
    img = Variable(img.type(torch.FloatTensor))
    baselines = Variable(baselines.type(torch.FloatTensor))

if target_y == 'obj':
    target_y = 4
elif target_y == 'cls':
    target_y = 5


# setting wrapper
def yolo_wrapper(inp, output_id):
    layer_num, anchor_num, x, y = output_id
    output = model(inp, shap=True)
    return output[layer_num][:,anchor_num,y,x]
                
with torch.no_grad():
    gs = GradientShap(yolo_wrapper,multiply_by_inputs=multiply_by_inputs)
    
with torch.no_grad():
    attr, delta = gs.attribute(img, additional_forward_args=output_id, n_samples=n_samples, stdevs=stdevs, baselines=baselines, target=target_y, return_convergence_delta=True)

img_H = 416
img_W = 416

def zscore(x, axis = None):
    xmean = x.mean(axis=axis, keepdims=True)
    xstd  = np.std(x, axis=axis, keepdims=True)
    zscore = (x-xmean)/xstd
    return zscore

pixel_attr = np.sum(attr, axis=2)
pixel_attr = zscore(pixel_attr)
pixel_mask = np.zeros((img_H,img_W))
in_area = 0
for [x1,y1,x2,y2] in bboxes:
    for i in range(int(y1), int(y2)):
        for j in range(int(x1),int(x2)):
            pixel_mask[i,j] = 1
            in_area += 1
out_area = img_H*img_W - in_area

in_pos = 0
in_neg = 0
out_pos = 0
out_neg = 0
l_in = []
for i in range(img_H):
    for j in range(img_W):
        if pixel_mask[i,j]>0:
            if pixel_attr[i,j]>=0:
                in_pos += pixel_attr[i,j]
                l_in.append(pixel_attr[i,j])
            else:
                in_neg += pixel_attr[i,j]
            
        else:
            if pixel_attr[i,j]>=0:
                out_pos += pixel_attr[i,j]
            else:
                out_neg += pixel_attr[i,j]
                
if in_area >0:
    in_pos = in_pos/in_area
    in_neg = in_neg/in_area
if out_area>0:
    out_pos = out_pos/out_area
    out_neg = out_neg/out_area


## Training with SHAP-Regularization
We used annotation json files which contain dict format data like
* {"image_file_name":
    * {"regions":
        * [ {"class_id": 0, "bb":[0,0,0,0]},...

In [None]:
!python train_vd_reg.py --cfg config/config.cfg --weights_path weights/darknet53.conv.74 --checkpoint_interval 100 --checkpoint_dir checkpoints --anno_file anno_data.json --shap_interval 10 --eval_interval 10