## Classification Test

In [None]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]='3'

In [None]:
from tqdm import tqdm, tqdm_notebook

import torch
import torch.nn as nn
from torchvision.models import vgg19, vgg16_bn

import model
import dataloader

In [None]:
valid_loader = dataloader.imagenet_loader(bs=32)

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
# model = vgg16_bn(pretrained=True)
model = model.Googlenet_for_CAM()
model = model.to(device)
criterion = nn.CrossEntropyLoss()

In [None]:
def test_accuracy(epoch):
    top1_accuracy = 0.
    top5_accuracy = 0.
    loss = 0.

    model.eval()
    for idx, (inputs, targets) in tqdm_notebook(enumerate(valid_loader)):
        inputs, targets = inputs.to(device), targets.to(device)
        
        outputs = model(inputs)
        loss += criterion(outputs, targets).detach().cpu().item()

        topk = outputs.topk(5,dim=1)[1]
        top1_accuracy += topk[:,0].eq(targets).sum().cpu().item()
        top5_accuracy += topk.eq(torch.stack([targets]*5,dim=1)).max(1)[0].sum().cpu().item()
    
    top1_accuracy /= len(valid_loader.dataset)
    top5_accuracy /= len(valid_loader.dataset)
    loss /= len(valid_loader.dataset)

    print('Classification')
    print(f'===> Test Loss: {loss:.4f}, Top1-Acc: {top1_accuracy*100:.4f}, Top5-Acc: {top5_accuracy*100:.4f}')

In [None]:
test_accuracy(0) 
# vgg19 ===> Test Loss: 0.0349, Top1-Acc: 72.3760, Top5-Acc: 90.8760
# vgg16_bn ===> Test Loss: 0.0333, Top1-Acc: 73.3600, Top5-Acc: 91.5160

## Localization Test

In [None]:
import json

## Classification & Localization (top-1 Loc)

In [1]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]='0'

In [2]:
from tqdm import tqdm, tqdm_notebook

import torch
import torch.nn as nn
from torchvision.models import vgg16, vgg16_bn

import dataloader

In [3]:
import util

In [4]:
import csv
from os.path import join,expanduser
import re
import numpy as np
from PIL import Image

In [5]:
from cam import CAM

In [6]:
valid_loader = dataloader.imagenet_loader(bs=10)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# model = vgg16_bn(pretrained=True)
# model = model.to(device)
# criterion = nn.CrossEntropyLoss()

In [7]:
map = CAM('googlenet')
data_dict = map.valid_dataset.data_dict
input_files = map.valid_dataset.img_files
img_dir = map.valid_dataset.img_dir

In [8]:
model = map.model
criterion = nn.CrossEntropyLoss()

In [9]:
bboxes_dict = {}
with open('runs/googlenet/0.2_0.4/bbox.csv','r') as cf:
    for row in csv.reader(cf):
        k,v = row
        bboxes_dict[k] = eval(v)

In [10]:
len(bboxes_dict)

50000

In [11]:
### def test_accuracy(epoch):

top1_acc_cls = 0.
top5_acc_cls = 0.
gtknown_acc_loc = 0.
top1_acc_loc = 0.
top5_acc_loc = 0.
loss = 0.

count = 0
model.eval()
for idx, (inputs, targets) in tqdm_notebook(enumerate(valid_loader)):
    inputs, targets = inputs.to(device), targets.to(device)
    
    outputs = model(inputs)
    loss += criterion(outputs, targets).detach().cpu().item()
    
    topk = outputs.topk(5,dim=1)[1]
    top1_acc_clss = topk[:,0].eq(targets).cpu().numpy()
    top5_acc_clss = topk.eq(torch.stack([targets]*5,dim=1)).max(1)[0].cpu().numpy()
    
    gtknown_acc_locs = []
    for data_idx in range(idx*10,(idx+1)*10):
        count += 1
    
        # get true bbox
        input_file = input_files[data_idx]
        img_origin = Image.open(join(img_dir, input_file)).convert('RGB')
        bboxes_true = data_dict[input_file][1]
        bboxes_true = util.bboxes_resize(img_origin, bboxes_true, size=224)

        # get proposed bbox
        bbox_propose = bboxes_dict[input_file]

        # get iou
        iou_propose = []
        for bbox_true in bboxes_true:
            iou_propose.append(util.get_iou(bbox_true, bbox_propose))
        iou_propose = max(np.array(iou_propose) >= 0.5).astype(np.int)
        gtknown_acc_locs.append(iou_propose)
        
    top1_acc_locs = np.logical_and(top1_acc_clss, gtknown_acc_locs)
    top5_acc_locs = np.logical_and(top5_acc_clss, gtknown_acc_locs)
    
    top1_acc_cls += top1_acc_clss.sum()
    top5_acc_cls += top5_acc_clss.sum()
    gtknown_acc_loc += np.array(gtknown_acc_locs).sum()
    top1_acc_loc += top1_acc_locs.sum()
    top5_acc_loc += top5_acc_locs.sum()
    
#     if idx == 999:
#         break

top1_acc_cls /= count
top5_acc_cls /= count
gtknown_acc_loc /= count
top1_acc_loc /= count
top5_acc_loc /= count
print('Classification & Localization')
print(f'===> Top1-Loc: {top1_acc_loc}, Top1-Acc: {top1_acc_cls}, GTKnown: {gtknown_acc_loc}')
print(f'===> Top5-Loc: {top5_acc_loc}, Top5-Acc: {top5_acc_cls}')


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


Classification & Localization
===> Top1-Loc: 0.47602, Top1-Acc: 0.7232, GTKnown: 0.61674
===> Top5-Loc: 0.5805, Top5-Acc: 0.90908


#### vgg16_bn 
vanilla ===> Top1-Loc: 0.44906, Top1-Acc: 0.7336, GTKnown: 0.57684, Top5-Loc: 0.54378, Top5-Acc: 0.91516 <br>
ours ===> Top1-Loc: 0.47832, Top1-Acc: 0.7336, GTKnown: 0.61506, Top5-Loc: 0.5803, Top5-Acc: 0.91516

#### GoogLeNet
vanilla ===> Top1-Loc: 0.45264, Top1-Acc: 0.7232, GTKnown: 0.5849, Top5-Loc: 0.55128, Top5-Acc: 0.90908 <br>
ours ===> Top1-Loc: 0.47602, Top1-Acc: 0.7232, GTKnown: 0.61674, Top5-Loc: 0.5805, Top5-Acc: 0.90908

#### AlexNet

In [None]:
join('aa','bb','','cc')

## Save the plot

In [3]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]='2'

from os.path import join, expanduser
import torch
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from PIL import Image
from tqdm import tqdm_notebook

from cam import CAM
import util
import dataloader

In [4]:
map = CAM('vgg')

class_dict = dataloader.get_class_dict(join(expanduser('~'),'data','imagenet'))[0]
data_dict = map.valid_dataset.data_dict
input_files = map.valid_dataset.img_files
img_dir = map.valid_dataset.img_dir

In [10]:
def save_result_plt(data_idx, th1=0.2, th2=10, mc=15):
    # get true bbox
    input_file = input_files[data_idx]
    img_origin = Image.open(join(img_dir, input_file)).convert('RGB')
    bboxes_true = data_dict[input_file][1]
    bboxes_true = util.bboxes_resize(img_origin, bboxes_true, size=224)
    
    # get input, target, and topk
    input, target = map.get_item(data_idx)
    target = target.cpu().item()

    topk_idxs = map.topk(input)
    top1_correct = target in topk_idxs[:1]
    top5_correct = target in topk_idxs[:5]
    
    att_idx = target
    
    # origin
    img, heatmap_origin, boolmap, boolmap_biggest, \
    bbox_pred = map.get_values(data_idx,att_idx, th1, phase='test')
    
    # propose
    _, heatmap_mean, heatmap_std, boolmap_propose, boolmap_biggest_propose, \
    bbox_propose = map.get_values(data_idx, att_idx, th1, th2, mc, phase='train')
    heatmap_std_max = heatmap_std.max()
    
    # save the plot
    fig, ax = plt.subplots(2,5,figsize=(20.5,8))
    
    ax[0,0].imshow(img)
    ax[0,0].set_title('input', fontsize=15)
    ax[0,0].axis('off')

    ax[0,1].imshow(heatmap_origin, cmap='gray')
    ax[0,1].set_title('$L^{grad}$', fontsize=15)
    ax[0,1].axis('off')

    ax[0,2].imshow(img)
    ax[0,2].imshow(heatmap_origin, alpha=0.5, cmap='jet')
    ax[0,2].set_title('GradCAM', fontsize=15)
    ax[0,2].axis('off')

#     ax[0,3].imshow(Image.fromarray((boolmap*255).astype(np.uint8)), cmap='gray')
#     ax[0,3].set_title('boolean map')
#     ax[0,3].axis('off')

    ax[0,3].imshow(Image.fromarray((boolmap_biggest*255).astype(np.uint8)), cmap='gray')
    ax[0,3].set_title('boolean map', fontsize=15)
    ax[0,3].axis('off')

    ax[0,4].imshow(img)
    for bbox_true in bboxes_true:
        rect_true = patches.Rectangle((bbox_true[0],bbox_true[1]),bbox_true[2],bbox_true[3],
                                      linewidth=3,edgecolor='#00d700',facecolor='none')
        ax[0,4].add_patch(rect_true)
    rect_pred = patches.Rectangle((bbox_pred[0],bbox_pred[1]),bbox_pred[2],bbox_pred[3],
                                  linewidth=3,edgecolor='r',facecolor='none')
    ax[0,4].add_patch(rect_pred)
    ax[0,4].set_title('bounding box', fontsize=15)
    ax[0,4].axis('off')


    ax[1,0].imshow(heatmap_mean, cmap='gray')
    ax[1,0].set_title('$L^{mean}$', fontsize=15)
    ax[1,0].axis('off')

    ax[1,1].imshow(heatmap_std, cmap='gray')
    ax[1,1].set_title(r'$L^{std}$', fontsize=15)
    ax[1,1].axis('off')

    im1 = ax[1,2].imshow(heatmap_mean, cmap='Reds', label='mean')
    im2 = ax[1,2].imshow(heatmap_std, cmap='Blues', label='std', alpha=0.5)
    ax[1,2].set_title('DropoutCAM', fontsize=15)
    ax[1,2].axis('off')
    patch = [patches.Patch(color=im1.cmap(150), label='mean'), 
             patches.Patch(color=im2.cmap(150), label='std')]
    ax[1,2].legend(handles=patch, loc='best')

#     ax[1,3].imshow(Image.fromarray((boolmap_propose*255).astype(np.uint8)), cmap='gray')
#     ax[1,3].set_title('boolean map')
#     ax[1,3].axis('off')

    ax[1,3].imshow(Image.fromarray((boolmap_biggest_propose*255).astype(np.uint8)), cmap='gray')
    ax[1,3].set_title('boolean map', fontsize=15)
    ax[1,3].axis('off')

    ax[1,4].imshow(img)
    for bbox_true in bboxes_true:
        rect_true = patches.Rectangle((bbox_true[0],bbox_true[1]),bbox_true[2],bbox_true[3],
                                      linewidth=3,edgecolor='#00d700',facecolor='none')
        ax[1,4].add_patch(rect_true)
    rect_pred = patches.Rectangle((bbox_propose[0],bbox_propose[1]),bbox_propose[2],bbox_propose[3],
                                  linewidth=3,edgecolor='r',facecolor='none')
    ax[1,4].add_patch(rect_pred)
    ax[1,4].set_title('bounding box', fontsize=15)
    ax[1,4].axis('off')

    plt.tight_layout()
#     plt.show()
    plt.savefig(join('imgs',f'{data_idx:05d}.png'))
    plt.close(fig)

In [6]:
# for data_idx in tqdm_notebook(range(1000)):
#     save_result_plt(data_idx, th1=0.4, th2=0.4, mc=30)

In [11]:
save_result_plt(177,th1=0.2, th2=0.4, mc=20) # good hyperparam... maybe! th1=0.4, th2=0.4, mc=50 
# for data_idx in np.arange(0,400):
#     save_result_plt(data_idx,th1=0.2, th2=0.4, mc=20)

data_idx = 63, 74, 81, 96, 129, 155, 177, 328, 387, 

really less discriminative? : 327, 177, 169, 81, 74

##  Does deviation map capture a less discriminative part?

In [12]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]='3'

from os.path import join, expanduser
import torch
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from PIL import Image
from tqdm import tqdm_notebook

from cam import CAM
import util
import dataloader

In [13]:
map = CAM('vgg')

class_dict = dataloader.get_class_dict(join(expanduser('~'),'data','imagenet'))[0]
data_dict = map.valid_dataset.data_dict
input_files = map.valid_dataset.img_files
img_dir = map.valid_dataset.img_dir

In [16]:
def save_result_plt(data_idxs, th1=0.2, th2=0.4, mc=30):
    
    # save the plot
    fig, ax = plt.subplots(len(data_idxs),3,figsize=(12,4*len(data_idxs)))
    
    for idx, data_idx in enumerate(data_idxs):
        # get true bbox
        input_file = input_files[data_idx]
        img_origin = Image.open(join(img_dir, input_file)).convert('RGB')
        bboxes_true = data_dict[input_file][1]
        bboxes_true = util.bboxes_resize(img_origin, bboxes_true, size=224)

        # get input, target, and topk
        input, target = map.get_item(data_idx)
        target = target.cpu().item()

        topk_idxs = map.topk(input)
        top1_correct = target in topk_idxs[:1]
        top5_correct = target in topk_idxs[:5]

        att_idx = target

        # origin
        img, heatmap_origin, boolmap, boolmap_biggest, \
        bbox_pred = map.get_values(data_idx,att_idx, th1, phase='test')

        # propose
        _, heatmap_mean, heatmap_std, boolmap_propose, boolmap_biggest_propose, \
        bbox_propose = map.get_values(data_idx, att_idx, th1, th2, mc, phase='train')
        heatmap_std_max = heatmap_std.max()

        ax[idx,0].imshow(img)
        for bbox_true in bboxes_true:
            rect_true = patches.Rectangle((bbox_true[0],bbox_true[1]),bbox_true[2],bbox_true[3],
                                          linewidth=3,edgecolor='#00d700',facecolor='none')
            ax[idx,0].add_patch(rect_true)
        if idx == 0:
            ax[idx,0].set_title('input', fontsize=26, pad=15)
        ax[idx,0].axis('off')

        ax[idx,1].imshow(heatmap_origin, cmap='gray')
        for bbox_true in bboxes_true:
            rect_true = patches.Rectangle((bbox_true[0],bbox_true[1]),bbox_true[2],bbox_true[3],
                                          linewidth=3,edgecolor='#00d700',facecolor='none')
            ax[idx,1].add_patch(rect_true)        
        if idx == 0:
            ax[idx,1].set_title(r'$L^{grad}$', fontsize=26, pad=13)
        ax[idx,1].axis('off')

        ax[idx,2].imshow(heatmap_std, cmap='gray')
        for bbox_true in bboxes_true:
            rect_true = patches.Rectangle((bbox_true[0],bbox_true[1]),bbox_true[2],bbox_true[3],
                                          linewidth=3,edgecolor='#00d700',facecolor='none')
            ax[idx,2].add_patch(rect_true)
        if idx == 0:
            ax[idx,2].set_title(r'$L^{std}$', fontsize=25, pad=13)
        ax[idx,2].axis('off')

    plt.tight_layout()
#     plt.show()
    plt.savefig('z_multi.png')
    plt.close(fig)

In [17]:
save_result_plt([169, 81, 74])

## Correct bboxes violinplot

In [None]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]='3'

In [None]:
from tqdm import tqdm, tqdm_notebook

import torch
import torch.nn as nn
from torchvision.models import vgg16, vgg16_bn

import dataloader
import util
from cam import CAM

import csv
from os.path import join,expanduser
import re
import numpy as np
from PIL import Image
from copy import copy
import matplotlib.pyplot as plt

In [None]:
bs = 1
valid_loader = dataloader.imagenet_loader(bs=bs)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
map = CAM('vgg')
data_dict = map.valid_dataset.data_dict
input_files = map.valid_dataset.img_files
img_dir = map.valid_dataset.img_dir

In [None]:
def get_bboxes_dict(path):
    bboxes_dict = {}
    with open(path,'r') as cf:
        for row in csv.reader(cf):
            k,v = row
            bboxes_dict[k] = eval(v)
    return bboxes_dict

bboxes_vgg_2 = get_bboxes_dict('runs/vgg/0.2/bbox.csv')
bboxes_vgg_24 = get_bboxes_dict('runs/vgg/0.2_0.4/bbox.csv')
bboxes_googlenet_2 = get_bboxes_dict('runs/googlenet/0.2/bbox.csv')
bboxes_googlenet_24 = get_bboxes_dict('runs/googlenet/0.2_0.4/bbox.csv')

In [None]:
def get_iou_proposes(data_idx, bboxes_dict):
    # get true bbox
    input_file = input_files[data_idx]
    img_origin = Image.open(join(img_dir, input_file)).convert('RGB')
    bboxes_true = data_dict[input_file][1]
    bboxes_true = util.bboxes_resize(img_origin, bboxes_true, size=224)

    # get proposed bbox
    bbox_propose = bboxes_dict[input_file]

    # get iou
    iou_propose = []
    for bbox_true in bboxes_true:
        iou_propose.append(util.get_iou(bbox_true, bbox_propose))
    iou_max_bool = max(np.array(iou_propose) >= 0.5).astype(np.int)
    iou_max_val = np.max(iou_propose)
    
    return iou_max_val if iou_max_bool == 1 else None

In [None]:
iou_proposes_vgg_2 = []
iou_proposes_vgg_24 = []
iou_proposes_googlenet_2 = []
iou_proposes_googlenet_24 = []

for data_idx, (inputs, targets) in tqdm_notebook(enumerate(valid_loader)):
    val = get_iou_proposes(data_idx, bboxes_vgg_2)
    if val is not None:
        iou_proposes_vgg_2.append(val)
    
    val = get_iou_proposes(data_idx, bboxes_vgg_24)
    if val is not None:
        iou_proposes_vgg_24.append(val)
    
    val = get_iou_proposes(data_idx, bboxes_googlenet_2)
    if val is not None:
        iou_proposes_googlenet_2.append(val)
    
    val = get_iou_proposes(data_idx, bboxes_googlenet_24)
    if val is not None:
        iou_proposes_googlenet_24.append(val)
    
#     if data_idx == 3000:
#         break
        

In [None]:
import seaborn as sns
import pandas as pd

In [None]:
sns.set(style="whitegrid")

In [None]:
ious = {'value': iou_proposes_vgg_2 + iou_proposes_vgg_24 + 
                 iou_proposes_googlenet_2 + iou_proposes_googlenet_24,
        'method': ['GradCAM']*len(iou_proposes_vgg_2) + ['DropoutCAM']*len(iou_proposes_vgg_24) + 
                  ['GradCAM']*len(iou_proposes_googlenet_2) + ['DropoutCAM']*len(iou_proposes_googlenet_24),
        'model': ['VGG']*(len(iou_proposes_vgg_2)+len(iou_proposes_vgg_24)) + 
                 ['GoogLeNet-Drop']*(len(iou_proposes_googlenet_2)+len(iou_proposes_googlenet_24))
       }

In [None]:
df = pd.DataFrame(ious)

In [None]:
plt.figure(figsize=(8,4))
sns.violinplot(x='model', y='value', hue='method', data=df, split=True,
               palette='Set2', scale='count', inner='quartile', cut=0, width=0.6)
plt.legend(loc='upper center')
plt.tight_layout()
# plt.savefig('iou_comparison.png')
# plt.close()