## ViT Qualitative

In [1]:
# !!DO NOT EDIT!!

import os

import torch
import pandas as pd
import numpy as np

import clip
import utils
import data_utils
import similarity
import PySimpleGUI as psg
from matplotlib import pyplot as plt
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
import matplotlib


## Settings

In [2]:
clip_name = 'ViT-B/16'
d_probe = 'imagenet_val'
concept_set = 'data/10k.txt'
batch_size = 30
device = 'cuda'
pool_mode = 'avg'

save_dir = 'saved_activations'
similarity_fn = similarity.soft_wpmi

In [3]:
target_name = 'resnet50'
target_layer = 'layer1'

## Run CLIP-Dissect

In [4]:
utils.save_activations(clip_name = clip_name, target_name = target_name, target_layers = [target_layer],
                       d_probe = d_probe, concept_set = concept_set, batch_size = batch_size,
                       device = device, pool_mode=pool_mode, save_dir = save_dir)

with open(concept_set, 'r') as f:
    words = (f.read()).split('\n')

pil_data = data_utils.get_data(d_probe)

In [7]:
save_names = utils.get_save_names(clip_name = clip_name, target_name = target_name,
                                  target_layer = target_layer, d_probe = d_probe,
                                  concept_set = concept_set, pool_mode=pool_mode,
                                  save_dir = save_dir)

target_save_name, clip_save_name, text_save_name = save_names

similarities, target_feats = utils.get_similarity_from_activations(target_save_name, clip_save_name,
                                                                text_save_name, similarity_fn, device=device)

100%|██████████| 256/256 [00:00<00:00, 512.76it/s]


torch.Size([256, 9894])


## Visualize

In [6]:
import matplotlib.pyplot as plt
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
import numpy as np

In [7]:
fields = ['neuron', 'accurate', 'prediction', 'label']
rows = []

In [8]:
top_vals, top_ids = torch.topk(target_feats, k=10, dim=0)
neurons_to_check = torch.sort(torch.max(similarities, dim=1)[0], descending=True)[1][0:4]
font_size = 14
font = {'size'   : font_size}
matplotlib.use('TkAgg')

matplotlib.rc('font', **font)


def draw_figure(canvas, figure):
   tkcanvas = FigureCanvasTkAgg(figure, canvas)
   tkcanvas.draw()
   tkcanvas.get_tk_widget().pack(side='bottom', fill='both', expand=1)
   return tkcanvas
psg.set_options(font=('Arial Bold', 16))

layout = [
   [psg.Text('Label: '), psg.Input(key='-LABEL-', do_not_clear=False, focus=True)],
   [psg.Button("yes"), psg.Button("maybe"), psg.Button("no"), psg.Exit()],
   [psg.Canvas(key='-CANVAS-')],
   
]
window = psg.Window('Labeller', layout, modal=True, size=(715, 300), resizable=True, finalize=True, element_justification='left')
fig = matplotlib.figure.Figure(figsize=[15, 2])#constrained_layout=True)
window['-LABEL-'].set_focus()
window.bind("<Alt_L><q>", "ALT-1")
window.bind("<Alt_L><w>", "ALT-2")
window.bind("<Alt_L><e>", "ALT-3")
tkcanvas = draw_figure(window['-CANVAS-'].TKCanvas, fig)

for orig_id in range(len(similarities)):
   fig.clear()
   tkcanvas.get_tk_widget().forget()

   vals, ids = torch.topk(similarities[orig_id], k=5, largest=True)

   fig.text(0.13, 0.9, "Neuron {}:".format(int(orig_id)), size=font_size)
   fig.text(0.3, 0.9, "CLIP-Dissect:", size=font_size)
   fig.text(0.5, 0.9, words[int(ids[0])], size=font_size)
   axs = fig.subplots(nrows=2, ncols=5)
   for i, top_id in enumerate(top_ids[:, orig_id]):
      im, label = pil_data[top_id]
      im = im.resize([375,375])
      axs[int(i/5), i % 5].imshow(im)
      axs[int(i/5), i % 5].axis('off')

   tkcanvas = draw_figure(window['-CANVAS-'].TKCanvas, fig)
   window['-LABEL-'].set_focus(force=True)
   event, values = window.read()
   
   if event == psg.WIN_CLOSED or event == 'Exit':
      break
   elif event in ("ALT-1"):
      event = 'yes'
   elif event in ("ALT-2"):
      event = 'maybe'
   elif event in ("ALT-3"):
      event = 'no'
   row = [orig_id, event, words[int(ids[0])], values['-LABEL-']]
   rows.append(row)
window.close()
fig.clear()


In [11]:
print(rows)

[[830, 'no', 'bookstore', ''], [831, 'yes', 'habitat', ''], [832, 'yes', 'grid', ''], [833, 'no', 'dogs', ''], [834, 'maybe', 'dress', ''], [835, 'maybe', 'puppy', ''], [836, 'no', 'florists', ''], [837, 'maybe', 'sink', ''], [838, 'maybe', 'gambling', ''], [839, 'yes', 'leaf', ''], [840, 'yes', 'cats', ''], [841, 'maybe', 'textile', ''], [842, 'yes', 'automotive', ''], [843, 'yes', 'grass', ''], [844, 'yes', 'stripes', ''], [845, 'yes', 'nursery', ''], [846, 'maybe', 'kitchen', ''], [847, 'no', 'juvenile', ''], [848, 'no', 'tile', ''], [849, 'no', 'furnished', ''], [850, 'yes', 'buildings', ''], [851, 'no', 'basename', ''], [852, 'yes', 'texture', ''], [853, 'yes', 'stripes', ''], [854, 'maybe', 'clothes', ''], [855, 'yes', 'steps', ''], [856, 'yes', 'web', ''], [857, 'yes', 'exterior', ''], [858, 'yes', 'furniture', ''], [859, 'yes', 'bedrooms', ''], [860, 'maybe', 'dogs', ''], [861, 'no', 'yeast', ''], [862, 'maybe', 'texture', ''], [863, 'yes', 'bird', ''], [864, 'yes', 'fur', ''],

In [11]:
import csv

In [19]:
with open('imagenet_10k_layer4_accuracy.csv', 'a', newline='') as f:
    write = csv.writer(f, )
    write.writerows(rows)

## Layer 1 Metrics

In [4]:
layer1 = np.loadtxt('imagenet_10k_layer1_accuracy.csv', skiprows=1, delimiter=',', dtype='str')
l1_acc = np.count_nonzero(layer1[:,1] == 'yes') / layer1.shape[0]
l1_maybe = np.count_nonzero(layer1[:,1] == 'maybe') / layer1.shape[0]
l1_fail = np.count_nonzero(layer1[:,1] == 'no') / layer1.shape[0]

In [5]:
l1_acc, l1_maybe, l1_fail

(0.30980392156862746, 0.06274509803921569, 0.6274509803921569)

## Layer 2 Metrics

In [6]:
layer2 = np.loadtxt('imagenet_10k_layer2_accuracy.csv', skiprows=1, delimiter=',', dtype='str')
l2_acc = np.count_nonzero(layer2[:,1] == 'yes') / layer2.shape[0]
l2_maybe = np.count_nonzero(layer2[:,1] == 'maybe') / layer2.shape[0]
l2_fail = np.count_nonzero(layer2[:,1] == 'no') / layer2.shape[0]

In [7]:
l2_acc, l2_maybe, l2_fail

(0.2837573385518591, 0.01761252446183953, 0.6986301369863014)

## Layer 3 Metrics

In [8]:
layer3 = np.loadtxt('imagenet_10k_layer3_accuracy.csv', skiprows=1, delimiter=',', dtype='str')
l3_acc = np.count_nonzero(layer3[:,1] == 'yes') / layer3.shape[0]
l3_maybe = np.count_nonzero(layer3[:,1] == 'maybe') / layer3.shape[0]
l3_fail = np.count_nonzero(layer3[:,1] == 'no') / layer3.shape[0]

In [9]:
l3_acc, l3_maybe, l3_fail

(0.36656891495601174, 0.013685239491691105, 0.6197458455522972)

## Layer 4 Metrics

In [10]:
layer4 = np.loadtxt('imagenet_10k_layer4_accuracy.csv', skiprows=1, delimiter=',', dtype='str')
l4_acc = np.count_nonzero(layer4[:,1] == 'yes') / layer4.shape[0]
l4_maybe = np.count_nonzero(layer4[:,1] == 'maybe') / layer4.shape[0]
l4_fail = np.count_nonzero(layer4[:,1] == 'no') / layer4.shape[0]

In [11]:
l4_acc, l4_maybe, l4_fail

(0.4890083048363459, 0.016609672691744015, 0.4943820224719101)