## Performance on describing final layer neurons of ResNet-50 (ImageNet)

In [1]:
import os
#virtually move to parent directory
os.chdir("..")

import torch
import pandas as pd
from sentence_transformers import SentenceTransformer

import clip
import utils
import similarity

## Arguments for CLIP-Dissect

In [2]:
clip_name = 'ViT-B/16'
target_name = 'resnet50'
target_layer = 'fc'
batch_size = 200
device = 'cuda'
pool_mode = 'avg'

save_dir = 'saved_activations'
similarity_fn = similarity.soft_wpmi

In [3]:
model = SentenceTransformer('all-mpnet-base-v2')
clip_model, _ = clip.load(clip_name, device=device)

with open('data/imagenet_labels.txt', 'r') as f: 
    imagenet_classnames = (f.read()).split('\n')

## Run CLIP-Dissect

In [4]:
rows = [("imagenet_val", "data/broden_labels_clean.txt"),
       ("imagenet_val", "data/3k.txt"),
       ("imagenet_val", "data/10k.txt"),
       ("imagenet_val", "data/20k.txt"),
       ("imagenet_val", "data/imagenet_labels.txt"),
       ("cifar100_train", "data/20k.txt"),
       ("broden", "data/20k.txt"),
       ("imagenet_val", "data/20k.txt"),
       ("imagenet_broden", "data/20k.txt"),]

In [None]:
for d_probe, concept_set in rows:
    with open(concept_set, 'r') as f: 
        words = (f.read()).split('\n')
    utils.save_activations(clip_name = clip_name, target_name = target_name, target_layers = [target_layer], 
                           d_probe = d_probe, concept_set = concept_set, batch_size = batch_size, 
                           device = device, pool_mode=pool_mode, save_dir = save_dir)

    save_names = utils.get_save_names(clip_name = clip_name, target_name = target_name,
                                      target_layer = target_layer, d_probe = d_probe,
                                      concept_set = concept_set, pool_mode=pool_mode,
                                      save_dir = save_dir)

    target_save_name, clip_save_name, text_save_name = save_names

    similarities, target_feats = utils.get_similarity_from_activations(target_save_name, clip_save_name, 
                                                        text_save_name, similarity_fn, device=device)

    clip_preds = torch.argmax(similarities, dim=1)
    clip_preds = [words[int(pred)] for pred in clip_preds]

    clip_cos, mpnet_cos = utils.get_cos_similarity(clip_preds, imagenet_classnames, clip_model, model, device, batch_size)
    print("D_probe:{}, Concept set:{}".format(d_probe, concept_set))
    print("CLIP-Dissect - Clip similarity: {:.4f}, mpnet similarity: {:.4f}".format(clip_cos, mpnet_cos))

## Baselines

In [None]:
netdissect_res = pd.read_csv('data/NetDissect_results/resnet50_imagenet_fc.csv')
nd_preds = netdissect_res['label'].values

clip_cos, mpnet_cos = utils.get_cos_similarity(nd_preds, imagenet_classnames, clip_model, model, device, batch_size)
print("Network Dissection - Clip similarity: {:.4f}, mpnet similarity: {:.4f}".format(clip_cos, mpnet_cos))

In [None]:
milan_preds = pd.read_csv('data/MILAN_results/m_base_resnet50_imagenet.csv')
milan_preds = milan_preds[milan_preds['layer']=='fc']
milan_preds = milan_preds.sort_values(by=['unit'])
milan_preds = list(milan_preds['description'])

clip_cos, mpnet_cos = utils.get_cos_similarity(milan_preds, imagenet_classnames, clip_model, model, device, batch_size)
print("MILAN - Clip similarity: {:.4f}, mpnet similarity: {:.4f}".format(clip_cos, mpnet_cos))