## Create 16x results

In [1]:
%%bash

# 1. Define the four similarity functions and four probe‐sets:
similarity_fns=(cos_similarity rank_reorder wpmi soft_wpmi)
# d_probes=(cifar100_train broden imagenet_val imagenet_broden)
d_probes=(cifar100_train)

# 2. Loop over each pair and invoke describe_neurons.py on the FC layer:
for sim_fn in "${similarity_fns[@]}"; do
  for dp in "${d_probes[@]}"; do
    echo "Running: similarity_fn=$sim_fn, d_probe=$dp"
    python describe_neurons.py \
      --clip_model    ViT-B/16 \
      --target_model  resnet50 \
      --target_layers fc \
      --d_probe       "$dp" \
      --concept_set   data/imagenet_labels.txt \
      --pool_mode     avg \
      --similarity_fn "$sim_fn" \
      --batch_size    200 \
      --device        cuda
    sleep 70
  done
done

Running: similarity_fn=cos_similarity, d_probe=cifar100_train


usage: describe_neurons.py [-h]
                           [--clip_model {RN50,RN101,RN50x4,RN50x16,RN50x64,ViT-B/32,ViT-B/16,ViT-L/14}]
                           [--target_model TARGET_MODEL]
                           [--target_layers TARGET_LAYERS]
                           [--d_probe {imagenet_broden,cifar100_val,imagenet_val,broden,imagenet_broden}]
                           [--concept_set CONCEPT_SET]
                           [--batch_size BATCH_SIZE] [--device DEVICE]
                           [--activation_dir ACTIVATION_DIR]
                           [--result_dir RESULT_DIR] [--pool_mode POOL_MODE]
                           [--similarity_fn {soft_wpmi,wpmi,rank_reorder,cos_similarity,cos_similarity_cubed}]
describe_neurons.py: error: argument --d_probe: invalid choice: 'cifar100_train' (choose from 'imagenet_broden', 'cifar100_val', 'imagenet_val', 'broden', 'imagenet_broden')


Running: similarity_fn=rank_reorder, d_probe=cifar100_train


usage: describe_neurons.py [-h]
                           [--clip_model {RN50,RN101,RN50x4,RN50x16,RN50x64,ViT-B/32,ViT-B/16,ViT-L/14}]
                           [--target_model TARGET_MODEL]
                           [--target_layers TARGET_LAYERS]
                           [--d_probe {imagenet_broden,cifar100_val,imagenet_val,broden,imagenet_broden}]
                           [--concept_set CONCEPT_SET]
                           [--batch_size BATCH_SIZE] [--device DEVICE]
                           [--activation_dir ACTIVATION_DIR]
                           [--result_dir RESULT_DIR] [--pool_mode POOL_MODE]
                           [--similarity_fn {soft_wpmi,wpmi,rank_reorder,cos_similarity,cos_similarity_cubed}]
describe_neurons.py: error: argument --d_probe: invalid choice: 'cifar100_train' (choose from 'imagenet_broden', 'cifar100_val', 'imagenet_val', 'broden', 'imagenet_broden')


Running: similarity_fn=wpmi, d_probe=cifar100_train


usage: describe_neurons.py [-h]
                           [--clip_model {RN50,RN101,RN50x4,RN50x16,RN50x64,ViT-B/32,ViT-B/16,ViT-L/14}]
                           [--target_model TARGET_MODEL]
                           [--target_layers TARGET_LAYERS]
                           [--d_probe {imagenet_broden,cifar100_val,imagenet_val,broden,imagenet_broden}]
                           [--concept_set CONCEPT_SET]
                           [--batch_size BATCH_SIZE] [--device DEVICE]
                           [--activation_dir ACTIVATION_DIR]
                           [--result_dir RESULT_DIR] [--pool_mode POOL_MODE]
                           [--similarity_fn {soft_wpmi,wpmi,rank_reorder,cos_similarity,cos_similarity_cubed}]
describe_neurons.py: error: argument --d_probe: invalid choice: 'cifar100_train' (choose from 'imagenet_broden', 'cifar100_val', 'imagenet_val', 'broden', 'imagenet_broden')


Running: similarity_fn=soft_wpmi, d_probe=cifar100_train


usage: describe_neurons.py [-h]
                           [--clip_model {RN50,RN101,RN50x4,RN50x16,RN50x64,ViT-B/32,ViT-B/16,ViT-L/14}]
                           [--target_model TARGET_MODEL]
                           [--target_layers TARGET_LAYERS]
                           [--d_probe {imagenet_broden,cifar100_val,imagenet_val,broden,imagenet_broden}]
                           [--concept_set CONCEPT_SET]
                           [--batch_size BATCH_SIZE] [--device DEVICE]
                           [--activation_dir ACTIVATION_DIR]
                           [--result_dir RESULT_DIR] [--pool_mode POOL_MODE]
                           [--similarity_fn {soft_wpmi,wpmi,rank_reorder,cos_similarity,cos_similarity_cubed}]
describe_neurons.py: error: argument --d_probe: invalid choice: 'cifar100_train' (choose from 'imagenet_broden', 'cifar100_val', 'imagenet_val', 'broden', 'imagenet_broden')


## Similarity function comparison

In [2]:
import os
os.chdir("/home/vpalaniappan/private/DSC291-CLIP-Dissect")

import torch
from sentence_transformers import SentenceTransformer
from sklearn import metrics

import clip
import utils
import similarity

2025-06-02 09:14:37.528455: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-06-02 09:14:37.568925: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-06-02 09:14:37.568944: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-06-02 09:14:37.570159: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-06-02 09:14:37.577162: I tensorflow/core/platform/cpu_feature_guar

## Settings

In [3]:
similarity_fns = ["cos_similarity", "rank_reorder", "wpmi", "soft_wpmi"]
# similarity_fns = ["rank_reorder"]
# d_probes = ['cifar100_train', 'broden', 'imagenet_val', 'imagenet_broden']
d_probes = ['cifar100_train']

clip_name = 'ViT-B/16'
target_name = 'resnet50'
target_layer = 'fc'
batch_size = 200
device = 'cuda'
pool_mode = 'avg'
save_dir = 'saved_activations'

In [4]:
model = SentenceTransformer('all-mpnet-base-v2')
clip_model, _ = clip.load(clip_name, device=device)

with open("data/imagenet_labels.txt", "r") as f:
    cls_id_to_name = f.read().split("\n")



# Cos similarities

In [5]:
concept_set = 'data/20k.txt'

with open(concept_set, 'r') as f:
    words = f.read().split('\n')

for similarity_fn in similarity_fns:
    for d_probe in d_probes:
        utils.save_activations(clip_name = clip_name, target_name = target_name, target_layers = [target_layer], 
                               d_probe = d_probe, concept_set = concept_set, batch_size = batch_size, 
                               device = device, pool_mode=pool_mode, save_dir = save_dir)

        save_names = utils.get_save_names(clip_name = clip_name, target_name = target_name,
                                          target_layer = target_layer, d_probe = d_probe,
                                          concept_set = concept_set, pool_mode=pool_mode,
                                          save_dir = save_dir)

        target_save_name, clip_save_name, text_save_name = save_names

        similarities, target_feats = utils.get_similarity_from_activations(target_save_name, clip_save_name, 
                                                                           text_save_name, 
                                                                           eval("similarity.{}".format(similarity_fn)),
                                                                           device=device)

        clip_preds = torch.argmax(similarities, dim=1)
        clip_preds = [words[int(pred)] for pred in clip_preds]

        clip_cos, mpnet_cos = utils.get_cos_similarity(clip_preds, cls_id_to_name, clip_model, model, device, batch_size)
        print("Similarity fn: {}, D_probe: {}".format(similarity_fn, d_probe))
        print("Clip similarity: {:.4f}, mpnet similarity: {:.4f}".format(clip_cos, mpnet_cos))

Files already downloaded and verified
Files already downloaded and verified


100%|██████████| 250/250 [01:07<00:00,  3.68it/s]
100%|██████████| 250/250 [00:57<00:00,  4.32it/s]
100%|██████████| 1/1 [00:01<00:00,  1.39s/it]


Similarity fn: cos_similarity, D_probe: cifar100_train
Clip similarity: 0.6484, mpnet similarity: 0.2758
Files already downloaded and verified
Files already downloaded and verified


100%|██████████| 1000/1000 [03:42<00:00,  4.50it/s]


Similarity fn: rank_reorder, D_probe: cifar100_train
Clip similarity: 0.7217, mpnet similarity: 0.3233
Files already downloaded and verified
Files already downloaded and verified


100%|██████████| 1000/1000 [00:10<00:00, 94.54it/s]


Similarity fn: wpmi, D_probe: cifar100_train
Clip similarity: 0.7192, mpnet similarity: 0.3466
Files already downloaded and verified
Files already downloaded and verified


100%|██████████| 1000/1000 [00:27<00:00, 35.99it/s]


torch.Size([1000, 20000])
Similarity fn: soft_wpmi, D_probe: cifar100_train
Clip similarity: 0.7300, mpnet similarity: 0.3655


# Accuracies

In [6]:
def get_topk_acc(sim, k=5):
    correct = 0
    for orig_id in range(1000):
        vals, ids = torch.topk(sim[orig_id], k=k)
        for idx in ids[:k]:
            correct += (int(idx)==orig_id)
    return (correct/1000)*100

def get_correct_rank_mean_median(sim):
    ranks = []
    for orig_id in range(1000):
        vals, ids = torch.sort(sim[orig_id], descending=True)
        
        ranks.append(list(ids).index(orig_id)+1)
        
    mean = sum(ranks)/len(ranks)
    median = sorted(ranks)[500]
    return mean, median

def get_auc(sim):
    max_sim, preds = torch.max(sim.cpu(), dim=1)
    gtruth = torch.arange(0, 1000)
    correct = (preds==gtruth)
    fpr, tpr, thresholds = metrics.roc_curve(correct, max_sim)
    auc = metrics.roc_auc_score(correct, max_sim)
    return auc

In [7]:
concept_set = 'data/imagenet_labels.txt'
with open(concept_set, 'r') as f: 
    words = (f.read()).split('\n')
    

for similarity_fn in similarity_fns:
    for d_probe in d_probes:
        utils.save_activations(clip_name = clip_name, target_name = target_name, target_layers = [target_layer], 
                               d_probe = d_probe, concept_set = concept_set, batch_size = batch_size, 
                               device = device, pool_mode=pool_mode, save_dir = save_dir)

        save_names = utils.get_save_names(clip_name = clip_name, target_name = target_name,
                                          target_layer = target_layer, d_probe = d_probe,
                                          concept_set = concept_set, pool_mode=pool_mode,
                  
                                          save_dir = save_dir)

        target_save_name, clip_save_name, text_save_name = save_names

        similarities, target_feats = utils.get_similarity_from_activations(target_save_name, clip_save_name, 
                                                                           text_save_name, 
                                                                           eval("similarity.{}".format(similarity_fn)),
                                                                           device=device)
        
        print("Similarity fn: {}, D_probe: {}".format(similarity_fn, d_probe))
        print("Top 1 acc: {:.2f}%, Top 5 acc: {:.2f}%".format(get_topk_acc(similarities, k=1),
                                                         get_topk_acc(similarities, k=5)))
        
        mean, median = get_correct_rank_mean_median(similarities)
        print("Mean rank of correct class: {:.2f}, Median rank of correct class: {}".format(mean, median))
        print("AUC: {:.4f}".format(get_auc(similarities)))



Files already downloaded and verified
Files already downloaded and verified


100%|██████████| 5/5 [00:00<00:00, 16.78it/s]
100%|██████████| 1/1 [00:00<00:00, 11.08it/s]


Similarity fn: cos_similarity, D_probe: cifar100_train
Top 1 acc: 8.70%, Top 5 acc: 25.10%
Mean rank of correct class: 53.93, Median rank of correct class: 21
AUC: 0.5895
Files already downloaded and verified
Files already downloaded and verified


100%|██████████| 1000/1000 [00:44<00:00, 22.48it/s]


Similarity fn: rank_reorder, D_probe: cifar100_train
Top 1 acc: 36.50%, Top 5 acc: 67.20%
Mean rank of correct class: 13.62, Median rank of correct class: 3
AUC: 0.6271
Files already downloaded and verified
Files already downloaded and verified


100%|██████████| 1000/1000 [00:00<00:00, 3367.08it/s]


Similarity fn: wpmi, D_probe: cifar100_train
Top 1 acc: 23.70%, Top 5 acc: 55.00%
Mean rank of correct class: 20.46, Median rank of correct class: 4
AUC: 0.6345
Files already downloaded and verified
Files already downloaded and verified


100%|██████████| 1000/1000 [00:05<00:00, 181.78it/s]


torch.Size([1000, 1000])
Similarity fn: soft_wpmi, D_probe: cifar100_train
Top 1 acc: 46.20%, Top 5 acc: 79.40%
Mean rank of correct class: 8.62, Median rank of correct class: 2
AUC: 0.6671
