In [1]:
import sys
sys.path.append('..')

import torch
import os
import scipy.io
import pandas as pd
import numpy as np
from tqdm import tqdm
from torchvision import transforms, datasets

from PIL import Image

import utils
import vision_transformer as vits

  from .autonotebook import tqdm as notebook_tqdm


In [6]:
arch = 'vit_small'
patches = 16
dat = 'face'
ckpt_pth = f'/om2/user/amarvi/dino/saved_models/{dat}400_dino/checkpoint.pth'


# load in model
model = vits.__dict__[arch](patch_size=patches, num_classes=0)
model.cuda()
model.eval()
utils.load_pretrained_weights(model, ckpt_pth, 'student', arch, patches)

print(f"Model {arch} built.")

Take key student in provided checkpoint dict
Pretrained weights found at /om2/user/amarvi/dino/saved_models/face400_dino/checkpoint.pth and loaded with msg: _IncompatibleKeys(missing_keys=[], unexpected_keys=['head.mlp.0.weight', 'head.mlp.0.bias', 'head.mlp.2.weight', 'head.mlp.2.bias', 'head.mlp.4.weight', 'head.mlp.4.bias', 'head.last_layer.weight_g', 'head.last_layer.weight_v'])
Model vit_small built.


In [7]:
transform = transforms.Compose([
    transforms.Resize(256, interpolation=3),
    transforms.RandomCrop(224),
    transforms.RandomGrayscale(p=0.2),
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
]) 

In [8]:
# cols = ['model', 'dataset', 'size', 'image_idx', 'activation']
# df = pd.DataFrame(columns=cols)

img_folder = f'/om2/user/amarvi/FACE/data/behav_{dat}'
img_list = []

for jpg_name in tqdm(os.listdir(img_folder)):
    img_pth = os.path.join(img_folder, jpg_name)
    img = Image.open(img_pth).convert('RGB')
    inpt_img = transform(img)
    img_list.append(inpt_img)

# Stack all images into a single batch tensor
batch_tensor = torch.stack(img_list).cuda()    
out = model.get_intermediate_layers(batch_tensor,n=12)

for idx, layer_activation in enumerate(out):
    clss_token = layer_activation[:, 0, :].squeeze()
    clss_token = clss_token.detach().cpu().numpy()
    df.loc[len(df)] = {'model': 'dino', 'dataset': dat, 'size': '400', 'image_idx': idx+1, 'activation': clss_token}
    
# df.to_pickle('/om2/user/amarvi/dino/saved_models/mts_dino_activations.pkl')

100%|██████████| 200/200 [00:00<00:00, 200.80it/s]


In [11]:
f = scipy.io.loadmat('/om2/user/amarvi/FACE/data/data_up.mat')

triplet = f['data_up'][0][0][0]
perf = f['data_up'][0][0][1]

print(triplet.shape, perf.shape)

(3, 1560) (1, 1560)


In [17]:
column_names = ['model', 'size', 'dataset', 'layer', 'results', 'accuracy', 'ci']
res_df = pd.DataFrame(columns=column_names)

for index, row in tqdm(df.iterrows()):
    act = row['activation']
    dat = row['dataset']
    count = 0
    bstrap = []

    for idx, trio in enumerate(triplet.transpose()):
        trio = trio-1
        gt = trio//5
        [targ, m1, m2] = trio

        dist1 = np.linalg.norm(act[targ] - act[m1])
        dist2 = np.linalg.norm(act[targ] - act[m2])
        model_choice = np.argmax(np.array([dist1, dist2])) + 1
        correct_choice = np.where(gt != gt[0])[0]


        bstrap.append(int(model_choice == correct_choice))
        if model_choice != correct_choice:
            count += 1
            
    bstrap = np.array(bstrap)
    n_bootstrap = 10000
    bootstrap_samples = np.random.choice(bstrap, size=(len(bstrap), n_bootstrap), replace=True)
    bootstrap_sample_means = np.mean(bootstrap_samples, axis=0)

    ci_lower = np.percentile(bootstrap_sample_means, 2.5)
    ci_upper = np.percentile(bootstrap_sample_means, 97.5)


    acc = 1 - count/len(triplet[0])
#     print("CI:", (ci_lower, ci_upper), 'Mean:', np.mean(bstrap))
#     print(dat, index, acc)

    res_df.loc[len(res_df)] = {'model': 'dino', 'size': '400', 'dataset': dat, 'layer': index%12+1, 'results': bstrap, 'accuracy':np.mean(bstrap), 'ci': (ci_lower, ci_upper)}

24it [00:06,  3.96it/s]


In [19]:
res_df.to_pickle('/om2/user/amarvi/dino/saved_models/mts_dino_results.pkl')