In [4]:
import torchvision.models as models
import scipy.io
import torch, clip
from UTILS.utils import *

In [5]:
# initialization
device = "cuda" if torch.cuda.is_available() else "cpu"
path = ''
model, preprocess = clip.load("ViT-B/32", device=device, jit=False)
s_156 = scipy.io.loadmat(path+'DATASETS/dataset3.mat')
images = [s_156['visual_stimuli156'][0][i][0] for i in range(156)]
images_torch = get_stimulis(0,156, images, preprocess=preprocess, word=None).to(device)

model_name = "clip"
contexts   = ["a photo of a "]
labels  = torch.load(path+"DATASETS/LABELS/labels.pt")
super_labels, basic_labels, hierarchy = labels["SUPERORDINATES"], labels["BASICS"], labels["CLUSTERING"]
context = contexts[0]

##**REPRESENTATIONS FOR FEEDFORWARD MODELS**

In [6]:
vgg19_bn = models.vgg19_bn(pretrained=True)
resnet152 = models.resnet18(pretrained=True)

models = {
    "VGG19 - Batch Normalization": vgg19_bn,
    "ResNet152": resnet152
}

In [7]:
reps = None
def hook_fn(module, input, output):
    global reps
    reps = input[0]


images = torch.cat(
    [preprocess(Image.fromarray(s_156['visual_stimuli156'][0][i][0])).unsqueeze(0) for i in range(156)]).to(device)


def get_RDM(model, images):
    if hasattr(model, 'classifier'):
        hook = model.classifier[-1].register_forward_hook(hook_fn)
    elif hasattr(model, 'fc'):
        hook = model.fc.register_forward_hook(hook_fn)
    else:
        assert (False)

    model.to(device)
    model.eval()
    with torch.no_grad():
        _ = model(images)

    similarities = []

    for i in range(reps.size(0)):
        similarities.append([])
        for j in range(reps.size(0)):
            similarities[len(similarities) - 1].append(torch.nn.CosineSimilarity(dim=0)(reps[i], reps[j]).item())

    return similarities

In [None]:
for key in models:
    print(key)
    similarities = get_RDM(models[key], images_torch)
    ordered_labels = {"animal": [None] * 28, "plant": [None] * 14, "food": [None] * 16, "indoor": [None] * 22,
                      "outdoor": [None] * 20, "human body": [None] * 24, "human face": [None] * 32}
    display_img_with_ordered_labels(similarities, ordered_labels, None, True, (8, 8))
    plt.show()

##**REPRESENTATIONS CLIP**

In [None]:
similarities = []
features = torch.empty(0, 512).to(device)

dataset_size = 156
batch_size = 32

with torch.no_grad():
    features = model.encode_image(images_torch)

for i in range(features.size(0)):
    similarities.append([])
    for j in range(features.size(0)):
        similarities[len(similarities) - 1].append(torch.nn.CosineSimilarity(dim=0)(features[i], features[j]).item())
ordered_labels = {"animal": [None] * 28, "plant": [None] * 14, "food": [None] * 16, "indoor": [None] * 22,
                  "outdoor": [None] * 20, "human body": [None] * 24, "human face": [None] * 32}

display_img_with_ordered_labels(similarities, ordered_labels, None, True, (8, 8))

##**REPRESENTATION OF WORD-SUPERIMPOSED IMAGES ON CLIP**

In [None]:
word = 'animal'
images_words = get_stimulis(0, 156, preprocess, word=word).to(device)
similarities = []
features = torch.empty(0, 512).to(device)


with torch.no_grad():
    features = model.encode_image(images_words)

for i in range(features.size(0)):
    similarities.append([])
    for j in range(features.size(0)):
        similarities[len(similarities) - 1].append(torch.nn.CosineSimilarity(dim=0)(features[i], features[j]).item())
ordered_labels = {"animal": [None] * 28, "plant": [None] * 14, "food": [None] * 16, "indoor": [None] * 22,
                  "outdoor": [None] * 20, "human body": [None] * 24, "human face": [None] * 32}
display_img_with_ordered_labels(similarities, ordered_labels, None, True, (8, 8))