In [29]:
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
import models
import torch
from collections import OrderedDict
import json
import os
import torchvision.transforms as transforms
from tokenizer import SimpleTokenizer
import datasets
import utils
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np

In [30]:
model = getattr(models, 'ICLIP_VITB16')()
model.cuda()

	Creating MAE projection head
	MAE projection head created
	Creating IBOT projection head


  WeightNorm.apply(module, name, dim)


	keys have been loaded for ibot head with status: <All keys matched successfully>
	IBOT projection head created
	DetailCLIP model created


ICLIP(
  (visual): MaskVisionTransformer(
    (patch_embed): PatchEmbed(
      (proj): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16))
      (norm): Identity()
    )
    (pos_drop): Dropout(p=0.0, inplace=False)
    (blocks): Sequential(
      (0): Block(
        (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
        (attn): Attention(
          (qkv): Linear(in_features=768, out_features=2304, bias=True)
          (attn_drop): Dropout(p=0.0, inplace=False)
          (proj): Linear(in_features=768, out_features=768, bias=True)
          (proj_drop): Dropout(p=0.0, inplace=False)
        )
        (drop_path): Identity()
        (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
        (mlp): Mlp(
          (fc1): Linear(in_features=768, out_features=3072, bias=True)
          (act): GELU(approximate='none')
          (drop1): Dropout(p=0.0, inplace=False)
          (fc2): Linear(in_features=3072, out_features=768, bias=True)
          (drop2): Dropout

In [31]:
# Creating model
ckpt_path = 'checkpoint_best.pt'

ckpt = torch.load(ckpt_path, map_location='cpu', weights_only=False)
state_dict = OrderedDict()
for k, v in ckpt['state_dict'].items():
    state_dict[k.replace('module.', '')] = v

old_args = ckpt['args']
print("=> creating model: {}".format(old_args.model))
model = getattr(models, old_args.model)()
model.cuda()
model.load_state_dict(state_dict, strict=True)
print("=> loaded resume checkpoint '{}' (epoch {})".format(ckpt_path, ckpt['epoch']))

=> creating model: ICLIP_VITB16
	Creating MAE projection head
	MAE projection head created
	Creating IBOT projection head
	keys have been loaded for ibot head with status: <All keys matched successfully>
	IBOT projection head created
	DetailCLIP model created
=> loaded resume checkpoint 'checkpoint_best.pt' (epoch 48)


In [32]:
cwd = '/home/onyxia/work/DetailCLIP'
with open(os.path.join(cwd, 'dataset_catalog.json')) as f:
    catalog = json.load(f)

with open(os.path.join(cwd, 'templates.json')) as f:
    all_templates = json.load(f)

with open(os.path.join(cwd, 'labels.json')) as f:
    all_labels = json.load(f)

In [33]:
# Data loading code
print("=> creating dataset")
tokenizer = SimpleTokenizer()
val_transform = transforms.Compose([
        transforms.Resize(224),
        transforms.CenterCrop(224),
        lambda x: x.convert('RGB'),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                std=[0.229, 0.224, 0.225])
    ])


=> creating dataset


In [34]:

for d in catalog:
    print('Evaluating {}'.format(d))
    val_dataset = datasets.get_downstream_dataset(catalog, name=d, is_train=False, transform=val_transform)

    val_loader = torch.utils.data.DataLoader(
        val_dataset, batch_size=256, shuffle=False,
        num_workers=10, pin_memory=True, drop_last=False)

    templates = all_templates[d]
    labels = all_labels[d]

    model.eval()

Evaluating cub200


In [35]:

results = []
for d in catalog:
    print('Evaluating {}'.format(d))
    val_dataset = datasets.get_downstream_dataset(catalog, name=d, is_train=False, transform=val_transform)

    val_loader = torch.utils.data.DataLoader(
        val_dataset, batch_size=256, shuffle=False,
        num_workers=10, pin_memory=True, drop_last=False)

    templates = all_templates[d]
    labels = all_labels[d]

    model.eval()

    print('=> encoding captions')
    with torch.no_grad():
        text_features = []
        for label in labels:
            if isinstance(label, list):
                texts = [t.format(l) for t in templates for l in label]
            else:
                texts = [t.format(label) for t in templates]
            texts = tokenizer(texts).cuda(non_blocking=True)
            texts = texts.view(-1, 77).contiguous()
            class_embeddings = utils.get_model(model).encode_text(texts, ema=True)
            class_embeddings = class_embeddings / class_embeddings.norm(dim=-1, keepdim=True)
            class_embeddings = class_embeddings.mean(dim=0)
            class_embeddings = class_embeddings / class_embeddings.norm(dim=-1, keepdim=True)
            text_features.append(class_embeddings)

        text_features = torch.stack(text_features, dim=0)

Evaluating cub200
=> encoding captions


In [36]:
with torch.no_grad():
    image_features_tot = []
    for images, target in val_loader:
        images = images.cuda(non_blocking=True)
        target = target.cuda(non_blocking=True)

        # encode images
        image_features = utils.get_model(model).encode_image(images, ema=True)
        image_features = image_features / image_features.norm(dim=-1, keepdim=True)
        image_features_tot.append(image_features)


In [37]:
print(image_features_tot)

[tensor([[-0.0066, -0.0634,  0.0728,  ...,  0.0365, -0.0034, -0.0179],
        [ 0.0038, -0.0198, -0.0476,  ...,  0.0205,  0.0356,  0.0236],
        [ 0.0129, -0.0154,  0.0238,  ...,  0.0551, -0.0033, -0.0015],
        ...,
        [ 0.0082, -0.0489,  0.0071,  ..., -0.0102, -0.0467, -0.0173],
        [-0.0299, -0.0024,  0.0376,  ..., -0.0193, -0.0307,  0.0027],
        [-0.0175, -0.0791,  0.0748,  ..., -0.0237, -0.0170,  0.0056]],
       device='cuda:0'), tensor([[-0.0261, -0.0602,  0.0404,  ..., -0.0162, -0.0122, -0.0057],
        [-0.0436, -0.0608,  0.0129,  ..., -0.0048, -0.0182,  0.0077],
        [ 0.0091,  0.0097,  0.0220,  ..., -0.0166, -0.0391,  0.0316],
        ...,
        [ 0.0579,  0.0024,  0.0385,  ..., -0.0365, -0.0212,  0.0228],
        [-0.0039, -0.0065,  0.0295,  ..., -0.0241, -0.0319,  0.0187],
        [-0.0215,  0.0091,  0.0095,  ..., -0.0375, -0.0553, -0.0103]],
       device='cuda:0'), tensor([[-0.0540, -0.0167,  0.0168,  ..., -0.0026, -0.0569,  0.0190],
        [ 0

In [38]:
# t-SNE

text_features = text_features.to('cpu').detach().numpy()

tsne = TSNE(n_components=2, verbose=0, perplexity=40, n_iter=300)
tsne_results = tsne.fit_transform(text_features)

tsne_results = pd.DataFrame(tsne_results)

plt.figure(figsize=(8, 6))
scatter = plt.scatter(tsne_results[:, 0], tsne_results[:, 1], c=labels, cmap='tab10')
plt.legend(*scatter.legend_elements(), title="Classes")
plt.title("Visualisation t-SNE des embeddings")
plt.show()



KeyError: (slice(None, None, None), 0)

<Figure size 800x600 with 0 Axes>

In [None]:
# t-SNE

image_feature_tot = np.array(image_feature_tot)
tsne = TSNE(n_components=2, verbose=0, perplexity=40, n_iter=300)
tsne_results = tsne.fit_transform(image_features_tot)

tsne_results = pd.DataFrame(tsne_results)

plt.figure(figsize=(16,4))
ax1 = plt.subplot(1, 3, 1)
sns.scatterplot(
    palette=sns.color_palette("hls", 10),
    data=tsne_results,
    legend="full",
    alpha=0.3,
    ax=ax1
)

NameError: name 'np' is not defined