In [2]:
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
import models
import torch
from collections import OrderedDict
import json
import os
import torchvision.transforms as transforms
from tokenizer import SimpleTokenizer
import datasets
import utils
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.colors as pc
import plotly.io as pio
import random
import kaleido
import plotly.graph_objects as go
from sklearn.preprocessing import StandardScaler

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
model = getattr(models, 'ICLIP_VITB16')()
model.cuda()
print()

	Creating MAE projection head
	MAE projection head created
	Creating IBOT projection head


  WeightNorm.apply(module, name, dim)


	keys have been loaded for ibot head with status: <All keys matched successfully>
	IBOT projection head created
	DetailCLIP model created



In [4]:
# Creating model
ckpt_path = 'checkpoint_best.pt'

ckpt = torch.load(ckpt_path, map_location='cpu', weights_only=False)
state_dict = OrderedDict()
for k, v in ckpt['state_dict'].items():
    state_dict[k.replace('module.', '')] = v

old_args = ckpt['args']
print("=> creating model: {}".format(old_args.model))
model = getattr(models, old_args.model)()
model.cuda()
model.load_state_dict(state_dict, strict=True)
print("=> loaded resume checkpoint '{}' (epoch {})".format(ckpt_path, ckpt['epoch']))

=> creating model: ICLIP_VITB16
	Creating MAE projection head
	MAE projection head created
	Creating IBOT projection head
	keys have been loaded for ibot head with status: <All keys matched successfully>
	IBOT projection head created
	DetailCLIP model created
=> loaded resume checkpoint 'checkpoint_best.pt' (epoch 48)


In [5]:
cwd = '/home/onyxia/work/DetailCLIP'
with open(os.path.join(cwd, 'dataset_catalog.json')) as f:
    catalog = json.load(f)

with open(os.path.join(cwd, 'templates.json')) as f:
    all_templates = json.load(f)

with open(os.path.join(cwd, 'labels.json')) as f:
    all_labels = json.load(f)

In [6]:
# Data loading code
print("=> creating dataset")
tokenizer = SimpleTokenizer()
val_transform = transforms.Compose([
        transforms.Resize(224),
        transforms.CenterCrop(224),
        lambda x: x.convert('RGB'),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                std=[0.229, 0.224, 0.225])
    ])


=> creating dataset


In [7]:
d='cub200'
print('Evaluating {}'.format(d))
val_dataset = datasets.get_downstream_dataset(catalog, name=d, is_train=False, transform=val_transform)

val_loader = torch.utils.data.DataLoader(
    val_dataset, batch_size=256, shuffle=False,
    num_workers=10, pin_memory=True, drop_last=False)

templates = all_templates[d]
labels = all_labels[d]

model.eval()
print()

Evaluating cub200



In [8]:
#Text embeddings

results = []
print('=> encoding captions')
with torch.no_grad():
    text_features = []
    for label in labels:
        if isinstance(label, list):
            texts = [t.format(l) for t in templates for l in label]
        else:
            texts = [t.format(label) for t in templates]
        texts = tokenizer(texts).cuda(non_blocking=True)
        texts = texts.view(-1, 77).contiguous()
        class_embeddings = utils.get_model(model).encode_text(texts, ema=True)
        class_embeddings = class_embeddings / class_embeddings.norm(dim=-1, keepdim=True)
        class_embeddings = class_embeddings.mean(dim=0)
        class_embeddings = class_embeddings / class_embeddings.norm(dim=-1, keepdim=True)
        text_features.append(class_embeddings)

    text_features = torch.stack(text_features, dim=0)

=> encoding captions


In [9]:
# Image embeddings

with torch.no_grad():
    image_features_tot = []
    target_list = []
    for images, target in val_loader:
        images = images.cuda(non_blocking=True)
        target = target.cuda(non_blocking=True)

        # encode images
        image_features = utils.get_model(model).encode_image(images, ema=True)
        image_features = image_features / image_features.norm(dim=-1, keepdim=True)
        image_features_tot.append(image_features)
        target_list.append(target)

In [10]:
image_features_cat = torch.cat(image_features_tot, dim=0)

In [11]:
color_list = torch.cat(target_list).tolist() # one color per image, sorted

color_uniques = list(set(color_list))

shuffled_classes = color_uniques.copy()
random.seed(4)
random.shuffle(shuffled_classes)
shuffling_dic = {i: shuffled for i, shuffled in enumerate(shuffled_classes)}

shuffled_colors = [shuffling_dic[i] for i in color_list]

In [None]:
# fusion of text and image embeddings
scaler = StandardScaler()

text_features_tsne = scaler.fit_transform(text_features.cpu())
image_features_tsne = scaler.fit_transform(image_features_cat.cpu())

text_features_tsne = torch.Tensor(text_features_tsne)
image_features_tsne = torch.Tensor(image_features_tsne)

text_cat_image = torch.cat((text_features_tsne, image_features_tsne), dim=0)
print(text_cat_image.size())

torch.Size([6120, 512])


In [13]:
# t-SNE of the merged tensor

tsne = TSNE(n_components=2, verbose=0, perplexity=40, n_iter=300)
image_text_tsne = tsne.fit_transform(text_cat_image.to('cpu'))

sizes = np.array([15 if i < 200 else 5 for i in range(len(image_text_tsne))])

fig = go.Figure()

fig.add_trace(go.Scatter(
    x=image_text_tsne[:200, 0],
    y=image_text_tsne[:200, 1],
    mode='markers',
    name='Text embeddings',
    marker=dict(
        size=sizes[:200],
        color=shuffled_classes,
        opacity=0.7,
        line=dict(width=0)
    ),
    showlegend=True
))

fig.add_trace(go.Scatter(
    x=image_text_tsne[200:, 0],
    y=image_text_tsne[200:, 1],
    mode='markers',
    name='Image embeddings',
    marker=dict(
        size=sizes[200:],
        color=shuffled_colors,
        opacity=0.7,
        line=dict(width=0)
    ),
    showlegend=True
))


fig.update_layout(
legend=dict(
    title='Types de données',
    x=0.01,
    y=0.99,
    bgcolor='rgba(255,255,255,0.8)',
    bordercolor='black',
    borderwidth=1
    )
)

fig.update_traces(
    
    marker=dict(
                              line=dict(width=0,
                                        )
                  ))

path='/home/onyxia/work/DetailCLIP/images/text + image/image.png'
fig.write_image(path, format='png', engine='kaleido')
fig.show()





In [30]:
# Cluster visualisation

nb=180
bleu = 0

nbs = [random.randint(40,200) for _ in range(5)]

image_tsne = image_text_tsne[200:]

for nb in nbs:
    colors=shuffled_colors.copy()
    for i,color in enumerate(colors):
        if color!=nb:
            colors[i]=bleu

    path = f'/home/onyxia/work/DetailCLIP/cluster_vis/{nb}.png'
    print(path)
    
    title = f"class : {labels[nb]}"
    fig = px.scatter(x=image_tsne[:, 0], y=image_tsne[:, 1], color=colors)
    fig['data'][0]['showlegend']=True
    fig['data'][0]['name']="image embeddings"

    fig.add_trace(go.Scatter(
        x=[None],
        y=[None],
        mode='markers',
        marker=dict(color='yellow'),
        name=title
    ))

    fig.update_layout(
    legend=dict(
        x=0.01,
        y=0.99,
        bgcolor='rgba(255,255,255,0.8)',
        bordercolor='black',
        borderwidth=1
        )
    )
    fig.show()
    #fig.write_image(path, format='png', engine='kaleido')



/home/onyxia/work/DetailCLIP/cluster_vis/74.png


/home/onyxia/work/DetailCLIP/cluster_vis/150.png


/home/onyxia/work/DetailCLIP/cluster_vis/61.png


/home/onyxia/work/DetailCLIP/cluster_vis/195.png


/home/onyxia/work/DetailCLIP/cluster_vis/76.png
