In [1]:
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
import models
import torch
from collections import OrderedDict
import json
import os
import torchvision.transforms as transforms
from tokenizer import SimpleTokenizer
import datasets
import utils
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.colors as pc
import plotly.io as pio
import random
import kaleido
import plotly.graph_objects as go
from sklearn.preprocessing import StandardScaler

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
model = getattr(models, 'ICLIP_VITB16')()
model.cuda()

	Creating MAE projection head
	MAE projection head created
	Creating IBOT projection head


  WeightNorm.apply(module, name, dim)


	keys have been loaded for ibot head with status: <All keys matched successfully>
	IBOT projection head created
	DetailCLIP model created


ICLIP(
  (visual): MaskVisionTransformer(
    (patch_embed): PatchEmbed(
      (proj): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16))
      (norm): Identity()
    )
    (pos_drop): Dropout(p=0.0, inplace=False)
    (blocks): Sequential(
      (0): Block(
        (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
        (attn): Attention(
          (qkv): Linear(in_features=768, out_features=2304, bias=True)
          (attn_drop): Dropout(p=0.0, inplace=False)
          (proj): Linear(in_features=768, out_features=768, bias=True)
          (proj_drop): Dropout(p=0.0, inplace=False)
        )
        (drop_path): Identity()
        (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
        (mlp): Mlp(
          (fc1): Linear(in_features=768, out_features=3072, bias=True)
          (act): GELU(approximate='none')
          (drop1): Dropout(p=0.0, inplace=False)
          (fc2): Linear(in_features=3072, out_features=768, bias=True)
          (drop2): Dropout

In [3]:
# Creating model
ckpt_path = 'checkpoint_best.pt'

ckpt = torch.load(ckpt_path, map_location='cpu', weights_only=False)
state_dict = OrderedDict()
for k, v in ckpt['state_dict'].items():
    state_dict[k.replace('module.', '')] = v

old_args = ckpt['args']
print("=> creating model: {}".format(old_args.model))
model = getattr(models, old_args.model)()
model.cuda()
model.load_state_dict(state_dict, strict=True)
print("=> loaded resume checkpoint '{}' (epoch {})".format(ckpt_path, ckpt['epoch']))

=> creating model: ICLIP_VITB16
	Creating MAE projection head
	MAE projection head created
	Creating IBOT projection head
	keys have been loaded for ibot head with status: <All keys matched successfully>
	IBOT projection head created
	DetailCLIP model created
=> loaded resume checkpoint 'checkpoint_best.pt' (epoch 48)


In [4]:
cwd = '/home/onyxia/work/DetailCLIP'
with open(os.path.join(cwd, 'dataset_catalog.json')) as f:
    catalog = json.load(f)

with open(os.path.join(cwd, 'templates.json')) as f:
    all_templates = json.load(f)

with open(os.path.join(cwd, 'labels.json')) as f:
    all_labels = json.load(f)

In [5]:
# Data loading code
print("=> creating dataset")
tokenizer = SimpleTokenizer()
val_transform = transforms.Compose([
        transforms.Resize(224),
        transforms.CenterCrop(224),
        lambda x: x.convert('RGB'),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                std=[0.229, 0.224, 0.225])
    ])


=> creating dataset


In [6]:
d='cub200'
print('Evaluating {}'.format(d))
val_dataset = datasets.get_downstream_dataset(catalog, name=d, is_train=False, transform=val_transform)

val_loader = torch.utils.data.DataLoader(
    val_dataset, batch_size=256, shuffle=False,
    num_workers=10, pin_memory=True, drop_last=False)

templates = all_templates[d]
labels = all_labels[d]

model.eval()

Evaluating cub200


ICLIP(
  (visual): MaskVisionTransformer(
    (patch_embed): PatchEmbed(
      (proj): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16))
      (norm): Identity()
    )
    (pos_drop): Dropout(p=0.0, inplace=False)
    (blocks): Sequential(
      (0): Block(
        (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
        (attn): Attention(
          (qkv): Linear(in_features=768, out_features=2304, bias=True)
          (attn_drop): Dropout(p=0.0, inplace=False)
          (proj): Linear(in_features=768, out_features=768, bias=True)
          (proj_drop): Dropout(p=0.0, inplace=False)
        )
        (drop_path): Identity()
        (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
        (mlp): Mlp(
          (fc1): Linear(in_features=768, out_features=3072, bias=True)
          (act): GELU(approximate='none')
          (drop1): Dropout(p=0.0, inplace=False)
          (fc2): Linear(in_features=3072, out_features=768, bias=True)
          (drop2): Dropout

In [7]:
#Text embeddings

results = []
print('=> encoding captions')
with torch.no_grad():
    text_features = []
    for label in labels:
        if isinstance(label, list):
            texts = [t.format(l) for t in templates for l in label]
        else:
            texts = [t.format(label) for t in templates]
        texts = tokenizer(texts).cuda(non_blocking=True)
        texts = texts.view(-1, 77).contiguous()
        class_embeddings = utils.get_model(model).encode_text(texts, ema=True)
        class_embeddings = class_embeddings / class_embeddings.norm(dim=-1, keepdim=True)
        class_embeddings = class_embeddings.mean(dim=0)
        class_embeddings = class_embeddings / class_embeddings.norm(dim=-1, keepdim=True)
        text_features.append(class_embeddings)

    text_features = torch.stack(text_features, dim=0)

=> encoding captions


In [8]:

# Image embeddings

with torch.no_grad():
    image_features_tot = []
    target_list = []
    for images, target in val_loader:
        images = images.cuda(non_blocking=True)
        target = target.cuda(non_blocking=True)

        # encode images
        image_features = utils.get_model(model).encode_image(images, ema=True)
        image_features = image_features / image_features.norm(dim=-1, keepdim=True)
        image_features_tot.append(image_features)
        target_list.append(target)

In [9]:
image_features_cat = torch.cat(image_features_tot, dim=0)

In [10]:
color_list = torch.cat(target_list).tolist() # one color per image, sorted

colors_uniques = set(color_list)
nb_of_colors = len(colors_uniques)
colors = list(range(nb_of_colors))
shuffled_classes = colors.copy() # for text embeddigns

random.seed(4)
random.shuffle(shuffled_classes)
remap = {old: new for old, new in zip(colors, shuffled_classes)}

shuffled_colors = [remap[color] for color in color_list] #one color per image, mixed

txt_img_color = shuffled_classes + shuffled_colors

In [11]:
# fusion of text and image embeddings
scaler = StandardScaler()

text_features_tsne0 = scaler.fit_transform(text_features.cpu())
image_features_tsne0 = scaler.fit_transform(image_features_cat.cpu())

text_features_tsne0 = torch.Tensor(text_features_tsne0)
image_features_tsne0 = torch.Tensor(image_features_tsne0)

text_cat_image = torch.cat((text_features_tsne0, image_features_tsne0), dim=0)
print(text_cat_image.size())

torch.Size([6120, 512])


In [23]:
# t-SNE of the merged tensor



tsne = TSNE(n_components=2, verbose=0, perplexity=40, n_iter=300)
image_text_tsne = tsne.fit_transform(text_cat_image.to('cpu'))

sizes = np.array([1 for i in range(len(image_text_tsne[:200]))])

fig = px.scatter(x=image_text_tsne[:200, 0], y=image_text_tsne[:200, 1], color=shuffled_classes, opacity=1, size_max=9, size=sizes)              #texte

fig.add_scatter(x=image_text_tsne[200:, 0], y=image_text_tsne[200:, 1], opacity = 0.7, mode='markers', marker_size=5,  marker=dict(  #images
            color=shuffled_colors
            ))
fig.data[1].showlegend = False
fig.update_traces(marker=dict(
                              line=dict(width=0,
                                        )
                  ))

path='/home/onyxia/work/DetailCLIP/images/text + image/image.png'
fig.write_image(path, format='png', engine='kaleido')
fig.show()




'n_iter' was renamed to 'max_iter' in version 1.5 and will be removed in 1.7.



In [13]:
# text features PCA

text_features_pca = text_features
pca = PCA(n_components=2)
text_pca = pca.fit_transform(text_features_pca.to('cpu'))

fig = px.scatter(x=text_pca[:, 0], y=text_pca[:, 1], hover_name=[all_labels[d] for d in catalog][0])
fig.update_layout(
    title="PCA visualization of Custom Classification dataset",
    xaxis_title="First Principal Component",
    yaxis_title="Second Principal Component",
)
fig.show()

In [14]:
# image features PCA

image_features_pca = image_features_cat
print(image_features_pca.size())
pca = PCA(n_components=2)
image_pca = pca.fit_transform(image_features_pca.to('cpu'))

fig = px.scatter(x=image_pca[:, 0], y=image_pca[:, 1], color=shuffled_colors)
fig.update_layout(
    title="PCA visualization of Custom Classification dataset",
    xaxis_title="First Principal Component",
    yaxis_title="Second Principal Component",
)
fig.show()

torch.Size([5920, 512])


In [15]:
# t-SNE

text_features_tsne = text_features
tsne = TSNE(n_components=2, verbose=0, perplexity=40, n_iter=300)
text_tsne = tsne.fit_transform(text_features_tsne.to('cpu'))

fig = px.scatter(x=text_tsne[:, 0], y=text_tsne[:, 1], hover_name=[all_labels[d] for d in catalog][0])
fig.update_layout(
    title="t-SNE class names embeddings",
    xaxis_title="First Principal Component",
    yaxis_title="Second Principal Component",
)
cwd = '/home/onyxia/work/tsne_plot/'
path = os.path.join(cwd, 't-SNE class names embeddings.png')
fig.write_image(path, format='png', engine='kaleido')
fig.show()


'n_iter' was renamed to 'max_iter' in version 1.5 and will be removed in 1.7.



In [16]:
# t-SNE

image_features_tsne = image_features_cat
tsne = TSNE(n_components=2, verbose=0, perplexity=40, n_iter=300)
image_tsne = tsne.fit_transform(image_features_tsne.to('cpu'))

fig = px.scatter(x=image_tsne[:, 0], y=image_tsne[:, 1], color=new_colors)
fig.update_layout(
    title="t-SNE of image embeddings",
    xaxis_title="First Principal Component",
    yaxis_title="Second Principal Component",
)
path = os.path.join(cwd, 't-SNE of image embeddings.png')
fig.write_image(path, format='png', engine='kaleido')
fig.show()



'n_iter' was renamed to 'max_iter' in version 1.5 and will be removed in 1.7.



NameError: name 'new_colors' is not defined

In [None]:
# Cluster visualisation

nb=180
bleu = 0

nbs = [random.randint(40,200) for _ in range(10)]

for nb in nbs:
    colors=shuffled_colors.copy()
    for i,color in enumerate(colors):
        if color!=nb:
            colors[i]=bleu

    path = f'/home/onyxia/work/DetailCLIP/cluster_vis/{nb}.png'
    print(path)
    fig = px.scatter(x=image_tsne[:, 0], y=image_tsne[:, 1], color=colors)
    #fig.write_image(path, format='png', engine='kaleido')



/home/onyxia/work/DetailCLIP/cluster_vis/132.png


ValueError: All arguments should have the same length. The length of argument `color` is 5920, whereas the length of previously-processed arguments ['x', 'y'] is 200

In [None]:
# Plot text + image

text_tsne = image_text_tsne[:200,:]
image_tsne = image_text_tsne[200:, :]

nbs = [random.randint(1,199) for _ in range(20)]
bleu = 0

for nb in nbs:
    classes = shuffled_classes.copy()
    colors = shuffled_colors.copy()  
    for i,classe in enumerate(classes):
        if classe!=nb:
            classes[i] = bleu

    for i,color in enumerate(colors):
        if color!=nb:
            colors[i]=bleu

    fig = px.scatter(x=image_tsne[:, 0], y=image_tsne[:, 1], color=colors, opacity=0.6)
    fig.add_scatter(x=text_tsne[:, 0], y=text_tsne[:, 1], opacity = 0.7, mode='markers', marker_size=15,     marker=dict(
            size=15,
            color=classes
            ))
    fig.data[0].showlegend = False
    fig.data[1].showlegend = False

    # Légende manuelle : image embeddings
    fig.add_trace(go.Scatter(
        x=[None], y=[None],
        mode='markers',
        marker=dict(size=10, color='blue'),
        name='Image embeddings',
        showlegend=True,
    ))

    # Légende manuelle : text embeddings
    fig.add_trace(go.Scatter(
        x=[None], y=[None],
        mode='markers',
        marker=dict(size=15, color='blue'),
        name='Text embeddings',
        showlegend=True,
    ))

    fig.update_layout(
    legend=dict(
        title='Types de données',
        x=0.01,
        y=0.99,
        bgcolor='rgba(255,255,255,0.8)',
        bordercolor='black',
        borderwidth=1
    )
)

    path = f'/home/onyxia/work/DetailCLIP/images/text + image/{nb}.png'
    fig.write_image(path, format='png', engine='kaleido')


[[-7.206905    4.8652205 ]
 [-8.454845    2.5451024 ]
 [-7.398945    4.8050723 ]
 ...
 [ 5.0441008  -0.40579963]
 [ 6.1546197  -1.7271287 ]
 [ 4.63135    -1.6197265 ]]
200


In [None]:

# Plot text + image

nbs = [random.randint(1,199) for _ in range(10)]
bleu = 0

for nb in nbs:
    classes = shuffled_classes.copy()
    colors = shuffled_colors.copy()  
    for i,classe in enumerate(classes):
        if classe!=nb:
            classes[i] = bleu

    for i,color in enumerate(colors):
        if color!=nb:
            colors[i]=bleu


    fig = px.scatter(title='Cluster visualisation', x=image_tsne[:, 0], y=image_tsne[:, 1], color=colors, opacity=0.6)
    #fig.data[0].showlegend = False
    #fig.add_scatter(x=text_tsne[:, 0], y=text_tsne[:, 1], opacity = 0.7, mode='markers', marker_size=15,     marker=dict(
    #        size=15,
    #        color=classes, 
    #        colorbar=dict(title="Classe"),
    #        ))
    #fig.update_traces(
    #    marker=dict(coloraxis='coloraxis'),
    #    hovertemplate='Classe : %{marker.color}<extra></extra>'
    #)
    # Légende manuelle : image embeddings
    #fig.add_trace(go.Scatter(
    #    x=[None], y=[None],
    #    mode='markers',
    #    marker=dict(size=10, color='blue'),
    #    name='Image embeddings',
    #    showlegend=True,
    #))

    # Légende manuelle : text embeddings
    #fig.add_trace(go.Scatter(
    #    x=[None], y=[None],
    #    mode='markers',
    #    marker=dict(size=15, color='blue'),
    #    name='Text embeddings',
    #    showlegend=True,
    #))

    #fig.update_layout(
    #legend=dict(
    #    title='Types de données',
    #    x=0.01,
    #    y=0.99,
    #    bgcolor='rgba(255,255,255,0.8)',
    #    bordercolor='black',
    #    borderwidth=1
    #)
    #)

    path = f'/home/onyxia/work/DetailCLIP/images/cluster_images/{nb}.png'
    fig.write_image(path, format='png', engine='kaleido')