### This notebook aim to provide code for embeddings visualization with tensorboard. Embeddings are computed from a Vision Transformers Masked Auto Encoder neural network, that was trained over Imagenet with image size 64 x 64. Embeddings can be visualized through PCA/T-SNE/UMAP projection from tensorboard framework, and we also provide code to visualize image content and labels with tensorboard. 

In this notebook we compute embeddings of weather image recognition dataset available <a href="https://www.kaggle.com/datasets/fceb22ab5e1d5288200c0f3016ccd626276983ca1fe8705ae2c32f7064d719de">here<a> and holding CC0 licence.




# Imports 

In [61]:
import csv
import cv2


import tensorflow as tf
from collections import defaultdict
from PIL import Image
import torch
import torchvision.datasets as datasets
from torchvision import transforms
from matplotlib import pyplot as plt
from torch.utils.data import Dataset
import numpy as np
import pickle
from tqdm import tqdm
import torch.nn.functional as F
import random

from models_mae import MaskedAutoencoderViT

from functools import partial
from torch import nn



import os 


import datetime


inet_mean = [0.485, 0.456, 0.406]
inet_std = [0.229, 0.224, 0.225]
import numpy as np

from torch.utils.tensorboard import SummaryWriter
from functools import partial
import torch.nn as nn
import timm.optim.optim_factory as optim_factory
from models_mae import MaskedAutoencoderViT

# Creating logs folders for tensorboard logs and data


In [2]:

current = os.getcwd()
DATA_PATH = current + '\\dataset' #folder to store images
LOG_DIR = current + '\\logs'

if not os.path.exists(LOG_DIR):
    os.mkdir(LOG_DIR)

if not os.path.exists(DATA_PATH):
    from zipfile import ZipFile
    with ZipFile("archive.zip", 'r') as zObject: #the weather image recognition dataset provide this "archive.zip" file when it's downloaded from kaggle.
        zObject.extractall()

### Check that cuda is available to make things faster

In [3]:
import torch
torch.cuda.is_available()

True

# Load the model and the data

In [4]:
#Loading VitMAE model pretrained on imagenet 64*64

n_heads = 16
patch_size = 8
img_size = 64
num_patch = int((img_size/patch_size)**2)
model = MaskedAutoencoderViT(
        img_size=img_size,patch_size=patch_size, embed_dim=240, depth=10, num_heads=12,
        decoder_embed_dim=160, decoder_depth=6, decoder_num_heads=n_heads,
        mlp_ratio=4, norm_layer=partial(nn.LayerNorm, eps=1e-6))

to_load = True
file_name = 'checkpoint-99.pth'
if to_load:
    checkpoint = torch.load(file_name, map_location='cpu')
    model.load_state_dict(checkpoint['model'])
    print('Model loaded.')

model = model.to('cuda')

Model loaded.


In [5]:
#Loading list of image files

data_path_length = len(DATA_PATH)
import glob
classes_list = glob.glob(os.path.join(DATA_PATH, '*') )


classes = {}

k = 0
imgs_list = []
for k in range(len(classes_list)):
    classes[str(k)] = classes_list[k][data_path_length:]  
    class_imgs = glob.glob(os.path.join(classes_list[k], '*.jpg') )
    for j in class_imgs:
        imgs_list.append([j,k])

In [27]:
##Tensorboard won't be able to display more than 1500 images, so it is recommended to sample some of them for visualization. Here we sample among the 3 first classes.
imgs_list = random.sample(imgs_list[:2662],1500)

In [28]:
#Filtering image with wrong number of channels or with resizing(encoding) errors
img_size = 64
channels = 3
ref_shape =(img_size,img_size,channels)
imgs_good_shape = []
for i in tqdm(range(len(imgs_list))): #img = Image.open(img_path)
    img = Image.open(imgs_list[i][0])
    img = img.resize((img_size, img_size))
    if np.array(img).shape==ref_shape:
        imgs_good_shape.append(imgs_list[i])
print('Number of bad images : ', len(imgs_list)-len(imgs_good_shape), ' / ', len(imgs_list))
imgs_list = imgs_good_shape

100%|██████████| 1500/1500 [00:08<00:00, 168.31it/s]

Number of bad images :  38  /  1500





In [29]:


from torch.utils.data import Dataset

class DATA(Dataset):
    def __init__(self, img_list, transform = None, mean = None,std = None,img_size = 64):
        self.img_list = img_list
        self.transform = None
        self.mean = mean
        self.std = std
        self.img_size = img_size
        self.normalize = None
        if(self.mean is not None and self.std is not None):
            self.normalize = transforms.Normalize(mean=self.mean,
                         std=self.std)
        self.img_size = img_size

    def __len__(self):
        return len(self.img_list)

    def __getitem__(self, idx, pil=False):
        img_path = self.img_list[idx][0]
        img = Image.open(img_path)
        img = img.resize((img_size, img_size))
        if pil:
            return img,self.img_list[idx][1]
        img = np.array(img)/255.              
        img = torch.Tensor(img) 
        img = torch.einsum('hwc->chw', img) #Switch channel position
        if self.normalize is not None:
            img = self.normalize(img)
        #img = tf.convert_to_tensor(img)
        return img,self.img_list[idx][1]
          
weather = DATA(imgs_list, transform = None, mean = inet_mean,std = inet_std,img_size = 64)

In [30]:
#Some utils

def get_embed(id,mask_ratio=0.75,grid_id=0):
    img = weather.__getitem__(id)[0].unsqueeze(dim=0)
    img = img.to('cuda')
    with torch.no_grad():
        f0,_,_,_ = model.forward_encoder(img,mask_ratio = mask_ratio,grid_idx = grid_id)
    f0 = f0[:, 1:, :] 
    return(f0.flatten())

def get_img(id):
    img = weather.__getitem__(id,pil=True)[0]
    return(img)
  
def get_label(id):
    return(weather.img_list[id][1])

# Generate embeddings and sprites, might take some time depending on your GPU

In [31]:
images_pil = []
images_embeddings = []
labels = []
for x in tqdm(range(weather.__len__())): 
    img_pil = get_img(x)
    img_embedding = get_embed(x)
    images_embeddings.append(img_embedding.cpu().detach().numpy())
    images_pil.append(np.array(img_pil))
    # Assuming your output data is directly the label
    label = get_label(x)
    labels.append(label)

100%|██████████| 1462/1462 [00:33<00:00, 43.85it/s]


In [32]:
def images_to_sprite(data):
        #create sprite image and necessary padding
        if len(data.shape) == 3:
            data = np.tile(data[...,np.newaxis], (1,1,1,3))
        data = data.astype(np.float32)
        min = np.min(data.reshape((data.shape[0], -1)), axis=1)
        data = (data.transpose(1,2,3,0) - min).transpose(3,0,1,2)
        max = np.max(data.reshape((data.shape[0], -1)), axis=1)
        data = (data.transpose(1,2,3,0) / max).transpose(3,0,1,2)

        n = int(np.ceil(np.sqrt(data.shape[0])))
        padding = ((0, n ** 2 - data.shape[0]), (0, 0),
                (0, 0)) + ((0, 0),) * (data.ndim - 3)
        data = np.pad(data, padding, mode='constant',
                constant_values=0)

        data = data.reshape((n, n) + data.shape[1:]).transpose((0, 2, 1, 3)
                + tuple(range(4, data.ndim + 1)))
        data = data.reshape((n * data.shape[1], n * data.shape[3]) + data.shape[4:])
        data = (data * 255).astype(np.uint8)
        return data

In [33]:
sprite = images_to_sprite(np.array(images_pil))
cv2.imwrite(f'{LOG_DIR}/sprite.jpg', sprite)

True

In [34]:
with open(f'{LOG_DIR}/feature_vecs.tsv', 'w') as fw:
    csv_writer = csv.writer(fw, delimiter='\t')
    csv_writer.writerows(images_embeddings)

In [35]:
with open(f'{LOG_DIR}/metadata.tsv', 'w') as file: 
    for label in labels:
        #file.write(f"{classes[str(label)]}\n")
        file.write(f"{label}\n")

In [36]:
#to get size of each image
int(np.ceil(np.sqrt(np.array(images_pil).shape[0])))

39

# Build proper config file for tensorboard and then visualize the embeddings


In [None]:
"""
This will be written in the config file.
embeddings {
  metadata_path: "metadata.tsv"
  tensor_path: "feature_vecs.tsv"
  sprite {
    image_path: "sprite.jpg"
    single_image_dim: 45
    single_image_dim: 45
  }
}
"""

text_file = open("logs/projector_config.pbtxt", "w")
 
text_file.write('embeddings {\n  metadata_path: "metadata.tsv"\n  tensor_path: "feature_vecs.tsv"\n  sprite {\n    image_path: "sprite.jpg"\n    single_image_dim: 45\n    single_image_dim: 45\n  }\n}')
 
text_file.close()

In [None]:
try:
  # %tensorflow_version only exists in Colab.
  %tensorflow_version 2.x
except Exception:
    pass

%load_ext tensorboard

In [None]:
#%reload_ext tensorboard #In case you made some experiments and whant to display another tensorboard.

In [None]:
%tensorboard --logdir ./logs #With some notebooks (s.a jupyterlab) you might need to execute the two cells, and the second one will display tensorboard. Then just go to "Projector".


In [None]:
%tensorboard --logdir ./logs #Tensorboard will launch below this cell

In [None]:
#On Windows, you might not be able to kill tensorboard process. In this case you should clear its data by removing this fodler : C:\Users\USER\AppData\Local\Temp\.tensorboard-info