In [None]:
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from torchsummary import summary
import torchvision.models as models
import glob
from tqdm import tqdm
from PIL import Image
import cv2
import os

import matplotlib.pyplot as plt
%matplotlib inline


from sklearn.decomposition import PCA
from sklearn.manifold import TSNE

import plotly.express as px
'''!pip install faiss-gpu
import faiss'''

'!pip install faiss-gpu\nimport faiss'

In [None]:
class AE_Linear01(torch.nn.Module):
  def __init__(self):
    super().__init__()

    self.encoder = torch.nn.Sequential(
        nn.Linear(32*32, 26 * 26),
        nn.ReLU(),
        nn.Linear(26*26, 512)

    )
    self.decoder = torch.nn.Sequential(
        nn.Linear(512, 26*26),
        nn.ReLU(),
        nn.Linear(26*26, 32*32),
        nn.Tanh(),
      )
  def forward(self,x):
    encoded = self.encoder(x)
    decoded = self.decoder(encoded)
    return decoded

In [None]:
'''
SHOW IMAGE
for img, label in new_loader:
  print(np.transpose(img[0], (1,2,0)).shape)
  print(img[0])
  print(f'label: {label[0]}')
  plt.imshow((img[0].detach().numpy().transpose(1, 2, 0)*255).astype(np.uint8))
  plt.show()
  break
'''

"\nSHOW IMAGE\nfor img, label in new_loader:\n  print(np.transpose(img[0], (1,2,0)).shape)\n  print(img[0])\n  print(f'label: {label[0]}')\n  plt.imshow((img[0].detach().numpy().transpose(1, 2, 0)*255).astype(np.uint8))\n  plt.show()\n  break\n"

In [None]:
def load_dog():
  !export KAGGLE_USERNAME="Name" && export KAGGLE_KEY="Key" && mkdir -p data_dogs && cd data_dogs && kaggle datasets download -d eward96/dog-breed-images && unzip -n dog-breed-images.zip && rm dog-breed-images.zip
  !ls data
  data_dir = 'data_dogs'
  list_imgs = glob.glob(data_dir + "/**/*.jpg")
  tc = transforms.Compose([
          transforms.Resize((256, 256)),
          transforms.ToTensor()
      ])

  image_datasets = datasets.ImageFolder(data_dir, transform=tc)
  dloader = torch.utils.data.DataLoader(image_datasets, batch_size=10, shuffle=True)
  print(f"There are {len(list_imgs)} images in the dataset {data_dir}")
  return ['dogs', dloader]

def load_stl10():
  tensor_transform = transforms.ToTensor()
  # Download the MNIST Dataset
  dtwo = datasets.STL10(root = "./data",
                          download = True,
                          transform = tensor_transform) #train = True,

  # DataLoader is used to load the dataset
  # for training
  dloader = torch.utils.data.DataLoader(dataset = dtwo,
                                      batch_size = 10,
                                      shuffle = True)
  return ['STL10', dloader]

def load_cifar10():
  tensor_transform = transforms.ToTensor()
  # Download the MNIST Dataset
  dtwo = datasets.CIFAR10(root = "./data",
                          download = True,
                          transform = tensor_transform) #train = True,

  # DataLoader is used to load the dataset
  # for training
  dloader = torch.utils.data.DataLoader(dataset = dtwo,
                                      batch_size = 10,
                                      shuffle = True)
  return ['CIFAR10', dloader]


In [None]:
def load_data(datasets=[load_dog]):
  data_loaders = []
  for data in datasets:
    try:
      data_loaders.append(data())
    except:
      print(f'The function {str(data)} does not exist')

    return data_loaders


In [None]:
OUTPUTS = []
def copy_embeddings(m, i, o):
  global OUTPUTS
  """Copy embeddings from the penultimate layer.
  """
  o = o[:, :, 0, 0].detach().numpy()#.tolist()
  OUTPUTS.append(o)

def copy_embeddings_ae(m, i, o):
  global OUTPUTS
  """Copy embeddings from the penultimate layer.
  """
  o = o[:].detach().numpy()#.tolist()
  OUTPUTS.append(o)

def load_resnet18():
  model = torch.hub.load('pytorch/vision:v0.10.0', 'resnet18', pretrained=True)
  layer = model._modules.get('avgpool')
  _ = layer.register_forward_hook(copy_embeddings) # Return Embeddings at this layer
  for param in model.parameters():
    param.requires_grad=False #deactivate backpropagation
  model.eval()
  return ['resnet18', model]

def load_ae():
  model = AE_Linear01()
  model.load_state_dict(torch.load("/content/model_weights.pth"), strict=False)
  layer = model._modules.get('encoder')
  _ = layer.register_forward_hook(copy_embeddings_ae) # Return Embeddings at this layer
  for param in model.parameters():
    param.requires_grad=False #deactivate backpropagation
  model.eval()
  return ['ae', model]

In [None]:
def load_models(models=[]):
  models = [model() for model in models]
  return models

In [None]:
def generate_embeddings(dloader, model):

  # Generate image's embeddings for all images in dloader and saves
  # them in the list outputs
  i = 0
  labels = []

  for X, Y in dloader:
    X = X.reshape(-1, 32*32)
    _ = model(X)#.cuda().detach().cpu().clone().numpy() #labels.append(
    labels.extend([y.item() for y in Y])
    if i > 100:
      break
    else:
      i += 1
  # flatten list of embeddings to remove batches
  global OUTPUTS
  list_embeddings = [item for sublist in OUTPUTS for item in sublist]
  list_embeddings = torch.Tensor(np.array(list_embeddings))
  OUTPUTS = []
  print(f'Number of Embeddings: {len(list_embeddings)}')
  print(f'Embeddings Dimension: {len(list_embeddings[0])}')
  print(f'Number labels:{len(labels)}')
  return [list_embeddings, labels]


In [None]:
def pipeline():
  # load data
  dloaders = load_data(datasets=[load_cifar10])
  models = load_models(models=[load_ae])
  # generate embeddings
  embeddings = [generate_embeddings(dloader[1], model[1])for dloader in dloaders for model in models]
  print(len(embeddings[0][1]))
  # evaluate embeddings
  #plot_embeddings(embeddings[0][0], embeddings[0][1])

  evaluate_cos_similarity(embeddings[0][0], embeddings[0][1])
  '''
  Normalize images before Network?
  '''
pipeline()

Files already downloaded and verified
Number of Embeddings: 3060
Embeddings Dimension: 512
Number labels:1020
1020


IndexError: list index out of range

In [None]:
def plot_embeddings(list_embeddings, labels):
  # Reduce Embeddings first by PCA to 50 and than from there with tsne to 2
  import matplotlib.pyplot as plt
  import seaborn as sns
  import pandas as pd
  pca_50 = PCA(n_components=50)
  pca_result_50 = pca_50.fit_transform(list_embeddings)

  tsne = TSNE(n_components=2, verbose=1, random_state=123, )
  z = tsne.fit_transform(pca_result_50)

  # Get labels to give the points in diagram below colors according to label
  all_labels = []
  '''for inputs, labels in dloader:
      all_labels.append(labels.tolist())'''

  df_subset = pd.DataFrame()
  df_subset['tsne-2d-one'] = z[:,0]
  df_subset['tsne-2d-two'] = z[:,1]

  plt.figure(figsize=(16,10))
  sns.scatterplot(
      x="tsne-2d-one", y="tsne-2d-two",
      hue=labels,
      palette=sns.color_palette("hls", 10),
      data=df_subset,
      legend="full",
      alpha=0.3
  )

In [None]:
def evaluate_precision_recall(embeddings,labels):
  def calc_mean_vec(embeddings, labels):
    sum_vecs = [[] for _ in range(10)]
    mean_vecs = []
    for i in range(len(embeddings)):
      sum_vecs[labels[i]].append(embeddings[i])

    for class_vecs in sum_vecs:
      if len(class_vecs) >1:
        mean_vecs.append(sum(class_vecs)/len(class_vecs))
      else:
        mean_vecs.append(torch.tensor(class_vecs))
    return mean_vecs

  def euclid_dist(embedding,mean_vec):
    return np.sqrt(sum((embedding - mean_vec)**2))

  def calc_true_lables(embeddings, mean_vecs):
    distance_all_classes = [[] for _ in range(len(embeddings))]
    i=0
    for embedding in embeddings:
      for mean_vec in mean_vecs:
        dist =  euclid_dist(embedding,mean_vec)
        distance_all_classes[i].append(dist)
      i +=1

    new_labels = []
    for example_classes in distance_all_classes:
      max_val = max(example_classes)
      index = example_classes.index(max_val)
      new_labels.append(index) #Index=Label da konsistent
    return new_labels

  # mean Vec
  mean_vecs = calc_mean_vec(embeddings, labels)
  # Calc Distance each point to mean vecs
  new_labels = calc_true_lables(embeddings, mean_vecs)
  score = 0.0
  for i in range(len(labels)):
    if labels[i] == new_labels[i]:
      score +=1
  score = score/len(labels) # normalize
  print(f'score: {score}')
  return score

def evaluate_cos_similarity(embeddings,labels):
  def group_classes(embeddings, labels):
    grouped_classes = [[] for _ in range(10)]
    for i in range(len(embeddings)):
      grouped_classes[labels[i]].append(embeddings[i])
    return grouped_classes

  def cos_similarity(vec1,vec2):
    return sum(vec1 * vec2)/(np.sqrt(sum(vec1**2))*np.sqrt(sum(vec2**2)))

  def classes_similarity(grouped_classes):
    similarity_classes = [[] for _ in range(10)]
    class_id = 0
    for classes in grouped_classes:
      id_vec1 = 0
      for vec1 in classes:
        for vec2 in classes[id_vec1:]:
          similarity_classes[class_id].append(cos_similarity(vec1.numpy(),vec2.numpy()))
        id_vec1 +=1
      class_id += 1
    print(len(similarity_classes))
    average_similarity_classes = []
    for classes in similarity_classes:
      average_similarity_classes.append(sum(classes)/len(classes))
    return average_similarity_classes

  grouped_classes = group_classes(embeddings, labels)

  average_similarity_in_classes = classes_similarity(grouped_classes)
  average_similarity_in_classes = sum(average_similarity_in_classes)/len(average_similarity_in_classes)
  print(average_similarity_in_classes)
  return average_similarity_in_classes

In [None]:
'''list_embeddings = np.array(list_embeddings).astype(np.float32)
d = list_embeddings.shape[1]
print(list_embeddings.shape)
quantizer = faiss.IndexFlatL2(512)   # build the index
index = faiss.IndexIVFPQ(quantizer, d, 5, 8, 8)
index.train(list_embeddings)
print(index.is_trained)
index.add(list_embeddings)                  # add vectors to the index
print(index.ntotal)
k = 4                          # we want to see 4 nearest neighbors
D, I = index.search(list_embeddings[:2], k)     # actual search
print(D)                   # neighbors of the 5 first queries
print(I)                  # neighbors of the 5 last queries'''