# Siamese Neural Network (Testing)

## Importing libraries

In [None]:
import torchvision
import torchvision.datasets as dset
import torchvision.transforms as transforms
import torchvision.models as models
from torch.utils.data import DataLoader, Dataset
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import torchvision.utils
import numpy as np
import random
import pandas as pd
from PIL import Image
import torch
import PIL.ImageOps    
import torch.nn as nn
from torch import optim
import torch.nn.functional as F
from math import sqrt
from operator import itemgetter
from sklearn import decomposition
from sklearn.preprocessing import StandardScaler
from sklearn.manifold import TSNE
import umap

## Configuration class

In [None]:
class Config:
    
    def __init__(self, training_dir, testing_dir, dir_training, dir_testing):
        self.training_dir = training_dir
        self.testing_dir = testing_dir
        self.dset_training = pd.read_csv(dir_training, index_col = 0)
        self.dset_testing = pd.read_csv(dir_testing, index_col = 0)

conf = Config("./data/memes/training/", 
              "./data/memes/testing/",
              "./data/spb_training.csv", 
              "./data/spb_testing.csv")

normalize = transforms.Normalize(mean = [0.485, 0.456, 0.406],
                                 std = [0.229, 0.224, 0.225])

## Siamese Neural Network definition

In [None]:
class SiameseNetwork_ResNet(nn.Module):
    
    def __init__(self, use_pretrained, num_classes):
        super(SiameseNetwork_ResNet, self).__init__()
        self.model_ft = models.resnet18(pretrained = use_pretrained)
        num_ftrs = self.model_ft.fc.in_features
        self.model_ft.fc = nn.Linear(num_ftrs, num_classes)

    def forward_once(self, x):
        output = self.model_ft(x)
        return output

    def forward(self, input):
        output = self.forward_once(input)
        return output

## Loading datasets

In [None]:
class SiameseNetworkDataset(Dataset):
    
    def __init__(self, imageFolderDataset, dset_csv, transform = None):
        self.imageFolderDataset = imageFolderDataset
        self.dset_csv = dset_csv
        self.transform = transform
        
    def __getitem__(self, index):  
        img_tuple = self.imageFolderDataset.imgs[index] 
        img_id = self.obtain_id(img_tuple[0])
        img_fav = self.dset_csv.loc[img_id]['favorites']
        img_tensor = Image.open(img_tuple[0])
        img_tensor = img_tensor.convert("RGB")
        if self.transform is not None:
            img_tensor = self.transform(img_tensor)
        return img_id, img_tensor, img_fav

    def __len__(self):
        return len(self.imageFolderDataset.imgs)

    def obtain_id(self, img_route):
        return int(img_route.split("/")[-1].split(".")[0])

In [None]:
tr_folder_dataset = dset.ImageFolder(conf.training_dir)
tr_siamese_dataset = SiameseNetworkDataset(imageFolderDataset = tr_folder_dataset,
                                        dset_csv = conf.dset_training,
                                        transform = transforms.Compose([transforms.Resize((224, 224)),
                                                    transforms.ToTensor(), 
                                                    normalize]))
tr_dataloader = DataLoader(tr_siamese_dataset, num_workers = 2, batch_size = 1, shuffle = True)

In [None]:
ver_folder_dataset = dset.ImageFolder(conf.testing_dir)
ver_siamese_dataset = SiameseNetworkDataset(imageFolderDataset = ver_folder_dataset,
                                        dset_csv = conf.dset_testing,
                                        transform = transforms.Compose([transforms.Resize((224, 224)),
                                                    transforms.ToTensor(), 
                                                    normalize]))
ver_dataloader = DataLoader(ver_siamese_dataset, num_workers = 2, batch_size = 1, shuffle = True)

## Creating tensors

In [None]:
def create_tensors(dataloader, dataset, net):
    ids, tensors, favs, imgs = [], [], [], []
    data = iter(dataloader)
    for _ in range(len(dataset)):
        id, img, fav = next(data)
        ids.append(id.item())
        img = net(img.cuda())
        tensors.append(img.cpu().detach())
        imgs.append(img.cpu().detach().numpy()[0])
        favs.append(fav.item())
    df = pd.DataFrame(index = ids, data = {'tensors': tensors, 'vectors': imgs, 'favorites': favs})
    return df   

## Dimensionality reduction

In [None]:
def reduce_dimensions(tensors):
    reducer = umap.UMAP()
    meme_data = tensors['vectors'].to_list()
    #UMAP
    scaled_meme_data = StandardScaler().fit_transform(meme_data)
    meme_umap = reducer.fit_transform(scaled_meme_data)
    #TSNE
    meme_tsne = TSNE(n_components = 2).fit_transform(meme_data)
    return meme_umap, meme_tsne

## Plotting

In [None]:
def plot_tensors(tensors, umap_v, tsne_v, plot_title):
    x1, x2, y1, y2, colors, sizes = [], [], [], [], [], []
    max_fav = max(tensors['favorites'].to_list())
    for i in range(len(tensors.index)):
        if tensors['favorites'].iloc[i] < max_fav:
            x1.append(umap_v[i][0])
            x2.append(tsne_v[i][0])
            y1.append(umap_v[i][1])
            y2.append(tsne_v[i][1])
            colors.append(tensors['favorites'].iloc[i])
            sizes.append((tensors['favorites'].iloc[i]/max_fav) * 500)
    colors = np.array(colors)
    sizes = np.array(sizes)
    fig = plt.figure(figsize = (20, 10))
    plt.subplot(1, 2, 1)
    plt.scatter(x1, y1, c = colors, s = sizes, alpha = 0.5, cmap = 'viridis_r')
    plt.title('UMAP')
    plt.colorbar(label = 'Número de likes')
    plt.subplot(1, 2, 2)
    plt.scatter(x2, y2, c = colors, s = sizes, alpha = 0.5, cmap = 'viridis_r')
    plt.title('TSNE')
    plt.colorbar(label = 'Número de likes')
    plt.suptitle(plot_title, fontsize = 30)
    plt.show()

## Likes prediction

In [None]:
def dist(t0, t1, mode):
    if mode == 0:
        distance = 1 - F.cosine_similarity(t0, t1, dim = 1)
    else:
        distance = F.pairwise_distance(t0, t1)
    return distance.item()

In [None]:
def likes_prediction(tr_tensors, ver_tensors, net, mode, n):
    ids, original, calculated = [], [], []
    for i in ver_tensors.index:
        likes = []
        for j in tr_tensors.index:
            fav = tr_tensors.loc[j]['favorites']
            dif = dist(ver_tensors.loc[i]['tensors'], tr_tensors.loc[j]['tensors'], mode)
            likes.append([j, fav, dif])
        likes.sort(key = itemgetter(2))
        likes, y = likes[:n], 0
        for x in likes:
            y += x[1] 
        ids.append(likes)
        original.append(ver_tensors.loc[i]['favorites'])
        calculated.append(int(y/n))
    df = pd.DataFrame(index = ver_tensors.index, data = {'ids': ids, 'original': original, 'calculated': calculated})  
    return df   

## ECM and RMSE

In [None]:
def ecm_rmse(tensors, likes_range):
    ecm, n, p = 0, 0, 0
    for i in tensors.index:
        if tensors.loc[i]['original'] in range(likes_range[0], likes_range[1] + 1):
            ecm += (tensors.loc[i]['original'] - tensors.loc[i]['calculated'])**2
            if tensors.loc[i]['calculated'] >= 0.9*tensors.loc[i]['original'] and tensors.loc[i]['calculated'] <= 1.1*tensors.loc[i]['original']:
                p += 1
            n += 1
    ecm = round(ecm/n, 2)
    rmse = round(sqrt(ecm), 2)
    positives = round(p/n, 2)
    return 'Range({}, {})\n ECM = {}, RMSE = {}, Acc = {}'.format(likes_range[0], likes_range[1], ecm, rmse, positives)

## Visual (Meme and best match)

In [None]:
def plot_memes(likes_df, x, mode, n):
    images, j = [], 0
    for i in x:
        route = "./data/memes/testing/testing/{}.jpeg".format(i)
        images.append([route, likes_df.loc[i]['original']])
        route = "./data/memes/training/training/{}.jpeg".format(likes_df.loc[i]['ids'][0][0])
        images.append([route, likes_df.loc[i]['ids'][0][1], likes_df.loc[i]['ids'][0][2]])
        j += 1
        if j == n:
            break
    fig = plt.figure(figsize = (10, n * 5))
    for i in range(n * 2):
        plt.subplot(n, 2, i + 1)
        read_img = mpimg.imread(images[i][0])
        plt.imshow(read_img)
        plt.axis('off')
        if (i + 1) % 2 == 0:
            if mode == 0:
                plt.title('{} likes (Dist = {})'.format(images[i][1], round(1 - images[i][2], 2)))
            else:
                plt.title('{} likes (Dist = {})'.format(images[i][1], round(images[i][2], 2)))
        else:
            plt.title('{} likes'.format(images[i][1]))
    plt.show()

## Cosine similarity

In [None]:
net = SiameseNetwork_ResNet(True, 512).cuda()
net.load_state_dict(torch.load("./models/resnet_simcos.zip"))
net.eval()

In [None]:
resnet_tr_tensors_cos = create_tensors(tr_dataloader, tr_siamese_dataset, net)

In [None]:
resnet_ver_tensors_cos = create_tensors(ver_dataloader, ver_siamese_dataset, net)

In [None]:
resnet_tr_umap_cos, resnet_tr_tsne_cos = reduce_dimensions(resnet_tr_tensors_cos)

In [None]:
resnet_ver_umap_cos, resnet_ver_tsne_cos = reduce_dimensions(resnet_ver_tensors_cos)

In [None]:
plot_tensors(resnet_tr_tensors_cos, resnet_tr_umap_cos, resnet_tr_tsne_cos, "Training dataset")

In [None]:
plot_tensors(resnet_ver_tensors_cos, resnet_ver_umap_cos, resnet_ver_tsne_cos, "Testing dataset")

In [None]:
resnet_cos_likes = likes_prediction(resnet_tr_tensors_cos, resnet_ver_tensors_cos, net, 0, 10)

In [None]:
print(ecm_rmse(resnet_cos_likes, [750, 1000]))
print(ecm_rmse(resnet_cos_likes, [1001, 1500]))
print(ecm_rmse(resnet_cos_likes, [1501, 2000]))
print(ecm_rmse(resnet_cos_likes, [2001, int(max(resnet_cos_likes['original']))]))

In [None]:
x = []
for i in range(5):
    x.append(random.choice(resnet_cos_likes.index))
plot_memes(resnet_cos_likes, x, 0, 5)

## Euclidean distance

In [None]:
net = SiameseNetwork_ResNet(True, 512).cuda()
net.load_state_dict(torch.load("./models/resnet_euc.zip"))
net.eval()

In [None]:
resnet_tr_tensors_euc = create_tensors(tr_dataloader, tr_siamese_dataset, net)

In [None]:
resnet_ver_tensors_euc = create_tensors(ver_dataloader, ver_siamese_dataset, net)

In [None]:
resnet_tr_umap_euc, resnet_tr_tsne_euc = reduce_dimensions(resnet_tr_tensors_euc)

In [None]:
resnet_ver_umap_euc, resnet_ver_tsne_euc = reduce_dimensions(resnet_ver_tensors_euc)

In [None]:
plot_tensors(resnet_tr_tensors_euc, resnet_tr_umap_euc, resnet_tr_tsne_euc, "Training dataset")

In [None]:
plot_tensors(resnet_ver_tensors_euc, resnet_ver_umap_euc, resnet_ver_tsne_euc, "Testing dataset")

In [None]:
resnet_euc_likes = likes_prediction(resnet_tr_tensors_euc, resnet_ver_tensors_euc, net, 1, 10)

In [None]:
print(ecm_rmse(resnet_euc_likes, [750, 1000]))
print(ecm_rmse(resnet_euc_likes, [1001, 1500]))
print(ecm_rmse(resnet_euc_likes, [1501, 2000]))
print(ecm_rmse(resnet_euc_likes, [2001, int(max(resnet_euc_likes['original']))]))

In [None]:
plot_memes(resnet_euc_likes, x, 1, 5)