In [1]:
import torchvision
import torchvision.datasets as dset
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Dataset
from torchvision.utils import save_image
import matplotlib.pyplot as plt
import torchvision.utils
import numpy as np
import random
from PIL import Image
import torch
from torch.autograd import Variable
import PIL.ImageOps    
import torch.nn as nn
from torch import optim
import torch.nn.functional as F
from tqdm import tqdm
import os
import glob
from sklearn.metrics import roc_curve, roc_auc_score
import math
from sklearn.metrics import confusion_matrix
import seaborn as sns


In [2]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


Plot function

In [3]:
def imshow(img,text=None,should_save=False):
    npimg = img.numpy()
    plt.axis("off")
    if text:
        plt.text(75, 8, text, style='italic',fontweight='bold',
            bbox={'facecolor':'white', 'alpha':0.8, 'pad':10})
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()  

Custom dataset for Siamese Networks


In [4]:
class SiameseNetworkDataset(Dataset):
    
    def __init__(self,imageFolderDataset,transform=None,should_invert=True):
        self.imageFolderDataset = imageFolderDataset    
        self.transform = transform
        self.should_invert = should_invert
        
    def __getitem__(self,index):
        img0_tuple = random.choice(self.imageFolderDataset.imgs)
        #we need to make sure approx 50% of images are in the same class
        should_get_same_class = random.randint(0,1) 
        if should_get_same_class:
            while True:
                #keep looping till the same class image is found
                img1_tuple = random.choice(self.imageFolderDataset.imgs) 
                if img0_tuple[1]==img1_tuple[1]:
                    break
        else:
            while True:
                #keep looping till a different class image is found
                
                img1_tuple = random.choice(self.imageFolderDataset.imgs) 
                if img0_tuple[1] !=img1_tuple[1]:
                    break
        
        img0 = Image.open(img0_tuple[0])
        img1 = Image.open(img1_tuple[0])
        img0 = img0.convert("L")
        img1 = img1.convert("L")
        
        if self.should_invert:
            img0 = PIL.ImageOps.invert(img0)
            img1 = PIL.ImageOps.invert(img1)

        if self.transform is not None:
            img0 = self.transform(img0)
            img1 = self.transform(img1)
        
        return img0, img1 , torch.from_numpy(np.array([int(img1_tuple[1]!=img0_tuple[1])],dtype=np.float32))
    
    def __len__(self):
        return len(self.imageFolderDataset.imgs)

Siamese Networks

In [5]:
#https://arxiv.org/pdf/1707.02131.pdf?ref=https://githubhelp.com
class SiameseNetwork128x128(nn.Module):
    def __init__(self):
        super(SiameseNetwork128x128, self).__init__()
        self.cnn1 = nn.Sequential(
            nn.Conv2d(in_channels= 1, out_channels=96, kernel_size=(11,11), stride=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size= (3,3), stride=2),
            
            nn.Conv2d(in_channels = 96, out_channels = 256, kernel_size=(5,5), stride=1, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size= (3,3), stride=2),
            nn.Dropout2d(p= 0.3),

            nn.Conv2d(in_channels = 256, out_channels =384, kernel_size=(3,3), stride=1, padding=1),
            nn.ReLU(inplace=True),
            
            nn.Conv2d(in_channels =384, out_channels= 256, kernel_size=(3,3), stride=1, padding=1),
            nn.MaxPool2d(kernel_size=(3,3), stride=2),
            nn.Dropout2d(p=0.3)
        )

        self.fc1 = nn.Sequential(
            nn.Linear(in_features =43264, out_features=1024),
            nn.ReLU(inplace=True),
            nn.Dropout2d(p=0.5),

            nn.Linear(in_features=1024, out_features=128),
            nn.ReLU(inplace=True),

            nn.Linear(in_features=128, out_features=15),
            nn.Sigmoid())

    #2 forward functions

    def forward_once(self, x):
        output = self.cnn1(x).cuda()
        output = output.view(output.size()[0], -1).cuda()
        output = self.fc1(output).cuda()
        return output

    def forward(self, input1, input2):
        output1 = self.forward_once(input1).cuda()
        output2 = self.forward_once(input2).cuda()
        return output1, output2

In [6]:
#https://arxiv.org/pdf/1707.02131.pdf?ref=https://githubhelp.com
class SiameseNetwork32x32(nn.Module):
    def __init__(self):
        super(SiameseNetwork32x32, self).__init__()
        self.cnn1 = nn.Sequential(
            nn.Conv2d(in_channels= 1, out_channels=96, kernel_size=(11,11), stride=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size= (3,3), stride=2),
            
            nn.Conv2d(in_channels = 96, out_channels = 256, kernel_size=(5,5), stride=1, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size= (3,3), stride=2),
            nn.Dropout2d(p= 0.3),

            nn.Conv2d(in_channels = 256, out_channels =384, kernel_size=(3,3), stride=1, padding=1),
            nn.ReLU(inplace=True),
            
            nn.Conv2d(in_channels =384, out_channels= 256, kernel_size=(3,3), stride=1, padding=1),
            nn.MaxPool2d(kernel_size=(3,3), stride=2),
            nn.Dropout2d(p=0.3)
        )

        self.fc1 = nn.Sequential(
            nn.Linear(in_features =256, out_features=1024),
            nn.ReLU(inplace=True),
            nn.Dropout2d(p=0.5),

            nn.Linear(in_features=1024, out_features=128),
            nn.ReLU(inplace=True),
            
            nn.Linear(in_features=128, out_features=15),
            nn.Sigmoid())

    #2 forward functions

    def forward_once(self, x):
        output = self.cnn1(x).cuda()
        output = output.view(output.size()[0], -1).cuda()
        output = self.fc1(output).cuda()
        return output

    def forward(self, input1, input2):
        output1 = self.forward_once(input1).cuda()
        output2 = self.forward_once(input2).cuda()
        return output1, output2

Push models to device

In [7]:
#models 128x128 without cropping
siamese_model128x128_2 = SiameseNetwork128x128().to(device)
siamese_model128x128_10 = SiameseNetwork128x128().to(device)
siamese_model128x128_30 = SiameseNetwork128x128().to(device)

#models 32x32 without cropping
siamese_model32x32_2 = SiameseNetwork32x32().to(device)
siamese_model32x32_10 = SiameseNetwork32x32().to(device)
siamese_model32x32_30 = SiameseNetwork32x32().to(device)

#models 128x128 with dlib cropping
siamese_model128x128_2dlib = SiameseNetwork128x128().to(device)
siamese_model128x128_10dlib = SiameseNetwork128x128().to(device)
siamese_model128x128_30dlib = SiameseNetwork128x128().to(device)

#models 32x32 with dlib cropping
siamese_model32x32_2dlib = SiameseNetwork32x32().to(device)
siamese_model32x32_10dlib = SiameseNetwork32x32().to(device)
siamese_model32x32_30dlib = SiameseNetwork32x32().to(device)

#models 128x128 with haar cropping
siamese_model128x128_2haar = SiameseNetwork128x128().to(device)
siamese_model128x128_10haar = SiameseNetwork128x128().to(device)
siamese_model128x128_30haar = SiameseNetwork128x128().to(device)

#models 32x32 with haar cropping
siamese_model32x32_2haar = SiameseNetwork32x32().to(device)
siamese_model32x32_10haar = SiameseNetwork32x32().to(device)
siamese_model32x32_30haar = SiameseNetwork32x32().to(device)


Load models


In [8]:
#models 128x128 without cropping
siamese_model128x128_2.load_state_dict(torch.load('/Siamese/Models/2train/128x128.pth'))
siamese_model128x128_10.load_state_dict(torch.load('/Siamese/Models/10train/128x128.pth'))
siamese_model128x128_30.load_state_dict(torch.load('/Siamese/Models/30train/128x128.pth'))

#models 32x32 without cropping
siamese_model32x32_2.load_state_dict(torch.load('/Siamese/Models/2train/32x32.pth'))
siamese_model32x32_10.load_state_dict(torch.load('/Siamese/Models/10train/32x32.pth'))
siamese_model32x32_30.load_state_dict(torch.load('/Siamese/Models/30train/32x32.pth'))

#models 128x128 with dlib cropping
siamese_model128x128_2dlib.load_state_dict(torch.load('/Siamese/Models/2Dlib/128x128.pth'))
siamese_model128x128_10dlib.load_state_dict(torch.load('/Siamese/Models/10Dlib/128x128.pth'))
siamese_model128x128_30dlib.load_state_dict(torch.load('/Siamese/Models/30Dlib/128x128.pth'))

#models 32x32 with dlib cropping
siamese_model32x32_2dlib.load_state_dict(torch.load('/2Dlib/32x32.pth'))
siamese_model32x32_10dlib.load_state_dict(torch.load('/Siamese/Models/10Dlib/32x32.pth'))
siamese_model32x32_30dlib.load_state_dict(torch.load('/Siamese/Models/30Dlib/32x32.pth'))

#models 128x128 with haar cropping
siamese_model128x128_2haar.load_state_dict(torch.load('/Siamese/Models/2Haar/128x128.pth'))
siamese_model128x128_10haar.load_state_dict(torch.load('/Siamese/Models/10Haar/128x128.pth'))
siamese_model128x128_30haar.load_state_dict(torch.load('/Siamese/Models/30Haar/128x128.pth'))

#models 32x32 with haar cropping
siamese_model32x32_2haar.load_state_dict(torch.load('/Siamese/Models/2Haar/32x32.pth'))
siamese_model32x32_10haar.load_state_dict(torch.load('/Siamese/Models/10Haar/32x32.pth'))
siamese_model32x32_30haar.load_state_dict(torch.load('/Siamese/Models/30Haar/32x32.pth'))

#models 128x128 without cropping
siamese_model128x128_2.eval()  
siamese_model128x128_10.eval()  
siamese_model128x128_30.eval() 

#models 32x32 without cropping
siamese_model32x32_2.eval() 
siamese_model32x32_10.eval() 
siamese_model32x32_30.eval() 

#models 128x128 with dlib cropping
siamese_model128x128_2dlib.eval() 
siamese_model128x128_10dlib.eval() 
siamese_model128x128_30dlib.eval() 

#models 32x32 with dlib cropping
siamese_model32x32_2dlib.eval() 
siamese_model32x32_10dlib.eval() 
siamese_model32x32_30dlib.eval() 

#models 128x128 with haar cropping
siamese_model128x128_2haar.eval() 
siamese_model128x128_10haar.eval() 
siamese_model128x128_30haar.eval()

#models 32x32 with haar cropping
siamese_model32x32_2haar.eval() 
siamese_model32x32_10haar.eval() 
siamese_model32x32_30haar.eval()

SiameseNetwork32x32(
  (cnn1): Sequential(
    (0): Conv2d(1, 96, kernel_size=(11, 11), stride=(1, 1))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=(3, 3), stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(96, 256, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=(3, 3), stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Dropout2d(p=0.3, inplace=False)
    (7): Conv2d(256, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (10): MaxPool2d(kernel_size=(3, 3), stride=2, padding=0, dilation=1, ceil_mode=False)
    (11): Dropout2d(p=0.3, inplace=False)
  )
  (fc1): Sequential(
    (0): Linear(in_features=256, out_features=1024, bias=True)
    (1): ReLU(inplace=True)
    (2): Dropout2d(p=0.5, inplace=False)
    (3): Linear(in_features=1024, out_features=128, bias=True)
    (4): ReL

Group models

In [9]:
uncropped_models128x128 = [siamese_model128x128_2,  
siamese_model128x128_10, 
siamese_model128x128_30]

uncropped_models32x32 = [
siamese_model32x32_2,
siamese_model32x32_10,
siamese_model32x32_30]

dlib_models128x128 = [siamese_model128x128_2dlib,
siamese_model128x128_10dlib, 
siamese_model128x128_30dlib]

dlib_models32x32 = [
siamese_model32x32_2dlib,
siamese_model32x32_10dlib, 
siamese_model32x32_30dlib]

haar_models128x128 = [siamese_model128x128_2haar,
siamese_model128x128_10haar, 
siamese_model128x128_30haar]

haar_models32x32 = [
siamese_model32x32_2haar,
siamese_model32x32_10haar, 
siamese_model32x32_30haar]

In [None]:
influencers = ['Airrack', 'BellaPoarch', 'Larry', 'LexiHensler', 'LoganPaul', 'MarkRober', 'MrBeast', 'PiersonWodzynski', 'Preston', 'Quackity', 'RosannaPansino', 'RyanTrahan', 'SoffieDossi', 'ZachKing', 'Zhc' ]
male_if = ['Airrack', 'Larry', 'LoganPaul', 'MarkRober', 'MrBeast', 'Preston', 'Quackity','RyanTrahan','ZachKing', 'Zhc']
female_if = ['BellaPoarch',  'LexiHensler','PiersonWodzynski','RosannaPansino','SoffieDossi']
folder = '/Youtube1video/'

names = [name for name in os.listdir(folder) if os.path.isdir(os.path.join(folder, name))] #choose random names from people that appear in 1 video
random.seed(10)
choice = random.sample(names, 15)

Custom dataset to load images from specific directory

In [11]:
#imagefolder pytorch select only images from specific folder
class MyDataset(Dataset):
    def __init__(self, path, transform=None):
        self.image_paths = glob.glob(os.path.join(path, '*.jpg'))
        self.transform = transform
        
    def __getitem__(self, index):
        x = Image.open(self.image_paths[index])
        if self.transform:
            x = self.transform(x)
        return x
    
    def __len__(self):
        return len(self.image_paths)

test function

In [None]:
def test(person1, person2, model, size):
    
    dissim = []

    query_folder = 'C:/Users/Tychon Bos/Documents/Youtube1person/' + person1 + '/'
    reference_folder = 'C:/Users/Tychon Bos/Documents/Youtube1person/' + person2 + '/'
    
    dataset_query = MyDataset(query_folder, transform = transforms.Compose([transforms.Resize(size), transforms.Grayscale(num_output_channels=1) ,transforms.ToTensor()]))
    dataset_reference = MyDataset(reference_folder, transform=transforms.Compose([transforms.Resize(size), transforms.Grayscale(num_output_channels=1) ,transforms.ToTensor()]))

    dataset_query_loader = DataLoader(dataset_query,num_workers=0,batch_size=1,shuffle=True) #left image changes
    dataset_reference_loader = DataLoader(dataset_reference,num_workers=0,batch_size=1,shuffle=False) #right image order does not change

    dataiter_dataset_query_loader = iter(dataset_query_loader)
    dataiter_dataset_reference_loader = iter(dataset_reference_loader)

    #labels are person names; if person names not the same --> 1 else 0
    #calculate average dissimilarity score between the same persons for every model with nested for loop so that left image also changes    

    for i in range(1):
        x0 = next(dataiter_dataset_query_loader)
    
        for j in range(30):
            try:
                with torch.no_grad():
                    x1 = next(dataiter_dataset_reference_loader)
                    concatenated = torch.cat((x0,x1),0)
                    
                    output1,output2 = model(Variable(x0).to(device),Variable(x1).to(device))
                    euclidean_distance = F.pairwise_distance(output1, output2)
                    z = torch.tensor(euclidean_distance.item())
                    dissim.append(torch.sigmoid(z).item())
                    #imshow(torchvision.utils.make_grid(concatenated),'Dissimilarity: {:.2f}'.format(torch.sigmoid(z)))
            except StopIteration:
                pass
    
    avg = round(sum(dissim)/len(dissim), 4)
    #print(avg)
    overall.append(avg)

all average uncropped_models128x128

In [None]:
all_avg_uncropped_models128x128 = []
for name1, name2 in zip(choice, choice):
    influencer1 = name1
    influencer2 = name2
    print(influencer1, influencer2)
    input_size = (128,128)
    count = 0
    final_avg = []

    if influencer1 != influencer2:
        raise ValueError("Names are not the same!")
    
    for model in range(len(uncropped_models128x128)):
        overall = []
        current_model = uncropped_models128x128[model]
        for i in range(30):
            test(influencer1, influencer2, current_model, input_size)

        avg_overall = round(sum(overall)/len(overall),4)
        final_avg.append(avg_overall)
        
        if len(overall) != 30:
            raise ValueError('Overall length is not 30!')
        
        if count == 0:
            print("The average overall for {} to {} is: {} with 2 training images and uncropped models".format(influencer1, influencer2, avg_overall))
            count +=1
        elif count == 1:
            print("The average overall for {} to {} is: {} with 10 training images and uncropped models".format(influencer1, influencer2, avg_overall))
            count +=1
        else:
            print("The average overall for {} to {} is: {} with 30 training images and uncropped models".format(influencer1, influencer2, avg_overall))
            count +=1
            
    print(final_avg)
    overall_avg = round(sum(final_avg)/len(final_avg),4)
    print('Overall average for {} to {} is: {}'.format(influencer1, influencer2, overall_avg))
    all_avg_uncropped_models128x128.append(overall_avg)
    print(all_avg_uncropped_models128x128)
    print()
x = sum(all_avg_uncropped_models128x128)/len(influencers)
print('Overall average for all uncropped 128x128 models: {} '.format(x)) 

all average haar_models128x128

In [None]:
all_avg_haar_models128x128 = []
for name1, name2 in zip(choice, choice):
    influencer1 = name1
    influencer2 = name2
    input_size = (128,128)
    count = 0
    final_avg = []

    if influencer1 != influencer2:
        raise ValueError("Names are not the same!")
    
    for model in range(len(haar_models128x128)):
        overall = []
        current_model = haar_models128x128[model]
        for i in range(30):
            test(influencer1, influencer2, current_model, input_size)

        avg_overall = round(sum(overall)/len(overall),4)
        final_avg.append(avg_overall)
        
        if len(overall) != 30:
            raise ValueError('Overall length is not 30!')
        
        if count == 0:
            print("The average overall for {} to {} is: {} with 2 training images and haar models".format(influencer1, influencer2, avg_overall))
            count +=1
        elif count == 1:
            print("The average overall for {} to {} is: {} with 10 training images and haar models".format(influencer1, influencer2, avg_overall))
            count +=1
        else:
            print("The average overall for {} to {} is: {} with 30 training images and haar models".format(influencer1, influencer2, avg_overall))
            count +=1
            
    print(final_avg)
    overall_avg = round(sum(final_avg)/len(final_avg),4)
    print('Overall average for {} to {} is: {}'.format(influencer1, influencer2, overall_avg))
    all_avg_haar_models128x128.append(overall_avg)
    print(all_avg_haar_models128x128)
    print()
x = sum(all_avg_haar_models128x128)/len(influencers)
print('Overall average for all haar 128x128 models: {} '.format(x))

all average dlib_models128x128

In [None]:
all_avg_dlib_models128x128 = []
for name1, name2 in zip(choice, choice):
    influencer1 = name1
    influencer2 = name2
    input_size = (128,128)
    count = 0
    final_avg = []

    if influencer1 != influencer2:
        raise ValueError("Names are not the same!")
    
    for model in range(len(dlib_models128x128)):
        overall = []
        current_model = dlib_models128x128[model]
        for i in range(30):
            test(influencer1, influencer2, current_model, input_size)

        avg_overall = round(sum(overall)/len(overall),4)
        final_avg.append(avg_overall)
        
        if len(overall) != 30:
            raise ValueError('Overall length is not 30!')
        
        if count == 0:
            print("The average overall for {} to {} is: {} with 2 training images and dlib models".format(influencer1, influencer2, avg_overall))
            count +=1
        elif count == 1:
            print("The average overall for {} to {} is: {} with 10 training images and dlib models".format(influencer1, influencer2, avg_overall))
            count +=1
        else:
            print("The average overall for {} to {} is: {} with 30 training images and dlib models".format(influencer1, influencer2, avg_overall))
            count +=1
            
    print(final_avg)
    overall_avg = round(sum(final_avg)/len(final_avg),4)
    print('Overall average for {} to {} is: {}'.format(influencer1, influencer2, overall_avg))
    print()
    all_avg_dlib_models128x128.append(overall_avg)
    print(all_avg_dlib_models128x128)
x = sum(all_avg_dlib_models128x128)/len(influencers)
print('Overall average for all dlib 128x128 models: {} '.format(x))

all average uncropped_models32x32

In [None]:
all_avg_uncropped_models32x32 = []
for name1, name2 in zip(choice, choice):
    influencer1 = name1
    influencer2 = name2
    input_size = (32,32)
    count = 0
    final_avg = []

    if influencer1 != influencer2:
        raise ValueError("Names are not the same!")
    
    for model in range(len(uncropped_models32x32)):
        overall = []
        current_model = uncropped_models32x32[model]
        for i in range(30):
            test(influencer1, influencer2, current_model, input_size)

        avg_overall = round(sum(overall)/len(overall),4)
        final_avg.append(avg_overall)
        
        if len(overall) != 30:
            raise ValueError('Overall length is not 30!')
        
        if count == 0:
            print("The average overall for {} to {} is: {} with 2 training images and uncropped models".format(influencer1, influencer2, avg_overall))
            count +=1
        elif count == 1:
            print("The average overall for {} to {} is: {} with 10 training images and uncropped models".format(influencer1, influencer2, avg_overall))
            count +=1
        else:
            print("The average overall for {} to {} is: {} with 30 training images and uncropped models".format(influencer1, influencer2, avg_overall))
            count +=1
            
    print(final_avg)
    overall_avg = round(sum(final_avg)/len(final_avg),4)
    print('Overall average for {} to {} is: {}'.format(influencer1, influencer2, overall_avg))
    all_avg_uncropped_models32x32.append(overall_avg)
    print(all_avg_uncropped_models32x32)
    print()
x = sum(all_avg_uncropped_models32x32)/len(influencers)
print('Overall average for all uncropped 32x32 models: {} '.format(x))

all average haar_models32x32

In [None]:
all_avg_haar_models32x32 = []
for name1, name2 in zip(choice, choice):
    influencer1 = name1
    influencer2 = name2
    input_size = (32,32)
    count = 0
    final_avg = []

    if influencer1 != influencer2:
        raise ValueError("Names are not the same!")
    
    for model in range(len(haar_models32x32)):
        overall = []
        current_model = haar_models32x32[model]
        for i in range(30):
            test(influencer1, influencer2, current_model, input_size)

        avg_overall = round(sum(overall)/len(overall),4)
        final_avg.append(avg_overall)
        
        if len(overall) != 30:
            raise ValueError('Overall length is not 30!')
        
        if count == 0:
            print("The average overall for {} to {} is: {} with 2 training images and haar models".format(influencer1, influencer2, avg_overall))
            count +=1
        elif count == 1:
            print("The average overall for {} to {} is: {} with 10 training images and haar models".format(influencer1, influencer2, avg_overall))
            count +=1
        else:
            print("The average overall for {} to {} is: {} with 30 training images and haar models".format(influencer1, influencer2, avg_overall))
            count +=1
            
    print(final_avg)
    overall_avg = round(sum(final_avg)/len(final_avg),4)
    print('Overall average for {} to {} is: {}'.format(influencer1, influencer2, overall_avg))
    all_avg_haar_models32x32.append(overall_avg)
    print(all_avg_haar_models32x32)
    print()
x = sum(all_avg_haar_models32x32)/len(influencers)
print('Overall average for all haar 32x32 models: {} '.format(x))

all average dlib_models32x32

In [None]:
all_avg_dlib_models32x32 = []
for name1, name2 in zip(choice, choice):
    influencer1 = name1
    influencer2 = name2
    input_size = (32,32)
    count = 0
    final_avg = []
    
    if influencer1 != influencer2:
        raise ValueError("Names are not the same!")
    
    for model in range(len(dlib_models32x32)):
        overall = []
        current_model = dlib_models32x32[model]
        for i in range(30):
            test(influencer1, influencer2, current_model, input_size)

        avg_overall = round(sum(overall)/len(overall),4)
        final_avg.append(avg_overall)
        
        if len(overall) != 30:
            raise ValueError('Overall length is not 30!')
        
        if count == 0:
            print("The average overall for {} to {} is: {} with 2 training images and dlib models".format(influencer1, influencer2, avg_overall))
            count +=1
        elif count == 1:
            print("The average overall for {} to {} is: {} with 10 training images and dlib models".format(influencer1, influencer2, avg_overall))
            count +=1
        else:
            print("The average overall for {} to {} is: {} with 30 training images and dlib models".format(influencer1, influencer2, avg_overall))
            count +=1
            
    print(final_avg)
    overall_avg = round(sum(final_avg)/len(final_avg),4)
    print('Overall average for {} to {} is: {}'.format(influencer1, influencer2, overall_avg))
    all_avg_dlib_models32x32.append(overall_avg)
    print(all_avg_dlib_models32x32)
    print()
x = sum(all_avg_dlib_models32x32)/len(influencers)
print('Overall average for all 32x32 dlib models: {} '.format(x))

Plot scores person to person 

In [None]:
total_uncropped_models128x128 = round(sum(all_avg_uncropped_models128x128)/len(choice),4)
print(total_uncropped_models128x128)

total_haar_models128x128 = round(sum(all_avg_haar_models128x128)/len(choice),4)
print(total_haar_models128x128)

total_dlib_models128x128 = round(sum(all_avg_dlib_models128x128)/len(choice),4)
print(total_dlib_models128x128)

total_uncropped_models32x32 = round(sum(all_avg_uncropped_models32x32)/len(choice),4)
print(total_uncropped_models32x32)

total_haar_models32x32 = round(sum(all_avg_haar_models32x32)/len(choice),4)
print(total_haar_models32x32)

total_dlib_models32x32 = round(sum(all_avg_dlib_models32x32)/len(choice),4)
print(total_dlib_models32x32)

names = ['128x128 uncropped', '128x128 haar', '128x128 dlib', '32x32 uncropped', '32x32 haar', '32x32 dlib']
values = [total_uncropped_models128x128, total_haar_models128x128, total_dlib_models128x128, total_uncropped_models32x32, total_haar_models32x32, total_dlib_models32x32]

def addlabels(x,y):
    for i in range(len(x)):
        plt.text(i,y[i],y[i], ha='center', fontstyle='oblique')

plt.style.use('ggplot')
plt.figure(figsize=(20, 5))
plt.ylim([0.5, 0.8])
plt.bar(names, values, align='center', width=0.6, color=(0.2, 0.4, 0.6, 0.6), alpha=1, edgecolor='black')
addlabels(names, values)
plt.title('Average dissimilarity score computed for each influencer to itself averaged over 2, 10 and 30 images used in training for each model ')
plt.tight_layout()

Function that plots the roc curve

In [13]:
from sklearn.metrics import roc_curve
from sklearn.metrics import roc_auc_score

def plot_roc_curve(fpr, tpr):
    plt.plot(fpr, tpr, color='orange', label='ROC')
    plt.plot([0, 1], [0, 1], color='darkblue', linestyle='--')
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('Receiver Operating Characteristic (ROC) Curve')
    plt.legend()
    plt.show()

Binary classification

Add manually paths for the query_folder, reference_folder and save_path variables 

In [37]:
def binary_classification(model, input_size, runs, treshold):
    # model: which model to use
    # input size: tuple which contains the input size
    # runs: how many runs to use
    # treshold: binary classification threshold
    # generate 30 pairs of people and for each pair compute 30 dissimilarity scores
    treshold = treshold
    labellist = []
    scorelist = []
    amount_of_images_pairs = 30 #30
    epochs = 30 #30
    tp = 0
    fn = 0
    tn = 0
    fp = 0
    count = 0
    
    for num in range(runs):

        for i in range(epochs): #30
            name1 = random.choice(influencers)
            #name1 = random.choice(choice)
            #name1 = random.choice(female_if)
            #we need to make sure approx 50% of images are in the same class
            should_get_same_class = random.randint(0,1) 
            if should_get_same_class:
                while True:
                    #keep looping till the same class image is found
                    name2 = random.choice(influencers) 
                    #name2 = random.choice(choice) 
                    #name2 = random.choice(female_if)
                    if name1==name2:
                        for n in range(amount_of_images_pairs):
                            labellist.append(0) #if names are the same append 0
                        break
            else:
                while True:
                    #keep looping till a different class image is found
                    name2 = random.choice(influencers) 
                    #name2 = random.choice(choice) 
                    #name2 = random.choice(female_if)
                    if name1 !=name2:
                        for n in range(amount_of_images_pairs):
                            labellist.append(1) #if names are different append 1
                        break

            query_folder = '/influencer_data/' + name1 + '/'
            reference_folder = '/influencer_data/' + name2 + '/'
            
            #query_folder = '/Youtube1video/' + name1 + '/'
            #reference_folder = '/Youtube1video/' + name2 + '/'
            dataset_query = MyDataset(query_folder, transform = transforms.Compose([transforms.Resize(input_size), transforms.Grayscale(num_output_channels=1) ,transforms.ToTensor()]))
            dataset_reference = MyDataset(reference_folder, transform=transforms.Compose([transforms.Resize(input_size), transforms.Grayscale(num_output_channels=1) ,transforms.ToTensor()]))

            dataset_query_loader = DataLoader(dataset_query,num_workers=0,batch_size=1,shuffle=False) 
            dataset_reference_loader = DataLoader(dataset_reference,num_workers=0,batch_size=1,shuffle=False) 

            dataiter_dataset_query_loader = iter(dataset_query_loader)
            dataiter_dataset_reference_loader = iter(dataset_reference_loader)
            
            temp_label = 5
            
            for i in range(1):
                x0 = next(dataiter_dataset_query_loader)
            
                for j in range(amount_of_images_pairs):
                    try:
                        with torch.no_grad():
                            if name1 == name2: #if names are the same (positive pairs) set temp_label variable to 0
                                temp_label = 0
                                # print(temp_label)
                                x1 = next(dataiter_dataset_reference_loader)
                                concatenated = torch.cat((x0,x1),0)
                                
                                output1,output2 = model(Variable(x0).to(device),Variable(x1).to(device))
                                euclidean_distance = F.pairwise_distance(output1, output2)
                                
                                z = torch.tensor(euclidean_distance.item())
                                sigmoid_value = round(torch.sigmoid(z).item(),4)
                                
                                if sigmoid_value < treshold:
                                    tp +=1
                                    grid = torchvision.utils.make_grid(concatenated,nrow=x0.size(0),padding=0)

                                    npimg = grid.detach().numpy() # to numpy array

                                    npimg = (npimg * 255).astype(np.uint8)
                                    
                                    fig, ax = plt.subplots(figsize = (12,8))
                                    ax.axis("off")
                                    output1,output2 = model(Variable(x0).to(device),Variable(x1).to(device))
                                    euclidean_distance = F.pairwise_distance(output1, output2)
                                    #scores.append(euclidean_distance.item())

                                    z = torch.tensor(euclidean_distance.item())
                                    z = torch.sigmoid(z)
                                    z = z.item()
                                    z= round(z,4)

                                    ax.set_title('Dissimilarity: ' + str(z), fontsize=45)
                                    imshow(torchvision.utils.make_grid(concatenated))

                                    savepath = "save here/" + str(count) + '.jpg'
                                    count +=1
                                    fig.savefig(savepath)
                                    
                                else:
                                    fn +=1
                                    grid = torchvision.utils.make_grid(concatenated,nrow=x0.size(0),padding=0)

                                    npimg = grid.detach().numpy() # to numpy array

                                    npimg = (npimg * 255).astype(np.uint8)
                                    
                                    fig, ax = plt.subplots(figsize = (12,8))
                                    ax.axis("off")
                                    output1,output2 = model(Variable(x0).to(device),Variable(x1).to(device))
                                    euclidean_distance = F.pairwise_distance(output1, output2)
                                    #scores.append(euclidean_distance.item())

                                    z = torch.tensor(euclidean_distance.item())
                                    z = torch.sigmoid(z)
                                    z = z.item()
                                    z= round(z,4)

                                    ax.set_title('Dissimilarity: ' + str(z), fontsize=45)
                                    imshow(torchvision.utils.make_grid(concatenated))

                                    savepath = "save here/" + str(count) + '.jpg'
                                    count +=1
                                    fig.savefig(savepath)
                                    
                                scorelist.append(sigmoid_value)
                            
                            elif name1 != name2: #if names are different (negative pairs) set temp_label variable to 1
                                temp_label = 1
                                
                                x1 = next(dataiter_dataset_reference_loader)
                                concatenated = torch.cat((x0,x1),0)
                                
                                output1,output2 = model(Variable(x0).to(device),Variable(x1).to(device))
                                euclidean_distance = F.pairwise_distance(output1, output2)
                                
                                z = torch.tensor(euclidean_distance.item())
                                sigmoid_value = round(torch.sigmoid(z).item(),4)
                                
                                if sigmoid_value > treshold:
                                    tn +=1
                                    grid = torchvision.utils.make_grid(concatenated,nrow=x0.size(0),padding=0)

                                    npimg = grid.detach().numpy() # to numpy array

                                    npimg = (npimg * 255).astype(np.uint8)
                                    
                                    fig, ax = plt.subplots(figsize = (12,8))
                                    ax.axis("off")
                                    output1,output2 = model(Variable(x0).to(device),Variable(x1).to(device))
                                    euclidean_distance = F.pairwise_distance(output1, output2)
                                    #scores.append(euclidean_distance.item())

                                    z = torch.tensor(euclidean_distance.item())
                                    z = torch.sigmoid(z)
                                    z = z.item()
                                    z= round(z,4)

                                    ax.set_title('Dissimilarity: ' + str(z), fontsize=45)
                                    imshow(torchvision.utils.make_grid(concatenated))

                                    savepath = "save here/" + str(count) + '.jpg'
                                    count +=1
                                    fig.savefig(savepath)
                                else:
                                    fp +=1 
                                    
                                    grid = torchvision.utils.make_grid(concatenated,nrow=x0.size(0),padding=0)

                                    npimg = grid.detach().numpy() # to numpy array

                                    npimg = (npimg * 255).astype(np.uint8)
                                    
                                    fig, ax = plt.subplots(figsize = (12,8))
                                    ax.axis("off")
                                    output1,output2 = model(Variable(x0).to(device),Variable(x1).to(device))
                                    euclidean_distance = F.pairwise_distance(output1, output2)
                                    #scores.append(euclidean_distance.item())

                                    z = torch.tensor(euclidean_distance.item())
                                    z = torch.sigmoid(z)
                                    z = z.item()
                                    z= round(z,4)

                                    ax.set_title('Dissimilarity: ' + str(z), fontsize=45)
                                    imshow(torchvision.utils.make_grid(concatenated))
                                    
                                    savepath = "save here/" + str(count) + '.jpg'
                                    count +=1
                                    fig.savefig(savepath) 
                                    
                                scorelist.append(sigmoid_value)
                            
                            #imshow(torchvision.utils.make_grid(concatenated),'Dissimilarity: {:.2f}'.format(torch.sigmoid(z)))
                            
                    except StopIteration:
                        pass

        total_img = epochs * amount_of_images_pairs
        accuracy = round((tp + tn)/ total_img,2)
        precision = round(tp / (tp + fp),4)
        recall = round(tp / (tp + fn),4)
        
    labellist = np.array(labellist)
    scorelist = np.array(scorelist)

    fpr, tpr, thresholds = roc_curve(labellist, scorelist)
    print('roc auc score: ', roc_auc_score(labellist, scorelist))
    optimal_idx = np.argmax(tpr - fpr)
    optimal_threshold = thresholds[optimal_idx]
    print("Threshold value is:", optimal_threshold)
    plot_roc_curve(fpr, tpr)

    fpr, tpr, _ = roc_curve(labellist, scorelist)
    auc = roc_auc_score(labellist, scorelist)
    plt.plot(fpr,tpr,label= "auc="+str(auc))
    plt.legend(loc=4)
    plt.show()  
    
    print('precision: ', precision)
    print('recall: ', recall)
    
    #Generate the confusion matrix
    prob_to_label = []
    for score in scorelist:
        if score < treshold:
            prob_to_label.append(0)
        else:
            prob_to_label.append(1)
        
    cf_matrix = confusion_matrix(labellist, prob_to_label)
    
    group_names = ['True Neg','False Pos','False Neg','True Pos']

    group_counts = ["{0:0.0f}".format(value) for value in
                    cf_matrix.flatten()]

    group_percentages = ["{0:.2%}".format(value) for value in
                        cf_matrix.flatten()/np.sum(cf_matrix)]

    labels = [f"{v1}\n{v2}\n{v3}" for v1, v2, v3 in
            zip(group_names,group_counts,group_percentages)]

    labels = np.asarray(labels).reshape(2,2)

    ax = sns.heatmap(cf_matrix, annot=labels, fmt='', cmap='Blues')

    ax.set_title('Confusion Matrix')
    ax.set_xlabel('\nPredicted Values')
    ax.set_ylabel('Actual Values ')

    ax.xaxis.set_ticklabels(['False','True'])
    ax.yaxis.set_ticklabels(['False','True'])

    plt.show()
    
    print(tp, fn, tn, fp)
    
    return 'Average accuracy score is: {} over {} runs'.format(round(accuracy/runs, 2), runs)
