In [1]:
import cv2
import os
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.vgg16 import preprocess_input

import warnings
warnings.filterwarnings("ignore")

def cosine_similarity(vector1, vector2):
    dot_product = np.dot(vector1, vector2)
    norm_vector1 = np.linalg.norm(vector1)
    norm_vector2 = np.linalg.norm(vector2)
    similarity = dot_product / (norm_vector1 * norm_vector2)
    return similarity

# Read test_candidates

In [2]:
test_candidates = pd.read_csv('.\COMP90086_2023_TLLdataset\\test_candidates.csv')
test_candidates.head()

Unnamed: 0,left,c0,c1,c2,c3,c4,c5,c6,c7,c8,...,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19
0,abm,kyr,qqo,xpv,tnd,xal,pvr,nsb,yzv,ahb,...,drb,vqu,vzr,kxe,mdq,oai,nmm,yzu,ihk,zwv
1,aci,jzn,hxw,iaj,edq,huu,huk,owo,ntd,uhk,...,mhg,exb,pny,kbo,tdb,sok,zsq,yfg,iqx,jnj
2,acn,ksm,tyj,hhy,rph,axt,dby,xiv,aoc,oxb,...,vsu,wrx,zem,rkq,vjq,duq,ncp,mst,wzd,gup
3,aco,tft,bxn,vkl,mdq,iqb,uev,mjn,ccy,nje,...,flo,ltz,sjz,ind,fbw,ahy,vwe,cog,xcj,boz
4,acu,apn,zip,cxx,cwh,wbu,azy,qoe,wnd,xoo,...,zwq,rqi,rei,poj,gos,hif,ami,fhc,ift,xcj


# VGG without pooling

## Load feature extraction results

In [3]:
# Load the feature extraction results

# VGG without pooling
left_test = np.load('vgg_test_left_all.npy')
right_test = np.load('vgg_test_right_all.npy')

## Create dictionary of feature arrays

In [4]:
path = ".\COMP90086_2023_TLLdataset\\test\\left"
dir_list = os.listdir(path)
test_left_images_list = dir_list

path = ".\COMP90086_2023_TLLdataset\\test\\right"
dir_list = os.listdir(path)
test_right_images_list = dir_list

left_array_dict = {}
for i in range(len(test_left_images_list)):
    left_array_dict[test_left_images_list[i][:3]] = left_test[i]
    
right_array_dict = {}
for i in range(len(test_right_images_list)):
    right_array_dict[test_right_images_list[i][:3]] = right_test[i]

In [5]:
# Create paires of extracted features
pairs_for_test = []

for j in range(len(test_candidates)):
    for i in range(20):
        pairs_for_test.append([left_array_dict.get(test_candidates.iloc[j][0]) , right_array_dict.get(test_candidates.iloc[j][i+1])])

pairs_for_test = np.array(pairs_for_test)

# Iterate over 40000 pairs
overall_scores = []
for i in range(40000):
    scores = []
    similarity_score = cosine_similarity(pairs_for_test[i][0], pairs_for_test[i][1])
    scores.append(similarity_score)
    overall_scores.append(scores)
    
overall_scores = np.array(overall_scores)

# Reshape to the Kaggle format
overall_scores = overall_scores.reshape(2000,20)

overall_cos_similarity = pd.DataFrame(np.array(overall_scores))
overall_cos_similarity.columns = ['c0', 'c1', 'c2', 'c3', 'c4', 'c5', 'c6', 'c7', 'c8', 'c9', 'c10', 'c11', 'c12', 'c13', 'c14', 'c15', 'c16', 'c17', 'c18', 'c19']

overall_cos_similarity['left'] = list(test_candidates['left'])

cols = overall_cos_similarity.columns.tolist()
cols = cols[-1:] + cols[:-1]
overall_cos_similarity = overall_cos_similarity[cols]

## Output CSV file

In [6]:
# Output the cosine similarity scores

# VGG without pooling
overall_cos_similarity.to_csv('vgg_all_test_cos_similarity.csv', index=False)

# VGG with max pooling

## Load feature extraction results

In [7]:
# Load the feature extraction results

# VGG max pooling
left_test = np.load('vgg_test_left.npy')
right_test = np.load('vgg_test_right.npy')

## Create dictionary of feature arrays

In [8]:
path = ".\COMP90086_2023_TLLdataset\\test\\left"
dir_list = os.listdir(path)
test_left_images_list = dir_list

path = ".\COMP90086_2023_TLLdataset\\test\\right"
dir_list = os.listdir(path)
test_right_images_list = dir_list

left_array_dict = {}
for i in range(len(test_left_images_list)):
    left_array_dict[test_left_images_list[i][:3]] = left_test[i]
    
right_array_dict = {}
for i in range(len(test_right_images_list)):
    right_array_dict[test_right_images_list[i][:3]] = right_test[i]

In [9]:
# Create paires of extracted features
pairs_for_test = []

for j in range(len(test_candidates)):
    for i in range(20):
        pairs_for_test.append([left_array_dict.get(test_candidates.iloc[j][0]) , right_array_dict.get(test_candidates.iloc[j][i+1])])

pairs_for_test = np.array(pairs_for_test)

# Iterate over 40000 pairs
overall_scores = []
for i in range(40000):
    scores = []
    similarity_score = cosine_similarity(pairs_for_test[i][0], pairs_for_test[i][1])
    scores.append(similarity_score)
    overall_scores.append(scores)
    
overall_scores = np.array(overall_scores)

# Reshape to the Kaggle format
overall_scores = overall_scores.reshape(2000,20)

overall_cos_similarity = pd.DataFrame(np.array(overall_scores))
overall_cos_similarity.columns = ['c0', 'c1', 'c2', 'c3', 'c4', 'c5', 'c6', 'c7', 'c8', 'c9', 'c10', 'c11', 'c12', 'c13', 'c14', 'c15', 'c16', 'c17', 'c18', 'c19']

overall_cos_similarity['left'] = list(test_candidates['left'])

cols = overall_cos_similarity.columns.tolist()
cols = cols[-1:] + cols[:-1]
overall_cos_similarity = overall_cos_similarity[cols]

## Output CSV file

In [10]:
# Output the cosine similarity scores

# VGG max pooling
overall_cos_similarity.to_csv('vgg_test_cos_similarity.csv', index=False)

# ResNet with max pooling

## Load feature extraction results

In [11]:
# Load the feature extraction results

# ResNet max pooling 
left_test = np.load('resnet_test_left.npy')
right_test = np.load('resnet_test_right.npy')

## Create dictionary of feature arrays

In [12]:
path = ".\COMP90086_2023_TLLdataset\\test\\left"
dir_list = os.listdir(path)
test_left_images_list = dir_list

path = ".\COMP90086_2023_TLLdataset\\test\\right"
dir_list = os.listdir(path)
test_right_images_list = dir_list

left_array_dict = {}
for i in range(len(test_left_images_list)):
    left_array_dict[test_left_images_list[i][:3]] = left_test[i]
    
right_array_dict = {}
for i in range(len(test_right_images_list)):
    right_array_dict[test_right_images_list[i][:3]] = right_test[i]

In [13]:
# Create paires of extracted features
pairs_for_test = []

for j in range(len(test_candidates)):
    for i in range(20):
        pairs_for_test.append([left_array_dict.get(test_candidates.iloc[j][0]) , right_array_dict.get(test_candidates.iloc[j][i+1])])

pairs_for_test = np.array(pairs_for_test)

# Iterate over 40000 pairs
overall_scores = []
for i in range(40000):
    scores = []
    similarity_score = cosine_similarity(pairs_for_test[i][0], pairs_for_test[i][1])
    scores.append(similarity_score)
    overall_scores.append(scores)
    
overall_scores = np.array(overall_scores)

# Reshape to the Kaggle format
overall_scores = overall_scores.reshape(2000,20)

overall_cos_similarity = pd.DataFrame(np.array(overall_scores))
overall_cos_similarity.columns = ['c0', 'c1', 'c2', 'c3', 'c4', 'c5', 'c6', 'c7', 'c8', 'c9', 'c10', 'c11', 'c12', 'c13', 'c14', 'c15', 'c16', 'c17', 'c18', 'c19']

overall_cos_similarity['left'] = list(test_candidates['left'])

cols = overall_cos_similarity.columns.tolist()
cols = cols[-1:] + cols[:-1]
overall_cos_similarity = overall_cos_similarity[cols]

## Output CSV file

In [14]:
# Output the cosine similarity scores

# ResNet max pooling
overall_cos_similarity.to_csv('resnet_test_cos_similarity.csv', index=False)

# DenseNet with max pooling

## Load feature extraction results

In [15]:
# Load the feature extraction results

# DenseNet max pooling 
left_test = np.load('densenet_test_left.npy')
right_test = np.load('densenet_test_right.npy')

## Create dictionary of feature arrays

In [16]:
path = ".\COMP90086_2023_TLLdataset\\test\\left"
dir_list = os.listdir(path)
test_left_images_list = dir_list

path = ".\COMP90086_2023_TLLdataset\\test\\right"
dir_list = os.listdir(path)
test_right_images_list = dir_list

left_array_dict = {}
for i in range(len(test_left_images_list)):
    left_array_dict[test_left_images_list[i][:3]] = left_test[i]
    
right_array_dict = {}
for i in range(len(test_right_images_list)):
    right_array_dict[test_right_images_list[i][:3]] = right_test[i]

In [17]:
# Create paires of extracted features
pairs_for_test = []

for j in range(len(test_candidates)):
    for i in range(20):
        pairs_for_test.append([left_array_dict.get(test_candidates.iloc[j][0]) , right_array_dict.get(test_candidates.iloc[j][i+1])])

pairs_for_test = np.array(pairs_for_test)

# Iterate over 40000 pairs
overall_scores = []
for i in range(40000):
    scores = []
    similarity_score = cosine_similarity(pairs_for_test[i][0], pairs_for_test[i][1])
    scores.append(similarity_score)
    overall_scores.append(scores)
    
overall_scores = np.array(overall_scores)

# Reshape to the Kaggle format
overall_scores = overall_scores.reshape(2000,20)

overall_cos_similarity = pd.DataFrame(np.array(overall_scores))
overall_cos_similarity.columns = ['c0', 'c1', 'c2', 'c3', 'c4', 'c5', 'c6', 'c7', 'c8', 'c9', 'c10', 'c11', 'c12', 'c13', 'c14', 'c15', 'c16', 'c17', 'c18', 'c19']

overall_cos_similarity['left'] = list(test_candidates['left'])

cols = overall_cos_similarity.columns.tolist()
cols = cols[-1:] + cols[:-1]
overall_cos_similarity = overall_cos_similarity[cols]

## Output CSV file

In [18]:
# Output the cosine similarity scores

# DenseNet max pooling
overall_cos_similarity.to_csv('densenet_test_cos_similarity.csv', index=False)