#### Import the required packages

In [1]:
import os
import numpy as np
import pandas as pd
from scipy.spatial import distance
from sklearn.metrics.pairwise import cosine_similarity

#### Define a function that will create the dataframe structure, renaming columns and setting the index

In [2]:
def StructureFolder(df, list_dir, index_name="image_names"):
    df = pd.DataFrame(df)

    for i in range(len(df)-1):
        df = df.rename(columns = {i:list_dir[i]})
        
    df[index_name] = list_dir
    df.set_index(index_name, inplace=True)
    return df

#### Define a function to loop through and calculate the Euclidean and Cosine distances for the entire array

In [3]:
def AllAgainstAllSimilarities(df):
    euclidean_distance = []
    cosine_distance = []

    df_array = np.array(df)

    for i in range(len(df_array)):
        for j in range(len(df_array)):
            euclidean_distance.append(distance.euclidean(df_array[i, :], df_array[j, :]))
            cosine_distance.append(1 - distance.cosine(df_array[i, :], df_array[j, :]))
    return (euclidean_distance, cosine_distance)

#### Define a function to loop through our dataframe of images to compare them, calculating Euclidean and Cosine distance for each

In [4]:
def ListOfSimilarities(df, list_dir, list_to_compare):
    euclidean_distance = []
    cosine_distance = []

    df_array = np.array(df)

    for i in range(len(list_to_compare)):
        position1 = np.isin(list_dir, list_to_compare.image1[i])
        position2 = np.isin(list_dir, list_to_compare.image2[i])
        
        for i in range(len(position1)):
            if position1[i] == True:
                get_position1 = i
        
        for i in range(len(position2)):
            if position2[i] == True:
                get_position2 = i
        
        euclidean_distance.append(distance.euclidean(df_array[get_position1, :], df_array[get_position2, :]))
        cosine_distance.append(1 - distance.cosine(df_array[get_position1, :], df_array[get_position2, :]))
        
    return (euclidean_distance, cosine_distance)

#### Define the path to our CSV file, a path to our images, and the output file names

In [89]:
# The path to the directory of the feature vector loaded in as file_name
path = "C:/Users/Public/Documents/Deep_Learner_DIN_Project/Algorithms/Feature_Vector_Extraction/Average_Images/PassportDatabase/"
# The path to the images
path_of_images = "C:/Users/Public/Documents/Image_Databases/DIN_Image/PassportDatabase/average_images_cropped/"

# This is our input feature vector file
file_name = "Face_Recognition_ResNet34_Feature_Vector.csv"
save_file = "Face_Recognition_ResNet34"

# These two files are our output files
name_to_save_euclidean = save_file+"_Euclidean_Distance.csv"
name_to_save_cosine = save_file+"_Cosine_Distance.csv"

In [90]:
# Create a dataframe which will contain two columns - image1 and image 2
#list_to_compare = pd.DataFrame()
#image1 = ["S1_a.png", "S2_a.png", "S3_a.png"]
#image2 = ["S1_b.png", "S2_b.png", "S3_b.png"]

# Set the the first column to the first list of images, and the second column to the second list of images
#list_to_compare["image1"] = image1
#list_to_compare["image2"] = image2

# Confirm the dataframe structure
#list_to_compare

#### Load the previously defined CSV file

In [91]:
# Note that we specify header=None as we do not have a header row with column names
df = pd.read_csv(path + file_name, header=None)

In [92]:
# Create a list of all the images in the provided directory
list_dir = os.listdir(path_of_images)
list_dir = list_dir[0:1600]
len(list_dir)

1600

In [93]:
euclidean_distance, cosine_distance = AllAgainstAllSimilarities(df)

In [94]:
#euclidean_distance_list, cosine_distance_list = ListOfSimilarities(df, list_dir, list_to_compare)

In [95]:
# Reshape our numpy arrays
euclidean_distance = np.reshape(euclidean_distance, (len(df), -1))
cosine_distance = np.reshape(cosine_distance, (len(df), -1))

In [96]:
# Confirm the shape of our euclidean and cosine distance arrays
print(euclidean_distance.shape)
print(cosine_distance.shape)

(1600, 1600)
(1600, 1600)


#### Inspect one element of both the euclidean and cosine distances, ensuring they make sense

In [97]:
euclidean_distance[5]

array([0.12633435, 0.17820121, 0.15521694, ..., 0.75456654, 0.78180779,
       0.78012947])

In [98]:
# We can verify that any 0 values are transformed into a 1, as cos(0) = 1
cosine_distance[5]

array([0.9969839 , 0.99401428, 0.9953771 , ..., 0.88701835, 0.87741511,
       0.87957998])

#### Call our structure function defined above to create a dataframe with the correct structure for both euclidean and cosine distances

In [99]:
# Pass in the numpy arrays created previously to our dataframe structure
df_euclidean = StructureFolder(euclidean_distance, list_dir)
df_cosine = StructureFolder(cosine_distance, list_dir)

#### Output our data to a CSV file in the respective directory

In [100]:
df_euclidean.to_csv(path + "/" + name_to_save_euclidean)
df_cosine.to_csv(path + "/" + name_to_save_cosine)