# Analyze the AgeDB Dataset

### Requirements

Files needed are "AgeDB_Cleaned_Images.csv" and "AgeDB_Cleaned_Individual.csv" which are genereted by notebook file "AgeDB_Code"

### Get both dataframes  

In [31]:
import pandas as pd
import numpy as np

AgeDB_Images = pd.read_csv('AgeDB_Cleaned_Images.csv')
AgeDB_Individual =  pd.read_csv('AgeDB_Cleaned_Individual.csv')

In [32]:
AgeDB_Images

Unnamed: 0.1,Unnamed: 0,ID,File Name,Name,Age,Gender,Age Range
0,0,4,10053_HelenHunt_39_f.jpg,HelenHunt,39,0,31-40
1,1,4,10052_HelenHunt_38_f.jpg,HelenHunt,38,0,31-40
2,2,4,10046_HelenHunt_34_f.jpg,HelenHunt,34,0,31-40
3,3,4,10051_HelenHunt_37_f.jpg,HelenHunt,37,0,31-40
4,4,4,10050_HelenHunt_37_f.jpg,HelenHunt,37,0,31-40
...,...,...,...,...,...,...,...
9823,9823,566,9971_ElkeSommer_62_f.jpg,ElkeSommer,62,0,61-70
9824,9824,566,9972_ElkeSommer_63_f.jpg,ElkeSommer,63,0,61-70
9825,9825,566,9973_ElkeSommer_64_f.jpg,ElkeSommer,64,0,61-70
9826,9826,566,9975_ElkeSommer_67_f.jpg,ElkeSommer,67,0,61-70


In [33]:
AgeDB_Individual

Unnamed: 0.1,Unnamed: 0,ID,Name,Min Age,Max Age,Age Span,Number of Age Ranges,Number of Images,Gender
0,0,4,HelenHunt,10,51,41,6,38,0.0
1,1,5,JaneBirkin,19,68,49,6,39,0.0
2,2,6,PaulAnka,15,72,57,7,42,1.0
3,3,7,JaneFonda,25,74,49,6,30,0.0
4,4,14,MarietteHartley,21,73,52,6,33,0.0
...,...,...,...,...,...,...,...,...,...
267,267,556,MegRyan,7,77,70,7,34,0.0
268,268,557,DorisDay,17,82,65,7,49,0.0
269,269,559,JaneAsher,6,67,61,7,34,0.0
270,270,565,AliMacGraw,25,74,49,6,34,0.0


### Face recognition

Now we need to analyze the similarity between the same person across different age ranges. In order to compare and find similarities/differences we need to find the feature vector for each image.

In [34]:
# Help functions

def cosine_similarity(feat1, feat2):
    return (np.dot(feat1, feat2.T) / (np.linalg.norm(feat1) * np.linalg.norm(feat2)))[0][0]

def get_row_information(df, filename):
    
    row = df.loc[df["File Name"] == filename]
    
    id = row["ID"].values[0]
    name = row["Name"].values[0]
    age = row["Age"].values[0]
    age_range = row["Age Range"].values[0]
    gender = row["Gender"].values[0]

    return id, name, age, age_range, gender

def get_path(filename):

    path_to_folder = '/Users/ivarblohm/Documents/Skola/Uppsala_Universitet/2022/ProjectFaceRec.nosync/AgeDB_resized/'
    path_to_image = path_to_folder + filename[:-4]

    return path_to_image

In [35]:
# Function to gather all similarities and features

from insightface.app import FaceAnalysis
from insightface.data import get_image as ins_get_image

def face_recognition_and_comparison(df, insightface_model):

    ID_individual = []
    Name_individual  = []
    Gender_individual = []
    Model = []
    firstFilename = []
    secondFilename = []
    firstAge = []
    secondAge = []
    firstAgeRange = []
    secondAgeRange = []
    firstImageFeature = []
    secondImageFeature = []
    similarities = []

    list_of_images = df["File Name"].to_list()

    app = FaceAnalysis(name=insightface_model)
    app.prepare(ctx_id=0, det_size=(256, 256))

    for i in range(len(list_of_images) - 1):
        
        firstImage_filename = list_of_images[i]
        id_individual, name_individual, first_age, first_age_range, gender_individual = get_row_information(df, firstImage_filename)
        first_path = get_path(firstImage_filename)

        for j in range(i+1, len(list_of_images)):

            secondImage_filename = list_of_images[j]
            id_individual_check, _, second_age, second_age_range, _ = get_row_information(df, secondImage_filename)

            if id_individual == id_individual_check:

                second_path = get_path(secondImage_filename)
                
                first_image = ins_get_image(first_path)
                second_image = ins_get_image(second_path)

                first_face = app.get(first_image)
                second_face = app.get(second_image)

                first_feature = []
                for face in first_face:
                    first_feature.append(face.normed_embedding)

                second_feature = []
                for face in second_face:
                    second_feature.append(face.normed_embedding)
                
                first_feature_np = np.array(first_feature)
                second_feature_np = np.array(second_feature)

                sims = cosine_similarity(first_feature_np, second_feature_np)

                ID_individual.append(id_individual)
                Name_individual.append(name_individual)
                Gender_individual.append(gender_individual)
                Model.append(insightface_model)
                firstFilename.append(firstImage_filename)
                secondFilename.append(secondImage_filename)
                firstAge.append(first_age)
                secondAge.append(second_age)
                firstAgeRange.append(first_age_range)
                secondAgeRange.append(second_age_range)
                firstImageFeature.append(first_feature[0])
                secondImageFeature.append(second_feature[0])
                similarities.append(sims)
    
    result_col = { "ID" : ID_individual,
                    "Name" : Name_individual,
                    "Gender" : Gender_individual,
                    "Model" : Model,
                    "First Image" : firstFilename,
                    "First Age" : firstAge,
                    "First Age Range": firstAgeRange,
                    "First Image Features" : firstImageFeature,
                    "Second Image" : secondFilename,
                    "Second Age" : secondAge,
                    "Second Age Range" : secondAgeRange,
                    "Second Image Features" : secondImageFeature,
                    "Cosine Similarity" : similarities}

    result_df = pd.DataFrame(result_col)
    return result_df

## Use external resources

Below command runs above function, this takes around 60 hours to execute on a "regular" computer. 

In [36]:
AgeDB_Sim_Feat_Result = face_recognition_and_comparison(AgeDB_Images, 'buffalo_l')
AgeDB_Sim_Feat_Result

Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
find model: /Users/ivarblohm/.insightface/models/buffalo_l/1k3d68.onnx landmark_3d_68 ['None', 3, 192, 192] 0.0 1.0
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
find model: /Users/ivarblohm/.insightface/models/buffalo_l/2d106det.onnx landmark_2d_106 ['None', 3, 192, 192] 0.0 1.0
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
find model: /Users/ivarblohm/.insightface/models/buffalo_l/det_10g.onnx detection [1, 3, '?', '?'] 127.5 128.0
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
find model: /Users/ivarblohm/.insightface/models/buffalo_l/genderage.onnx genderage ['None', 3, 96, 96] 0.0 1.0
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
find model: /Users/ivarblohm/.insightface/models/buffalo_l/w600k_r50.onnx recognition ['None', 3, 112,

  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4


Unnamed: 0,ID,Name,Gender,Model,First Image,First Age,First Age Range,First Image Features,Second Image,Second Age,Second Age Range,Second Image Features,Cosine Similarity
0,4,HelenHunt,0,buffalo_l,10053_HelenHunt_39_f.jpg,39,31-40,"[-0.03122383, 0.010653337, 0.07720214, 0.02029...",10052_HelenHunt_38_f.jpg,38,31-40,"[-0.061942432, 0.015461457, 0.07515598, 0.0306...",0.709523
1,4,HelenHunt,0,buffalo_l,10053_HelenHunt_39_f.jpg,39,31-40,"[-0.03122383, 0.010653337, 0.07720214, 0.02029...",10046_HelenHunt_34_f.jpg,34,31-40,"[-0.023768721, -0.0039987536, 0.051619872, -0....",0.648489
2,4,HelenHunt,0,buffalo_l,10053_HelenHunt_39_f.jpg,39,31-40,"[-0.03122383, 0.010653337, 0.07720214, 0.02029...",10051_HelenHunt_37_f.jpg,37,31-40,"[-0.047966324, 0.016480953, -0.0040377695, 0.0...",0.684564
3,4,HelenHunt,0,buffalo_l,10053_HelenHunt_39_f.jpg,39,31-40,"[-0.03122383, 0.010653337, 0.07720214, 0.02029...",10050_HelenHunt_37_f.jpg,37,31-40,"[-0.055004273, 0.0368939, 0.027720187, -0.0416...",0.606415
4,4,HelenHunt,0,buffalo_l,10053_HelenHunt_39_f.jpg,39,31-40,"[-0.03122383, 0.010653337, 0.07720214, 0.02029...",10049_HelenHunt_36_f.jpg,36,31-40,"[-0.039865274, 0.010151083, 0.074160986, 0.009...",0.722155
...,...,...,...,...,...,...,...,...,...,...,...,...,...
176526,566,ElkeSommer,0,buffalo_l,9972_ElkeSommer_63_f.jpg,63,61-70,"[0.0769861, -0.08088149, -0.04463595, 0.018160...",9975_ElkeSommer_67_f.jpg,67,61-70,"[0.026405472, -0.05441061, -0.022071349, -0.01...",0.804749
176527,566,ElkeSommer,0,buffalo_l,9972_ElkeSommer_63_f.jpg,63,61-70,"[0.0769861, -0.08088149, -0.04463595, 0.018160...",9962_ElkeSommer_44_f.jpg,44,41-50,"[0.039438926, -0.06269737, -0.040195905, 0.025...",0.623625
176528,566,ElkeSommer,0,buffalo_l,9973_ElkeSommer_64_f.jpg,64,61-70,"[0.00781833, -0.0726525, -0.05035398, 0.026622...",9975_ElkeSommer_67_f.jpg,67,61-70,"[0.026405472, -0.05441061, -0.022071349, -0.01...",0.699521
176529,566,ElkeSommer,0,buffalo_l,9973_ElkeSommer_64_f.jpg,64,61-70,"[0.00781833, -0.0726525, -0.05035398, 0.026622...",9962_ElkeSommer_44_f.jpg,44,41-50,"[0.039438926, -0.06269737, -0.040195905, 0.025...",0.591983


In [37]:
# Saving work

AgeDB_Sim_Feat_Result.to_csv("AgeDB_Sim_Feat_Result.csv")