# How easy is it to test the accuracy of a face recognition algorithm?

## Part 2 - templating the image files to make them ready for matching



In [1]:
# Import pandas to read the CSV into a dataframe
import pandas as pd
# Import the face matcher class
import face_matcher as fm
matcher = fm.FaceMatcher()
import re # for regex to reformat face locations into required format

# Reading CSV and checking first few rows
faces = pd.read_csv('./easy_faces_2015_one_face_only.csv')
faces.head(3)

Unnamed: 0,subject_id,image_sequence_number,gender,age,lighting,view,cropped,facial_emotion,year,part,occlusion,image_filters,level_of_difficulty,image_path,face_locations,faces_found
0,MotM,1984,m,21,i,fr,nc,sd,2015,1,e0,Gn,e,./images/MotM_01984_m_21_i_fr_nc_sd_2015_1_e0_...,"[(171, 379, 439, 111)]",1
1,PetK,2257,m,20,i,nf,nc,no,2015,1,e0,Ps,e,./images/PetK_02257_m_20_i_nf_nc_no_2015_1_e0_...,"[(118, 464, 341, 241)]",1
2,MosK,1975,m,20,o,nf,nc,sd,2015,1,e0,Gs,e,./images/MosK_01975_m_20_o_nf_nc_sd_2015_1_e0_...,"[(142, 489, 365, 266)]",1


In [2]:
# Checking shape
faces.shape

(1961, 16)

In [3]:
# There is a need to convert format of the face location. Since we now know it is only one face. 
faces['face_locations'].replace(to_replace ='[\'\[\(]|[\)\]\']', value = '', regex = True, inplace=True)

In [4]:
faces.rename(columns={'face_locations':'face_location'}, inplace=True)

In [5]:
faces.head(3)

Unnamed: 0,subject_id,image_sequence_number,gender,age,lighting,view,cropped,facial_emotion,year,part,occlusion,image_filters,level_of_difficulty,image_path,face_location,faces_found
0,MotM,1984,m,21,i,fr,nc,sd,2015,1,e0,Gn,e,./images/MotM_01984_m_21_i_fr_nc_sd_2015_1_e0_...,"171, 379, 439, 111",1
1,PetK,2257,m,20,i,nf,nc,no,2015,1,e0,Ps,e,./images/PetK_02257_m_20_i_nf_nc_no_2015_1_e0_...,"118, 464, 341, 241",1
2,MosK,1975,m,20,o,nf,nc,sd,2015,1,e0,Gs,e,./images/MosK_01975_m_20_o_nf_nc_sd_2015_1_e0_...,"142, 489, 365, 266",1


In [6]:
# Group data. Number of itentities
face_groups = faces.groupby('subject_id')
len(face_groups.groups.keys())

64

In [7]:
# extract out one group as a test
id_MotM = face_groups.get_group('MotM')
id_MotM.head(3)

Unnamed: 0,subject_id,image_sequence_number,gender,age,lighting,view,cropped,facial_emotion,year,part,occlusion,image_filters,level_of_difficulty,image_path,face_location,faces_found
0,MotM,1984,m,21,i,fr,nc,sd,2015,1,e0,Gn,e,./images/MotM_01984_m_21_i_fr_nc_sd_2015_1_e0_...,"171, 379, 439, 111",1
5,MotM,1995,m,21,o,fr,nc,no,2015,1,e0,Gs,e,./images/MotM_01995_m_21_o_fr_nc_no_2015_1_e0_...,"142, 409, 409, 141",1
8,MotM,1994,m,21,i,fr,nc,hp,2015,1,en,nl,e,./images/MotM_01994_m_21_i_fr_nc_hp_2015_1_en_...,"134, 455, 455, 134",1


In [8]:
# test template generation (biometric enrollment)
image_path = id_MotM['image_path'][0]
location = id_MotM['face_location'][0].split(", ")
template = matcher.create_template(image_path, location)
print(type(template), type(template[0]), type(template[0][0]), "\n", template)

<class 'list'> <class 'numpy.ndarray'> <class 'numpy.float64'> 
 [array([-1.51017696e-01,  8.16482008e-02,  7.12076873e-02, -5.81135508e-04,
       -1.11955918e-01, -1.43141076e-02, -6.43797517e-02, -1.23610966e-01,
        1.56638816e-01, -7.63088167e-02,  2.06025094e-01,  5.99477105e-02,
       -2.33586609e-01, -7.22645596e-02,  8.12688321e-02,  9.96994972e-02,
       -1.52423099e-01, -1.57981887e-01, -6.77747577e-02, -4.63530719e-02,
        3.57958674e-02, -9.70868841e-02,  3.30040902e-02,  4.41798121e-02,
       -2.08102554e-01, -3.49925071e-01, -1.23185083e-01, -1.33675978e-01,
        2.48490497e-02, -9.97912735e-02,  1.19796190e-02,  4.34815884e-04,
       -1.72491133e-01, -2.70583481e-03,  3.07523236e-02,  1.03466302e-01,
        2.26588100e-02, -3.48281264e-02,  1.78748772e-01, -2.56113000e-02,
       -1.85807079e-01,  5.16017526e-02,  3.97023112e-02,  1.68589398e-01,
        2.03639880e-01,  7.55521432e-02,  9.00853127e-02, -5.27126193e-02,
        1.29495904e-01, -2.7228555

In [9]:
# We should be able to create a dictionary referencing the index in the dataframe 
# to the biometric template to be used for match. We can reference the data frame to 
# know the identity of the person in the face
enrollment_list = []
for index, row in faces.iterrows():
     enrollment_list.append([index, (row['image_path'], row['face_location'].split(", "))])
enrollment_list[2]

[2,
 ('./images/MosK_01975_m_20_o_nf_nc_sd_2015_1_e0_Gs_e.jpg',
  ['142', '489', '365', '266'])]

In [10]:
# Now can pass this in as an argument to enroll all items and store in a dict
enrollments = matcher.enroll_batch(enrollment_list)

Enrolled... 0
Enrolled... 100
Enrolled... 200
Enrolled... 300
Enrolled... 400
Enrolled... 500
Enrolled... 600
Enrolled... 700
Enrolled... 800
Enrolled... 900
Enrolled... 1000
Enrolled... 1100
Enrolled... 1200
Enrolled... 1300
Enrolled... 1400
Enrolled... 1500
Enrolled... 1600
Enrolled... 1700
Enrolled... 1800
Enrolled... 1900
Finished enrolling 1961 in 55 seconds


## Part 3 - matching to generate scores
#### Unfortunately this algorithm does not generate a matching score out of the box. However we can tune a tolerance setting which will allow us to generate a score by attempting a match until the face_recognition module switch the result from True to False
#### Since mated pairs are more likely to match, we should start the process from 1.0 and work downwards. For dissimilar pairs we should start at 0.0 and work upwards

In [11]:
# Dissimilar pairs
print(faces['subject_id'][0], faces['subject_id'][1])
print(matcher.single_match(enrollments[0], enrollments[1], threshold=0.24),
      matcher.single_match(enrollments[0], enrollments[1], threshold=0.23))
print(faces['image_path'][0], faces['image_path'][1])


MotM PetK
[False] [True]
./images/MotM_01984_m_21_i_fr_nc_sd_2015_1_e0_Gn_e.jpg ./images/PetK_02257_m_20_i_nf_nc_no_2015_1_e0_Ps_e.jpg


##### From trial and error we can see these images match around a score of 0.23
##### and we know it is a different person from the ground truth
<p float="left">
  <img style="display: inline;" src="./images/MotM_01984_m_21_i_fr_nc_sd_2015_1_e0_Gn_e.jpg" width="200" />
  <img style="display: inline;" src="./images/PetK_02257_m_20_i_nf_nc_no_2015_1_e0_Ps_e.jpg" width="200" /> 
</p>

In [12]:
# Similar pairs 
print(faces['subject_id'][0], faces['subject_id'][8])
print(matcher.single_match(enrollments[0], enrollments[8], threshold=0.60),
      matcher.single_match(enrollments[0], enrollments[8], threshold=0.59))
print(faces['image_path'][0], faces['image_path'][8])

MotM MotM
[False] [True]
./images/MotM_01984_m_21_i_fr_nc_sd_2015_1_e0_Gn_e.jpg ./images/MotM_01994_m_21_i_fr_nc_hp_2015_1_en_nl_e.jpg


##### From trial and error we can see these images match around a score of 0.59
##### and we know it is the same person from the ground truth
<p float="left">
  <img style="display: inline;" src="./images/MotM_01984_m_21_i_fr_nc_sd_2015_1_e0_Gn_e.jpg" width="200" />
  <img style="display: inline;" src="./images/MotM_01994_m_21_i_fr_nc_hp_2015_1_en_nl_e.jpg" width="200" /> 
</p>

In [13]:
# Matching a non-mated pair
matcher.generate_score(enrollments[0], enrollments[1], mate=False, logging=True, new_threshold=False)

testing threshold... 0.00
testing threshold... 0.01
testing threshold... 0.02
testing threshold... 0.03
testing threshold... 0.04
testing threshold... 0.05
testing threshold... 0.06
testing threshold... 0.07
testing threshold... 0.08
testing threshold... 0.09
testing threshold... 0.10
testing threshold... 0.11
testing threshold... 0.12
testing threshold... 0.13
testing threshold... 0.14
testing threshold... 0.15
testing threshold... 0.16
testing threshold... 0.17
testing threshold... 0.18
testing threshold... 0.19
testing threshold... 0.20
testing threshold... 0.21
testing threshold... 0.22
testing threshold... 0.23
testing threshold... 0.24


'0.24'

In [14]:
# Matching a mated pair
matcher.generate_score(enrollments[0], enrollments[8], mate=True, logging=True, new_threshold=False)

testing threshold... 1.00
testing threshold... 0.99
testing threshold... 0.98
testing threshold... 0.97
testing threshold... 0.96
testing threshold... 0.95
testing threshold... 0.94
testing threshold... 0.93
testing threshold... 0.92
testing threshold... 0.91
testing threshold... 0.90
testing threshold... 0.89
testing threshold... 0.88
testing threshold... 0.87
testing threshold... 0.86
testing threshold... 0.85
testing threshold... 0.84
testing threshold... 0.83
testing threshold... 0.82
testing threshold... 0.81
testing threshold... 0.80
testing threshold... 0.79
testing threshold... 0.78
testing threshold... 0.77
testing threshold... 0.76
testing threshold... 0.75
testing threshold... 0.74
testing threshold... 0.73
testing threshold... 0.72
testing threshold... 0.71
testing threshold... 0.70
testing threshold... 0.69
testing threshold... 0.68
testing threshold... 0.67
testing threshold... 0.66
testing threshold... 0.65
testing threshold... 0.64
testing threshold... 0.63
testing thre

'0.59'

In [15]:
# Now we can generate a CSV file for matches of mates and non-mates
# It should look like
# subject_id_1, subject_id_2, score
# Get the unique groups as batches of mated images
group_keys = faces.subject_id.unique
pairs = []
# Loop over each group
for group in group_keys():
    # Extract the data per group
    group_data = faces[faces['subject_id'] == group]
    # Get all indexes to loop over them    
    indexes = group_data.index.values.tolist()
    for face_num in indexes:
        for other_face_num in indexes:
            if(face_num != other_face_num): # Only match pairs that aren't the same image
                pairs.append([face_num, other_face_num])

print(pairs[0], pairs[1], pairs[2])

[0, 5] [0, 8] [0, 50]


In [16]:
# Now there is a list of tuple of image pairs to match where we can be assured they are mated pairs
matcher.cross_match_pairs(enrollments, pairs, 'mates_matched.csv')

0 matches completed
5000 matches completed
10000 matches completed
15000 matches completed
20000 matches completed
25000 matches completed
30000 matches completed
35000 matches completed
40000 matches completed
45000 matches completed
50000 matches completed
55000 matches completed
60000 matches completed
65000 matches completed
70000 matches completed
75000 matches completed
80000 matches completed
85000 matches completed
90000 matches completed
95000 matches completed
100000 matches completed
105000 matches completed
110000 matches completed
115000 matches completed
120000 matches completed
125000 matches completed
130000 matches completed
Finished matching pairs in 104 seconds


In [19]:
# Matching imposters takes the identities that arent the same person
imposters = []
# Loop over each group
for group in group_keys():
    # Extract the data per group
    group_data = faces[faces['subject_id'] == group]
    imposter_data = faces[faces['subject_id'] != group]
    # Get all indexes to loop over them 
    indexes = group_data.index.values.tolist()
    indexes_imposters = imposter_data.index.values.tolist()
    # print(indexes)
    for face_num in indexes:
        # print(face_num)
        for imposter in indexes_imposters:
            imposters.append([face_num, imposter])

print(imposters[0], imposters[1], imposters[2], imposters[3], imposters[4])

[0, 1] [0, 2] [0, 3] [0, 4] [0, 6]


In [18]:
matcher.cross_match_pairs(enrollments, imposters, 'imposters_matched.csv')

0 matches completed
5000 matches completed
10000 matches completed
15000 matches completed
20000 matches completed
25000 matches completed
30000 matches completed
35000 matches completed
40000 matches completed
45000 matches completed
50000 matches completed
55000 matches completed
60000 matches completed
65000 matches completed
70000 matches completed
75000 matches completed
80000 matches completed
85000 matches completed
90000 matches completed
95000 matches completed
100000 matches completed
105000 matches completed
110000 matches completed
115000 matches completed
120000 matches completed
125000 matches completed
130000 matches completed
135000 matches completed
140000 matches completed
145000 matches completed
150000 matches completed
155000 matches completed
160000 matches completed
165000 matches completed
170000 matches completed
175000 matches completed
180000 matches completed
185000 matches completed
190000 matches completed
195000 matches completed
200000 matches completed


1620000 matches completed
1625000 matches completed
1630000 matches completed
1635000 matches completed
1640000 matches completed
1645000 matches completed
1650000 matches completed
1655000 matches completed
1660000 matches completed
1665000 matches completed
1670000 matches completed
1675000 matches completed
1680000 matches completed
1685000 matches completed
1690000 matches completed
1695000 matches completed
1700000 matches completed
1705000 matches completed
1710000 matches completed
1715000 matches completed
1720000 matches completed
1725000 matches completed
1730000 matches completed
1735000 matches completed
1740000 matches completed
1745000 matches completed
1750000 matches completed
1755000 matches completed
1760000 matches completed
1765000 matches completed
1770000 matches completed
1775000 matches completed
1780000 matches completed
1785000 matches completed
1790000 matches completed
1795000 matches completed
1800000 matches completed
1805000 matches completed
1810000 matc

3200000 matches completed
3205000 matches completed
3210000 matches completed
3215000 matches completed
3220000 matches completed
3225000 matches completed
3230000 matches completed
3235000 matches completed
3240000 matches completed
3245000 matches completed
3250000 matches completed
3255000 matches completed
3260000 matches completed
3265000 matches completed
3270000 matches completed
3275000 matches completed
3280000 matches completed
3285000 matches completed
3290000 matches completed
3295000 matches completed
3300000 matches completed
3305000 matches completed
3310000 matches completed
3315000 matches completed
3320000 matches completed
3325000 matches completed
3330000 matches completed
3335000 matches completed
3340000 matches completed
3345000 matches completed
3350000 matches completed
3355000 matches completed
3360000 matches completed
3365000 matches completed
3370000 matches completed
3375000 matches completed
3380000 matches completed
3385000 matches completed
3390000 matc

##### Please refer to part 4 for a brief analysis of scores generated from this dataset...
