# Finding Threshold

Several feature extractor:
1. Face Embedding: Facenet
2. Face Embedding: VGG Face
3. Face Embedding: VGG Face - VGG16
4. Face Embedding: VGG Face - RESNET50
5. LBPH (Local Binary Pattern Histogram)

In [1]:
from scipy.spatial.distance import euclidean
from sklearn.metrics import accuracy_score, f1_score
from keras import backend as K
from feature_extractor.face_feature_extractor import FaceFeatureExtractor
import numpy as np
import cv2 as cv
from mtcnn.mtcnn import MTCNN
import matplotlib.pyplot as plt

Using TensorFlow backend.


# Read Dataset

In [2]:
import os

In [3]:
DIR_PATH = '../lfw/'
image_path_list = []
labels = []
name_dictionary = {}
counter = 0
for root, dirs, files in os.walk(DIR_PATH):
    for filename in files:
        person_name = ' '.join(filename.split('.')[0].split('_')[0:-1]) 
        file_path = os.path.join(root, filename)
        if person_name not in name_dictionary:
            counter += 1
            name_dictionary[person_name] = counter
        image_path_list.append(file_path)
        labels.append(name_dictionary[person_name])

In [4]:
print(len(labels))
print(len(image_path_list))

13233
13233


In [5]:
image_path_list[0:5]

['../lfw/Ryan_Newman/Ryan_Newman_0001.jpg',
 '../lfw/Dimitar_Berbatov/Dimitar_Berbatov_0001.jpg',
 '../lfw/Ed_Rendell/Ed_Rendell_0001.jpg',
 '../lfw/Joe_Crede/Joe_Crede_0001.jpg',
 '../lfw/Norman_Mailer/Norman_Mailer_0001.jpg']

In [6]:
image_path_list = sorted(image_path_list)

In [7]:
image_path_list[914:956]

['../lfw/Arnold_Schwarzenegger/Arnold_Schwarzenegger_0001.jpg',
 '../lfw/Arnold_Schwarzenegger/Arnold_Schwarzenegger_0002.jpg',
 '../lfw/Arnold_Schwarzenegger/Arnold_Schwarzenegger_0003.jpg',
 '../lfw/Arnold_Schwarzenegger/Arnold_Schwarzenegger_0004.jpg',
 '../lfw/Arnold_Schwarzenegger/Arnold_Schwarzenegger_0005.jpg',
 '../lfw/Arnold_Schwarzenegger/Arnold_Schwarzenegger_0006.jpg',
 '../lfw/Arnold_Schwarzenegger/Arnold_Schwarzenegger_0007.jpg',
 '../lfw/Arnold_Schwarzenegger/Arnold_Schwarzenegger_0008.jpg',
 '../lfw/Arnold_Schwarzenegger/Arnold_Schwarzenegger_0009.jpg',
 '../lfw/Arnold_Schwarzenegger/Arnold_Schwarzenegger_0010.jpg',
 '../lfw/Arnold_Schwarzenegger/Arnold_Schwarzenegger_0011.jpg',
 '../lfw/Arnold_Schwarzenegger/Arnold_Schwarzenegger_0012.jpg',
 '../lfw/Arnold_Schwarzenegger/Arnold_Schwarzenegger_0013.jpg',
 '../lfw/Arnold_Schwarzenegger/Arnold_Schwarzenegger_0014.jpg',
 '../lfw/Arnold_Schwarzenegger/Arnold_Schwarzenegger_0015.jpg',
 '../lfw/Arnold_Schwarzenegger/Arnold_Sc

# Experiment

In [8]:
FACENET_MODEL_PATH = 'model_data/facenet/20180402-114759'
VGGFACE_MODEL_PATH = 'model_data/vgg_face_weights.h5'

In [9]:
mtcnn_detector = MTCNN()

Instructions for updating:
keep_dims is deprecated, use keepdims instead
Instructions for updating:
keep_dims is deprecated, use keepdims instead


## Facenet

In [10]:
feature_extractor = FaceFeatureExtractor(FACENET_MODEL_PATH, extractor_name='facenet')

Model directory: model_data/facenet/20180402-114759
Metagraph file: model-20180402-114759.meta
Checkpoint file: model-20180402-114759.ckpt-275
Instructions for updating:
To construct input pipelines, use the `tf.data` module.
INFO:tensorflow:Restoring parameters from model_data/facenet/20180402-114759/model-20180402-114759.ckpt-275


## Finding Threshold

### Base Feature Same Person

In [11]:
%%time
for idx, image_path in enumerate(image_path_list[914:915]):
    if idx % 1000 == 0:
        print("Checkpoint", idx)
    img = cv.cvtColor(cv.imread(image_path), cv.COLOR_BGR2RGB)
    detection_result = mtcnn_detector.detect_faces(img)
    cropped_image = None
    for face in detection_result:
        face_bbox = face['box']
        x, y, w, h = face_bbox
        if x < 0:
            x = 0
        if y < 0:
            y = 0
        cropped_image = img[y:y+h, x:x+w]
        break
    
    if not cropped_image is None:
        feature_test = feature_extractor.extract_image(cropped_image)

Checkpoint 0
CPU times: user 5.69 s, sys: 652 ms, total: 6.34 s
Wall time: 5.21 s


In [12]:
base_feature = feature_test.copy()
base_feature

array([-0.02005969, -0.05056864,  0.01026126,  0.01742446,  0.02215639,
        0.01717553,  0.0363065 ,  0.03492048, -0.00316124,  0.0156415 ,
        0.01338803,  0.0119026 , -0.01074322,  0.01188657, -0.04681126,
        0.0182586 ,  0.00043653,  0.02466263, -0.06686319, -0.03734707,
       -0.08993586,  0.07345945,  0.07387831, -0.0692407 , -0.06713409,
       -0.01115576, -0.055341  , -0.03616242,  0.00777087,  0.07503883,
        0.00155666,  0.00863445,  0.06811176, -0.09804851,  0.03359478,
        0.03583899,  0.04942855, -0.02359399, -0.00993581, -0.00890585,
       -0.01316803,  0.04314459,  0.04884369, -0.01185691, -0.00249397,
       -0.00734212,  0.02021011,  0.04437131, -0.00410194, -0.00865446,
        0.00823828,  0.01580766, -0.00934255, -0.04321346, -0.05499689,
        0.02872187, -0.00497116,  0.0262275 , -0.02250077,  0.02877656,
       -0.034068  , -0.02791355, -0.02561077, -0.03420937, -0.07754732,
       -0.00139099, -0.01407351,  0.03491594,  0.07275735, -0.01

### Find Distance With Same Person

In [13]:
%%time
same_person_distances = []
for idx, image_path in enumerate(image_path_list[915:956]):
    if idx % 1000 == 0:
        print("Checkpoint", idx)
    img = cv.cvtColor(cv.imread(image_path), cv.COLOR_BGR2RGB)
    detection_result = mtcnn_detector.detect_faces(img)
    cropped_image = None
    for face in detection_result:
        face_bbox = face['box']
        x, y, w, h = face_bbox
        if x < 0:
            x = 0
        if y < 0:
            y = 0
        cropped_image = img[y:y+h, x:x+w]
        break
    
    if not cropped_image is None:
        feature_test = feature_extractor.extract_image(cropped_image)
        distance = euclidean(feature_test, base_feature)
        same_person_distances.append(distance)
        print(distance)

Checkpoint 0
0.8130298852920532
0.7526906728744507
0.7656626105308533
0.9419419765472412
0.841714084148407
0.9273679852485657
1.246404767036438
0.8177686333656311
0.8727921843528748
1.1033804416656494
0.8299792408943176
0.7919194102287292
0.8341163992881775
0.9972293972969055
0.9661878347396851
0.8906254172325134
0.9554269313812256
0.8980324864387512
0.8356178998947144
0.9626778364181519
0.9394333958625793
0.8426283001899719
0.7394007444381714
1.078840732574463
0.8271809816360474
0.8836042284965515
0.8984346985816956
1.2705713510513306
1.0150607824325562
0.8812487721443176
0.8757113218307495
0.8704571723937988
0.8863216042518616
0.870261013507843
0.7747063040733337
0.8275177478790283
0.9177504777908325
0.8642610907554626
0.9547302722930908
0.7103040814399719
0.9604081511497498
CPU times: user 22.6 s, sys: 20.8 s, total: 43.4 s
Wall time: 6.31 s


In [14]:
print(np.mean(same_person_distances))

0.9008146175524083


### Find Distance With Diff Person

In [15]:
%%time
diff_person_distances = []
min_distance = np.inf
min_path = None
distance_path_tuples = []
for idx, image_path in enumerate(image_path_list[0:914]):
    if idx % 1000 == 0:
        print("Checkpoint", idx)
    img = cv.cvtColor(cv.imread(image_path), cv.COLOR_BGR2RGB)
    detection_result = mtcnn_detector.detect_faces(img)
    cropped_image = None
    for face in detection_result:
        face_bbox = face['box']
        x, y, w, h = face_bbox
        if x < 0:
            x = 0
        if y < 0:
            y = 0
        cropped_image = img[y:y+h, x:x+w]
        break
    
    if not cropped_image is None:
        feature_test = feature_extractor.extract_image(cropped_image)
        distance = euclidean(feature_test, base_feature)
        if distance < min_distance:
            min_distance = distance
            min_path = image_path
        distance_path_tuples.append((distance, image_path))
        diff_person_distances.append(distance)

Checkpoint 0
CPU times: user 11min 28s, sys: 9min 11s, total: 20min 39s
Wall time: 3min 20s


In [16]:
print(min_distance, min_path)

0.9111239314079285 ../lfw/Andres_DAlessandro/Andres_DAlessandro_0001.jpg


In [17]:
print("AVG:", np.mean(diff_person_distances))

AVG: 1.385503837068441


In [20]:
sorted(distance_path_tuples, key=lambda x: x[0])[0:5]

[(0.9111239314079285, '../lfw/Andres_DAlessandro/Andres_DAlessandro_0001.jpg'),
 (1.0896060466766357, '../lfw/Ariel_Sharon/Ariel_Sharon_0047.jpg'),
 (1.1000478267669678, '../lfw/Andrew_Firestone/Andrew_Firestone_0001.jpg'),
 (1.101060390472412, '../lfw/Anibal_Ibarra/Anibal_Ibarra_0001.jpg'),
 (1.1044726371765137, '../lfw/Andre_Agassi/Andre_Agassi_0012.jpg')]

Conclusion: Threshold Facenet=1.0 (Rounding for simplicity)

## LBPH

In [10]:
feature_extractor = FaceFeatureExtractor(None, extractor_name='lbph')

## Finding Threshold

### Base Feature Same Person

In [11]:
import time

In [12]:
start = time.time()
for idx, image_path in enumerate(image_path_list[914:915]):
    if idx % 1000 == 0:
        print("Checkpoint", idx)
    img = cv.cvtColor(cv.imread(image_path), cv.COLOR_BGR2RGB)
    detection_result = mtcnn_detector.detect_faces(img)
    cropped_image = None
    for face in detection_result:
        face_bbox = face['box']
        x, y, w, h = face_bbox
        if x < 0:
            x = 0
        if y < 0:
            y = 0
        cropped_image = img[y:y+h, x:x+w]
        break
    
    if not cropped_image is None:
        feature_test = feature_extractor.extract_image(cropped_image)
end = time.time()
print(end-start)

Checkpoint 0
0.8437561988830566


  H = np.histogram(C, bins=2**self.lbp_operator.neighbors, range=(0, 2**self.lbp_operator.neighbors), normed=True)[0]


In [13]:
base_feature = feature_test.copy()
base_feature

array([0.04081633, 0.02040816, 0.        , ..., 0.        , 0.02040816,
       0.08163265])

### Find Distance With Same Person

In [14]:
start = time.time()
same_person_distances = []
for idx, image_path in enumerate(image_path_list[915:956]):
    if idx % 1000 == 0:
        print("Checkpoint", idx)
    img = cv.cvtColor(cv.imread(image_path), cv.COLOR_BGR2RGB)
    detection_result = mtcnn_detector.detect_faces(img)
    cropped_image = None
    for face in detection_result:
        face_bbox = face['box']
        x, y, w, h = face_bbox
        if x < 0:
            x = 0
        if y < 0:
            y = 0
        cropped_image = img[y:y+h, x:x+w]
        break
    
    if not cropped_image is None:
        feature_test = feature_extractor.extract_image(cropped_image)
        distance = euclidean(feature_test, base_feature)
        same_person_distances.append(distance)
        print(distance)
end = time.time()
print(end-start)

Checkpoint 0
3.3287209516085356
3.1102201510110343
2.977774117056493
3.231328320603648
3.01129347288532
3.0068642835449815
3.1053956080584864
3.000347057531731
3.093705234553012
2.7235533053801206
2.835633320182744
2.85889159311632
2.8006068242299498
2.882251191059374
2.84618891448599
2.9088613555260134
2.869506776060866
3.010878513170957
3.2412378279333582
3.0779136905346576
2.8206119421938354
3.109416580360493
2.7096016050290204
2.645830019696629
2.8368081035869217
3.0834566841234325
2.953336940431132
2.760916012151722
3.017372992371628
2.776110379286849
2.8748721066025036
2.7263045244158
3.0621767226462238
2.868200178083406
3.251245236086144
2.7936084603795512
3.138879008935572
2.9411838204577787
2.7422982909780935
3.0381442855316227
2.8248908527344128
1.9966025352478027


In [15]:
print(np.mean(same_person_distances))

2.948693591576009


### Find Distance With Diff Person

In [16]:
start = time.time()
diff_person_distances = []
min_distance = np.inf
min_path = None
distance_path_tuples = []
for idx, image_path in enumerate(image_path_list[0:914]):
    if idx % 1000 == 0:
        print("Checkpoint", idx)
    img = cv.cvtColor(cv.imread(image_path), cv.COLOR_BGR2RGB)
    detection_result = mtcnn_detector.detect_faces(img)
    cropped_image = None
    for face in detection_result:
        face_bbox = face['box']
        x, y, w, h = face_bbox
        if x < 0:
            x = 0
        if y < 0:
            y = 0
        cropped_image = img[y:y+h, x:x+w]
        break
    
    if not cropped_image is None:
        feature_test = feature_extractor.extract_image(cropped_image)
        distance = euclidean(feature_test, base_feature)
        if distance < min_distance:
            min_distance = distance
            min_path = image_path
        distance_path_tuples.append((distance, image_path))
        diff_person_distances.append(distance)
end = time.time()
print(end-start)

Checkpoint 0
42.20249342918396


In [17]:
print(min_distance, min_path)

2.4320845795152497 ../lfw/Angelo_Reyes/Angelo_Reyes_0004.jpg


In [18]:
print("AVG:", np.mean(diff_person_distances))

AVG: 3.0644682269666124


In [19]:
sorted(distance_path_tuples, key=lambda x: x[0])[0:5]

[(2.4320845795152497, '../lfw/Angelo_Reyes/Angelo_Reyes_0004.jpg'),
 (2.616227830925081, '../lfw/Alvaro_Uribe/Alvaro_Uribe_0012.jpg'),
 (2.6530612244897958, '../lfw/Andrew_Gilligan/Andrew_Gilligan_0001.jpg'),
 (2.6571397222464967, '../lfw/Anders_Ebbeson/Anders_Ebbeson_0001.jpg'),
 (2.657609915376478, '../lfw/Anibal_Ibarra/Anibal_Ibarra_0003.jpg')]

Conclusion: Threshold LBPH=2.5 (Rounding for simplicity)

## VGG Face - RESNET50

In [10]:
feature_extractor = FaceFeatureExtractor(None, extractor_name='vgg_face_resnet50')

## Finding Threshold

### Base Feature Same Person

In [11]:
import time

In [12]:
start = time.time()
for idx, image_path in enumerate(image_path_list[914:915]):
    if idx % 1000 == 0:
        print("Checkpoint", idx)
    img = cv.cvtColor(cv.imread(image_path), cv.COLOR_BGR2RGB)
    detection_result = mtcnn_detector.detect_faces(img)
    cropped_image = None
    for face in detection_result:
        face_bbox = face['box']
        x, y, w, h = face_bbox
        if x < 0:
            x = 0
        if y < 0:
            y = 0
        cropped_image = img[y:y+h, x:x+w]
        break
    
    if not cropped_image is None:
        feature_test = feature_extractor.extract_image(cropped_image)
end = time.time()
print(end-start)

Checkpoint 0
2.1601386070251465


In [13]:
base_feature = feature_test.copy()
base_feature

array([0.        , 0.        , 0.81484425, ..., 4.7339234 , 0.        ,
       0.0314533 ], dtype=float32)

### Find Distance With Same Person

In [14]:
start = time.time()
same_person_distances = []
for idx, image_path in enumerate(image_path_list[915:956]):
    if idx % 1000 == 0:
        print("Checkpoint", idx)
    img = cv.cvtColor(cv.imread(image_path), cv.COLOR_BGR2RGB)
    detection_result = mtcnn_detector.detect_faces(img)
    cropped_image = None
    for face in detection_result:
        face_bbox = face['box']
        x, y, w, h = face_bbox
        if x < 0:
            x = 0
        if y < 0:
            y = 0
        cropped_image = img[y:y+h, x:x+w]
        break
    
    if not cropped_image is None:
        feature_test = feature_extractor.extract_image(cropped_image)
        distance = euclidean(feature_test, base_feature)
        same_person_distances.append(distance)
        print(distance)
end = time.time()
print(end-start)

Checkpoint 0
104.15174102783203
97.89521789550781
86.51909637451172
101.3939208984375
97.29674530029297
92.4189224243164
129.88865661621094
90.91893768310547
104.22125244140625
113.3676528930664
80.53562927246094
100.95791625976562
90.62042999267578
107.2735824584961
113.50267028808594
125.45764923095703
93.08356475830078
110.21676635742188
112.44432830810547
106.92987060546875
124.18465423583984
95.99147033691406
81.55104064941406
130.40789794921875
100.2811508178711
96.12640380859375
117.74563598632812
113.28272247314453
110.75565338134766
100.73863983154297
109.44210052490234
101.48326110839844
113.94477081298828
110.74088287353516
109.66216278076172
93.85488891601562
108.48161315917969
104.99198913574219
93.01565551757812
76.61616516113281
117.52426147460938
2.428919553756714


In [15]:
print(np.mean(same_person_distances))

104.14433102491425


### Find Distance With Diff Person

In [16]:
start = time.time()
diff_person_distances = []
min_distance = np.inf
min_path = None
distance_path_tuples = []
for idx, image_path in enumerate(image_path_list[0:914]):
    if idx % 1000 == 0:
        print("Checkpoint", idx)
    img = cv.cvtColor(cv.imread(image_path), cv.COLOR_BGR2RGB)
    detection_result = mtcnn_detector.detect_faces(img)
    cropped_image = None
    for face in detection_result:
        face_bbox = face['box']
        x, y, w, h = face_bbox
        if x < 0:
            x = 0
        if y < 0:
            y = 0
        cropped_image = img[y:y+h, x:x+w]
        break
    
    if not cropped_image is None:
        feature_test = feature_extractor.extract_image(cropped_image)
        distance = euclidean(feature_test, base_feature)
        if distance < min_distance:
            min_distance = distance
            min_path = image_path
        distance_path_tuples.append((distance, image_path))
        diff_person_distances.append(distance)
end = time.time()
print(end-start)

Checkpoint 0
45.05798029899597


In [17]:
print(min_distance, min_path)

117.9339599609375 ../lfw/Arlen_Specter/Arlen_Specter_0002.jpg


In [18]:
print("AVG:", np.mean(diff_person_distances))

AVG: 142.75825240888534


In [19]:
sorted(distance_path_tuples, key=lambda x: x[0])[0:5]

[(117.9339599609375, '../lfw/Arlen_Specter/Arlen_Specter_0002.jpg'),
 (118.80604553222656, '../lfw/Andy_Graves/Andy_Graves_0001.jpg'),
 (120.54059600830078,
  '../lfw/Alexandra_Vodjanikova/Alexandra_Vodjanikova_0002.jpg'),
 (122.3892593383789, '../lfw/Andrew_Cuomo/Andrew_Cuomo_0001.jpg'),
 (124.77921295166016,
  '../lfw/Andrei_Konchalovsky/Andrei_Konchalovsky_0001.jpg')]

Conclusion: Threshold VGG Face - RESNET50=110 (Rounding for simplicity)