# Comparison Using LFW Dataset - Facenet

Several feature extractor:
1. Face Embedding: Facenet
2. Face Embedding: VGG Face
3. Face Embedding: VGG Face - VGG16
4. Face Embedding: VGG Face - RESNET50
5. LBPH (Local Binary Pattern Histogram)

In [1]:
from scipy.spatial.distance import euclidean
from sklearn.metrics import accuracy_score, f1_score
from keras import backend as K
from feature_extractor.face_feature_extractor import FaceFeatureExtractor
import numpy as np
import cv2 as cv
from mtcnn.mtcnn import MTCNN
import matplotlib.pyplot as plt

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


# Read Dataset

In [2]:
import os

In [3]:
DIR_PATH = '../lfw/'
image_path_list = []
labels = []
name_dictionary = {}
counter = 0
for root, dirs, files in os.walk(DIR_PATH):
    for filename in files:
        person_name = ' '.join(filename.split('.')[0].split('_')[0:-1]) 
        file_path = os.path.join(root, filename)
        if person_name not in name_dictionary:
            counter += 1
            name_dictionary[person_name] = counter
        image_path_list.append(file_path)
        labels.append(name_dictionary[person_name])

In [4]:
print(len(labels))
print(len(image_path_list))

13233
13233


In [5]:
image_path_list[0:5]

['../lfw/Ryan_Newman/Ryan_Newman_0001.jpg',
 '../lfw/Dimitar_Berbatov/Dimitar_Berbatov_0001.jpg',
 '../lfw/Ed_Rendell/Ed_Rendell_0001.jpg',
 '../lfw/Joe_Crede/Joe_Crede_0001.jpg',
 '../lfw/Norman_Mailer/Norman_Mailer_0001.jpg']

## Shuffle It

In [6]:
import random

In [7]:
temp = list(zip(image_path_list, labels))
random.Random(0).shuffle(temp) # custom seed
image_path_list, labels = zip(*temp)

In [8]:
image_path_list[0:5]

('../lfw/Tommy_Maddox/Tommy_Maddox_0001.jpg',
 '../lfw/David_Millar/David_Millar_0001.jpg',
 '../lfw/Gregg_Popovich/Gregg_Popovich_0004.jpg',
 '../lfw/Shimon_Peres/Shimon_Peres_0001.jpg',
 '../lfw/Rudolph_Giuliani/Rudolph_Giuliani_0024.jpg')

In [9]:
labels[0:5]

(3038, 1517, 3115, 834, 1016)

In [10]:
name_dictionary['John Ashcroft']

482

# Experiment

In [11]:
FACENET_MODEL_PATH = 'model_data/facenet/20180402-114759'
VGGFACE_MODEL_PATH = 'model_data/vgg_face_weights.h5'

In [12]:
mtcnn_detector = MTCNN()

Instructions for updating:
keep_dims is deprecated, use keepdims instead
Instructions for updating:
keep_dims is deprecated, use keepdims instead


In [13]:
def predict(image_representation_list, true_label, test_representation, min_distance):
    minimum_label = None
    minimum_distance = min_distance
    
    for idx, image_representation in enumerate(image_representation_list):
        distance = euclidean(image_representation, test_representation)
        if distance < minimum_distance:
            minimum_distance = distance
            minimum_label = true_label[idx]
    
    return minimum_label

In [14]:
def compute_score(predictions, labels):
    count_same = 0
    for idx, prediction in enumerate(predictions):
        if labels[idx] == prediction:
            count_same += 1
    return count_same / len(predictions)

In [15]:
def normalize_keypoints(point, x, y):
    return (point[0] - x, point[1] - y)

## Facenet

Based on experiment Facenet have distance below 0.9 for euclidean if two image are the same person

In [16]:
feature_extractor = FaceFeatureExtractor(FACENET_MODEL_PATH, extractor_name='facenet')

Model directory: model_data/facenet/20180402-114759
Metagraph file: model-20180402-114759.meta
Checkpoint file: model-20180402-114759.ckpt-275
Instructions for updating:
To construct input pipelines, use the `tf.data` module.
INFO:tensorflow:Restoring parameters from model_data/facenet/20180402-114759/model-20180402-114759.ckpt-275


### Testing Starts Here

In [17]:
THRESHOLD = 0.95

In [33]:
image_representation_database = []
image_representation_labels = []
prediction_result = []

In [34]:
%%time
for idx, image_path in enumerate(image_path_list):
    if idx % 1000 == 0 and idx > 0:
        print("Checkpoint", idx)
        print(compute_score(prediction_result, labels))
    img = cv.cvtColor(cv.imread(image_path), cv.COLOR_BGR2RGB)
    detection_result = mtcnn_detector.detect_faces(img)
    cropped_image = None
    for face in detection_result:
        face_bbox = face['box']
        x, y, w, h = face_bbox
        if x < 0:
            x = 0
        if y < 0:
            y = 0
        cropped_image = img[y:y+h, x:x+w]
        
        keypoints = face['keypoints']
        # Normalize position
        le_pos = normalize_keypoints(keypoints['left_eye'], x, y) 
        re_pos =  normalize_keypoints(keypoints['right_eye'], x, y)
        nose_pos =  normalize_keypoints(keypoints['nose'], x, y)
        ml_pos =  normalize_keypoints(keypoints['mouth_left'], x, y)
        mr_pos =  normalize_keypoints(keypoints['mouth_right'], x, y)
        
        dX = re_pos[0] - le_pos[0]
        dY = re_pos[1] - le_pos[1]
        angle = np.degrees(np.arctan2(dY, dX))
        
        # Affine transformation
        scale = 1
        eyes_center = ((le_pos[0] + re_pos[0]) // 2, (le_pos[1] + re_pos[1]) // 2)
        M = cv.getRotationMatrix2D(eyes_center, angle, scale)
        cropped_image = cv.warpAffine(cropped_image, M, (w, h), cv.INTER_CUBIC)
        break
    if not cropped_image is None:
        feature_test = feature_extractor.extract_image(cropped_image)
        prediction = predict(image_representation_database, image_representation_labels, feature_test, THRESHOLD)
        if prediction == None:
            label = labels[idx]
            if label not in image_representation_labels:
                image_representation_labels.append(label)
                image_representation_database.append(feature_test)
                prediction_result.append(label)
            else: # false prediction
                prediction_result.append(-1) 
        else:
            prediction_result.append(prediction)
    else: # failed to detect faces
        prediction_result.append(-1)

Checkpoint 1000
0.348
Checkpoint 2000
0.3245
Checkpoint 3000
0.27466666666666667
Checkpoint 4000
0.248
Checkpoint 5000
0.2122
Checkpoint 6000
0.199
Checkpoint 7000
0.187
Checkpoint 8000
0.17925
Checkpoint 9000
0.17177777777777778
Checkpoint 10000
0.1657
Checkpoint 11000
0.15972727272727272
Checkpoint 12000
0.15508333333333332
Checkpoint 13000
0.14869230769230768
CPU times: user 2h 16min 30s, sys: 1h 58min 56s, total: 4h 15min 26s
Wall time: 46min 12s


In [35]:
image_path_list[37]

'../lfw/Abdullah_Gul/Abdullah_Gul_0007.jpg'

In [36]:
compute_score(prediction_result, labels)

0.14773671880903802

In [37]:
len(prediction_result)

13233

In [38]:
len(labels)

13233

In [24]:
# feature_extractor.extractor.close_session()

In [25]:
image_path_list = sorted(image_path_list)

In [26]:
image_path_list[31:50]

['../lfw/Abdullah_Gul/Abdullah_Gul_0001.jpg',
 '../lfw/Abdullah_Gul/Abdullah_Gul_0002.jpg',
 '../lfw/Abdullah_Gul/Abdullah_Gul_0003.jpg',
 '../lfw/Abdullah_Gul/Abdullah_Gul_0004.jpg',
 '../lfw/Abdullah_Gul/Abdullah_Gul_0005.jpg',
 '../lfw/Abdullah_Gul/Abdullah_Gul_0006.jpg',
 '../lfw/Abdullah_Gul/Abdullah_Gul_0007.jpg',
 '../lfw/Abdullah_Gul/Abdullah_Gul_0008.jpg',
 '../lfw/Abdullah_Gul/Abdullah_Gul_0009.jpg',
 '../lfw/Abdullah_Gul/Abdullah_Gul_0010.jpg',
 '../lfw/Abdullah_Gul/Abdullah_Gul_0011.jpg',
 '../lfw/Abdullah_Gul/Abdullah_Gul_0012.jpg',
 '../lfw/Abdullah_Gul/Abdullah_Gul_0013.jpg',
 '../lfw/Abdullah_Gul/Abdullah_Gul_0014.jpg',
 '../lfw/Abdullah_Gul/Abdullah_Gul_0015.jpg',
 '../lfw/Abdullah_Gul/Abdullah_Gul_0016.jpg',
 '../lfw/Abdullah_Gul/Abdullah_Gul_0017.jpg',
 '../lfw/Abdullah_Gul/Abdullah_Gul_0018.jpg',
 '../lfw/Abdullah_Gul/Abdullah_Gul_0019.jpg']

In [27]:
%%time
for idx, image_path in enumerate(image_path_list[31:32]):
    if idx % 1000 == 0:
        print("Checkpoint", idx)
    img = cv.cvtColor(cv.imread(image_path), cv.COLOR_BGR2RGB)
    detection_result = mtcnn_detector.detect_faces(img)
    cropped_image = None
    for face in detection_result:
        face_bbox = face['box']
        x, y, w, h = face_bbox
        if x < 0:
            x = 0
        if y < 0:
            y = 0
        cropped_image = img[y:y+h, x:x+w]
        
        keypoints = face['keypoints']
        # Normalize position
        le_pos = normalize_keypoints(keypoints['left_eye'], x, y) 
        re_pos =  normalize_keypoints(keypoints['right_eye'], x, y)
        nose_pos =  normalize_keypoints(keypoints['nose'], x, y)
        ml_pos =  normalize_keypoints(keypoints['mouth_left'], x, y)
        mr_pos =  normalize_keypoints(keypoints['mouth_right'], x, y)
        
        dX = re_pos[0] - le_pos[0]
        dY = re_pos[1] - le_pos[1]
        angle = np.degrees(np.arctan2(dY, dX))
        
        # Affine transformation
        scale = 1
        eyes_center = ((le_pos[0] + re_pos[0]) // 2, (le_pos[1] + re_pos[1]) // 2)
        M = cv.getRotationMatrix2D(eyes_center, angle, scale)
        cropped_image = cv.warpAffine(cropped_image, M, (w, h), cv.INTER_CUBIC)
        break
    
    if not cropped_image is None:
        feature_test = feature_extractor.extract_image(cropped_image)
        prediction = predict(image_representation_database, image_representation_labels, feature_test, THRESHOLD)
        if prediction == None:
            label = labels[idx]
            if label not in image_representation_labels:
                image_representation_labels.append(label)
                image_representation_database.append(feature_test)
                prediction_result.append(label)
            else: # false prediction
                prediction_result.append(-1) 
        else:
            prediction_result.append(prediction)
    else: # failed to detect faces
        prediction_result.append(-1)

Checkpoint 0
CPU times: user 527 ms, sys: 593 ms, total: 1.12 s
Wall time: 178 ms


In [28]:
base_feature = feature_test.copy()
base_feature

array([-3.78268175e-02,  3.25670764e-02,  4.05604392e-02,  3.39724659e-03,
        5.37126437e-02,  3.68385874e-02, -3.60369831e-02, -1.73444413e-02,
        8.96531567e-02, -4.92382273e-02,  6.34373799e-02, -4.30092923e-02,
        1.38749322e-02, -2.25634258e-02,  5.25098713e-03, -5.53738847e-02,
       -2.98285335e-02,  3.73244248e-02,  2.42847018e-02, -8.39974079e-03,
       -4.18943465e-02,  2.79076807e-02,  1.68454982e-02, -9.15986672e-02,
       -6.12437464e-02,  4.28005122e-03,  3.90765490e-03, -7.52496067e-03,
       -4.01982926e-02,  2.21072044e-02,  1.55353844e-02,  8.68968144e-02,
        5.28657362e-02,  2.58001499e-02,  6.26789108e-02, -5.48725091e-02,
        2.25371365e-02, -6.22436218e-02,  1.20208515e-02,  2.69103255e-02,
        6.34750202e-02, -7.82843903e-02, -2.62630619e-02,  4.75831330e-02,
        2.00108066e-02, -3.07505000e-02, -5.62555566e-02,  1.24266259e-02,
       -4.64748703e-02, -7.86072314e-02, -3.78704518e-02, -4.81357537e-02,
       -3.58574651e-02,  

In [29]:
%%time
for idx, image_path in enumerate(image_path_list[31:50]):
    if idx % 1000 == 0:
        print("Checkpoint", idx)
    img = cv.cvtColor(cv.imread(image_path), cv.COLOR_BGR2RGB)
    detection_result = mtcnn_detector.detect_faces(img)
    cropped_image = None
    for face in detection_result:
        face_bbox = face['box']
        x, y, w, h = face_bbox
        if x < 0:
            x = 0
        if y < 0:
            y = 0
        cropped_image = img[y:y+h, x:x+w]
        
        keypoints = face['keypoints']
        # Normalize position
        le_pos = normalize_keypoints(keypoints['left_eye'], x, y) 
        re_pos =  normalize_keypoints(keypoints['right_eye'], x, y)
        nose_pos =  normalize_keypoints(keypoints['nose'], x, y)
        ml_pos =  normalize_keypoints(keypoints['mouth_left'], x, y)
        mr_pos =  normalize_keypoints(keypoints['mouth_right'], x, y)
        
        dX = re_pos[0] - le_pos[0]
        dY = re_pos[1] - le_pos[1]
        angle = np.degrees(np.arctan2(dY, dX))
        
        # Affine transformation
        scale = 1
        eyes_center = ((le_pos[0] + re_pos[0]) // 2, (le_pos[1] + re_pos[1]) // 2)
        M = cv.getRotationMatrix2D(eyes_center, angle, scale)
        cropped_image = cv.warpAffine(cropped_image, M, (w, h), cv.INTER_CUBIC)
        break
    
    if not cropped_image is None:
        feature_test = feature_extractor.extract_image(cropped_image)
        distance = euclidean(feature_test, base_feature)
        print(distance)

Checkpoint 0
0.0
0.9450331330299377
0.7808054089546204
0.8431230187416077
0.719917356967926
0.6710638403892517
0.5184654593467712
0.855949342250824
0.992148220539093
0.7338433265686035
0.7143685817718506
0.5583601593971252
0.5010948777198792
0.691178560256958
0.7147900462150574
0.737917959690094
0.5430542230606079
0.7266527414321899
0.6774882674217224
CPU times: user 12.1 s, sys: 10.2 s, total: 22.2 s
Wall time: 3.34 s


In [30]:
%%time
scores = []
for idx, image_path in enumerate(image_path_list[0:30]):
    if idx % 1000 == 0:
        print("Checkpoint", idx)
    img = cv.cvtColor(cv.imread(image_path), cv.COLOR_BGR2RGB)
    detection_result = mtcnn_detector.detect_faces(img)
    cropped_image = None
    for face in detection_result:
        face_bbox = face['box']
        x, y, w, h = face_bbox
        if x < 0:
            x = 0
        if y < 0:
            y = 0
        cropped_image = img[y:y+h, x:x+w]
        
        keypoints = face['keypoints']
        # Normalize position
        le_pos = normalize_keypoints(keypoints['left_eye'], x, y) 
        re_pos =  normalize_keypoints(keypoints['right_eye'], x, y)
        nose_pos =  normalize_keypoints(keypoints['nose'], x, y)
        ml_pos =  normalize_keypoints(keypoints['mouth_left'], x, y)
        mr_pos =  normalize_keypoints(keypoints['mouth_right'], x, y)
        
        dX = re_pos[0] - le_pos[0]
        dY = re_pos[1] - le_pos[1]
        angle = np.degrees(np.arctan2(dY, dX))
        
        # Affine transformation
        scale = 1
        eyes_center = ((le_pos[0] + re_pos[0]) // 2, (le_pos[1] + re_pos[1]) // 2)
        M = cv.getRotationMatrix2D(eyes_center, angle, scale)
        cropped_image = cv.warpAffine(cropped_image, M, (w, h), cv.INTER_CUBIC)
        break
    
    if not cropped_image is None:
        feature_test = feature_extractor.extract_image(cropped_image)
        distance = euclidean(feature_test, base_feature)
        print(distance)
        scores.append(distance)
print("AVG:", np.mean(scores))

Checkpoint 0
1.4553213119506836
1.58197021484375
1.684726595878601
1.4819999933242798
1.422335147857666
1.4219071865081787
1.3882689476013184
1.4149330854415894
1.4556185007095337
1.3702315092086792
1.5404937267303467
1.5120075941085815
1.0301886796951294
1.5865408182144165
1.456681251525879
1.2860819101333618
1.4552093744277954
1.43538236618042
1.2492679357528687
1.4601813554763794
1.5615521669387817
1.3907543420791626
1.5302071571350098
1.0118813514709473
1.45500648021698
1.2846486568450928
1.2236195802688599
1.2333000898361206
1.2461092472076416
1.3176668882369995
AVG: 1.3981364488601684
CPU times: user 21.4 s, sys: 22.4 s, total: 43.8 s
Wall time: 7.78 s
