# Analytic Second Comparison Using LFW Dataset - VGGFace (RESNET50)

Several feature extractor:
1. Face Embedding: Facenet
2. Face Embedding: VGG Face
3. Face Embedding: VGG Face - VGG16
4. Face Embedding: VGG Face - RESNET50
5. LBPH (Local Binary Pattern Histogram)

In [1]:
from scipy.spatial.distance import euclidean
from sklearn.metrics import accuracy_score, f1_score
from keras import backend as K
from feature_extractor.face_feature_extractor import FaceFeatureExtractor
import numpy as np
import cv2 as cv
from mtcnn.mtcnn import MTCNN
import matplotlib.pyplot as plt
import time

Using TensorFlow backend.


# Read Dataset

In [2]:
import os

In [3]:
DIR_PATH = '../lfw/'
image_path_list = []
labels = []
name_dictionary = {}
counter = 0
for root, dirs, files in os.walk(DIR_PATH):
    for filename in files:
        person_name = ' '.join(filename.split('.')[0].split('_')[0:-1]) 
        file_path = os.path.join(root, filename)
        if person_name not in name_dictionary:
            counter += 1
            name_dictionary[person_name] = counter
        image_path_list.append(file_path)
        labels.append(name_dictionary[person_name])

In [4]:
print(len(labels))
print(len(image_path_list))

13233
13233


In [5]:
image_path_list[0:5]

['../lfw/Ryan_Newman/Ryan_Newman_0001.jpg',
 '../lfw/Dimitar_Berbatov/Dimitar_Berbatov_0001.jpg',
 '../lfw/Ed_Rendell/Ed_Rendell_0001.jpg',
 '../lfw/Joe_Crede/Joe_Crede_0001.jpg',
 '../lfw/Norman_Mailer/Norman_Mailer_0001.jpg']

# Experiment

In [6]:
FACENET_MODEL_PATH = 'model_data/facenet/20180402-114759'
VGGFACE_MODEL_PATH = 'model_data/vgg_face_weights.h5'

In [7]:
mtcnn_detector = MTCNN()

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
keep_dims is deprecated, use keepdims instead
Instructions for updating:
keep_dims is deprecated, use keepdims instead
Instructions for updating:
Deprecated in favor of operator or tf.math.divide.


In [8]:
def predict(image_representation_list, true_label, test_representation, min_distance):
    minimum_label = None
    minimum_distance = min_distance
    
    for idx, image_representation in enumerate(image_representation_list):
        distance = euclidean(image_representation, test_representation)
        if distance < minimum_distance:
            minimum_distance = distance
            minimum_label = true_label[idx]
    
    return minimum_label

In [9]:
def compute_score(predictions, labels):
    count_same = 0
    for idx, prediction in enumerate(predictions):
        if labels[idx] == prediction:
            count_same += 1
        elif labels[idx] == -2: # failed to detect face (still measure as valid)
            count_same += 1
    return count_same / len(predictions)

In [10]:
def write_checkpoint(filename, prediction_results):
    f = open(filename, 'w')
    for prediction in prediction_results:
        f.write(str(prediction) + '\n')
    f.close()

## VGGFace RESNET50

Based on experiment VGGFace RESNET50 have distance below 100 for euclidean if two image are the same person

In [11]:
feature_extractor = FaceFeatureExtractor(None, extractor_name='vgg_face_resnet50')

In [12]:
THRESHOLD = 100

### Experiment 1

### Shuffle Dataset Seed=0

In [13]:
import random

In [14]:
temp = list(zip(image_path_list, labels))
random.Random(0).shuffle(temp) # custom seed
image_path_list, labels = zip(*temp)

In [15]:
image_path_list[0:5]

('../lfw/Tommy_Maddox/Tommy_Maddox_0001.jpg',
 '../lfw/David_Millar/David_Millar_0001.jpg',
 '../lfw/Gregg_Popovich/Gregg_Popovich_0004.jpg',
 '../lfw/Shimon_Peres/Shimon_Peres_0001.jpg',
 '../lfw/Rudolph_Giuliani/Rudolph_Giuliani_0024.jpg')

In [16]:
labels[0:5]

(3038, 1517, 3115, 834, 1016)

In [17]:
name_dictionary['Tommy Maddox']

3038

### Experiment Starts

In [18]:
image_representation_database = []
image_representation_labels = []
prediction_result = []
error_dict = {'face_detection': 0, 'false_old_prediction': 0, 
             'should_be_new_person': 0, 'should_be_old_person': 0}

In [19]:
%%time
for idx, image_path in enumerate(image_path_list):
    if idx % 1000 == 0:
        print("Checkpoint", idx)
    img = cv.cvtColor(cv.imread(image_path), cv.COLOR_BGR2RGB)
    detection_result = mtcnn_detector.detect_faces(img)
    cropped_image = None
    for face in detection_result:
        face_bbox = face['box']
        x, y, w, h = face_bbox
        if x < 0:
            x = 0
        if y < 0:
            y = 0
        cropped_image = img[y:y+h, x:x+w]
        break
    
    if not cropped_image is None:
        feature_test = feature_extractor.extract_image(cropped_image)
        prediction = predict(image_representation_database, image_representation_labels, feature_test, THRESHOLD)
        label = labels[idx]
        if prediction == None: # predict as new person
            if label not in image_representation_labels: # is new person
                image_representation_labels.append(label)
                image_representation_database.append(feature_test)
                prediction_result.append(label)
            else: # failed to predict (should be old person)
                image_representation_labels.append(label)
                image_representation_database.append(feature_test)
                prediction_result.append(-1)
                error_dict['should_be_old_person'] += 1
        else: # predict as old person (always add true label to galery)
            image_representation_labels.append(label)
            image_representation_database.append(feature_test)
            prediction_result.append(prediction)
            if prediction != label:
                error_dict['false_old_prediction'] += 1
            if label not in image_representation_labels:
                error_dict['should_be_new_person'] += 1
                
    else: # failed to detect faces
        prediction_result.append(-2)
        error_dict['face_detection'] += 1

Checkpoint 0
Checkpoint 1000
Checkpoint 2000
Checkpoint 3000
Checkpoint 4000
Checkpoint 5000
Checkpoint 6000
Checkpoint 7000
Checkpoint 8000
Checkpoint 9000
Checkpoint 10000
Checkpoint 11000
Checkpoint 12000
Checkpoint 13000
CPU times: user 34min 22s, sys: 1min 35s, total: 35min 58s
Wall time: 39min 1s


In [20]:
print(error_dict)

{'face_detection': 0, 'false_old_prediction': 1466, 'should_be_new_person': 0, 'should_be_old_person': 614}


In [21]:
image_path_list[37]

'../lfw/George_W_Bush/George_W_Bush_0233.jpg'

In [22]:
compute_score(prediction_result, labels)

0.8428171994256782

In [23]:
len(prediction_result)

13233

In [24]:
len(labels)

13233

In [25]:
import json
with open('Reason VGGFace RESNET - 1', 'w') as file:
     file.write(json.dumps(error_dict)) # use `json.loads` to do the reverse

### Experiment 2

### Shuffle Dataset Seed=1

In [26]:
import random

In [27]:
temp = list(zip(image_path_list, labels))
random.Random(1).shuffle(temp) # custom seed
image_path_list, labels = zip(*temp)

In [28]:
image_path_list[0:5]

('../lfw/Eddy_Merckx/Eddy_Merckx_0002.jpg',
 '../lfw/Rocco_Buttiglione/Rocco_Buttiglione_0001.jpg',
 '../lfw/Gerhard_Schroeder/Gerhard_Schroeder_0076.jpg',
 '../lfw/Mark_Hogan/Mark_Hogan_0001.jpg',
 '../lfw/Cameron_Diaz/Cameron_Diaz_0003.jpg')

In [29]:
labels[0:5]

(4804, 334, 25, 5684, 4615)

In [30]:
name_dictionary['Eddy Merckx']

4804

### Experiment Starts

In [31]:
image_representation_database = []
image_representation_labels = []
prediction_result = []
error_dict = {'face_detection': 0, 'false_old_prediction': 0, 
             'should_be_new_person': 0, 'should_be_old_person': 0}

In [32]:
%%time
for idx, image_path in enumerate(image_path_list):
    if idx % 1000 == 0:
        print("Checkpoint", idx)
    img = cv.cvtColor(cv.imread(image_path), cv.COLOR_BGR2RGB)
    detection_result = mtcnn_detector.detect_faces(img)
    cropped_image = None
    for face in detection_result:
        face_bbox = face['box']
        x, y, w, h = face_bbox
        if x < 0:
            x = 0
        if y < 0:
            y = 0
        cropped_image = img[y:y+h, x:x+w]
        break
    
    if not cropped_image is None:
        feature_test = feature_extractor.extract_image(cropped_image)
        prediction = predict(image_representation_database, image_representation_labels, feature_test, THRESHOLD)
        label = labels[idx]
        if prediction == None: # predict as new person
            if label not in image_representation_labels: # is new person
                image_representation_labels.append(label)
                image_representation_database.append(feature_test)
                prediction_result.append(label)
            else: # failed to predict (should be old person)
                image_representation_labels.append(label)
                image_representation_database.append(feature_test)
                prediction_result.append(-1)
                error_dict['should_be_old_person'] += 1
        else: # predict as old person (always add true label to galery)
            image_representation_labels.append(label)
            image_representation_database.append(feature_test)
            prediction_result.append(prediction)
            if prediction != label:
                error_dict['false_old_prediction'] += 1
            if label not in image_representation_labels:
                error_dict['should_be_new_person'] += 1
                
    else: # failed to detect faces
        prediction_result.append(-2)
        error_dict['face_detection'] += 1

Checkpoint 0
Checkpoint 1000
Checkpoint 2000
Checkpoint 3000
Checkpoint 4000
Checkpoint 5000
Checkpoint 6000
Checkpoint 7000
Checkpoint 8000
Checkpoint 9000
Checkpoint 10000
Checkpoint 11000
Checkpoint 12000
Checkpoint 13000
CPU times: user 35min 26s, sys: 1min 33s, total: 37min
Wall time: 35min


In [33]:
print(error_dict)

{'face_detection': 0, 'false_old_prediction': 1525, 'should_be_new_person': 0, 'should_be_old_person': 601}


In [34]:
image_path_list[37]

'../lfw/Tom_Cruise/Tom_Cruise_0007.jpg'

In [35]:
compute_score(prediction_result, labels)

0.8393410413360538

In [36]:
len(prediction_result)

13233

In [37]:
len(labels)

13233

In [38]:
import json
with open('Reason VGGFace RESNET - 2', 'w') as file:
     file.write(json.dumps(error_dict)) # use `json.loads` to do the reverse

### Experiment 3

### Shuffle Dataset Seed=2

In [52]:
import random

In [53]:
temp = list(zip(image_path_list, labels))
random.Random(2).shuffle(temp) # custom seed
image_path_list, labels = zip(*temp)

In [54]:
image_path_list[0:5]

('../lfw/Jack_Straw/Jack_Straw_0022.jpg',
 '../lfw/Erin_Runnion/Erin_Runnion_0002.jpg',
 '../lfw/Alejandro_Toledo/Alejandro_Toledo_0027.jpg',
 '../lfw/Infanta_Cristina/Infanta_Cristina_0001.jpg',
 '../lfw/Charles_Richardson/Charles_Richardson_0001.jpg')

In [55]:
labels[0:5]

(1699, 3616, 1313, 3055, 5341)

In [56]:
name_dictionary['Colin Powell']

2196

### Experiment Starts

In [57]:
image_representation_database = []
image_representation_labels = []
prediction_result = []
error_dict = {'face_detection': 0, 'false_old_prediction': 0, 
             'should_be_new_person': 0, 'should_be_old_person': 0}

In [58]:
%%time
for idx, image_path in enumerate(image_path_list):
    if idx % 1000 == 0:
        print("Checkpoint", idx)
    img = cv.cvtColor(cv.imread(image_path), cv.COLOR_BGR2RGB)
    detection_result = mtcnn_detector.detect_faces(img)
    cropped_image = None
    for face in detection_result:
        face_bbox = face['box']
        x, y, w, h = face_bbox
        if x < 0:
            x = 0
        if y < 0:
            y = 0
        cropped_image = img[y:y+h, x:x+w]
        break
    
    if not cropped_image is None:
        feature_test = feature_extractor.extract_image(cropped_image)
        prediction = predict(image_representation_database, image_representation_labels, feature_test, THRESHOLD)
        label = labels[idx]
        if prediction == None: # predict as new person
            if label not in image_representation_labels: # is new person
                image_representation_labels.append(label)
                image_representation_database.append(feature_test)
                prediction_result.append(label)
            else: # failed to predict (should be old person)
                image_representation_labels.append(label)
                image_representation_database.append(feature_test)
                prediction_result.append(-1)
                error_dict['should_be_old_person'] += 1
        else: # predict as old person (always add true label to galery)
            image_representation_labels.append(label)
            image_representation_database.append(feature_test)
            prediction_result.append(prediction)
            if prediction != label:
                error_dict['false_old_prediction'] += 1
            if label not in image_representation_labels:
                error_dict['should_be_new_person'] += 1
                
    else: # failed to detect faces
        prediction_result.append(-2)
        error_dict['face_detection'] += 1

Checkpoint 0
Checkpoint 1000
Checkpoint 2000
Checkpoint 3000
Checkpoint 4000
Checkpoint 5000
Checkpoint 6000
Checkpoint 7000
Checkpoint 8000
Checkpoint 9000
Checkpoint 10000
Checkpoint 11000
Checkpoint 12000
Checkpoint 13000
CPU times: user 32min 1s, sys: 1min 30s, total: 33min 31s
Wall time: 35min 52s


In [59]:
print(error_dict)

{'face_detection': 0, 'false_old_prediction': 1501, 'should_be_new_person': 0, 'should_be_old_person': 598}


In [60]:
image_path_list[37]

'../lfw/Tom_Harkin/Tom_Harkin_0002.jpg'

In [61]:
compute_score(prediction_result, labels)

0.8413813949973551

In [62]:
len(prediction_result)

13233

In [63]:
len(labels)

13233

In [64]:
import json
with open('Reason VGGFace RESNET - 3', 'w') as file:
     file.write(json.dumps(error_dict)) # use `json.loads` to do the reverse

### Experiment 4

### Shuffle Dataset Seed=3

In [65]:
import random

In [66]:
temp = list(zip(image_path_list, labels))
random.Random(3).shuffle(temp) # custom seed
image_path_list, labels = zip(*temp)

In [67]:
image_path_list[0:5]

('../lfw/Bernard_Law/Bernard_Law_0005.jpg',
 '../lfw/Jennifer_Capriati/Jennifer_Capriati_0019.jpg',
 '../lfw/Kjell_Magne_Bondevik/Kjell_Magne_Bondevik_0001.jpg',
 '../lfw/Tom_Ridge/Tom_Ridge_0019.jpg',
 '../lfw/Mike_Matthews/Mike_Matthews_0001.jpg')

In [68]:
labels[0:5]

(4978, 4158, 3955, 4890, 3662)

In [69]:
name_dictionary['Lance Armstrong']

4771

### Experiment Starts

In [70]:
image_representation_database = []
image_representation_labels = []
prediction_result = []
error_dict = {'face_detection': 0, 'false_old_prediction': 0, 
             'should_be_new_person': 0, 'should_be_old_person': 0}

In [71]:
%%time
for idx, image_path in enumerate(image_path_list):
    if idx % 1000 == 0:
        print("Checkpoint", idx)
    img = cv.cvtColor(cv.imread(image_path), cv.COLOR_BGR2RGB)
    detection_result = mtcnn_detector.detect_faces(img)
    cropped_image = None
    for face in detection_result:
        face_bbox = face['box']
        x, y, w, h = face_bbox
        if x < 0:
            x = 0
        if y < 0:
            y = 0
        cropped_image = img[y:y+h, x:x+w]
        break
    
    if not cropped_image is None:
        feature_test = feature_extractor.extract_image(cropped_image)
        prediction = predict(image_representation_database, image_representation_labels, feature_test, THRESHOLD)
        label = labels[idx]
        if prediction == None: # predict as new person
            if label not in image_representation_labels: # is new person
                image_representation_labels.append(label)
                image_representation_database.append(feature_test)
                prediction_result.append(label)
            else: # failed to predict (should be old person)
                image_representation_labels.append(label)
                image_representation_database.append(feature_test)
                prediction_result.append(-1)
                error_dict['should_be_old_person'] += 1
        else: # predict as old person (always add true label to galery)
            image_representation_labels.append(label)
            image_representation_database.append(feature_test)
            prediction_result.append(prediction)
            if prediction != label:
                error_dict['false_old_prediction'] += 1
            if label not in image_representation_labels:
                error_dict['should_be_new_person'] += 1
                
    else: # failed to detect faces
        prediction_result.append(-2)
        error_dict['face_detection'] += 1

Checkpoint 0
Checkpoint 1000
Checkpoint 2000
Checkpoint 3000
Checkpoint 4000
Checkpoint 5000
Checkpoint 6000
Checkpoint 7000
Checkpoint 8000
Checkpoint 9000
Checkpoint 10000
Checkpoint 11000
Checkpoint 12000
Checkpoint 13000
CPU times: user 34min 49s, sys: 1min 30s, total: 36min 20s
Wall time: 34min 54s


In [72]:
print(error_dict)

{'face_detection': 0, 'false_old_prediction': 1457, 'should_be_new_person': 0, 'should_be_old_person': 621}


In [73]:
image_path_list[37]

'../lfw/Mark_Mulder/Mark_Mulder_0001.jpg'

In [74]:
compute_score(prediction_result, labels)

0.8429683367339228

In [75]:
len(prediction_result)

13233

In [76]:
len(labels)

13233

In [77]:
import json
with open('Reason VGGFace RESNET - 4', 'w') as file:
     file.write(json.dumps(error_dict)) # use `json.loads` to do the reverse

### Experiment 5

### Shuffle Dataset Seed=4

In [78]:
import random

In [79]:
temp = list(zip(image_path_list, labels))
random.Random(4).shuffle(temp) # custom seed
image_path_list, labels = zip(*temp)

In [80]:
image_path_list[0:5]

('../lfw/Kaye_Young/Kaye_Young_0001.jpg',
 '../lfw/Colin_Powell/Colin_Powell_0081.jpg',
 '../lfw/Win_Aung/Win_Aung_0003.jpg',
 '../lfw/Jose_Maria_Aznar/Jose_Maria_Aznar_0002.jpg',
 '../lfw/Dennis_Hastert/Dennis_Hastert_0005.jpg')

In [81]:
labels[0:5]

(3867, 2196, 4896, 4188, 1119)

In [82]:
name_dictionary['Takashi Yamamoto']

2730

### Experiment Starts

In [83]:
image_representation_database = []
image_representation_labels = []
prediction_result = []
error_dict = {'face_detection': 0, 'false_old_prediction': 0, 
             'should_be_new_person': 0, 'should_be_old_person': 0}

In [84]:
%%time
for idx, image_path in enumerate(image_path_list):
    if idx % 1000 == 0:
        print("Checkpoint", idx)
    img = cv.cvtColor(cv.imread(image_path), cv.COLOR_BGR2RGB)
    detection_result = mtcnn_detector.detect_faces(img)
    cropped_image = None
    for face in detection_result:
        face_bbox = face['box']
        x, y, w, h = face_bbox
        if x < 0:
            x = 0
        if y < 0:
            y = 0
        cropped_image = img[y:y+h, x:x+w]
        break
    
    if not cropped_image is None:
        feature_test = feature_extractor.extract_image(cropped_image)
        prediction = predict(image_representation_database, image_representation_labels, feature_test, THRESHOLD)
        label = labels[idx]
        if prediction == None: # predict as new person
            if label not in image_representation_labels: # is new person
                image_representation_labels.append(label)
                image_representation_database.append(feature_test)
                prediction_result.append(label)
            else: # failed to predict (should be old person)
                image_representation_labels.append(label)
                image_representation_database.append(feature_test)
                prediction_result.append(-1)
                error_dict['should_be_old_person'] += 1
        else: # predict as old person (always add true label to galery)
            image_representation_labels.append(label)
            image_representation_database.append(feature_test)
            prediction_result.append(prediction)
            if prediction != label:
                error_dict['false_old_prediction'] += 1
            if label not in image_representation_labels:
                error_dict['should_be_new_person'] += 1
                
    else: # failed to detect faces
        prediction_result.append(-2)
        error_dict['face_detection'] += 1

Checkpoint 0
Checkpoint 1000
Checkpoint 2000
Checkpoint 3000
Checkpoint 4000
Checkpoint 5000
Checkpoint 6000
Checkpoint 7000
Checkpoint 8000
Checkpoint 9000
Checkpoint 10000
Checkpoint 11000
Checkpoint 12000
Checkpoint 13000
CPU times: user 31min 52s, sys: 1min 31s, total: 33min 23s
Wall time: 31min 53s


In [85]:
print(error_dict)

{'face_detection': 0, 'false_old_prediction': 1493, 'should_be_new_person': 0, 'should_be_old_person': 621}


In [86]:
image_path_list[37]

'../lfw/Brendan_Gaughan/Brendan_Gaughan_0001.jpg'

In [87]:
compute_score(prediction_result, labels)

0.8402478651855211

In [88]:
len(prediction_result)

13233

In [89]:
len(labels)

13233

In [90]:
import json
with open('Reason VGGFace RESNET - 5', 'w') as file:
     file.write(json.dumps(error_dict)) # use `json.loads` to do the reverse