In [143]:
# pip install numpy
import numpy as np
import os
# pip install pandas
import pandas as pd

# pip install git+https://github.com/rcmalli/keras-vggface.git
from keras_vggface.vggface import VGGFace
from keras_vggface.utils import preprocess_input

# pip install Pillow
from PIL import Image

from glob import glob
from collections import defaultdict

import pickle
# pip install scipy
from scipy.spatial.distance import cosine, euclidean
# pip install tqdm
from tqdm import tqdm

In [132]:
model = VGGFace(model='resnet50', include_top=False, input_shape=(224, 224, 3), pooling='avg')

In [133]:
def get_encodings(file):
    image = Image.open(file)
    image = image.convert('RGB')
    face = np.expand_dims(np.asarray(image, dtype=np.float64), axis=0)
    face = preprocess_input(face, version=2)
    yhat = model.predict(face)
    return yhat

In [141]:
train_df = pd.read_csv("datasets/train.csv", usecols=['face', 'label'])
val_df = pd.read_csv("datasets/val.csv", usecols=['face', 'label'])
df = pd.concat([train_df, val_df])
df.drop_duplicates(inplace=True)
df.sort_values(by=['label'], inplace=True)
df.reset_index(inplace=True)
encodings = defaultdict(list)
for i in tqdm(range(df.shape[0])):
    row = df.iloc[i, :]
    try:
        encd = get_encodings(row.face)
    except Exception as e:
        print("[ERROR] Failed to get encodings for: {}".format(row.face))
        print(e)
    else:
        encodings[row.label].append(encd)

encodings = [(label, encods) for label, encods in encodings.items()]

with open("face_encodings.pkl", 'wb') as f:
    pickle.dump(encodings, f)

 76%|██████████████████████████████████████████████████████████▉                   | 1139/1506 [02:28<00:35, 10.35it/s]

[ERROR] Failed to get encodings...


100%|██████████████████████████████████████████████████████████████████████████████| 1506/1506 [03:18<00:00,  7.58it/s]


In [142]:
threshold = 0.5
right = 0
wrong = 0

for i in tqdm(range(len(encodings))):
    for j in range(i, len(encodings)):
        for e1 in encodings[i][1]:
            for e2 in encodings[j][1]:
                if i == j:
                    if cosine(e1, e2) < threshold:
                        right += 1
                    else:
                        wrong += 1

print("[INFO] Accuracy (th:{}): {}".format(right / (right + wrong)))

100%|███████████████████████████████████████████████████████████████████████████████| 300/300 [00:00<00:00, 655.34it/s]

[INFO] Accuracy (th:0.5): 0.9997357643017571



