In [1]:
pip show --version deepface

Name: deepfaceNote: you may need to restart the kernel to use updated packages.

Version: 0.0.75
Summary: A Lightweight Face Recognition and Facial Attribute Analysis Framework (Age, Gender, Emotion, Race) for Python
Home-page: https://github.com/serengil/deepface
Author: Sefik Ilkin Serengil
Author-email: serengil@gmail.com
License: UNKNOWN
Location: c:\users\admin\anaconda3\lib\site-packages
Requires: fire, Flask, gdown, keras, mtcnn, numpy, opencv-python, pandas, Pillow, retina-face, tensorflow, tqdm
Required-by: 


In [2]:
import pandas as pd
import itertools
from sklearn.metrics import confusion_matrix
from tqdm import tqdm
tqdm.pandas()


In [3]:
from deepface import DeepFace

# Summary

Face recognition models are regular convolutional neural networks models. They represent face photos as vectors. We find the distance between these two vectors to compare two faces. Finally, we classify two faces as same person whose distance is less than a threshold value.

The question is that how to determine the threshold. In this notebook, we will find the best split point for a threshold.

# Data set

In [4]:
# Ref: https://github.com/serengil/deepface/tree/master/tests/dataset
idendities = {
    "Angelina": ["img1.jpg", "img2.jpg", "img4.jpg", "img5.jpg", "img6.jpg", "img7.jpg", "img10.jpg", "img11.jpg"],
    "Scarlett": ["img8.jpg", "img9.jpg"],
    "Jennifer": ["img3.jpg", "img12.jpg"],
    "Mark": ["img13.jpg", "img14.jpg", "img15.jpg"],
    "Jack": ["img16.jpg", "img17.jpg"],
    "Elon": ["img18.jpg", "img19.jpg"],
    "Jeff": ["img20.jpg", "img21.jpg"],
    "Marissa": ["img22.jpg", "img23.jpg"],
    "Sundar": ["img24.jpg", "img25.jpg"]
}

# Positive samples
Find different photos of same people

In [5]:
positives = []

for key, values in idendities.items():
    
    #print(key)
    for i in range(0, len(values)-1):
        for j in range(i+1, len(values)):
            #print(values[i], " and ", values[j])
            positive = []
            positive.append(values[i])
            positive.append(values[j])
            positives.append(positive)

In [6]:
positives = pd.DataFrame(positives, columns = ["file_x", "file_y"])
positives["decision"] = "Yes"

# Negative samples
Compare photos of different people

In [7]:
samples_list = list(idendities.values())
print(idendities)

{'Angelina': ['img1.jpg', 'img2.jpg', 'img4.jpg', 'img5.jpg', 'img6.jpg', 'img7.jpg', 'img10.jpg', 'img11.jpg'], 'Scarlett': ['img8.jpg', 'img9.jpg'], 'Jennifer': ['img3.jpg', 'img12.jpg'], 'Mark': ['img13.jpg', 'img14.jpg', 'img15.jpg'], 'Jack': ['img16.jpg', 'img17.jpg'], 'Elon': ['img18.jpg', 'img19.jpg'], 'Jeff': ['img20.jpg', 'img21.jpg'], 'Marissa': ['img22.jpg', 'img23.jpg'], 'Sundar': ['img24.jpg', 'img25.jpg']}


In [8]:
negatives = []

for i in range(0, len(idendities) - 1):
    for j in range(i+1, len(idendities)):
        #print(samples_list[i], " vs ",samples_list[j]) 
        cross_product = itertools.product(samples_list[i], samples_list[j])
        cross_product = list(cross_product)
        #print(cross_product)
        
        for cross_sample in cross_product:
            #print(cross_sample[0], " vs ", cross_sample[1])
            negative = []
            negative.append(cross_sample[0])
            negative.append(cross_sample[1])
            negatives.append(negative)
        

In [9]:
negatives = pd.DataFrame(negatives, columns = ["file_x", "file_y"])
negatives["decision"] = "No"

# Merge Positives and Negative Samples

In [10]:
df = pd.concat([positives, negatives]).reset_index(drop = True)

In [11]:
df.shape

(300, 3)

In [12]:
df.decision.value_counts()

No     262
Yes     38
Name: decision, dtype: int64

In [13]:
df.file_x = "deepface/tests/dataset/"+df.file_x
df.file_y = "deepface/tests/dataset/"+df.file_y

# DeepFace

In [14]:
from deepface import DeepFace

In [15]:
instances = df[["file_x", "file_y"]].values.tolist()
print(instances)

[['deepface/tests/dataset/img1.jpg', 'deepface/tests/dataset/img2.jpg'], ['deepface/tests/dataset/img1.jpg', 'deepface/tests/dataset/img4.jpg'], ['deepface/tests/dataset/img1.jpg', 'deepface/tests/dataset/img5.jpg'], ['deepface/tests/dataset/img1.jpg', 'deepface/tests/dataset/img6.jpg'], ['deepface/tests/dataset/img1.jpg', 'deepface/tests/dataset/img7.jpg'], ['deepface/tests/dataset/img1.jpg', 'deepface/tests/dataset/img10.jpg'], ['deepface/tests/dataset/img1.jpg', 'deepface/tests/dataset/img11.jpg'], ['deepface/tests/dataset/img2.jpg', 'deepface/tests/dataset/img4.jpg'], ['deepface/tests/dataset/img2.jpg', 'deepface/tests/dataset/img5.jpg'], ['deepface/tests/dataset/img2.jpg', 'deepface/tests/dataset/img6.jpg'], ['deepface/tests/dataset/img2.jpg', 'deepface/tests/dataset/img7.jpg'], ['deepface/tests/dataset/img2.jpg', 'deepface/tests/dataset/img10.jpg'], ['deepface/tests/dataset/img2.jpg', 'deepface/tests/dataset/img11.jpg'], ['deepface/tests/dataset/img4.jpg', 'deepface/tests/dataset

In [16]:
model_name = "VGG-Face"
distance_metric = "cosine"

In [17]:
ls


 Volume in drive C has no label.
 Volume Serial Number is 0EAE-C834

 Directory of C:\Users\admin\projects\dp-new\dp-new

17-02-2024  22:30    <DIR>          .
17-02-2024  22:30    <DIR>          ..
14-03-2023  14:50    <DIR>          .github
21-11-2022  10:28               424 .gitignore
15-03-2023  07:18    <DIR>          .ipynb_checkpoints
14-03-2023  14:50    <DIR>          api
15-03-2023  07:18    <DIR>          deepface
21-11-2022  10:28               215 Dockerfile
17-02-2024  22:30            52,805 Fine-Tuning-Threshold.ipynb
14-03-2023  14:50    <DIR>          icon
21-11-2022  10:28             1,076 LICENSE
21-11-2022  10:28            21,277 README.md
21-11-2022  11:39               365 requirements.txt
14-03-2023  14:50    <DIR>          scripts
21-11-2022  10:28             1,089 setup.py
15-03-2023  07:59    <DIR>          tests
               7 File(s)         77,251 bytes
               9 Dir(s)  158,392,287,232 bytes free


In [18]:
resp_obj = DeepFace.verify(img1_path = "deepface/tests/dataset/img1.jpg", img2_path = "deepface/tests/dataset/img2.jpg", model_name = model_name, distance_metric = distance_metric)

ValueError: ('Confirm that ', 'dp-new/tests/dataset/img1.jpg', ' exists')

In [None]:
resp_obj = DeepFace.verify(instances, model_name = model_name, distance_metric = distance_metric)

In [None]:
distances = []
for i in range(0, len(instances)):
    distance = round(resp_obj["pair_%s" % (i+1)]["distance"], 4)
    distances.append(distance)

In [None]:
df["distance"] = distances

# Analyzing Distances

In [None]:
tp_mean = round(df[df.decision == "Yes"].mean().values[0], 4)
tp_std = round(df[df.decision == "Yes"].std().values[0], 4)
fp_mean = round(df[df.decision == "No"].mean().values[0], 4)
fp_std = round(df[df.decision == "No"].std().values[0], 4)

In [None]:
print("Mean of true positives: ", tp_mean)
print("Std of true positives: ", tp_std)
print("Mean of false positives: ", fp_mean)
print("Std of false positives: ", fp_std)

# Distribution

In [None]:
df[df.decision == "Yes"].distance.plot.kde()
df[df.decision == "No"].distance.plot.kde()

# Best Split Point

In [None]:
from chefboost import Chefboost as chef

In [None]:
config = {'algorithm': 'C4.5'}

In [None]:
tmp_df = df[['distance', 'decision']].rename(columns = {"decision": "Decision"}).copy()
model = chef.fit(tmp_df, config)

# Sigma

In [None]:
sigma = 2
#2 sigma corresponds 95.45% confidence, and 3 sigma corresponds 99.73% confidence

#threshold = round(tp_mean + sigma * tp_std, 4)
threshold = 0.3147 #comes from c4.5 algorithm
print("threshold: ", threshold)

In [None]:
df[df.decision == 'Yes'].distance.max()

In [None]:
df[df.decision == 'No'].distance.min()

# Evaluation

In [None]:
df["prediction"] = "No"

In [None]:
idx = df[df.distance <= threshold].index
df.loc[idx, 'prediction'] = 'Yes'

In [None]:
df.sample(5)

In [None]:
cm = confusion_matrix(df.decision.values, df.prediction.values)

In [None]:
cm

In [None]:
tn, fp, fn, tp = cm.ravel()

In [None]:
tn, fp, fn, tp

In [None]:
recall = tp / (tp + fn)
precision = tp / (tp + fp)
accuracy = (tp + tn)/(tn + fp +  fn + tp)
f1 = 2 * (precision * recall) / (precision + recall)

In [None]:
print("Precision: ", 100*precision,"%")
print("Recall: ", 100*recall,"%")
print("F1 score ",100*f1, "%")
print("Accuracy: ", 100*accuracy,"%")

In [None]:
df.to_csv("threshold_pivot.csv", index = False)

## Test results

### Threshold = 0.3147 (C4.5 best split point)

Precision:  100.0 %

Recall:  89.47368421052632 %

F1 score  94.44444444444444%

Accuracy:  98.66666666666667 %

### Threshold = 0.3751 (2 sigma)

Precision:  90.47619047619048 %

Recall:  100.0 %

F1 score  95.0 %

Accuracy:  98.66666666666667 %