## Imports

In [2]:
import os
import cv2

import numpy as np
import face_recognition

from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

import pickle

## Dataset
The `Dataset` folder represents a fraction of the LFW dataset and will contain images used for training the model. The `labels` variable contains all the people (subfolders) in the dataset.

In [3]:
path_dataset = "dataset/"
path_fakes = "dataset-fakes/"

labels = os.listdir(path_dataset)
labels_fakes = os.listdir(path_fakes)

## Preprocessing

In [4]:
def preprocess(img):
    (t, r, b, l) = face_recognition.face_locations(img)[0]
    face_img = img[t:b, l:r]
    face_img = cv2.resize(face_img, (224, 224))
    encode = face_recognition.face_encodings(face_img)[0]

    return encode

X = []
y = []
for i, label in enumerate(labels):
    img_filenames = os.listdir(f"{path_dataset}{label}/")
    for filename in img_filenames:
        filepath = f"{path_dataset}{label}/{filename}"
        img = cv2.imread(filepath)
        
        try:
            encode = preprocess(img)
        except Exception as e:
            continue
        
        X.append(encode)
        y.append(i)

X = np.asarray(X)
y = np.asarray(y)

## Prepare for training
This step splits the data into a set for training and a set for testing and saves the testing part into files that will be used later.

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

np.save("test-data/X_test.npy", X_test)
np.save("test-data/y_test.npy", y_test)

## Training the model

In [6]:
svc_model = svm.SVC()
svc_model.fit(X_train, y_train)

## Preparing fakes

In [7]:
X_fake = []
y_fake = []
for i, label in enumerate(labels_fakes):
    img_filenames = os.listdir(f"{path_fakes}{label}/")
    for filename in img_filenames:
        filepath = f"{path_fakes}{label}/{filename}"
        img = cv2.imread(filepath)
        
        try:
            encode = preprocess(img)
        except Exception as e:
            continue
        
        X_fake.append(encode)
        y_fake.append(i)

X_fake = np.asarray(X_fake)
y_fake = np.asarray(y_fake)

X_train_fake, X_test_fake, y_train_fake, y_test_fake = train_test_split(X_fake, y_fake, test_size=0.2, random_state=42)

## Validating the model

In [12]:
pred = svc_model.predict(X_train)
train_acc = accuracy_score(y_train, pred)
print(f"Training Accuracy: {train_acc:.2%}")

pred = svc_model.predict(X_test)
test_acc = accuracy_score(y_test, pred)
print(f"Test Accuracy: {test_acc:.2%}")

def perf_measure(y_actual, y_hat):
   TP = 0
   FP = 0
   TN = 0
   FN = 0

   for i in range(len(y_hat)): 
      if y_actual[i]==y_hat[i]==1:
         TP += 1
      if y_hat[i]==1 and y_actual[i]!=y_hat[i]:
         FP += 1
      if y_actual[i]==y_hat[i]==0:
         TN += 1
      if y_hat[i]==0 and y_actual[i]!=y_hat[i]:
         FN += 1

   return (TP, FP, TN, FN)

threshold = 0.5

votes = np.array(svc_model.decision_function(X_test))
prob = np.exp(votes) / np.sum(np.exp(votes), axis=1, keepdims=True)
y_predicted = (np.max(prob, axis=1) >= threshold).astype(int)

votes = np.array(svc_model.decision_function(X_test_fake))
prob = np.exp(votes) / np.sum(np.exp(votes), axis=1, keepdims=True)
y_predicted_fake = (np.max(prob, axis=1) >= threshold).astype(int)

TP, FP, TN, FN = perf_measure(y_predicted_fake, y_predicted)

print((TP, FP, TN, FN))

print(f"Final FAR: {(FP / (FP + TN)):.2%}")
print(f"Final FRR: {(FN / (FN + TP)):.2%}")

Training Accuracy: 33.97%
Test Accuracy: 32.38%
(282, 18, 0, 15)
Final FAR: 100.00%
Final FRR: 5.05%


## Saving the model

In [None]:
model_name = f"svm-{str(int(test_acc*100))}.model"
pickle.dump(svc_model, open("models/" + model_name, "wb"))