In [1]:
import numpy as np
import cv2
from pathlib import Path


DATA_DIR = Path('data/')
NEGATIVE_DATA_DIR = Path('negative_data/')
GRAY_ROI_SIZE = (350, 350)


tmp_X = []
tmp_y = []

face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
file_images = (file for file in (Path.cwd() / DATA_DIR).glob('**/*') if file.is_file())


for index, file in enumerate(file_images):

    img = cv2.imread(str(file))
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    faces = face_cascade.detectMultiScale(gray, 1.3, 5)
    for (x,y,w,h) in faces:
        img = cv2.rectangle(img,(x,y),(x+w,y+h),(255,0,0),2)
        roi_gray = gray[y:y+h, x:x+w]
        roi_color = img[y:y+h, x:x+w]
        resized_roi_gray = cv2.resize(roi_gray, GRAY_ROI_SIZE)

        # cv2.imwrite(f'detected_faces/[{index}]{w}{h}_faces.jpg', resized_roi_gray)

        reshaped = resized_roi_gray.reshape(GRAY_ROI_SIZE[0] * GRAY_ROI_SIZE[1])
        tmp_X.append(reshaped)
        tmp_y.append(file.parent.name)


file_negative_images = (file for file in (Path.cwd() / NEGATIVE_DATA_DIR).glob('*') if file.is_file())

for index, file in enumerate(file_negative_images):
    img = cv2.imread(str(file))
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    resized_roi_gray = cv2.resize(gray, GRAY_ROI_SIZE)
    reshaped = resized_roi_gray.reshape(GRAY_ROI_SIZE[0] * GRAY_ROI_SIZE[1])
    tmp_X.append(reshaped)
    tmp_y.append(file.parent.name)

In [2]:
from sklearn.model_selection import train_test_split


n_samples = tmp_X[0].shape
X = np.array(tmp_X)

target_names = set(tmp_y)
n_classes = len(target_names)
uid_name = {uid: name for uid, name in enumerate(target_names)}
name_uid = {name: uid for uid, name in uid_name.items()}
y = np.array(list(map(name_uid.get, tmp_y)))

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=42)

In [3]:
from sklearn.decomposition import PCA
from sklearn.metrics import classification_report
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC
from time import time


# #############################################################################
# Compute a PCA (eigenfaces) on the face dataset (treated as unlabeled
# dataset): unsupervised feature extraction / dimensionality reduction

n_components = 300

print("Extracting the top %d eigenfaces from %d faces" % (n_components, X_train.shape[0]))
t0 = time()
pca = PCA(n_components=n_components, svd_solver='randomized', whiten=True).fit(X_train)
print("done in %0.3fs" % (time() - t0))

eigenfaces = pca.components_.reshape((n_components, *GRAY_ROI_SIZE))

print("Projecting the input data on the eigenfaces orthonormal basis")
t0 = time()
X_train_pca = pca.transform(X_train)
X_test_pca = pca.transform(X_test)
print("done in %0.3fs" % (time() - t0))

# #############################################################################
# Train a SVM classification model

print("Fitting the classifier to the training set")
t0 = time()
param_grid = {'C': [1e3, 5e3, 1e4, 5e4, 1e5], 'gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1], }
clf = GridSearchCV(SVC(kernel='rbf', class_weight='balanced'), param_grid)
clf = clf.fit(X_train_pca, y_train)
print("done in %0.3fs" % (time() - t0))
print("Best estimator found by grid search:")
print(clf.best_estimator_)

# #############################################################################
# Quantitative evaluation of the model quality on the test set

print("Predicting people's names on the test set")
t0 = time()
y_pred = clf.predict(X_test_pca)
print("done in %0.3fs" % (time() - t0))

print(classification_report(y_test, y_pred, target_names=target_names))


Extracting the top 300 eigenfaces from 458 faces
done in 47.417s
Projecting the input data on the eigenfaces orthonormal basis
done in 23.071s
Fitting the classifier to the training set
done in 51.804s
Best estimator found by grid search:
SVC(C=1000.0, class_weight='balanced', gamma=0.001)
Predicting people's names on the test set
done in 0.198s
                 precision    recall  f1-score   support

  negative_data       0.94      0.89      0.92       132
djenifer_lorens       0.87      0.72      0.79        46
    rand_person       0.47      0.84      0.60        19

       accuracy                           0.85       197
      macro avg       0.76      0.82      0.77       197
   weighted avg       0.88      0.85      0.86       197

