In [1]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV
from sklearn import svm
import numpy as np
import os
import json
import joblib

# Load feature vector

In [2]:
feature_path = "/Users/trananhvu/Documents/CV/data/feature"
# HOG + Openface
hog_openface_feature_path = os.path.join(feature_path, "hog_openface")
# MTCNN + Facenet
mtcnn_facenet_feature_path = os.path.join(feature_path, "mtcnn_facenet")

In [3]:
def get_feature(feature_path, feature_extraction_type):
    list_feature = []
    list_label = []
    if feature_extraction_type=="openface":
        size = 128
    elif feature_extraction_type=="facenet":
        size = 512
    for i in os.listdir(feature_path):
        if i==".DS_Store":
            continue
        for j in os.listdir(os.path.join(feature_path, i)):
            if j==".DS_Store":
                continue
            list_label.append(i)
            feature=np.load(os.path.join(os.path.join(feature_path, i), j))
            list_feature.append(feature.reshape(size).tolist())
    return list_label, list_feature

In [4]:
list_label_hog, list_hog_openface_feature = get_feature(hog_openface_feature_path, feature_extraction_type="openface")
list_label_mtcnn, list_mtcnn_facenet_feature = get_feature(mtcnn_facenet_feature_path, feature_extraction_type="facenet")

# Prepare for training

In [5]:
# Face label to index
count=0
face_2_idx = {}
face_hog_idx_list = []
face_mtcnn_idx_list = []
for i in list_label_hog:
    if i not in face_2_idx:
        face_2_idx[i]=count
        count+=1
    face_hog_idx_list.append(face_2_idx[i])
for i in list_label_mtcnn:
    face_mtcnn_idx_list.append(face_2_idx[i])

In [6]:
face_2_idx

{'Robert Downey Jr': 0,
 'Chris Evans': 1,
 'Mark Ruffalo': 2,
 'Chris Hemsworth': 3,
 'Scarlett Johansson': 4,
 'Unknown': 5,
 'Tom Holland': 6}

In [7]:
model_path = "/Users/trananhvu/Documents/CV/CV_internship/face_recognition/step_by_step_face_recognition/model"
svm_model_hog_openface_path = os.path.join(model_path, "hog_openface_svm_model.sav")
svm_model_mtcnn_facenet_path = os.path.join(model_path, "mtcnn_facenet_svm_model.sav")
knn_model_hog_openface_path = os.path.join(model_path, "hog_openface_knn_model.sav")
knn_model_mtcnn_facenet_path = os.path.join(model_path, "mtcnn_facenet_knn_model.sav")

In [8]:
with open(os.path.join(model_path, "face2idx.json"), "w") as outfile:
    json.dump(face_2_idx, outfile)

# Face recognition

## SVM

### HOG + Openface 

In [9]:
# Train test split
x_train, x_test, y_train, y_test = train_test_split(list_hog_openface_feature, face_hog_idx_list, test_size=0.33,
                                                    random_state=4, stratify=face_hog_idx_list)
# Training SVM model
print("Fitting the classifier to the training set")
param_grid = {
         'C': [1e3, 5e3, 1e4, 5e4, 1e5],
          'gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1],
          }
clf = GridSearchCV(svm.SVC(kernel='rbf', class_weight='balanced'), param_grid)
clf = clf.fit(x_train, y_train)
print("Best estimator found by grid search:")
print(clf.best_estimator_)
# Save model
joblib.dump(clf, svm_model_hog_openface_path)

Fitting the classifier to the training set
Best estimator found by grid search:
SVC(C=10000.0, class_weight='balanced', gamma=0.0001)


['/Users/trananhvu/Documents/CV/CV_internship/face_recognition/step_by_step_face_recognition/model/hog_openface_svm_model.sav']

In [10]:
print("HOG + Openface + SVM accuracy: ")
y_pred = clf.predict(x_test)
accuracy_score(y_test, y_pred)

HOG + Openface + SVM accuracy: 


0.892271662763466

### MTCNN + Facenet 

In [11]:
# Train test split
x_train, x_test, y_train, y_test = train_test_split(list_mtcnn_facenet_feature, face_mtcnn_idx_list, test_size=0.33,
                                                    random_state=4, stratify=face_mtcnn_idx_list)
# Training SVM model
print("Fitting the classifier to the training set")
param_grid = {
         'C': [1e3, 5e3, 1e4, 5e4, 1e5],
          'gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1],
          }
clf = GridSearchCV(svm.SVC(kernel='rbf', class_weight='balanced'), param_grid)
clf = clf.fit(x_train, y_train)
print("Best estimator found by grid search:")
print(clf.best_estimator_)
# Save model
joblib.dump(clf, svm_model_mtcnn_facenet_path)

Fitting the classifier to the training set
Best estimator found by grid search:
SVC(C=1000.0, class_weight='balanced', gamma=0.1)


['/Users/trananhvu/Documents/CV/CV_internship/face_recognition/step_by_step_face_recognition/model/mtcnn_facenet_svm_model.sav']

In [12]:
print("MTCNN + Facenet + SVM accuracy: ")
y_pred_svm = clf.predict(x_test)
accuracy_score(y_test, y_pred_svm)

MTCNN + Facenet + SVM accuracy: 


0.9902439024390244

## KNN

### HOG + Openface 

In [13]:
# Train test split
x_train, x_test, y_train, y_test = train_test_split(list_hog_openface_feature, face_hog_idx_list, test_size=0.33,
                                                    random_state=4, stratify=face_hog_idx_list)
# Training KNN model
param_grid = {
        'n_neighbors': [3, 5, 7]
        }
clf = GridSearchCV(KNeighborsClassifier(), param_grid)
clf.fit(x_train, y_train)
print("Best estimator found by grid search:")
print(clf.best_estimator_)
# Save model
joblib.dump(clf, knn_model_hog_openface_path)

Best estimator found by grid search:
KNeighborsClassifier(n_neighbors=3)


['/Users/trananhvu/Documents/CV/CV_internship/face_recognition/step_by_step_face_recognition/model/hog_openface_knn_model.sav']

In [14]:
print("HOG + Openface + KNN accuracy: ")
y_pred = clf.predict(x_test)
accuracy_score(y_test, y_pred)

HOG + Openface + KNN accuracy: 


0.8946135831381733

### MTCNN + Facenet

In [15]:
# Train test split
x_train, x_test, y_train, y_test = train_test_split(list_mtcnn_facenet_feature, face_mtcnn_idx_list, test_size=0.33,
                                                    random_state=4, stratify=face_mtcnn_idx_list)
# Training KNN model
param_grid = {
        'n_neighbors': [3, 5, 7]
        }
clf = GridSearchCV(KNeighborsClassifier(), param_grid)
clf.fit(x_train, y_train)
print("Best estimator found by grid search:")
print(clf.best_estimator_)
# Save model
joblib.dump(clf, knn_model_mtcnn_facenet_path)

Best estimator found by grid search:
KNeighborsClassifier(n_neighbors=3)


['/Users/trananhvu/Documents/CV/CV_internship/face_recognition/step_by_step_face_recognition/model/mtcnn_facenet_knn_model.sav']

In [16]:
print("MTCNN + Facenet + KNN accuracy: ")
y_pred = clf.predict(x_test)
accuracy_score(y_test, y_pred)

MTCNN + Facenet + KNN accuracy: 


0.9780487804878049