In [44]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV
from sklearn import svm
import numpy as np
import os
import json
import random
import joblib
import pandas as pd
import seaborn as sn
import matplotlib.pyplot as plt
import create_directory

# Tạo Sample Data

In [45]:
# Tạo label2idx
label2idx = {'Chris Evans':0, 'Chris Hemsworth':1, 'Mark Ruffalo':2, 'Robert Downey Jr':3, 
             'Scarlett Johansson':4, 'Tom Holland':5, 'Unknown':6}
with open(os.path.join(create_directory.recognition_model_dir, "label2idx.json"), "w") as outfile:
    json.dump(label2idx, outfile)

In [46]:
def get_feature(feature_path, label2idx, feature_extraction_type):
    list_feature = []
    list_label_idx = []
    if feature_extraction_type=="openface":
        path = os.path.join(feature_path, 'hog_openface')
        size = 128
    elif feature_extraction_type=="facenet":
        path = os.path.join(feature_path, 'mtcnn_facenet')
        size = 512
    for i in os.listdir(path):
        if i==".DS_Store":
            continue
        for j in os.listdir(os.path.join(path, i)):
            if j==".DS_Store":
                continue
            list_label_idx.append(label2idx[i])
            feature=np.load(os.path.join(os.path.join(path, i), j))
            list_feature.append(feature.reshape(size).tolist())
    return list_label_idx, list_feature

feature_path = os.path.join(create_directory.marvel_data_dir, "feature")
hog_label, hog_feature = get_feature(feature_path, label2idx, feature_extraction_type='openface')
mtcnn_label, mtcnn_feature = get_feature(feature_path, label2idx, feature_extraction_type='facenet')

In [47]:
# Thống kê dữ liệu
def create_data_num(label):
    data_num = {}
    for i in label:
        if i not in data_num:
            data_num[i]=0
        data_num[i]+=1
    return data_num
print("THỐNG KÊ DỮ LIỆU")
print(" -hog_openface:")
print(create_data_num(hog_label))
print(" -mtcnn_facenet:")
print(create_data_num(mtcnn_label))

THỐNG KÊ DỮ LIỆU
 -hog_openface:
{3: 229, 0: 160, 2: 173, 1: 157, 4: 194, 6: 186, 5: 187}
 -mtcnn_facenet:
{3: 229, 0: 160, 2: 173, 1: 157, 4: 194, 6: 186, 5: 187}


Ta sẽ lấy 1/3 ảnh ngẫu nhiên làm tập test, còn lại ta sẽ chia ra thành các mẫu để làm tập train, cụ thể:
- Sample 1: sử dụng toàn bộ ảnh còn lại làm tập train
- Sample 2: sử dụng 50% ảnh còn lại làm tập train
- Sample 3: sử dụng 25% ảnh còn lại làm tập train
- Sample 4: sử dụng 12.5% ảnh làm tập train

In [48]:
def get_train_test_data(feature, label):
    x_train, x_test, y_train, y_test = train_test_split(feature, label, test_size=0.33,
                                                        random_state=4, stratify=label)
    return x_train, x_test, y_train, y_test

x_train_hog, x_test_hog, y_train_hog, y_test_hog = get_train_test_data(hog_feature, hog_label)
x_train_mtcnn, x_test_mtcnn, y_train_mtcnn, y_test_mtcnn = get_train_test_data(mtcnn_feature, mtcnn_label)

In [49]:
def create_sample(feature, label, percentage, save_path, save_name):
    data_num = create_data_num(label)
    temp = list(zip(feature, label))
    random.shuffle(temp)
    feature, label = zip(*temp)
    feature, label = list(feature), list(label)

    count={}
    sample_feature=[]
    sample_label=[]
    for feat, label in zip(feature, label):
        if label not in count:
            count[label]=0
        if count[label]==int(data_num[label]*percentage):
            continue
        sample_feature.append(feat)
        sample_label.append(label)
        count[label]+=1
    with open(os.path.join(save_path, save_name+"_feature"+".json"), "w") as outfile:
        json.dump(sample_feature, outfile)
    with open(os.path.join(save_path, save_name+"_label_idx.json"), "w") as outfile:
        json.dump(sample_label, outfile)  

In [50]:
save_path = os.path.join(create_directory.marvel_data_dir, 'sample_feature')
if not os.path.exists(save_path):
    os.makedirs(save_path)
save_path_hog = os.path.join(save_path, 'hog_openface')
if not os.path.exists(save_path_hog):
    os.makedirs(save_path_hog)
save_path_mtcnn = os.path.join(save_path, 'mtcnn_facenet')
if not os.path.exists(save_path_mtcnn):
    os.makedirs(save_path_mtcnn)

In [51]:
# Lưu dữ liệu test
with open(os.path.join(save_path_hog, "test_feature.json"), "w") as outfile:
    json.dump(x_test_hog, outfile)
with open(os.path.join(save_path_hog, "test_label_idx.json"), "w") as outfile:
    json.dump(y_test_hog, outfile)
with open(os.path.join(save_path_mtcnn, "test_feature.json"), "w") as outfile:
    json.dump(x_test_mtcnn, outfile)
with open(os.path.join(save_path_mtcnn, "test_label_idx.json"), "w") as outfile:
    json.dump(y_test_mtcnn, outfile)

In [52]:
sample_percentage = [0.125, 0.25, 0.5, 1]
for idx, percentage in enumerate(sample_percentage):
    print("Sample {}: {}% train data".format(idx, percentage*100))
    print('    -hog openface')
    create_sample(x_train_hog, y_train_hog, percentage,
                  save_path=save_path_hog, 
                  save_name='train_sample_{}'.format(idx))
    print('    -mtcnn facenet')
    create_sample(x_train_mtcnn, y_train_mtcnn, percentage,
                  save_path=save_path_mtcnn, 
                  save_name='train_sample_{}'.format(idx))

Sample 0: 12.5% train data
    -hog openface
    -mtcnn facenet
Sample 1: 25.0% train data
    -hog openface
    -mtcnn facenet
Sample 2: 50.0% train data
    -hog openface
    -mtcnn facenet
Sample 3: 100% train data
    -hog openface
    -mtcnn facenet


# Face recognition

## Xây dựng hàm

In [54]:
# Hàm huấn luyện mô hình
def train_model(x_train, x_test, y_train, y_test, model_type, save, model_path=''):
    if model_type == 'svm':
        # Training SVM model
        param_grid = {
                'C': [1e3, 5e3, 1e4, 5e4, 1e5],
                'gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1],
                }
        clf = GridSearchCV(svm.SVC(kernel='rbf', class_weight='balanced'), param_grid)
    elif model_type == 'knn':
        # Training KNN model
        param_grid = {
                'n_neighbors': [3, 5, 7]
                }
        clf = GridSearchCV(KNeighborsClassifier(weights='distance'), param_grid)
    clf = clf.fit(x_train, y_train)
    # print("Best estimator found by grid search:")
    # print(clf.best_estimator_)
    if save == 1:
        joblib.dump(clf, model_path)
    y_pred = clf.predict(x_test)
    return y_test, y_pred

In [55]:
# Hàm đánh giá
def evaluation(true, pred):
    fa = 0  # False accept
    wa = 0  # Wrong answer
    fr = 0  # False reject
    accept = 0
    reject = 0
    for (i, j) in zip(true, pred):
        # Hệ thống nhận diện khuôn mặt đó có trong database
        if j != label2idx["Unknown"]:
            accept+=1
            # Hệ thống nhận diện khuôn mặt Unknown thành khuôn mặt trong database
            if i == label2idx["Unknown"]:
                fa+=1
            else:
                # Hệ thống nhận diện nhầm khuôn mặt trong database
                if i!=j:
                    wa+=1
        else:
            reject+=1
            if i != label2idx["Unknown"]:
                fr+=1
    accuracy = accuracy_score(true, pred)
    # Mong muốn giảm fa, wa
    return (fa, wa, fr, accept, reject, accuracy)

In [56]:
sample_list=[0, 1, 2, 3]
def process(data_path, sample_list, model_type):
    for i in sample_list:
        print('Sample {}'.format(i))
        with open(os.path.join(data_path, "train_sample_{}_feature.json".format(i)), "r") as outfile:
            x_train = json.load(outfile)
        with open(os.path.join(data_path, "test_feature.json"), "r") as outfile:
            x_test = json.load(outfile)
        with open(os.path.join(data_path, "train_sample_{}_label_idx.json".format(i)), "r") as outfile:
            y_train = json.load(outfile)
        with open(os.path.join(data_path, "test_label_idx.json"), "r") as outfile:
            y_test = json.load(outfile)
        y_test, y_pred = train_model(x_train, x_test, y_train, y_test, model_type, save=0)
        fa, wa, fr, accept, reject, accuracy = evaluation(y_test, y_pred)
        print(' - Accept: {}, False Accept: {}, Wrong Answer: {}'.format(accept, fa, wa))
        print(' - Reject: {}, False Reject: {}'.format(reject, fr))
        print(' - Accuracy: {}'.format(accuracy))

## HOG + Openface 

In [57]:
data_path = os.path.join(create_directory.marvel_data_dir, 'sample_feature/hog_openface')

### SVM

In [58]:
process(data_path, sample_list, model_type='svm')

Sample 0
 - Accept: 394, False Accept: 43, Wrong Answer: 23
 - Reject: 31, False Reject: 13
 - Accuracy: 0.8141176470588235
Sample 1
 - Accept: 363, False Accept: 17, Wrong Answer: 20
 - Reject: 62, False Reject: 18
 - Accuracy: 0.8705882352941177
Sample 2
 - Accept: 373, False Accept: 23, Wrong Answer: 15
 - Reject: 52, False Reject: 14
 - Accuracy: 0.8776470588235294
Sample 3
 - Accept: 357, False Accept: 13, Wrong Answer: 16
 - Reject: 68, False Reject: 20
 - Accuracy: 0.8847058823529412


### KNN

In [59]:
process(data_path, sample_list, model_type='knn')

Sample 0
 - Accept: 418, False Accept: 57, Wrong Answer: 25
 - Reject: 7, False Reject: 3
 - Accuracy: 0.8
Sample 1
 - Accept: 417, False Accept: 56, Wrong Answer: 18
 - Reject: 8, False Reject: 3
 - Accuracy: 0.8188235294117647
Sample 2
 - Accept: 389, False Accept: 34, Wrong Answer: 16
 - Reject: 36, False Reject: 9
 - Accuracy: 0.8611764705882353
Sample 3
 - Accept: 381, False Accept: 21, Wrong Answer: 15
 - Reject: 44, False Reject: 4
 - Accuracy: 0.9058823529411765


## MTCNN + Facenet 

In [60]:
data_path = os.path.join(create_directory.marvel_data_dir, 'sample_feature/mtcnn_facenet')

### SVM

In [61]:
process(data_path, sample_list, model_type='svm')

Sample 0
 - Accept: 374, False Accept: 14, Wrong Answer: 3
 - Reject: 51, False Reject: 4
 - Accuracy: 0.9505882352941176
Sample 1
 - Accept: 367, False Accept: 7, Wrong Answer: 2
 - Reject: 58, False Reject: 4
 - Accuracy: 0.9694117647058823
Sample 2
 - Accept: 371, False Accept: 11, Wrong Answer: 4
 - Reject: 54, False Reject: 4
 - Accuracy: 0.9552941176470588
Sample 3
 - Accept: 362, False Accept: 1, Wrong Answer: 3
 - Reject: 63, False Reject: 3
 - Accuracy: 0.9835294117647059


### KNN

In [62]:
process(data_path, sample_list, model_type='knn')

Sample 0
 - Accept: 396, False Accept: 32, Wrong Answer: 1
 - Reject: 29, False Reject: 0
 - Accuracy: 0.9223529411764706
Sample 1
 - Accept: 399, False Accept: 35, Wrong Answer: 2
 - Reject: 26, False Reject: 0
 - Accuracy: 0.9129411764705883
Sample 2
 - Accept: 384, False Accept: 20, Wrong Answer: 1
 - Reject: 41, False Reject: 0
 - Accuracy: 0.9505882352941176
Sample 3
 - Accept: 369, False Accept: 5, Wrong Answer: 1
 - Reject: 56, False Reject: 0
 - Accuracy: 0.9858823529411764


# Save best model

Sample 3 cho ra kết quả tốt nhất, ta sẽ lưu lại mô hình knn và svm được huấn luyện trên sample 3

In [63]:
svm_model_hog_openface_path = os.path.join(create_directory.recognition_model_dir, "hog_openface_svm_model.sav")
svm_model_mtcnn_facenet_path = os.path.join(create_directory.recognition_model_dir, "mtcnn_facenet_svm_model.sav")
knn_model_hog_openface_path = os.path.join(create_directory.recognition_model_dir, "hog_openface_knn_model.sav")
knn_model_mtcnn_facenet_path = os.path.join(create_directory.recognition_model_dir, "mtcnn_facenet_knn_model.sav")

In [64]:
def save_process(data_path, model_type, model_path):
    with open(os.path.join(data_path, "train_sample_{}_feature.json".format(3)), "r") as outfile:
        x_train = json.load(outfile)
    with open(os.path.join(data_path, "test_feature.json"), "r") as outfile:
        x_test = json.load(outfile)
    with open(os.path.join(data_path, "train_sample_{}_label_idx.json".format(3)), "r") as outfile:
        y_train = json.load(outfile)
    with open(os.path.join(data_path, "test_label_idx.json"), "r") as outfile:
        y_test = json.load(outfile)
    train_model(x_train, x_test, y_train, y_test, model_type, 1, model_path=model_path)

In [65]:
data_path = os.path.join(create_directory.marvel_data_dir, 'sample_feature/hog_openface')
save_process(data_path, 'svm', svm_model_hog_openface_path)
save_process(data_path, 'knn', knn_model_hog_openface_path)
data_path = os.path.join(create_directory.marvel_data_dir, 'sample_feature/mtcnn_facenet')
save_process(data_path, 'svm', svm_model_mtcnn_facenet_path)
save_process(data_path, 'knn', knn_model_mtcnn_facenet_path)