In [1]:
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics import accuracy_score
import json
import os
import ast
import pandas as pd
import numpy as np
import create_directory
import pathlib

# Load dữ liệu

In [2]:
# label2idx
with open(os.path.join(create_directory.recognition_model_dir, 'label2idx.json')) as json_file:
    label2idx = json.load(json_file)

# Load dữ liệu test
test_data_path = os.path.join(create_directory.marvel_data_dir, "distance_face_recognition/fixed_threshold")
with open(os.path.join(test_data_path, "test_feature_big.json"), "r") as outfile:
    x_test = json.load(outfile)
with open(os.path.join(test_data_path, "test_label_idx_big.json"), "r") as outfile:
    y_test = json.load(outfile)

# Face Recognition

In [3]:
# Hàm Load dữ liệu train
def create_train_feature(data_path, register_order_path, num_register):
    data = pd.read_csv(data_path)
    data['feature'] = [ast.literal_eval(i) for i in data["feature"]]
    with open(register_order_path, "r") as outfile:
        order = json.load(outfile)
    order = order[:num_register]
    return data, order

In [4]:
# Hàm cập nhật threshold
def update_threshold(data, order):
    threshold_data = pd.DataFrame({'feature': [], 'label': [], 'threshold': [], 'idx': []})
    threshold_data = threshold_data.astype({"feature": object, "label": int, 'threshold': float, 'idx': int})
    for i in order:
        register_data = data[data['idx']==i].to_dict(orient="records")[0]
        similarity_score = []
        for i in range(len(threshold_data)):
            item = {}
            if threshold_data.iloc[i]['label']!=register_data['label']:
                item['idx']=threshold_data.iloc[i]['idx']
                item['similarity']=cosine_similarity(np.array(threshold_data.iloc[i]['feature']).reshape(1, -1), np.array(register_data['feature']).reshape(1, -1))[0][0]
                similarity_score.append(item)
        for item in similarity_score:
            threshold_data.loc[threshold_data['idx']==item['idx'], 'threshold']=max(threshold_data.loc[threshold_data['idx']==item['idx']]['threshold'].item(), item['similarity'])
        register_data['threshold'] = max([item["similarity"] for item in similarity_score]+[register_data['threshold']])
        new_row = pd.DataFrame({'feature':[register_data['feature']], 'label': [register_data['label']], 'threshold':[register_data['threshold']], 'idx':[register_data['idx']]})
        threshold_data = pd.concat([threshold_data, new_row])
    return threshold_data

In [5]:
# Hàm dự đoán
def predict(feat, threshold_data):
    candidate_similarity=[]
    for i in range(len(threshold_data)):
        similarity = cosine_similarity(np.array(threshold_data.iloc[i]['feature']).reshape(1, -1), np.array(feat).reshape(1, -1))[0][0]
        candidate_similarity.append(similarity)
    max_similarity = max(candidate_similarity)
    idx = candidate_similarity.index(max_similarity)
    if max_similarity > threshold_data.iloc[idx]['threshold']:
        return threshold_data.iloc[idx]['label']
    else:
        return label2idx['Unknown']

In [6]:
def predict_test(x_test, data_path, order_path, num_register, save_threshold="False", save_path=""):
    data, order = create_train_feature(data_path, order_path, num_register)
    threshold_data = update_threshold(data, order)
    if save_threshold:
        threshold_data.to_csv(save_path, index=False)
    pred = [predict(feat, threshold_data) for feat in x_test]
    return pred

In [7]:
train_data_path = os.path.join(create_directory.marvel_data_dir, "distance_face_recognition/adaptive_threshold")
update_threshold_path = os.path.join(train_data_path, 'threshold', 'cosine')
if not os.path.exists(update_threshold_path):
    os.makedirs(update_threshold_path)
data_path = os.path.join(train_data_path, "feature_cosine.csv")
order_path = os.path.join(train_data_path, "order")
order_path_list = [os.path.join(order_path, i) for i in os.listdir(order_path)]
num_register_list = [30, 60, 120, 300, 600, 900]

In [8]:
result = {}
for path in order_path_list:
    result[path]={}
    print(path)
    for num_register in num_register_list:
        print("    -Num register: {}".format(num_register))
        save_threshold_path = os.path.join(update_threshold_path, pathlib.Path(path).stem+'_num_register_'+str(num_register)+'.csv')
        pred = predict_test(x_test, data_path, path, num_register, save_threshold=True, save_path=save_threshold_path)
        result[path][num_register] = pred

/Users/trananhvu/Documents/Data-Face-Recognition/Marvel/distance_face_recognition/adaptive_threshold/order/register_order_1.json
    -Num register: 30
    -Num register: 60
    -Num register: 120
    -Num register: 300
    -Num register: 600
    -Num register: 900
/Users/trananhvu/Documents/Data-Face-Recognition/Marvel/distance_face_recognition/adaptive_threshold/order/register_order_0.json
    -Num register: 30
    -Num register: 60
    -Num register: 120
    -Num register: 300
    -Num register: 600
    -Num register: 900
/Users/trananhvu/Documents/Data-Face-Recognition/Marvel/distance_face_recognition/adaptive_threshold/order/register_order_3.json
    -Num register: 30
    -Num register: 60
    -Num register: 120
    -Num register: 300
    -Num register: 600
    -Num register: 900
/Users/trananhvu/Documents/Data-Face-Recognition/Marvel/distance_face_recognition/adaptive_threshold/order/register_order_2.json
    -Num register: 30
    -Num register: 60
    -Num register: 120
    -Num 

In [9]:
result_df = {"order_path": [], "num_register": [], "result": []}
for order_path, item in result.items():
    for num_register, pred in item.items():
        result_df["order_path"].append(order_path)
        result_df["num_register"].append(num_register)
        result_df["result"].append(pred)
df = pd.DataFrame(result_df)

# Kết quả

In [10]:
def metric(true, pred):
    fa = 0  # False accept
    wa = 0  # Wrong answer
    fr = 0  # False reject
    accept = 0
    reject = 0

    for (i, j) in zip(true, pred):
        # Hệ thống nhận diện khuôn mặt đó có trong database
        if j != label2idx["Unknown"]:
            accept+=1
            # Hệ thống nhận diện khuôn mặt Unknown thành khuôn mặt trong database
            if i == label2idx["Unknown"]:
                fa+=1
            else:
                # Hệ thống nhận diện nhầm khuôn mặt trong database
                if i!=j:
                    wa+=1
        else:
            reject+=1
            if i != label2idx["Unknown"]:
                fr+=1
    # Mong muốn giảm fa, wa
    return (fa, wa, fr, accept, reject)

In [11]:
save_path = os.path.join(create_directory.result_dir, 'test_threshold/adaptive_threshold')
if not os.path.exists(save_path):
    os.makedirs(save_path)

order_list, num_register, fa_list, wa_list, fr_list, accept_list, reject_list, accuracy_list = [], [], [], [], [], [], [], []
for i in range(len(df)):
    fa, wa, fr, accept, reject = metric(y_test, df.iloc[i]['result'])
    acc = accuracy_score(y_test, df.iloc[i]['result'])
    order_list.append(pathlib.Path(df.iloc[i]['order_path']).stem)
    num_register.append(df.iloc[i]['num_register'])
    fa_list.append(fa)
    wa_list.append(wa)
    fr_list.append(fr)
    accept_list.append(accept)
    reject_list.append(reject)
    accuracy_list.append(acc)
result_dict = {'order': order_list, 'num_register': num_register, 'accept': accept_list, 'fa': fa_list, 'wa': wa_list, 
            'reject': reject_list, 'fr': fr_list, 'accuracy': accuracy_list}
df = pd.DataFrame(result_dict)
df.to_csv(os.path.join(save_path, 'adaptive-result-cosine.csv'), index=False)     