In [3]:
import os
import numpy as np
import random
import pandas as pd
from sklearn.model_selection import StratifiedKFold
from collections import Counter
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import f1_score
from sklearn import linear_model
from imblearn.over_sampling import RandomOverSampler
from imblearn.over_sampling import SMOTE
from sklearn import preprocessing
from sklearn import metrics
import math
from sklearn import svm

In [4]:
def get_face_landmarks(csv_path):
    df = pd.read_csv(csv_path, usecols=[1,2,3])
    # print(df.head(10))
    df_x = df['x']
    df_y = df['y']
    df_z = df['z']
    face_x = df_x.values.tolist()
    face_y = df_y.values.tolist()
    face_z = df_z.values.tolist()

    print("There are {num} faces recorded from this sample.".format(num = np.array(face_x).shape[0] / 468))
    print(np.array(face_x).shape)

    return face_x, face_y, face_z, (np.array(face_x).shape[0] / 468)

def get_avg_face_landmarks(x, y, z, face_num):
    avg_x = []
    avg_y = []
    avg_z = []

    for i in range(468):
        temp = x[i:len(x):468]
        avg_x.append(np.sum(temp)/face_num)
    for i in range(468):
        temp = y[i:len(x):468]
        avg_y.append(np.sum(temp)/face_num)
    for i in range(468):
        temp = z[i:len(x):468]
        avg_z.append(np.sum(temp)/face_num)

    print(np.array(avg_x).shape)
    return avg_x, avg_y, avg_z

def get_iris_landmarks(csv_path):
    df = pd.read_csv(csv_path, usecols=[1,2,3,4])
    # print(df.head(10))
    df_leftx = df['left_x']
    df_lefty = df['left_y']
    df_rightx = df['right_x']
    df_righty = df['right_y']
    iris_left_x = df_leftx.values.tolist()
    iris_left_y = df_lefty.values.tolist()
    iris_right_x = df_rightx.values.tolist()
    iris_right_y = df_righty.values.tolist()

    print("There are {num} irises recorded from this sample.".format(num = np.array(df_leftx).shape[0]))
    print(np.array(iris_left_x).shape)

    return iris_left_x, iris_left_y, iris_right_x, iris_right_y, (np.array(df_leftx).shape[0])

def get_avg_iris_landmarks(leftx, lefty, rightx, righty, iris_num):
    avg_leftx = np.sum(leftx) / iris_num
    avg_lefty = np.sum(lefty) / iris_num
    avg_rightx = np.sum(rightx) / iris_num
    avg_righty = np.sum(righty) / iris_num

    return avg_leftx, avg_lefty, avg_rightx, avg_righty

def get_physical_index(csv_path):
    df = pd.read_csv(csv_path, usecols=[1])
    df_index = df['TSfresh'].values.tolist()

    return df_index

def get_voice_feature(csv_path):
    df = pd.read_csv(csv_path, usecols=[1])
    df_mfcc = df['MFCC'].values.tolist()
    
    return df_mfcc

def get_text_feature(csv_path):
    df = pd.read_csv(csv_path, usecols=[2])
    df_text = df['roberta'].values.tolist()
    return df_text


In [47]:
if __name__ == '__main__':
    face = []
    iris = []
    physical_index = []
    mfcc_ft = []
    text = []

    labels = []
    # 标签
    label = {b'healthy': 0, b'unhealthy': 1}

    root = os.path.dirname(os.getcwd())
    print("root direction", root)

    dataset_class = os.listdir(root + '/Dataset')
    print(dataset_class)

    # 遍历数据集的人脸信息
    for sample_class in dataset_class:
        sample_class_path = root + '/Dataset' + '/' + sample_class
        # print(sample_class_path)
        sample_file = os.listdir(sample_class_path)
        # print(sample_file)

        for detail in sample_file:
            detail_path = sample_class_path + '/' + detail
            sample_detail = os.listdir(detail_path)
            print(detail_path)

            for csv_file in sample_detail:
                if 'face_landmarks' in csv_file:
                    # 打开csv文件并读取人脸信息
                    print(detail_path + '/' + csv_file)
                    face_x, face_y, face_z, face_num = get_face_landmarks(detail_path + '/' + csv_file)
                    avg_x, avg_y, avg_z = get_avg_face_landmarks(face_x, face_y, face_z, face_num)
                    temp_face = avg_x + avg_y + avg_z
                    face.append(temp_face)
                    
                    # 加上标签
                    if sample_class == 'healthy':
                        labels.append(0)
                    elif sample_class == 'unhealthy':
                        labels.append(1)

                if 'iris_location' in csv_file:
                    print(detail_path + '/' + csv_file)
                    left_x, left_y, right_x, right_y, iris_num = get_iris_landmarks(detail_path + '/' + csv_file)
                    
                    avg_leftx, avg_lefty, avg_rightx, avg_righty = get_avg_iris_landmarks(left_x, left_y, right_x, right_y, iris_num)
                    # temp_iris = avg_leftx + avg_lefty + avg_rightx + avg_righty
                    # print(np.array(temp_iris).shape)
                    iris.append(avg_leftx)
                    iris.append(avg_lefty)
                    iris.append(avg_rightx)
                    iris.append(avg_righty)
                    print(np.array(iris).shape)

                if 'Physical_feature' in csv_file:
                    print(detail_path + '/' + csv_file)
                    temp_index = get_physical_index(detail_path + '/' + csv_file)
                    physical_index.append(temp_index)
                    print(np.array(physical_index).shape)

                if 'Voice_feature' in csv_file:
                    print(detail_path + '/' + csv_file)
                    temp_mfcc = get_voice_feature(detail_path + '/' + csv_file)
                    mfcc_ft.append(temp_mfcc)
                    print(np.array(mfcc_ft).shape)

                if 'text_feature' in csv_file:
                    print(detail_path + '/' + csv_file)
                    temp_text = get_text_feature(detail_path + '/' + csv_file)
                    text.append(temp_text)
                    print(np.array(text).shape)


                
    iris = np.array(iris).reshape(-1, 4)
    print("Face set:{face}".format(face = np.array(face).shape))
    print("Iris set:{iris}".format(iris = np.array(iris).shape))
    print("Physical index set:{index}".format(index = np.array(physical_index).shape))
    print("MFCC set:{mfcc}".format(mfcc = np.array(mfcc_ft).shape))
    print("Text set:{text}".format(text = np.array(text).shape))
    print("Label set:{label}".format(label = np.array(labels).shape))

root direction f:\Project\AMH
['healthy', 'unhealthy']
f:\Project\AMH/Dataset/healthy/00002-0101
f:\Project\AMH/Dataset/healthy/00002-0101/face_landmarks.csv
There are 209.0 faces recorded from this sample.
(97812,)
(468,)
f:\Project\AMH/Dataset/healthy/00002-0101/iris_location.csv
There are 209 irises recorded from this sample.
(209,)
(4,)
f:\Project\AMH/Dataset/healthy/00002-0101/Physical_feature.csv
(1, 4722)
f:\Project\AMH/Dataset/healthy/00002-0101/text_feature.csv
(1, 1024)
f:\Project\AMH/Dataset/healthy/00002-0101/Voice_feature.csv
(1, 9828)
f:\Project\AMH/Dataset/healthy/00002-0102
f:\Project\AMH/Dataset/healthy/00002-0102/face_landmarks.csv
There are 224.0 faces recorded from this sample.
(104832,)
(468,)
f:\Project\AMH/Dataset/healthy/00002-0102/iris_location.csv
There are 224 irises recorded from this sample.
(224,)
(8,)
f:\Project\AMH/Dataset/healthy/00002-0102/Physical_feature.csv
(2, 4722)
f:\Project\AMH/Dataset/healthy/00002-0102/text_feature.csv
(2, 1024)
f:\Project\AMH

In [60]:
min_max_scaler = preprocessing.MinMaxScaler()
face_c = min_max_scaler.fit_transform(face)
iris_c = min_max_scaler.fit_transform(iris)
physical_index_c = min_max_scaler.fit_transform(physical_index)
mfcc_ft_c = min_max_scaler.fit_transform(mfcc_ft)
text_c = min_max_scaler.fit_transform(text)

print("Face set:{face}".format(face = np.array(face).shape))
print("Iris set:{iris}".format(iris = np.array(iris).shape))
print("Physical index set:{index}".format(index = np.array(physical_index).shape))
print("MFCC set:{mfcc}".format(mfcc = np.array(mfcc_ft).shape))
print("Text set:{text}".format(text = np.array(text).shape))
print("Label set:{label}".format(label = np.array(labels).shape))

Face set:(417, 1404)
Iris set:(417, 4)
Physical index set:(417, 4722)
MFCC set:(417, 9828)
Text set:(417, 1024)
Label set:(417,)


In [7]:
def pad_input(arr, std_len):
    arr_len = len(arr)
    # res = 0
    if arr_len > std_len:
        gap = arr_len / std_len
        gap = math.ceil(gap)
        res = np.append(arr, np.zeros(gap*std_len-arr_len))
        
    else:
        res = np.append(arr, np.zeros(std_len-arr_len))
        gap = 1

    return res, gap

In [58]:
from dtaidistance import dtw
x = []
for i in range(0, 417):
    dis = []
    weight= []
    dis_face = dtw.distance_fast(text_c[i], face_c[i])
    dis_iris = dtw.distance_fast(text_c[i], iris_c[i])
    dis_index = dtw.distance_fast(text_c[i], physical_index_c[i])
    dis_mfcc = dtw.distance_fast(text_c[i], mfcc_ft_c[i])

    dis.append(dis_face)
    dis.append(dis_iris)
    dis.append(dis_index)
    dis.append(dis_mfcc)
    dis_sum = np.sum(dis)
    weight = dis / dis_sum
    print(weight)
    print("face:{dis_face},iris:{dis_iris},index:{dis_index},mfcc:{dis_mfcc}".format(dis_face=dis_face,dis_iris=dis_iris,dis_index=dis_index,dis_mfcc=dis_mfcc))

    face_c[i] = face_c[i] * (1 + weight[0])
    iris_c[i] = iris_c[i] * (1 + weight[1])
    physical_index_c[i] = physical_index_c[i] * (1 + weight[2])
    mfcc_ft_c[i] = mfcc_ft_c[i] * (1 + weight[3])
    x.append(np.concatenate((face_c[i], iris_c[i], physical_index_c[i], mfcc_ft_c[i], text_c[i])))


In [61]:
from dtaidistance import dtw
x = []
for i in range(0, 417):
    dis = []
    weight= []
    dis_face = dtw.distance_fast(text_c[i], face_c[i])
    dis_iris = dtw.distance_fast(text_c[i], iris_c[i])
    dis_index = dtw.distance_fast(text_c[i], physical_index_c[i])
    dis_mfcc = dtw.distance_fast(text_c[i], mfcc_ft_c[i])

    dis.append(dis_face)
    dis.append(dis_iris)
    dis.append(dis_index)
    dis.append(dis_mfcc)
    dis_sum = np.sum(dis)
    weight = dis / dis_sum
    print(weight)
    print("face:{dis_face},iris:{dis_iris},index:{dis_index},mfcc:{dis_mfcc}".format(dis_face=dis_face,dis_iris=dis_iris,dis_index=dis_index,dis_mfcc=dis_mfcc))

    face_c[i] = face_c[i] * (weight[0])
    iris_c[i] = iris_c[i] * (weight[1])
    physical_index_c[i] = physical_index_c[i] * ( weight[2])
    mfcc_ft_c[i] = mfcc_ft_c[i] * (weight[3])
    x.append(np.concatenate((face_c[i], iris_c[i], physical_index_c[i], mfcc_ft_c[i], text_c[i])))


[0.12414827 0.14359001 0.32569423 0.40656749]
face:4.6344390142373015,iris:5.360196656494306,index:12.158123520530731,mfcc:15.17711178884801
[0.09826033 0.12826495 0.32919415 0.44428057]
face:3.717251225076382,iris:4.8523451792312855,index:12.45362560121485,mfcc:16.80741866065345
[0.11612229 0.14286359 0.24513392 0.4958802 ]
face:5.247004195204953,iris:6.455314209575252,index:11.076415588095138,mfcc:22.406426762875448
[0.10312087 0.21316304 0.32416206 0.35955403]
face:3.8185743975898183,iris:7.893445064127478,index:12.003748056601248,mfcc:13.314315583178026
[0.10301426 0.17094191 0.29519292 0.43085091]
face:4.119985612607899,iris:6.836706049016587,index:11.80604135942884,mfcc:17.231590729383523
[0.08490498 0.1334068  0.29177643 0.48991179]
face:3.700412003961111,iris:5.814265431874208,index:12.716485466174133,mfcc:21.35181439135505
[0.09228239 0.13470321 0.28249803 0.49051638]
face:3.9296163890862887,iris:5.7360017444665905,index:12.029477490351718,mfcc:20.88742275315101
[0.10738564 0.

In [65]:
# print(np.array(x).shape)
lr_acc = []
rbf_acc = []
poly_acc = []
linear_acc = []
rf_acc=[] 
gbdt_acc = []


min_max_scaler = preprocessing.MinMaxScaler()
face_c = min_max_scaler.fit_transform(face)
iris_c = min_max_scaler.fit_transform(iris)
physical_index_c = min_max_scaler.fit_transform(physical_index)
mfcc_ft_c = min_max_scaler.fit_transform(mfcc_ft)
text_c = min_max_scaler.fit_transform(text)

x = []
for i in range(0, 417):
    dis = []
    weight= []
    dis_face = dtw.distance_fast(text_c[i], face_c[i])
    dis_iris = dtw.distance_fast(text_c[i], iris_c[i])
    dis_index = dtw.distance_fast(text_c[i], physical_index_c[i])
    dis_mfcc = dtw.distance_fast(text_c[i], mfcc_ft_c[i])

    dis.append(dis_face)
    dis.append(dis_iris)
    dis.append(dis_index)
    dis.append(dis_mfcc)
    dis_sum = np.sum(dis)
    weight = dis / dis_sum
    print(weight)
    print("face:{dis_face},iris:{dis_iris},index:{dis_index},mfcc:{dis_mfcc}".format(dis_face=dis_face,dis_iris=dis_iris,dis_index=dis_index,dis_mfcc=dis_mfcc))

    face_c[i] = face_c[i] * (weight[0])
    iris_c[i] = iris_c[i] * (weight[1])
    physical_index_c[i] = physical_index_c[i] * ( weight[2])
    mfcc_ft_c[i] = mfcc_ft_c[i] * (weight[3])
    x.append(np.concatenate((face_c[i], iris_c[i], physical_index_c[i], mfcc_ft_c[i], text_c[i])))

x = np.array(x)
y = np.array(labels)
# print(np.array(y))
skf = StratifiedKFold(n_splits=10)
for train_index, test_index in skf.split(x, y):
    
    x_train, x_test = x[train_index], x[test_index]
    y_train, y_test = y[train_index], y[test_index]

    # LR
    logreg = linear_model.LogisticRegression(max_iter=50000, random_state=0, solver='liblinear', class_weight='balanced' )
    # logreg.fit(x_train, y_train)
    logreg.fit(x_train, y_train)
    score_lr = logreg.score(x_test, y_test)
    y_predicted = logreg.predict(x_test)
    print(y_predicted)
    print("The score of LR is : %f" % score_lr)
    # print("The weighted f1 score: ", f1_score(y_valid, y_predicted, average='weighted'))
    lr_acc.append(score_lr)
    
    # kernel = 'rbf'
    clf_rbf = svm.SVC(kernel='rbf', class_weight='balanced')
    # clf_rbf.fit(x_train, y_train)
    clf_rbf.fit(x_train, y_train)
    score_rbf = clf_rbf.score(x_test, y_test)
    y_predicted = clf_rbf.predict(x_test)
    print(y_predicted)
    print("The score of SVM rbf is : %f" % score_rbf)
    # print("The weighted f1 score: ", f1_score(y_valid, y_predicted, average='weighted'))
    rbf_acc.append(score_rbf)

    # kernel = 'linear'
    clf_linear = svm.SVC(kernel='linear', class_weight='balanced')
    # clf_linear.fit(x_train, y_train)
    clf_linear.fit(x_train, y_train)
    score_linear = clf_linear.score(x_test, y_test)
    y_predicted = clf_linear.predict(x_test)
    print(y_predicted)
    print("The score of SVM linear is : %f" % score_linear)
    # print("The weighted f1 score: ", f1_score(y_valid, y_predicted, average='weighted'))
    linear_acc.append(score_linear)

    # kernel = 'poly'
    clf_poly = svm.SVC(kernel='poly', class_weight='balanced')
    # clf_poly.fit(x_train, y_train)
    clf_poly.fit(x_train, y_train)
    score_poly = clf_poly.score(x_test, y_test)
    y_predicted = clf_poly.predict(x_test)
    print(y_predicted)
    print("The score of SVM poly is : %f" % score_poly)
    # print("The weighted f1 score: ", f1_score(y_valid, y_predicted, average='weighted'))
    poly_acc.append(score_poly)

    # random forest
    RF_clf = RandomForestClassifier(random_state=0, class_weight='balanced')
    RF_clf.fit(x_train, y_train)
    # RF_clf.fit(x_train, y_train)
    score_RF = RF_clf.score(x_test, y_test)
    y_predicted = RF_clf.predict(x_test)
    print(y_predicted)
    print("The score of random forest is : %f" % score_RF)
    # print("The weighted f1 score: ", f1_score(y_valid, y_predicted, average='weighted'))
    rf_acc.append(score_RF)

    # GBDT
    gbdt_clf = GradientBoostingClassifier(random_state=0)
    gbdt_clf.fit(x_train, y_train)
    # gbdt_clf.fit(x_train, y_train)
    score_gbdt = gbdt_clf.score(x_test, y_test)
    y_predicted = gbdt_clf.predict(x_test)
    print(y_predicted)
    print("The score of GBDT is : %f" % score_gbdt)
    # print("The weighted f1 score: ", f1_score(y_valid, y_predicted, average='weighted'))
    gbdt_acc.append(score_gbdt)

print("lr Max score:", np.max(lr_acc))
print("lr Avg score:", np.mean(lr_acc))

print("rbf Max score:", np.max(rbf_acc))
print("rbf Avg score:", np.mean(rbf_acc))

print("linear Max score:", np.max(linear_acc))
print("linear Avg score:", np.mean(linear_acc))

print("poly Max score:", np.max(poly_acc))
print("poly Avg score:", np.mean(poly_acc))

print("rf Max score:", np.max(rf_acc))
print("rf Avg score:", np.mean(rf_acc))

print("gbdt Max score:", np.max(gbdt_acc))
print("gbdt Avg score:", np.mean(gbdt_acc))


[0.12414827 0.14359001 0.32569423 0.40656749]
face:4.6344390142373015,iris:5.360196656494306,index:12.158123520530731,mfcc:15.17711178884801
[0.09826033 0.12826495 0.32919415 0.44428057]
face:3.717251225076382,iris:4.8523451792312855,index:12.45362560121485,mfcc:16.80741866065345
[0.11612229 0.14286359 0.24513392 0.4958802 ]
face:5.247004195204953,iris:6.455314209575252,index:11.076415588095138,mfcc:22.406426762875448
[0.10312087 0.21316304 0.32416206 0.35955403]
face:3.8185743975898183,iris:7.893445064127478,index:12.003748056601248,mfcc:13.314315583178026
[0.10301426 0.17094191 0.29519292 0.43085091]
face:4.119985612607899,iris:6.836706049016587,index:11.80604135942884,mfcc:17.231590729383523
[0.08490498 0.1334068  0.29177643 0.48991179]
face:3.700412003961111,iris:5.814265431874208,index:12.716485466174133,mfcc:21.35181439135505
[0.09228239 0.13470321 0.28249803 0.49051638]
face:3.9296163890862887,iris:5.7360017444665905,index:12.029477490351718,mfcc:20.88742275315101
[0.10738564 0.

In [67]:
# print(np.array(x).shape)
lr_acc = []
rbf_acc = []
poly_acc = []
linear_acc = []
rf_acc=[] 
gbdt_acc = []
x = np.array(x)
y = np.array(labels)
# print(np.array(y))

min_max_scaler = preprocessing.MinMaxScaler()
face_c = min_max_scaler.fit_transform(face)
iris_c = min_max_scaler.fit_transform(iris)
physical_index_c = min_max_scaler.fit_transform(physical_index)
mfcc_ft_c = min_max_scaler.fit_transform(mfcc_ft)
text_c = min_max_scaler.fit_transform(text)

x = []
for i in range(0, 417):
    dis = []
    weight= []
    dis_face = dtw.distance_fast(text_c[i], face_c[i])
    dis_iris = dtw.distance_fast(text_c[i], iris_c[i])
    dis_index = dtw.distance_fast(text_c[i], physical_index_c[i])
    dis_mfcc = dtw.distance_fast(text_c[i], mfcc_ft_c[i])

    dis.append(dis_face)
    dis.append(dis_iris)
    dis.append(dis_index)
    dis.append(dis_mfcc)
    dis_sum = np.sum(dis)
    weight = dis / dis_sum
    print(weight)
    print("face:{dis_face},iris:{dis_iris},index:{dis_index},mfcc:{dis_mfcc}".format(dis_face=dis_face,dis_iris=dis_iris,dis_index=dis_index,dis_mfcc=dis_mfcc))

    face_c[i] = face_c[i] * (1-weight[0])
    iris_c[i] = iris_c[i] * (1-weight[1])
    physical_index_c[i] = physical_index_c[i] * (1- weight[2])
    mfcc_ft_c[i] = mfcc_ft_c[i] * (1-weight[3])
    x.append(np.concatenate((face_c[i], iris_c[i], physical_index_c[i], mfcc_ft_c[i], text_c[i])))
x = np.array(x)
y = np.array(labels)
# print(np.array(y))    
skf = StratifiedKFold(n_splits=10)
for train_index, test_index in skf.split(x, y):
    
    x_train, x_test = x[train_index], x[test_index]
    y_train, y_test = y[train_index], y[test_index]

    # LR
    logreg = linear_model.LogisticRegression(max_iter=50000, random_state=0, solver='liblinear', class_weight='balanced' )
    # logreg.fit(x_train, y_train)
    logreg.fit(x_train, y_train)
    score_lr = logreg.score(x_test, y_test)
    y_predicted = logreg.predict(x_test)
    print(y_predicted)
    print("The score of LR is : %f" % score_lr)
    # print("The weighted f1 score: ", f1_score(y_valid, y_predicted, average='weighted'))
    lr_acc.append(score_lr)
    
    # kernel = 'rbf'
    clf_rbf = svm.SVC(kernel='rbf', class_weight='balanced')
    # clf_rbf.fit(x_train, y_train)
    clf_rbf.fit(x_train, y_train)
    score_rbf = clf_rbf.score(x_test, y_test)
    y_predicted = clf_rbf.predict(x_test)
    print(y_predicted)
    print("The score of SVM rbf is : %f" % score_rbf)
    # print("The weighted f1 score: ", f1_score(y_valid, y_predicted, average='weighted'))
    rbf_acc.append(score_rbf)

    # kernel = 'linear'
    clf_linear = svm.SVC(kernel='linear', class_weight='balanced')
    # clf_linear.fit(x_train, y_train)
    clf_linear.fit(x_train, y_train)
    score_linear = clf_linear.score(x_test, y_test)
    y_predicted = clf_linear.predict(x_test)
    print(y_predicted)
    print("The score of SVM linear is : %f" % score_linear)
    # print("The weighted f1 score: ", f1_score(y_valid, y_predicted, average='weighted'))
    linear_acc.append(score_linear)

    # kernel = 'poly'
    clf_poly = svm.SVC(kernel='poly', class_weight='balanced')
    # clf_poly.fit(x_train, y_train)
    clf_poly.fit(x_train, y_train)
    score_poly = clf_poly.score(x_test, y_test)
    y_predicted = clf_poly.predict(x_test)
    print(y_predicted)
    print("The score of SVM poly is : %f" % score_poly)
    # print("The weighted f1 score: ", f1_score(y_valid, y_predicted, average='weighted'))
    poly_acc.append(score_poly)

    # random forest
    RF_clf = RandomForestClassifier(random_state=0, class_weight='balanced')
    RF_clf.fit(x_train, y_train)
    # RF_clf.fit(x_train, y_train)
    score_RF = RF_clf.score(x_test, y_test)
    y_predicted = RF_clf.predict(x_test)
    print(y_predicted)
    print("The score of random forest is : %f" % score_RF)
    # print("The weighted f1 score: ", f1_score(y_valid, y_predicted, average='weighted'))
    rf_acc.append(score_RF)

    # GBDT
    gbdt_clf = GradientBoostingClassifier(random_state=0)
    gbdt_clf.fit(x_train, y_train)
    # gbdt_clf.fit(x_train, y_train)
    score_gbdt = gbdt_clf.score(x_test, y_test)
    y_predicted = gbdt_clf.predict(x_test)
    print(y_predicted)
    print("The score of GBDT is : %f" % score_gbdt)
    # print("The weighted f1 score: ", f1_score(y_valid, y_predicted, average='weighted'))
    gbdt_acc.append(score_gbdt)

print("lr Max score:", np.max(lr_acc))
print("lr Avg score:", np.mean(lr_acc))

print("rbf Max score:", np.max(rbf_acc))
print("rbf Avg score:", np.mean(rbf_acc))

print("linear Max score:", np.max(linear_acc))
print("linear Avg score:", np.mean(linear_acc))

print("poly Max score:", np.max(poly_acc))
print("poly Avg score:", np.mean(poly_acc))

print("rf Max score:", np.max(rf_acc))
print("rf Avg score:", np.mean(rf_acc))

print("gbdt Max score:", np.max(gbdt_acc))
print("gbdt Avg score:", np.mean(gbdt_acc))


[0.12414827 0.14359001 0.32569423 0.40656749]
face:4.6344390142373015,iris:5.360196656494306,index:12.158123520530731,mfcc:15.17711178884801
[0.09826033 0.12826495 0.32919415 0.44428057]
face:3.717251225076382,iris:4.8523451792312855,index:12.45362560121485,mfcc:16.80741866065345
[0.11612229 0.14286359 0.24513392 0.4958802 ]
face:5.247004195204953,iris:6.455314209575252,index:11.076415588095138,mfcc:22.406426762875448
[0.10312087 0.21316304 0.32416206 0.35955403]
face:3.8185743975898183,iris:7.893445064127478,index:12.003748056601248,mfcc:13.314315583178026
[0.10301426 0.17094191 0.29519292 0.43085091]
face:4.119985612607899,iris:6.836706049016587,index:11.80604135942884,mfcc:17.231590729383523
[0.08490498 0.1334068  0.29177643 0.48991179]
face:3.700412003961111,iris:5.814265431874208,index:12.716485466174133,mfcc:21.35181439135505
[0.09228239 0.13470321 0.28249803 0.49051638]
face:3.9296163890862887,iris:5.7360017444665905,index:12.029477490351718,mfcc:20.88742275315101
[0.10738564 0.

In [68]:
# print(np.array(x).shape)
lr_acc = []
rbf_acc = []
poly_acc = []
linear_acc = []
rf_acc=[] 
gbdt_acc = []


min_max_scaler = preprocessing.MinMaxScaler()
face_c = min_max_scaler.fit_transform(face)
iris_c = min_max_scaler.fit_transform(iris)
physical_index_c = min_max_scaler.fit_transform(physical_index)
mfcc_ft_c = min_max_scaler.fit_transform(mfcc_ft)
text_c = min_max_scaler.fit_transform(text)

x = []
for i in range(0, 417):
    dis = []
    weight= []
    dis_face = dtw.distance_fast(text_c[i], face_c[i])
    dis_iris = dtw.distance_fast(text_c[i], iris_c[i])
    dis_index = dtw.distance_fast(text_c[i], physical_index_c[i])
    dis_mfcc = dtw.distance_fast(text_c[i], mfcc_ft_c[i])

    dis.append(dis_face)
    dis.append(dis_iris)
    dis.append(dis_index)
    dis.append(dis_mfcc)
    dis_sum = np.sum(dis)
    weight = dis / dis_sum
    print(weight)
    print("face:{dis_face},iris:{dis_iris},index:{dis_index},mfcc:{dis_mfcc}".format(dis_face=dis_face,dis_iris=dis_iris,dis_index=dis_index,dis_mfcc=dis_mfcc))

    face_c[i] = face_c[i] * (1+weight[0])
    iris_c[i] = iris_c[i] * (1+weight[1])
    physical_index_c[i] = physical_index_c[i] * (1+ weight[2])
    mfcc_ft_c[i] = mfcc_ft_c[i] * (1+weight[3])
    x.append(np.concatenate((face_c[i], iris_c[i], physical_index_c[i], mfcc_ft_c[i], text_c[i])))

x = np.array(x)
y = np.array(labels)
# print(np.array(y))
skf = StratifiedKFold(n_splits=10)
for train_index, test_index in skf.split(x, y):
    
    x_train, x_test = x[train_index], x[test_index]
    y_train, y_test = y[train_index], y[test_index]

    # LR
    logreg = linear_model.LogisticRegression(max_iter=50000, random_state=0, solver='liblinear', class_weight='balanced' )
    # logreg.fit(x_train, y_train)
    logreg.fit(x_train, y_train)
    score_lr = logreg.score(x_test, y_test)
    y_predicted = logreg.predict(x_test)
    print(y_predicted)
    print("The score of LR is : %f" % score_lr)
    # print("The weighted f1 score: ", f1_score(y_valid, y_predicted, average='weighted'))
    lr_acc.append(score_lr)
    
    # kernel = 'rbf'
    clf_rbf = svm.SVC(kernel='rbf', class_weight='balanced')
    # clf_rbf.fit(x_train, y_train)
    clf_rbf.fit(x_train, y_train)
    score_rbf = clf_rbf.score(x_test, y_test)
    y_predicted = clf_rbf.predict(x_test)
    print(y_predicted)
    print("The score of SVM rbf is : %f" % score_rbf)
    # print("The weighted f1 score: ", f1_score(y_valid, y_predicted, average='weighted'))
    rbf_acc.append(score_rbf)

    # kernel = 'linear'
    clf_linear = svm.SVC(kernel='linear', class_weight='balanced')
    # clf_linear.fit(x_train, y_train)
    clf_linear.fit(x_train, y_train)
    score_linear = clf_linear.score(x_test, y_test)
    y_predicted = clf_linear.predict(x_test)
    print(y_predicted)
    print("The score of SVM linear is : %f" % score_linear)
    # print("The weighted f1 score: ", f1_score(y_valid, y_predicted, average='weighted'))
    linear_acc.append(score_linear)

    # kernel = 'poly'
    clf_poly = svm.SVC(kernel='poly', class_weight='balanced')
    # clf_poly.fit(x_train, y_train)
    clf_poly.fit(x_train, y_train)
    score_poly = clf_poly.score(x_test, y_test)
    y_predicted = clf_poly.predict(x_test)
    print(y_predicted)
    print("The score of SVM poly is : %f" % score_poly)
    # print("The weighted f1 score: ", f1_score(y_valid, y_predicted, average='weighted'))
    poly_acc.append(score_poly)

    # random forest
    RF_clf = RandomForestClassifier(random_state=0, class_weight='balanced')
    RF_clf.fit(x_train, y_train)
    # RF_clf.fit(x_train, y_train)
    score_RF = RF_clf.score(x_test, y_test)
    y_predicted = RF_clf.predict(x_test)
    print(y_predicted)
    print("The score of random forest is : %f" % score_RF)
    # print("The weighted f1 score: ", f1_score(y_valid, y_predicted, average='weighted'))
    rf_acc.append(score_RF)

    # GBDT
    gbdt_clf = GradientBoostingClassifier(random_state=0)
    gbdt_clf.fit(x_train, y_train)
    # gbdt_clf.fit(x_train, y_train)
    score_gbdt = gbdt_clf.score(x_test, y_test)
    y_predicted = gbdt_clf.predict(x_test)
    print(y_predicted)
    print("The score of GBDT is : %f" % score_gbdt)
    # print("The weighted f1 score: ", f1_score(y_valid, y_predicted, average='weighted'))
    gbdt_acc.append(score_gbdt)

print("lr Max score:", np.max(lr_acc))
print("lr Avg score:", np.mean(lr_acc))

print("rbf Max score:", np.max(rbf_acc))
print("rbf Avg score:", np.mean(rbf_acc))

print("linear Max score:", np.max(linear_acc))
print("linear Avg score:", np.mean(linear_acc))

print("poly Max score:", np.max(poly_acc))
print("poly Avg score:", np.mean(poly_acc))

print("rf Max score:", np.max(rf_acc))
print("rf Avg score:", np.mean(rf_acc))

print("gbdt Max score:", np.max(gbdt_acc))
print("gbdt Avg score:", np.mean(gbdt_acc))


[0.12414827 0.14359001 0.32569423 0.40656749]
face:4.6344390142373015,iris:5.360196656494306,index:12.158123520530731,mfcc:15.17711178884801
[0.09826033 0.12826495 0.32919415 0.44428057]
face:3.717251225076382,iris:4.8523451792312855,index:12.45362560121485,mfcc:16.80741866065345
[0.11612229 0.14286359 0.24513392 0.4958802 ]
face:5.247004195204953,iris:6.455314209575252,index:11.076415588095138,mfcc:22.406426762875448
[0.10312087 0.21316304 0.32416206 0.35955403]
face:3.8185743975898183,iris:7.893445064127478,index:12.003748056601248,mfcc:13.314315583178026
[0.10301426 0.17094191 0.29519292 0.43085091]
face:4.119985612607899,iris:6.836706049016587,index:11.80604135942884,mfcc:17.231590729383523
[0.08490498 0.1334068  0.29177643 0.48991179]
face:3.700412003961111,iris:5.814265431874208,index:12.716485466174133,mfcc:21.35181439135505
[0.09228239 0.13470321 0.28249803 0.49051638]
face:3.9296163890862887,iris:5.7360017444665905,index:12.029477490351718,mfcc:20.88742275315101
[0.10738564 0.

KeyboardInterrupt: 

In [36]:
std_len = text.shape[1]
merge_add = text
merge_mul = text
for i in range(0, 417):
    temp_face, gap = pad_input(face[i],std_len)
    split_temp_face = np.array_split(temp_face, gap)

    temp_iris, _ = pad_input(iris[i], std_len)

    temp_index, gap = pad_input(physical_index[i], std_len)
    split_temp_index = np.array_split(temp_index, gap)

    temp_mfcc, gap = pad_input(mfcc_ft[i], std_len)
    split_temp_mfcc = np.array_split(temp_mfcc, gap)

    # merge_add[i] =  np.sum(split_temp_face, axis=0) + temp_iris + np.sum(split_temp_index, axis=0) \
    #                 + np.sum(split_temp_mfcc, axis=0) + merge_add[i]
    # merge_mul[i] =  split_temp_face[0] * split_temp_face[1] * temp_iris * split_temp_index[0] * \
    #     split_temp_index[1] * split_temp_index[2] * split_temp_index[3] * split_temp_index[4] * \
    #     split_temp_mfcc[0] * split_temp_mfcc[1] * split_temp_mfcc[2] * split_temp_mfcc[3] * split_temp_mfcc[4] \
    #     * split_temp_mfcc[5] * split_temp_mfcc[6] * split_temp_mfcc[7]  * split_temp_mfcc[8] * split_temp_mfcc[9]\
    #      * merge_mul[i]
print(np.array(merge_add).shape)
print(np.array(merge_mul).shape)                    
    

(417, 1024)
(417, 1024)


In [48]:
x_train, x_test, y_train, y_test = train_test_split(merge_add, labels, random_state=0, train_size=0.7)

In [56]:
x_train, x_test, y_train, y_test = train_test_split(merge_mul, labels, random_state=0, train_size=0.7)

In [57]:
ros = RandomOverSampler(random_state=0)
x_train_resampled, y_train_resampled = ros.fit_resample(x_train, y_train)

In [58]:
# LR
logreg = linear_model.LogisticRegression(max_iter=50000, random_state=0, solver='liblinear', class_weight='balanced' )
# logreg.fit(x_train, y_train)
logreg.fit(x_train_resampled, y_train_resampled)
score_lr = logreg.score(x_test, y_test)
y_predicted = logreg.predict(x_test)
print(y_predicted)
print("The score of LR is : %f" % score_lr)
print(metrics.classification_report(y_test, y_predicted))
print("The weighted f1 score: ", f1_score(y_test, y_predicted, average='weighted'))
print("The confusion matrix:\n ", metrics.confusion_matrix(y_test, y_predicted))

[1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 1 1 1 1 1 1 1 1 1 1 0 1 1 0 1
 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 0 0 0 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1
 1 1 0 1 1 1 1 0 1 1 1 1 1 1 1]
The score of LR is : 0.301587
              precision    recall  f1-score   support

           0       0.81      0.13      0.23        98
           1       0.23      0.89      0.36        28

    accuracy                           0.30       126
   macro avg       0.52      0.51      0.30       126
weighted avg       0.68      0.30      0.26       126

The weighted f1 score:  0.25790321213662176
The confusion matrix:
  [[13 85]
 [ 3 25]]


In [59]:
# kernel = 'rbf'
clf_rbf = svm.SVC(kernel='rbf', class_weight='balanced')
# clf_rbf.fit(x_train, y_train)
clf_rbf.fit(x_train_resampled, y_train_resampled)
score_rbf = clf_rbf.score(x_test, y_test)
y_predicted = clf_rbf.predict(x_test)
print(y_predicted)
print("The score of SVM is : %f" % score_rbf)
print(metrics.classification_report(y_test, y_predicted))
print("The weighted f1 score: ", f1_score(y_test, y_predicted, average='weighted'))
print("The confusion matrix:\n ", metrics.confusion_matrix(y_test, y_predicted))

[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1
 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1]
The score of SVM is : 0.246032
              precision    recall  f1-score   support

           0       0.71      0.05      0.10        98
           1       0.22      0.93      0.35        28

    accuracy                           0.25       126
   macro avg       0.47      0.49      0.22       126
weighted avg       0.60      0.25      0.15       126

The weighted f1 score:  0.1526832955404384
The confusion matrix:
  [[ 5 93]
 [ 2 26]]


In [60]:
# kernel = 'linear'
clf_linear = svm.SVC(kernel='linear', class_weight='balanced')
# clf_linear.fit(x_train, y_train)
clf_linear.fit(x_train_resampled, y_train_resampled)
score_linear = clf_linear.score(x_test, y_test)
y_predicted = clf_linear.predict(x_test)
print(y_predicted)
print("The score of SVM linear is : %f" % score_linear)
print(metrics.classification_report(y_test, y_predicted))
print("The weighted f1 score: ", f1_score(y_test, y_predicted, average='weighted'))
print("The confusion matrix:\n ", metrics.confusion_matrix(y_test, y_predicted))

[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0]
The score of SVM linear is : 0.769841
              precision    recall  f1-score   support

           0       0.78      0.99      0.87        98
           1       0.00      0.00      0.00        28

    accuracy                           0.77       126
   macro avg       0.39      0.49      0.43       126
weighted avg       0.60      0.77      0.68       126

The weighted f1 score:  0.6766317887394121
The confusion matrix:
  [[97  1]
 [28  0]]


In [61]:
# kernel = 'poly'
clf_poly = svm.SVC(kernel='poly', class_weight='balanced')
# clf_poly.fit(x_train, y_train)
clf_poly.fit(x_train_resampled, y_train_resampled)
score_poly = clf_poly.score(x_test, y_test)
y_predicted = clf_poly.predict(x_test)
print(y_predicted)
print("The score of SVM poly is : %f" % score_poly)
print(metrics.classification_report(y_test, y_predicted))
print("The weighted f1 score: ", f1_score(y_test, y_predicted, average='weighted'))
print("The confusion matrix:\n ", metrics.confusion_matrix(y_test, y_predicted))

[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
The score of SVM poly is : 0.777778
              precision    recall  f1-score   support

           0       0.78      1.00      0.88        98
           1       0.00      0.00      0.00        28

    accuracy                           0.78       126
   macro avg       0.39      0.50      0.44       126
weighted avg       0.60      0.78      0.68       126

The weighted f1 score:  0.6805555555555557
The confusion matrix:
  [[98  0]
 [28  0]]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [62]:
# random forest
RF_clf = RandomForestClassifier(random_state=0, class_weight='balanced')
# RF_clf.fit(x_train, y_train)
RF_clf.fit(x_train_resampled, y_train_resampled)
score_RF = RF_clf.score(x_test, y_test)
y_predicted = RF_clf.predict(x_test)
print(y_predicted)
print("The score of random forest is : %f" % score_RF)
print(metrics.classification_report(y_test, y_predicted))
print("The weighted f1 score: ", f1_score(y_test, y_predicted, average='weighted'))
print("The confusion matrix:\n ", metrics.confusion_matrix(y_test, y_predicted))

[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1
 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1]
The score of random forest is : 0.246032
              precision    recall  f1-score   support

           0       0.71      0.05      0.10        98
           1       0.22      0.93      0.35        28

    accuracy                           0.25       126
   macro avg       0.47      0.49      0.22       126
weighted avg       0.60      0.25      0.15       126

The weighted f1 score:  0.1526832955404384
The confusion matrix:
  [[ 5 93]
 [ 2 26]]


In [63]:
# GBDT
gbdt_clf = GradientBoostingClassifier(random_state=0)
# gbdt_clf.fit(x_train, y_train)
gbdt_clf.fit(x_train_resampled, y_train_resampled)
score_gbdt = gbdt_clf.score(x_test, y_test)
y_predicted = gbdt_clf.predict(x_test)
print(y_predicted)
print("The score of GBDT is : %f" % score_gbdt)
print(metrics.classification_report(y_test, y_predicted))
print("The weighted f1 score: ", f1_score(y_test, y_predicted, average='weighted'))
print("The confusion matrix:\n ", metrics.confusion_matrix(y_test, y_predicted))

[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1
 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1]
The score of GBDT is : 0.246032
              precision    recall  f1-score   support

           0       0.80      0.04      0.08        98
           1       0.22      0.96      0.36        28

    accuracy                           0.25       126
   macro avg       0.51      0.50      0.22       126
weighted avg       0.67      0.25      0.14       126

The weighted f1 score:  0.14094683723927223
The confusion matrix:
  [[ 4 94]
 [ 1 27]]
