In [1]:
import os
import numpy as np
import random
import pandas as pd
from sklearn.model_selection import StratifiedKFold
from collections import Counter
from sklearn.model_selection import train_test_split

from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import f1_score
from sklearn import linear_model
from imblearn.over_sampling import RandomOverSampler
from imblearn.over_sampling import SMOTE
from imblearn.over_sampling import ADASYN
from imblearn.under_sampling import RandomUnderSampler 
from imblearn.combine import SMOTEENN
from imblearn.combine import SMOTETomek
from sklearn.svm import OneClassSVM
from sklearn.ensemble import IsolationForest
from sklearn.covariance import EllipticEnvelope
from sklearn.neighbors import LocalOutlierFactor
from sklearn import preprocessing
from ast import Sub
from re import subn
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
from torch.nn.parameter import Parameter
from torch.nn.init import xavier_normal_
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score

In [9]:
class SubNet(nn.Module):
    '''
    The subnetwork that is used in LMF for video and audio in the pre-fusion stage
    '''

    def __init__(self, in_size, hidden_size, dropout):
        '''
        Args:
            in_size: input dimension
            hidden_size: hidden layer dimension
            dropout: dropout probability
        Output:
            (return value in forward) a tensor of shape (batch_size, hidden_size)
        '''
        super(SubNet, self).__init__()
        self.norm = nn.BatchNorm1d(in_size)
        self.drop = nn.Dropout(p=dropout)
        self.linear_1 = nn.Linear(in_size, hidden_size)
        self.linear_2 = nn.Linear(hidden_size, hidden_size)
        self.linear_3 = nn.Linear(hidden_size, hidden_size)

    def forward(self, x):
        '''
        Args:
            x: tensor of shape (batch_size, in_size)
        '''
        normed = self.norm(x)
        dropped = self.drop(normed)
        y_1 = F.relu(self.linear_1(dropped))
        y_2 = F.relu(self.linear_2(y_1))
        y_3 = F.relu(self.linear_3(y_2))

        return y_3


class TextSubNet(nn.Module):
    '''
    The LSTM-based subnetwork that is used in LMF for text
    '''

    def __init__(self, in_size, hidden_size, out_size, num_layers=1, dropout=0.2, bidirectional=False):
        '''
        Args:
            in_size: input dimension
            hidden_size: hidden layer dimension
            num_layers: specify the number of layers of LSTMs.
            dropout: dropout probability
            bidirectional: specify usage of bidirectional LSTM
        Output:
            (return value in forward) a tensor of shape (batch_size, out_size)
        '''
        super(TextSubNet, self).__init__()
        self.rnn = nn.LSTM(in_size, hidden_size, num_layers=num_layers, dropout=dropout, bidirectional=bidirectional, batch_first=True)
        self.dropout = nn.Dropout(dropout)
        self.linear_1 = nn.Linear(hidden_size, out_size)

    def forward(self, x):
        '''
        Args:
            x: tensor of shape (batch_size, sequence_len, in_size)
        '''
        _, final_states = self.rnn(x)
        h = self.dropout(final_states[0].squeeze())
        y_1 = self.linear_1(h)
        return y_1


class LMF(nn.Module):
    '''
    Low-rank Multimodal Fusion
    '''

    def __init__(self, input_dims, hidden_dims, dropouts, output_dim, rank, use_softmax=False):
        '''
        Args:
            input_dims - a length-3 tuple, contains (audio_dim, video_dim, text_dim)
            hidden_dims - another length-3 tuple, hidden dims of the sub-networks
            text_out - int, specifying the resulting dimensions of the text subnetwork
            dropouts - a length-4 tuple, contains (audio_dropout, video_dropout, text_dropout, post_fusion_dropout)
            output_dim - int, specifying the size of output
            rank - int, specifying the size of rank in LMF
        Output:
            (return value in forward) a scalar value between -3 and 3
        '''
        super(LMF, self).__init__()

        # dimensions are specified in the order of audio, video and text
        self.audio_in = input_dims[0]
        self.video_in = input_dims[1]
        self.pert_in = input_dims[2]
        self.iris_in = input_dims[3]
        self.physical_in = input_dims[4]
        self.macbert_in = input_dims[5]
        self.robert_in = input_dims[6]
        self.graph_in = input_dims[7]

        self.audio_hidden = hidden_dims[0]
        self.video_hidden = hidden_dims[1]
        self.pert_hidden = hidden_dims[2]
        self.iris_hidden = hidden_dims[3]
        self.physical_hidden = hidden_dims[4]
        self.macbert_hidden = hidden_dims[5]
        self.robert_hidden = hidden_dims[6]
        self.graph_hidden = hidden_dims[7]

        # self.text_out= text_out
        self.output_dim = output_dim
        self.rank = rank
        self.use_softmax = use_softmax

        self.audio_prob = dropouts[0]
        self.video_prob = dropouts[1]
        self.pert_prob = dropouts[2]
        self.iris_prob = dropouts[3]
        self.physical_prob = dropouts[4]
        self.macbert_prob = dropouts[5]
        self.robert_prob = dropouts[6]
        self.graph_prob = dropouts[7]
        self.post_fusion_prob = dropouts[8]

        # define the pre-fusion subnetworks
        self.audio_subnet = SubNet(self.audio_in, self.audio_hidden, self.audio_prob)
        self.video_subnet = SubNet(self.video_in, self.video_hidden, self.video_prob)
        self.pert_subnet = SubNet(self.pert_in, self.pert_hidden, self.pert_prob)
        self.iris_subnet = SubNet(self.iris_in, self.iris_hidden, self.iris_prob)
        self.physical_subnet = SubNet(self.physical_in, self.physical_hidden, self.physical_prob)
        self.macbert_subnet = SubNet(self.macbert_in, self.macbert_hidden, self.macbert_prob)
        self.robert_subnet = SubNet(self.robert_in, self.robert_hidden, self.robert_prob)
        self.graph_subnet = SubNet(self.graph_in, self.graph_hidden, self.graph_prob)

        # define the post_fusion layers
        self.post_fusion_dropout = nn.Dropout(p=self.post_fusion_prob)
        self.audio_factor = Parameter(torch.Tensor(self.rank, self.audio_hidden + 1, self.output_dim))
        self.video_factor = Parameter(torch.Tensor(self.rank, self.video_hidden + 1, self.output_dim))
        self.pert_factor = Parameter(torch.Tensor(self.rank, self.pert_hidden + 1, self.output_dim))
        self.iris_factor = Parameter(torch.Tensor(self.rank, self.iris_hidden + 1, self.output_dim))
        self.physical_factor = Parameter(torch.Tensor(self.rank, self.physical_hidden + 1, self.output_dim))
        self.macbert_factor = Parameter(torch.Tensor(self.rank, self.macbert_hidden + 1, self.output_dim))
        self.robert_factor = Parameter(torch.Tensor(self.rank, self.robert_hidden + 1, self.output_dim))
        self.graph_factor = Parameter(torch.Tensor(self.rank, self.graph_hidden + 1, self.output_dim))
        self.fusion_weights = Parameter(torch.Tensor(1, self.rank))
        self.fusion_bias = Parameter(torch.Tensor(1, self.output_dim))

        # init teh factors
        xavier_normal_(self.audio_factor)
        xavier_normal_(self.video_factor)
        xavier_normal_(self.pert_factor)
        xavier_normal_(self.physical_factor)
        xavier_normal_(self.iris_factor)
        xavier_normal_(self.fusion_weights)
        xavier_normal_(self.macbert_factor)
        xavier_normal_(self.robert_factor)
        xavier_normal_(self.graph_factor)
        self.fusion_bias.data.fill_(0)

    def forward(self, audio_x, video_x, pert_x, iris_x, physical_x, macbert_x, robert_x, graph_x):
        '''
        Args:
            audio_x: tensor of shape (batch_size, audio_in)
            video_x: tensor of shape (batch_size, video_in)
            text_x: tensor of shape (batch_size, sequence_len, text_in)
        '''
        audio_h = self.audio_subnet(audio_x)
        video_h = self.video_subnet(video_x)
        pert_h = self.pert_subnet(pert_x)
        iris_h = self.iris_subnet(iris_x)
        physical_h = self.physical_subnet(physical_x)
        macbert_h = self.macbert_subnet(macbert_x)
        robert_h = self.robert_subnet(robert_x)
        graph_h = self.graph_subnet(graph_x)
        batch_size = audio_h.data.shape[0]

        # next we perform low-rank multimodal fusion
        # here is a more efficient implementation than the one the paper describes
        # basically swapping the order of summation and elementwise product
        if audio_h.is_cuda:
            DTYPE = torch.cuda.FloatTensor
        else:
            DTYPE = torch.FloatTensor

        _audio_h = torch.cat((Variable(torch.ones(batch_size, 1).type(DTYPE), requires_grad=False), audio_h), dim=1)
        _video_h = torch.cat((Variable(torch.ones(batch_size, 1).type(DTYPE), requires_grad=False), video_h), dim=1)
        _pert_h = torch.cat((Variable(torch.ones(batch_size, 1).type(DTYPE), requires_grad=False), pert_h), dim=1)
        _iris_h = torch.cat((Variable(torch.ones(batch_size, 1).type(DTYPE), requires_grad=False), iris_h), dim=1)
        _physical_h = torch.cat((Variable(torch.ones(batch_size, 1).type(DTYPE), requires_grad=False), physical_h), dim=1)
        _macbert_h = torch.cat((Variable(torch.ones(batch_size, 1).type(DTYPE), requires_grad=False), macbert_h), dim=1)
        _robert_h = torch.cat((Variable(torch.ones(batch_size, 1).type(DTYPE), requires_grad=False), robert_h), dim=1)
        _graph_h = torch.cat((Variable(torch.ones(batch_size, 1).type(DTYPE), requires_grad=False), graph_h), dim=1)
        
        fusion_audio = torch.matmul(_audio_h, self.audio_factor)
        fusion_video = torch.matmul(_video_h, self.video_factor)
        fusion_pert = torch.matmul(_pert_h, self.pert_factor)
        fusion_iris = torch.matmul(_iris_h, self.iris_factor)
        fusion_physical = torch.matmul(_physical_h, self.physical_factor)
        fusion_macbert = torch.matmul(_macbert_h, self.macbert_factor)
        fusion_robert = torch.matmul(_robert_h, self.robert_factor)
        fusion_graph = torch.matmul(_graph_h, self.graph_factor)

        fusion_zy = fusion_audio * fusion_video * fusion_pert * fusion_iris * fusion_physical * fusion_macbert * fusion_robert * fusion_graph
        # print(fusion_zy.shape)
        # output = torch.sum(fusion_zy, dim=0).squeeze()
        # use linear transformation instead of simple summation, more flexibility
        output = torch.matmul(self.fusion_weights, fusion_zy.permute(1, 0, 2)).squeeze() + self.fusion_bias
        # ft = output
        output = output.view(-1, self.output_dim)
        # print(output.shape)
        if self.use_softmax:
            output = F.softmax(output)
        return output


def get_face_landmarks(csv_path):
    df = pd.read_csv(csv_path, usecols=[1,2,3])
    # print(df.head(10))
    df_x = df['x']
    df_y = df['y']
    df_z = df['z']
    face_x = df_x.values.tolist()
    face_y = df_y.values.tolist()
    face_z = df_z.values.tolist()

    print("There are {num} faces recorded from this sample.".format(num = np.array(face_x).shape[0] / 468))
    print(np.array(face_x).shape)

    return face_x, face_y, face_z, (np.array(face_x).shape[0] / 468)

def get_avg_face_landmarks(x, y, z, face_num):
    avg_x = []
    avg_y = []
    avg_z = []

    for i in range(468):
        temp = x[i:len(x):468]
        avg_x.append(np.sum(temp)/face_num)
    for i in range(468):
        temp = y[i:len(x):468]
        avg_y.append(np.sum(temp)/face_num)
    for i in range(468):
        temp = z[i:len(x):468]
        avg_z.append(np.sum(temp)/face_num)

    print(np.array(avg_x).shape)
    return avg_x, avg_y, avg_z

def get_iris_landmarks(csv_path):
    df = pd.read_csv(csv_path, usecols=[1,2,3,4])
    # print(df.head(10))
    df_leftx = df['left_x']
    df_lefty = df['left_y']
    df_rightx = df['right_x']
    df_righty = df['right_y']
    iris_left_x = df_leftx.values.tolist()
    iris_left_y = df_lefty.values.tolist()
    iris_right_x = df_rightx.values.tolist()
    iris_right_y = df_righty.values.tolist()

    print("There are {num} irises recorded from this sample.".format(num = np.array(df_leftx).shape[0]))
    print(np.array(iris_left_x).shape)

    return iris_left_x, iris_left_y, iris_right_x, iris_right_y, (np.array(df_leftx).shape[0])

def get_avg_iris_landmarks(leftx, lefty, rightx, righty, iris_num):
    avg_leftx = np.sum(leftx) / iris_num
    avg_lefty = np.sum(lefty) / iris_num
    avg_rightx = np.sum(rightx) / iris_num
    avg_righty = np.sum(righty) / iris_num

    return avg_leftx, avg_lefty, avg_rightx, avg_righty

def get_physical_index(csv_path):
    df = pd.read_csv(csv_path, usecols=[1])
    df_index = df['TSfresh'].values.tolist()

    return df_index

def get_voice_feature(csv_path):
    df = pd.read_csv(csv_path, usecols=[1])
    df_mfcc = df['MFCC'].values.tolist()
    
    return df_mfcc

def get_text_feature(csv_path):
    df = pd.read_csv(csv_path, usecols=[2])
    df_text = df['roberta'].values.tolist()
    return df_text






In [4]:
root = os.path.dirname(os.getcwd())
print("root direction", root)

label_path = root + '/muldataset/label.csv'
print(label_path)

df = pd.read_csv(label_path)
# print(df)
df_folder = df['folder'].values.tolist()
df_dep = df['depression'].values.tolist()
df_inter = df['interpersonal tension'].values.tolist()
df_anx = df['anxiety'].values.tolist()
df_obs = df['obsessive-compulsive'].values.tolist()
df_par = df['paranoia'].values.tolist()
df_hos = df['hostility'].values.tolist()
df_aca = df['academic stress'].values.tolist()
df_mal = df['maladjustment'].values.tolist()
df_emo = df['emotional imbalance'].values.tolist()
df_psy = df['psychological imbalance'].values.tolist()
df_label = df['overall condition'].values.tolist()
df_sui = df['suicidal tendency'].values.tolist()
# print(df_folder)
# labels = df_label[412:-1]
labels = df_label
print(np.array(labels).shape)

root direction f:\Project\AMH
f:\Project\AMH/muldataset/label.csv
(927,)


In [7]:
def get_face_landmarks(csv_path):
    df = pd.read_csv(csv_path, usecols=[1,2,3])
    # print(df.head(10))
    df_x = df['x']
    df_y = df['y']
    df_z = df['z']
    face_x = df_x.values.tolist()
    face_y = df_y.values.tolist()
    face_z = df_z.values.tolist()

    print("There are {num} faces recorded from this sample.".format(num = np.array(face_x).shape[0] / 468))
    print(np.array(face_x).shape)

    return face_x, face_y, face_z, (np.array(face_x).shape[0] / 468)

def get_avg_face_landmarks(x, y, z, face_num):
    avg_x = []
    avg_y = []
    avg_z = []

    for i in range(468):
        temp = x[i:len(x):468]
        avg_x.append(np.sum(temp)/face_num)
    for i in range(468):
        temp = y[i:len(x):468]
        avg_y.append(np.sum(temp)/face_num)
    for i in range(468):
        temp = z[i:len(x):468]
        avg_z.append(np.sum(temp)/face_num)

    print(np.array(avg_x).shape)
    return avg_x, avg_y, avg_z

def get_iris_landmarks(csv_path):
    df = pd.read_csv(csv_path, usecols=[1,2,3,4])
    # print(df.head(10))
    df_leftx = df['left_x']
    df_lefty = df['left_y']
    df_rightx = df['right_x']
    df_righty = df['right_y']
    iris_left_x = df_leftx.values.tolist()
    iris_left_y = df_lefty.values.tolist()
    iris_right_x = df_rightx.values.tolist()
    iris_right_y = df_righty.values.tolist()

    print("There are {num} irises recorded from this sample.".format(num = np.array(df_leftx).shape[0]))
    print(np.array(iris_left_x).shape)

    return iris_left_x, iris_left_y, iris_right_x, iris_right_y, (np.array(df_leftx).shape[0])

def get_avg_iris_landmarks(leftx, lefty, rightx, righty, iris_num):
    avg_leftx = np.sum(leftx) / iris_num
    avg_lefty = np.sum(lefty) / iris_num
    avg_rightx = np.sum(rightx) / iris_num
    avg_righty = np.sum(righty) / iris_num

    return avg_leftx, avg_lefty, avg_rightx, avg_righty

def get_physical_index(csv_path):
    df = pd.read_csv(csv_path, usecols=[1])
    df_index = df['TSfresh'].values.tolist()

    return df_index

def get_voice_feature(csv_path):
    df = pd.read_csv(csv_path, usecols=[1])
    df_mfcc = df['MFCC'].values.tolist()
    
    return df_mfcc

def get_wav2vec(csv_path):
    df = pd.read_csv(csv_path, usecols=[1])
    df_wav2vec = df['wav2vec'].values.tolist()
    
    return df_wav2vec

def get_text_feature(csv_path):
    df = pd.read_csv(csv_path, usecols=[1,2,3])
    df_pert = df['pert'].values.tolist()
    df_roberta = df['roberta'].values.tolist()
    df_macberta = df['macberta'].values.tolist()
    return df_pert, df_roberta, df_macberta

def get_graph_feature(csv_path):
    df = pd.read_csv(csv_path, usecols=[1])
    df_graph = df['graph'].values.tolist()
    
    return df_graph

if __name__ == '__main__':
    face = []
    iris = []
    physical_index = []
    mfcc_ft = []
    wav2vec = []
    pert = []
    roberta = []
    macberta = []
    graph = []

    root = os.path.dirname(os.getcwd())
    print("root direction", root)
    dataset = os.listdir(root + '/muldataset')
    del dataset[-1]
    for sample in dataset:
        # if '00002' in sample:
        #     continue
        sample_path = root + '/muldataset' + '/' + sample #样本文件夹
        # print(sample_path)
        sample_file = os.listdir(sample_path) #样本文件夹内文件列表
        # print(sample_file)
        for detail in sample_file:
                sample_path = root + '/muldataset' + '/' + sample
                if 'face_landmarks' in detail:
                    # 打开csv文件并读取人脸信息
                    print(sample_path + '/' + detail)
                    face_x, face_y, face_z, face_num = get_face_landmarks(sample_path + '/' + detail)
                    avg_x, avg_y, avg_z = get_avg_face_landmarks(face_x, face_y, face_z, face_num)
                    temp_face = avg_x + avg_y + avg_z
                    face.append(temp_face)
                    print(np.array(face).shape)
                    

                if 'iris_location' in detail:
                    print(sample_path + '/' + detail)
                    left_x, left_y, right_x, right_y, iris_num = get_iris_landmarks(sample_path + '/' + detail)
                    
                    # avg_leftx, avg_lefty, avg_rightx, avg_righty = get_avg_iris_landmarks(left_x, left_y, right_x, right_y, iris_num)
                    # temp_iris = avg_leftx + avg_lefty + avg_rightx + avg_righty
                    # print(np.array(temp_iris).shape)
                    iris.append(left_x)
                    iris.append(left_y)
                    iris.append(right_x)
                    iris.append(right_y)
                    
                    print(np.array(iris).shape)

                if 'Physical_feature' in detail:
                    print(sample_path + '/' + detail)
                    temp_index = get_physical_index(sample_path + '/' + detail)
                    physical_index.append(temp_index)
                    print(np.array(physical_index).shape)

                if 'mfcc' in detail:
                    print(sample_path + '/' + detail)
                    temp_mfcc = get_voice_feature(sample_path + '/' + detail)
                    mfcc_ft.append(temp_mfcc)
                    print(np.array(mfcc_ft).shape)

                if 'graph' in detail:
                    print(sample_path + '/' + detail)
                    temp_graph = get_graph_feature(sample_path + '/' + detail)
                    graph.append(temp_graph)
                    print(np.array(graph).shape)
                # if 'wav2vec' in detail:
                #     print(sample_path + '/' + detail)
                #     temp_wav2vec = get_wav2vec(sample_path + '/' + detail)
                #     wav2vec.append(temp_wav2vec)
                #     print(np.array(wav2vec).shape)

                if 'text_feature' in detail:
                    print(sample_path + '/' + detail)
                    temp_pert, temp_roberta, temp_macberta = get_text_feature(sample_path + '/' + detail)
                    pert.append(temp_pert)
                    roberta.append(temp_roberta)
                    macberta.append(temp_macberta)
                    print(np.array(pert).shape, np.array(roberta).shape, np.array(macberta).shape)

    iris = np.array(iris).reshape(-1, 694*4)            
    # iris = np.array(iris).reshape(-1, 1)
    print("Face set:{face}".format(face = np.array(face).shape))
    print("Iris set:{iris}".format(iris = np.array(iris).shape))
    print("Physical index set:{index}".format(index = np.array(physical_index).shape))
    print("MFCC set:{mfcc}".format(mfcc = np.array(mfcc_ft).shape))
    print("Graph set:{gra}".format(gra = np.array(graph).shape))
    print("Pert set:{pert}, Roberta:{robert} and Macberta:{mac}".format(pert = np.array(pert).shape, \
        robert = np.array(roberta).shape, mac = np.array(macberta).shape))
    print("Label set:{label}".format(label = np.array(labels).shape))

root direction f:\Project\AMH
f:\Project\AMH/muldataset/00002-0101/face_landmarks.csv
There are 212.0 faces recorded from this sample.
(99216,)
(468,)
(1, 1404)
f:\Project\AMH/muldataset/00002-0101/graph.csv
(1, 49)
f:\Project\AMH/muldataset/00002-0101/iris_location.csv
There are 694 irises recorded from this sample.
(694,)
(4, 694)
f:\Project\AMH/muldataset/00002-0101/mfcc.csv
(1, 11988)
f:\Project\AMH/muldataset/00002-0101/Physical_feature.csv
(1, 4722)
f:\Project\AMH/muldataset/00002-0101/text_feature.csv
(1, 1024) (1, 1024) (1, 1024)
f:\Project\AMH/muldataset/00002-0102/face_landmarks.csv
There are 227.0 faces recorded from this sample.
(106236,)
(468,)
(2, 1404)
f:\Project\AMH/muldataset/00002-0102/graph.csv
(2, 49)
f:\Project\AMH/muldataset/00002-0102/iris_location.csv
There are 694 irises recorded from this sample.
(694,)
(8, 694)
f:\Project\AMH/muldataset/00002-0102/mfcc.csv
(2, 11988)
f:\Project\AMH/muldataset/00002-0102/Physical_feature.csv
(2, 4722)
f:\Project\AMH/muldataset

In [27]:
face_x_train, face_x_test, face_y_train, face_y_test = \
        train_test_split(face, labels, random_state=0, train_size=0.7)
iris_x_train, iris_x_test, iris_y_train, iris_y_test = \
        train_test_split(iris, labels, random_state=0, train_size=0.7)
physical_index_x_train, physical_index_x_test, physical_index_y_train, physical_index_y_test = \
        train_test_split(physical_index, labels, random_state=0, train_size=0.7)
mfcc_ft_x_train, mfcc_ft_x_test, mfcc_ft_y_train, mfcc_ft_y_test = \
        train_test_split(mfcc_ft, labels, random_state=0, train_size=0.7)
pert_x_train, pert_x_test, pert_y_train, pert_y_test = \
        train_test_split(pert, labels, random_state=0, train_size=0.7)
roberta_x_train, roberta_x_test, roberta_y_train, roberta_y_test = \
        train_test_split(roberta, labels, random_state=0, train_size=0.7)
macberta_x_train, macberta_x_test, macberta_y_train, macberta_y_test = \
        train_test_split(macberta, labels, random_state=0, train_size=0.7)

ros = RandomOverSampler(random_state=0)
sm = SMOTE(random_state=0)
face_x_train, face_y_train = sm.fit_resample(face_x_train, face_y_train)
iris_x_train, iris_y_train = sm.fit_resample(iris_x_train, iris_y_train)
physical_index_x_train, physical_index_y_train = sm.fit_resample(physical_index_x_train, \
        physical_index_y_train)
mfcc_ft_x_train, mfcc_ft_y_train = sm.fit_resample(mfcc_ft_x_train, mfcc_ft_y_train)
pert_x_train, pert_y_train = sm.fit_resample(pert_x_train, pert_y_train)
roberta_x_train, roberta_y_train = sm.fit_resample(roberta_x_train, roberta_y_train)
macberta_x_train, macberta_y_train = sm.fit_resample(macberta_x_train, macberta_y_train)

dtype = torch.FloatTensor

face_x_train, face_x_test, face_y_train, face_y_test = np.array(face_x_train), np.array(face_x_test),\
        np.array(face_y_train), np.array(face_y_test)
iris_x_train, iris_x_test, iris_y_train, iris_y_test = np.array(iris_x_train), np.array(iris_x_test),\
        np.array(iris_y_train), np.array(iris_y_test)
physical_index_x_train, physical_index_x_test, physical_index_y_train, physical_index_y_test = \
        np.array(physical_index_x_train), np.array(physical_index_x_test), \
            np.array(physical_index_y_train), np.array(physical_index_y_test)
mfcc_ft_x_train, mfcc_ft_x_test, mfcc_ft_y_train, mfcc_ft_y_test = np.array(mfcc_ft_x_train), \
        np.array(mfcc_ft_x_test), np.array(mfcc_ft_y_train), np.array(mfcc_ft_y_test)
pert_x_train, pert_x_test, pert_y_train, pert_y_test = np.array(pert_x_train), np.array(pert_x_test),\
        np.array(pert_y_train), np.array(pert_y_test)
roberta_x_train, roberta_x_test, roberta_y_train, roberta_y_test = np.array(roberta_x_train), np.array(roberta_x_test),\
        np.array(roberta_y_train), np.array(roberta_y_test)
macberta_x_train, macberta_x_test, macberta_y_train, macberta_y_test = np.array(macberta_x_train), np.array(macberta_x_test),\
        np.array(macberta_y_train), np.array(macberta_y_test)

pert_x_train, pert_y_train = torch.from_numpy(pert_x_train).type(dtype), \
        torch.from_numpy(pert_y_train).type(dtype)
roberta_x_train, roberta_y_train = torch.from_numpy(roberta_x_train).type(dtype), \
        torch.from_numpy(roberta_y_train).type(dtype)
macberta_x_train, macberta_y_train = torch.from_numpy(macberta_x_train).type(dtype), \
        torch.from_numpy(macberta_y_train).type(dtype)
mfcc_ft_x_train, mfcc_ft_y_train = torch.from_numpy(mfcc_ft_x_train).type(dtype), \
        torch.from_numpy(mfcc_ft_y_train).type(dtype)
face_x_train, face_y_train = torch.from_numpy(face_x_train).type(dtype), \
        torch.from_numpy(face_y_train).type(dtype)
iris_x_train, iris_y_train = torch.from_numpy(iris_x_train).type(dtype), \
        torch.from_numpy(iris_y_train).type(dtype)
physical_index_x_train, physical_index_y_train = torch.from_numpy(physical_index_x_train).type(dtype), \
        torch.from_numpy(physical_index_y_train).type(dtype)

pert_x_test, pert_y_test = torch.from_numpy(pert_x_test).type(dtype), \
        torch.from_numpy(pert_y_test).type(dtype)
roberta_x_test, roberta_y_test = torch.from_numpy(roberta_x_test).type(dtype), \
        torch.from_numpy(roberta_y_test).type(dtype)
macberta_x_test, macberta_y_test = torch.from_numpy(macberta_x_test).type(dtype), \
        torch.from_numpy(macberta_y_test).type(dtype)
mfcc_ft_x_test, mfcc_ft_y_test = torch.from_numpy(mfcc_ft_x_test).type(dtype), \
        torch.from_numpy(mfcc_ft_y_test).type(dtype)
face_x_test, face_y_test = torch.from_numpy(face_x_test).type(dtype), \
        torch.from_numpy(face_y_test).type(dtype)
iris_x_test, iris_y_test = torch.from_numpy(iris_x_test).type(dtype), \
        torch.from_numpy(iris_y_test).type(dtype)
physical_index_x_test, physical_index_y_test = torch.from_numpy(physical_index_x_test).type(dtype), \
        torch.from_numpy(physical_index_y_test).type(dtype)

y_train = pert_y_train
y_test = pert_y_test
y_train = y_train.unsqueeze(-1)
y_test = y_test.unsqueeze(-1)

    

In [6]:
model = LMF(input_dims=(9828, 1404, 1024, 4, 4722, 1024, 1024), hidden_dims=(1024, 32, 1024, 4, 4, 1024, 1024),\
         dropouts=(0.1, 0.1, 1, 0.3, 0.3, 1, 1, 0.5), output_dim=1, rank=1)

loss_func = torch.nn.L1Loss()
optimizer = torch.optim.Adam(model.parameters())

epochs = 50
temp_loss = 1
for t in range(epochs+1):
    out = model(mfcc_ft_x_train, face_x_train, text_x_train, iris_x_train, physical_index_x_train)
    loss = loss_func(out, y_train)
    print("train loss:%f" %loss.detach().numpy())
    # print(ft.detach().numpy())

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if t % 10 == 0 and t > 0:
        model.train(False)
        pred_y = out.detach().numpy()
        for item in range(pred_y.shape[0]):
            if pred_y[item] > 0.5:
                pred_y[item] = 1
            else:
                pred_y[item] = 0
        target_y = y_train.data.numpy()
        print("epoch %d, train acc %.4f" % (t, accuracy_score(pred_y, target_y)))

        
        out = model(mfcc_ft_x_test, face_x_test, text_x_test, iris_x_test, physical_index_x_test)
        loss = loss_func(out, y_test)
        print("test loss:%f" %loss.detach().numpy())
        loss_val = loss_func(out, y_test).detach().numpy()
        
        pred_y = out.detach().numpy()
        for item in range(pred_y.shape[0]):
            if pred_y[item] > 0.5:
                pred_y[item] = 1
            else:
                pred_y[item] = 0
        target_y = y_test.data.numpy()
        test_score = accuracy_score(target_y, pred_y)
        
        test_f1 = f1_score(target_y, pred_y, average='weighted')
        if loss_val < temp_loss:
            acc =  test_score
            f1_val = test_f1
            temp_loss = loss_val
            print(temp_loss)
            print(acc)
        print("test acc %.4f" % test_score)
        print("test f1-score %f" %test_f1 )


print("Best:{acc},{f1}".format(acc=acc, f1=f1_val))

NameError: name 'text_x_train' is not defined

In [8]:
from tsmoothie.smoother import *
import matplotlib.pyplot as plt
from numpy.fft import *
np.set_printoptions(threshold=np.inf)
smoother = ExponentialSmoother(window_len=20, alpha=0.3)
smoother = KalmanSmoother(component='level_trend', 
                          component_noise={'level':0.1, 'trend':0.1})

def filter_signal(signal, threshold=1e8):
    fourier = rfft(signal)
    frequencies = rfftfreq(signal.size, d=20e-3/signal.size)
    fourier[frequencies > threshold] = 0
    return irfft(fourier)
from sklearn import preprocessing

skf = StratifiedKFold(n_splits=10)
min_max_scaler = preprocessing.MinMaxScaler()
scaler = preprocessing.StandardScaler()
x = np.array(face)
y = labels
y = np.array(y)
face_x_train_skf = []
face_x_test_skf = []
face_y_train_skf = []
face_y_test_skf = []
# pca = PCA(n_components=2)
for train_index, test_index in skf.split(x, y):
#     print("TRAIN:", train_index, "TEST:", test_index)
    x_train, x_test = x[train_index], x[test_index]
    y_train, y_test = y[train_index], y[test_index]
    # x_train = min_max_scaler.fit_transform(x_train)
    # x_test = min_max_scaler.fit_transform(x_test)
    # for i in range(0, 375):
    #     x_train[i] = filter_signal(x_train[i], threshold=1e3)
    # for i in range(0, 41):
    #     x_test[i] = filter_signal(x_test[i], threshold=1e3)
    # smoother.smooth(x_train)
    # x_train = smoother.smooth_data
    # smoother.smooth(x_test)
    # x_test = smoother.smooth_data
    # x_train = pca.fit_transform(x_train)
    # x_test = pca.fit_transform(x_test)
    # x_train = scaler.fit_transform(x_train)
    # x_test = scaler.fit_transform(x_test)
    # x_train = preprocessing.scale(x_train)
    # x_test = preprocessing.scale(x_test)
    face_x_train_skf.append(x_train)
    face_x_test_skf.append(x_test)
    face_y_train_skf.append(y_train)
    face_y_test_skf.append(y_test)
print("face shape:", np.array(face_x_train_skf[0]).shape)
print("face shape:", np.array(face_x_test_skf[0]).shape)

x = np.array(iris)
y = np.array(y)
iris_x_train_skf = []
iris_x_test_skf = []
iris_y_train_skf = []
iris_y_test_skf = []
for train_index, test_index in skf.split(x, y):
#     print("TRAIN:", train_index, "TEST:", test_index)
    x_train, x_test = x[train_index], x[test_index]
    y_train, y_test = y[train_index], y[test_index]
    # x_train = min_max_scaler.fit_transform(x_train)
    # x_test = min_max_scaler.fit_transform(x_test)
    # for i in range(0, 375):
    #     x_train[i] = filter_signal(x_train[i], threshold=1e3)
    # for i in range(0, 41):
    #     x_test[i] = filter_signal(x_test[i], threshold=1e3)
    # x_train = pca.fit_transform(x_train)
    # x_test = pca.fit_transform(x_test)
    # x_train = preprocessing.scale(x_train)
    # x_test = preprocessing.scale(x_test)
    # x_train = scaler.fit_transform(x_train)
    # x_test = scaler.fit_transform(x_test)
    iris_x_train_skf.append(x_train)
    iris_x_test_skf.append(x_test)
    iris_y_train_skf.append(y_train)
    iris_y_test_skf.append(y_test)
print("iris shape:", np.array(iris_x_train_skf[0]).shape)
print("iris shape:", np.array(iris_x_test_skf[0]).shape)

x = np.array(physical_index)
y = np.array(y)
physical_x_train_skf = []
physical_x_test_skf = []
physical_y_train_skf = []
physical_y_test_skf = []
for train_index, test_index in skf.split(x, y):
#     print("TRAIN:", train_index, "TEST:", test_index)
    x_train, x_test = x[train_index], x[test_index]
    y_train, y_test = y[train_index], y[test_index]
    # x_train = min_max_scaler.fit_transform(x_train)
    # x_test = min_max_scaler.fit_transform(x_test)
    # for i in range(0, 375):
    #     x_train[i] = filter_signal(x_train[i], threshold=1e3)
    # for i in range(0, 41):
    #     x_test[i] = filter_signal(x_test[i], threshold=1e3)
    # smoother.smooth(x_train)
    # x_train = smoother.smooth_data
    # smoother.smooth(x_test)
    # x_test = smoother.smooth_data
    # x_train = pca.fit_transform(x_train)
    # x_test = pca.fit_transform(x_test)

    # x_train = preprocessing.scale(x_train)
    # x_test = preprocessing.scale(x_test)
    # x_train = scaler.fit_transform(x_train)
    # x_test = scaler.fit_transform(x_test)
    physical_x_train_skf.append(x_train)
    physical_x_test_skf.append(x_test)
    physical_y_train_skf.append(y_train)
    physical_y_test_skf.append(y_test)
print("physical index shape:", np.array(physical_x_train_skf[0]).shape)
print("physical index shape:", np.array(physical_x_test_skf[0]).shape)

x = np.array(mfcc_ft)
y = np.array(y)
mfcc_ft_x_train_skf = []
mfcc_ft_x_test_skf = []
mfcc_ft_y_train_skf = []
mfcc_ft_y_test_skf = []
for train_index, test_index in skf.split(x, y):
#     print("TRAIN:", train_index, "TEST:", test_index)
    x_train, x_test = x[train_index], x[test_index]
    y_train, y_test = y[train_index], y[test_index]
    # x_train = min_max_scaler.fit_transform(x_train)
    # x_test = min_max_scaler.fit_transform(x_test)
    # for i in range(0, 375):
    #     x_train[i] = filter_signal(x_train[i], threshold=1e3)
    # for i in range(0, 41):
    #     x_test[i] = filter_signal(x_test[i], threshold=1e3)
    # smoother.smooth(x_train)
    # x_train = smoother.smooth_data
    # smoother.smooth(x_test)
    # x_test = smoother.smooth_data
    # x_train = pca.fit_transform(x_train)
    # x_test = pca.fit_transform(x_test)
    # x_train = preprocessing.scale(x_train)
    # x_test = preprocessing.scale(x_test)
    # x_train = scaler.fit_transform(x_train)
    # x_test = scaler.fit_transform(x_test)
    mfcc_ft_x_train_skf.append(x_train)
    mfcc_ft_x_test_skf.append(x_test)  
    mfcc_ft_y_train_skf.append(y_train)
    mfcc_ft_y_test_skf.append(y_test)
print("mfcc shape:", np.array(mfcc_ft_x_train_skf[0]).shape)
print("mfcc shape:", np.array(mfcc_ft_x_test_skf[0]).shape)

x = np.array(pert)
y = np.array(y)
pert_x_train_skf = []
pert_x_test_skf = []
pert_y_train_skf = []
pert_y_test_skf = []
for train_index, test_index in skf.split(x, y):
#     print("TRAIN:", train_index, "TEST:", test_index)
    x_train, x_test = x[train_index], x[test_index]
    y_train, y_test = y[train_index], y[test_index]
    # x_train = min_max_scaler.fit_transform(x_train)
    # x_test = min_max_scaler.fit_transform(x_test)
    # for i in range(0, 375):
    #     x_train[i] = filter_signal(x_train[i], threshold=1e3)
    # for i in range(0, 41):
    #     x_test[i] = filter_signal(x_test[i], threshold=1e3)
    
    # smoother.smooth(x_train)
    # x_train = smoother.smooth_data
    # smoother.smooth(x_test)
    # x_test = smoother.smooth_data
    # x_train = pca.fit_transform(x_train)
    # x_test = pca.fit_transform(x_test)
    # x_train = preprocessing.scale(x_train)
    # x_test = preprocessing.scale(x_test)
    # x_train = scaler.fit_transform(x_train)
    # x_test = scaler.fit_transform(x_test)
    pert_x_train_skf.append(x_train)
    pert_x_test_skf.append(x_test)
    pert_y_train_skf.append(y_train)
    pert_y_test_skf.append(y_test)
print("pert shape:", np.array(pert_x_train_skf[0]).shape)
print("pert shape:", np.array(pert_x_test_skf[0]).shape)

x = np.array(roberta)
y = np.array(y)
roberta_x_train_skf = []
roberta_x_test_skf = []
roberta_y_train_skf = []
roberta_y_test_skf = []
for train_index, test_index in skf.split(x, y):
#     print("TRAIN:", train_index, "TEST:", test_index)
    x_train, x_test = x[train_index], x[test_index]
    y_train, y_test = y[train_index], y[test_index]
    # x_train = min_max_scaler.fit_transform(x_train)
    # x_test = min_max_scaler.fit_transform(x_test)
    # for i in range(0, 375):
    #     x_train[i] = filter_signal(x_train[i], threshold=1e3)
    # for i in range(0, 41):
    #     x_test[i] = filter_signal(x_test[i], threshold=1e3)
    # smoother.smooth(x_train)
    # x_train = smoother.smooth_data
    # smoother.smooth(x_test)
    # x_test = smoother.smooth_data
    # x_train = pca.fit_transform(x_train)
    # x_test = pca.fit_transform(x_test)
    # x_train = preprocessing.scale(x_train)
    # x_test = preprocessing.scale(x_test)
    # x_train = scaler.fit_transform(x_train)
    # x_test = scaler.fit_transform(x_test)
    roberta_x_train_skf.append(x_train)
    roberta_x_test_skf.append(x_test)
    roberta_y_train_skf.append(y_train)
    roberta_y_test_skf.append(y_test)
print("roberta shape:", np.array(roberta_x_train_skf[0]).shape)
print("roberta shape:", np.array(roberta_x_test_skf[0]).shape)

x = np.array(macberta)
y = np.array(y)
macberta_x_train_skf = []
macberta_x_test_skf = []
macberta_y_train_skf = []
macberta_y_test_skf = []
for train_index, test_index in skf.split(x, y):
#     print("TRAIN:", train_index, "TEST:", test_index)
    x_train, x_test = x[train_index], x[test_index]
    y_train, y_test = y[train_index], y[test_index]
    # x_train = min_max_scaler.fit_transform(x_train)
    # x_test = min_max_scaler.fit_transform(x_test)

    # for i in range(0, 375):
    #     x_train[i] = filter_signal(x_train[i], threshold=1e3)
    # for i in range(0, 41):
    #     x_test[i] = filter_signal(x_test[i], threshold=1e3)
    # smoother.smooth(x_train)
    # x_train = smoother.smooth_data
    # smoother.smooth(x_test)
    # x_test = smoother.smooth_data

    # x_train = pca.fit_transform(x_train)
    # x_test = pca.fit_transform(x_test)
    # x_train = preprocessing.scale(x_train)
    # x_test = preprocessing.scale(x_test)
    # x_train = scaler.fit_transform(x_train)
    # x_test = scaler.fit_transform(x_test)
    macberta_x_train_skf.append(x_train)
    macberta_x_test_skf.append(x_test)
    macberta_y_train_skf.append(y_train)
    macberta_y_test_skf.append(y_test)
print("macberta shape:", np.array(macberta_x_train_skf[0]).shape)
print("macberta shape:", np.array(macberta_x_test_skf[0]).shape)

x = np.array(graph)
y = np.array(y)
graph_x_train_skf = []
graph_x_test_skf = []
graph_y_train_skf = []
graph_y_test_skf = []
for train_index, test_index in skf.split(x, y):
#     print("TRAIN:", train_index, "TEST:", test_index)
    x_train, x_test = x[train_index], x[test_index]
    y_train, y_test = y[train_index], y[test_index]
    # x_train = min_max_scaler.fit_transform(x_train)
    # x_test = min_max_scaler.fit_transform(x_test)

    # for i in range(0, 375):
    #     x_train[i] = filter_signal(x_train[i], threshold=1e3)
    # for i in range(0, 41):
    #     x_test[i] = filter_signal(x_test[i], threshold=1e3)
    # smoother.smooth(x_train)
    # x_train = smoother.smooth_data
    # smoother.smooth(x_test)
    # x_test = smoother.smooth_data

    # x_train = pca.fit_transform(x_train)
    # x_test = pca.fit_transform(x_test)
    # x_train = preprocessing.scale(x_train)
    # x_test = preprocessing.scale(x_test)
    # x_train = scaler.fit_transform(x_train)
    # x_test = scaler.fit_transform(x_test)
    graph_x_train_skf.append(x_train)
    graph_x_test_skf.append(x_test)
    graph_y_train_skf.append(y_train)
    graph_y_test_skf.append(y_test)
print("graph shape:", np.array(graph_x_train_skf[0]).shape)
print("graph shape:", np.array(graph_x_test_skf[0]).shape)



face shape: (834, 1404)
face shape: (93, 1404)
iris shape: (834, 2776)
iris shape: (93, 2776)
physical index shape: (834, 4722)
physical index shape: (93, 4722)
mfcc shape: (834, 11988)
mfcc shape: (93, 11988)
pert shape: (834, 1024)
pert shape: (93, 1024)
roberta shape: (834, 1024)
roberta shape: (93, 1024)
macberta shape: (834, 1024)
macberta shape: (93, 1024)
graph shape: (834, 49)
graph shape: (93, 49)


In [18]:
total_acc = []
total_f1 = []
for j in range(0, 1):
        ACC = []
        F1 = []
        for i in range(0, 10):
                print("The {round} group of {time}".format(round=i, time = j))
                sm = SMOTE(random_state=0)
                ros = RandomOverSampler(random_state=0)

                face_x_train, face_y_train = ros.fit_resample(face_x_train_skf[i], face_y_train_skf[i])
                iris_x_train, iris_y_train = ros.fit_resample(iris_x_train_skf[i], iris_y_train_skf[i])
                physical_x_train, physical_y_train = ros.fit_resample(physical_x_train_skf[i], \
                        physical_y_train_skf[i])
                mfcc_ft_x_train, mfcc_ft_y_train = ros.fit_resample(mfcc_ft_x_train_skf[i], mfcc_ft_y_train_skf[i])
                pert_x_train, pert_y_train = ros.fit_resample(pert_x_train_skf[i], pert_y_train_skf[i])
                macberta_x_train, macberta_y_train = ros.fit_resample(macberta_x_train_skf[i], macberta_y_train_skf[i])
                roberta_x_train, roberta_y_train = ros.fit_resample(roberta_x_train_skf[i], roberta_y_train_skf[i])
                graph_x_train, graph_y_train = ros.fit_resample(graph_x_train_skf[i], graph_y_train_skf[i])

                dtype = torch.FloatTensor
                ltype = torch.LongTensor

                face_x_train, face_x_test, face_y_train, face_y_test = np.array(face_x_train), np.array(face_x_test_skf[i]),\
                        np.array(face_y_train), np.array(face_y_test_skf[i])
                iris_x_train, iris_x_test, iris_y_train, iris_y_test = np.array(iris_x_train), np.array(iris_x_test_skf[i]),\
                        np.array(iris_y_train), np.array(iris_y_test_skf[i])
                physical_index_x_train, physical_index_x_test, physical_index_y_train, physical_index_y_test = \
                        np.array(physical_x_train), np.array(physical_x_test_skf[i]), \
                        np.array(physical_y_train), np.array(physical_y_test_skf[i])
                mfcc_ft_x_train, mfcc_ft_x_test, mfcc_ft_y_train, mfcc_ft_y_test = np.array(mfcc_ft_x_train), \
                        np.array(mfcc_ft_x_test_skf[i]), np.array(mfcc_ft_y_train), np.array(mfcc_ft_y_test_skf[i])
                pert_x_train, pert_x_test, pert_y_train, pert_y_test = np.array(pert_x_train), np.array(pert_x_test_skf[i]),\
                        np.array(pert_y_train), np.array(pert_y_test_skf[i])
                macberta_x_train, macberta_x_test, macberta_y_train, macberta_y_test = np.array(macberta_x_train), np.array(macberta_x_test_skf[i]),\
                        np.array(macberta_y_train), np.array(macberta_y_test_skf[i])
                roberta_x_train, roberta_x_test, roberta_y_train, roberta_y_test = np.array(roberta_x_train), np.array(roberta_x_test_skf[i]),\
                        np.array(roberta_y_train), np.array(roberta_y_test_skf[i])
                graph_x_train, graph_x_test, graph_y_train, graph_y_test = np.array(graph_x_train), np.array(graph_x_test_skf[i]),\
                        np.array(graph_y_train), np.array(graph_y_test_skf[i])

                pert_x_train, pert_y_train = torch.from_numpy(pert_x_train).type(dtype), \
                        torch.from_numpy(pert_y_train).type(dtype)
                macberta_x_train, macberta_y_train = torch.from_numpy(macberta_x_train).type(dtype), \
                        torch.from_numpy(macberta_y_train).type(dtype)
                roberta_x_train, roberta_y_train = torch.from_numpy(roberta_x_train).type(dtype), \
                        torch.from_numpy(roberta_y_train).type(dtype)
                mfcc_ft_x_train, mfcc_ft_y_train = torch.from_numpy(mfcc_ft_x_train).type(dtype), \
                        torch.from_numpy(mfcc_ft_y_train).type(dtype)
                face_x_train, face_y_train = torch.from_numpy(face_x_train).type(dtype), \
                        torch.from_numpy(face_y_train).type(dtype)
                iris_x_train, iris_y_train = torch.from_numpy(iris_x_train).type(dtype), \
                        torch.from_numpy(iris_y_train).type(dtype)
                physical_index_x_train, physical_index_y_train = torch.from_numpy(physical_index_x_train).type(dtype), \
                        torch.from_numpy(physical_index_y_train).type(dtype)
                graph_x_train, graph_y_train = torch.from_numpy(graph_x_train).type(dtype), \
                        torch.from_numpy(graph_y_train).type(ltype)

                pert_x_test, pert_y_test = torch.from_numpy(pert_x_test).type(dtype), \
                        torch.from_numpy(pert_y_test).type(dtype)
                macberta_x_test, macberta_y_test = torch.from_numpy(macberta_x_test).type(dtype), \
                        torch.from_numpy(macberta_y_test).type(dtype)
                roberta_x_test, roberta_y_test = torch.from_numpy(roberta_x_test).type(dtype), \
                        torch.from_numpy(roberta_y_test).type(dtype)
                mfcc_ft_x_test, mfcc_ft_y_test = torch.from_numpy(mfcc_ft_x_test).type(dtype), \
                        torch.from_numpy(mfcc_ft_y_test).type(dtype)
                face_x_test, face_y_test = torch.from_numpy(face_x_test).type(dtype), \
                        torch.from_numpy(face_y_test).type(dtype)
                iris_x_test, iris_y_test = torch.from_numpy(iris_x_test).type(dtype), \
                        torch.from_numpy(iris_y_test).type(dtype)
                physical_index_x_test, physical_index_y_test = torch.from_numpy(physical_index_x_test).type(dtype), \
                        torch.from_numpy(physical_index_y_test).type(dtype)
                graph_x_test, graph_y_test = torch.from_numpy(graph_x_test).type(dtype), \
                        torch.from_numpy(graph_y_test).type(ltype)

                y_train = pert_y_train
                y_test = pert_y_test
                y_train = y_train.unsqueeze(-1)
                y_test = y_test.unsqueeze(-1)

                # model = torch.load('./')
                model = LMF(input_dims=(11988, 1404, 1024, 2776, 4722, 1024, 1024, 49), hidden_dims=(4, 4, 4, 4, 4, 32, 32, 4),\
                        dropouts=(0.1, 0.1, 0.1, 0.1, 0.1, 0.1 ,0.1, 0.1, 0.5), output_dim=1, rank=1)

                device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
                model.to(device)

                loss_func = torch.nn.L1Loss()
                optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

                epochs = 20000
                temp_acc = 0  
                for t in range(epochs+1):

                        out = model(mfcc_ft_x_train.to(device), face_x_train.to(device), pert_x_train.to(device), \
                                iris_x_train.to(device), physical_index_x_train.to(device), macberta_x_train.to(device), roberta_x_train.to(device), graph_x_train.to(device))
                        loss = loss_func(out, y_train.to(device))
                        # print("train loss:%f" %loss.detach().cpu().numpy())
                        # print(ft.detach().numpy())

                        optimizer.zero_grad()
                        loss.backward()
                        optimizer.step()

                        if t % 10 == 0 and t > 0:
                                model.train(False)
                                pred_y = out.detach().cpu().numpy()
                                for item in range(pred_y.shape[0]):
                                        if pred_y[item] > 0.5:
                                                pred_y[item] = 1
                                        else:
                                                pred_y[item] = 0
                                target_y = y_train.data.numpy()
                                print("epoch %d, train acc %.4f" % (t, accuracy_score(pred_y, target_y)))
                        # pred_y = np.array(prediction)
                        # target_y = y_train.cpu().data.numpy()
                        # print("epoch %d, train acc %.4f" % (t, accuracy_score(pred_y, target_y)))
                
                                out = model(mfcc_ft_x_test.to(device), face_x_test.to(device), pert_x_test.to(device), \
                                        iris_x_test.to(device), physical_index_x_test.to(device), macberta_x_test.to(device), roberta_x_test.to(device), graph_x_test.to(device))
                                loss = loss_func(out,  y_test.to(device))
                                print("test loss:%f" %loss.detach().cpu().numpy())
                        
                                pred_y = out.detach().cpu().numpy()
                                for item in range(pred_y.shape[0]):
                                        if pred_y[item] > 0.5:
                                                pred_y[item] = 1
                                        else:
                                                pred_y[item] = 0
                                target_y = y_test.data.numpy()
                                test_score = accuracy_score(target_y, pred_y)
                                test_f1 = f1_score(target_y, pred_y, average='weighted')
                                if test_score > temp_acc:
                                        acc =  test_score
                                        f1 = test_f1
                                        temp_acc = test_score
                                        print('Round:{round} saved a model checkpoint at step: {step}. The best acc: {acc} and f1 score:{f1}'.format(round=i, time = j, step=t, acc=test_score,f1=test_f1))
                                # print("test acc %.4f" % test_score)
                #     print("test f1-score %f" %test_f1 )
                ACC.append(acc)
                F1.append(f1)
        print("LMF Max score:", np.max(ACC))
        print("LMF Min score:", np.min(ACC))
        print("LMF Avg score:", np.mean(ACC))
        print("LMF Max score:", np.max(F1))
        print("LMF Min score:", np.min(F1))
        print("LMF Avg F1-score:", np.mean(F1))
        total_acc.append(np.mean(ACC))
        total_f1.append(np.mean(F1))
# print("Average acc:{avg}, Max acc:{best}, Min acc:{mini}".format(avg=np.mean(total_acc), \
#         best=np.max(total_acc),mini=np.min(total_acc)))
        

The 0 group of 0
epoch 10, train acc 0.5000
test loss:0.258277
Round:0 saved a model checkpoint at step: 10. The best acc: 0.7419354838709677 and f1 score:0.6320191158900836
epoch 20, train acc 0.5000
test loss:0.258333
epoch 30, train acc 0.5000
test loss:0.258413
epoch 40, train acc 0.5000
test loss:0.258697
epoch 50, train acc 0.5000
test loss:0.259775
epoch 60, train acc 0.5000
test loss:0.263552
epoch 70, train acc 0.5000
test loss:0.275696
epoch 80, train acc 0.5041
test loss:0.312172
epoch 90, train acc 0.6321
test loss:0.418799
epoch 100, train acc 0.6953
test loss:0.433631
epoch 110, train acc 0.7496
test loss:0.436462
epoch 120, train acc 0.7934
test loss:0.424395
epoch 130, train acc 0.8225
test loss:0.415577
epoch 140, train acc 0.8420
test loss:0.410977
epoch 150, train acc 0.8728
test loss:0.395854
epoch 160, train acc 0.8922
test loss:0.401840
epoch 170, train acc 0.9068
test loss:0.401376
epoch 180, train acc 0.9157
test loss:0.404266
epoch 190, train acc 0.9230
test lo

KeyboardInterrupt: 

In [87]:
hidden_name = '32_16_512_1_1_'
drop_name = '0_0_0_0_0'
torch.save(model, 'LMF_' + hidden_name + drop_name + '.pth')

In [38]:
total_acc = []
for j in range(0, 10):
        ACC = []
        for i in range(0, 10):
                print("The {round} group of {time}".format(round=i, time = j))
                sm = SMOTE(random_state=0)
                ros = RandomOverSampler(random_state=0)
                face_x_train, face_y_train = ros.fit_resample(face_x_train_skf[i], face_y_train_skf[i])
                iris_x_train, iris_y_train = ros.fit_resample(iris_x_train_skf[i], iris_y_train_skf[i])
                physical_x_train, physical_y_train = ros.fit_resample(physical_x_train_skf[i], \
                        physical_y_train_skf[i])
                mfcc_ft_x_train, mfcc_ft_y_train = ros.fit_resample(mfcc_ft_x_train_skf[i], mfcc_ft_y_train_skf[i])
                text_x_train, text_y_train = ros.fit_resample(text_x_train_skf[i], text_y_train_skf[i])

                dtype = torch.FloatTensor

                face_x_train, face_x_test, face_y_train, face_y_test = np.array(face_x_train), np.array(face_x_test_skf[i]),\
                        np.array(face_y_train), np.array(face_y_test_skf[i])
                iris_x_train, iris_x_test, iris_y_train, iris_y_test = np.array(iris_x_train), np.array(iris_x_test_skf[i]),\
                        np.array(iris_y_train), np.array(iris_y_test_skf[i])
                physical_index_x_train, physical_index_x_test, physical_index_y_train, physical_index_y_test = \
                        np.array(physical_x_train), np.array(physical_x_test_skf[i]), \
                        np.array(physical_y_train), np.array(physical_y_test_skf[i])
                mfcc_ft_x_train, mfcc_ft_x_test, mfcc_ft_y_train, mfcc_ft_y_test = np.array(mfcc_ft_x_train), \
                        np.array(mfcc_ft_x_test_skf[i]), np.array(mfcc_ft_y_train), np.array(mfcc_ft_y_test_skf[i])
                text_x_train, text_x_test, text_y_train, text_y_test = np.array(text_x_train), np.array(text_x_test_skf[i]),\
                        np.array(text_y_train), np.array(text_y_test_skf[i])

                text_x_train, text_y_train = torch.from_numpy(text_x_train).type(dtype), \
                        torch.from_numpy(text_y_train).type(dtype)
                mfcc_ft_x_train, mfcc_ft_y_train = torch.from_numpy(mfcc_ft_x_train).type(dtype), \
                        torch.from_numpy(mfcc_ft_y_train).type(dtype)
                face_x_train, face_y_train = torch.from_numpy(face_x_train).type(dtype), \
                        torch.from_numpy(face_y_train).type(dtype)
                iris_x_train, iris_y_train = torch.from_numpy(iris_x_train).type(dtype), \
                        torch.from_numpy(iris_y_train).type(dtype)
                physical_index_x_train, physical_index_y_train = torch.from_numpy(physical_index_x_train).type(dtype), \
                        torch.from_numpy(physical_index_y_train).type(dtype)

                text_x_test, text_y_test = torch.from_numpy(text_x_test).type(dtype), \
                        torch.from_numpy(text_y_test).type(dtype)
                mfcc_ft_x_test, mfcc_ft_y_test = torch.from_numpy(mfcc_ft_x_test).type(dtype), \
                        torch.from_numpy(mfcc_ft_y_test).type(dtype)
                face_x_test, face_y_test = torch.from_numpy(face_x_test).type(dtype), \
                        torch.from_numpy(face_y_test).type(dtype)
                iris_x_test, iris_y_test = torch.from_numpy(iris_x_test).type(dtype), \
                        torch.from_numpy(iris_y_test).type(dtype)
                physical_index_x_test, physical_index_y_test = torch.from_numpy(physical_index_x_test).type(dtype), \
                        torch.from_numpy(physical_index_y_test).type(dtype)

                y_train = text_y_train
                y_test = text_y_test
                y_train = y_train.unsqueeze(-1)
                y_test = y_test.unsqueeze(-1)

                model = LMF(input_dims=(9828, 1404, 1024, 4, 4722), hidden_dims=(32,16,512,1,1),\
                        dropouts=(0, 0, 0, 0, 0, 0.5), output_dim=1, rank=1)

                device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
                model.to(device)

                loss_func = torch.nn.L1Loss()
                optimizer = torch.optim.Adam(model.parameters())

                epochs = 500
                temp_acc = 0  
                for t in range(epochs+1):
                        model.train(True)
                        out = model(mfcc_ft_x_train.to(device), face_x_train.to(device), text_x_train.to(device), iris_x_train.to(device), physical_index_x_train.to(device))
                        loss = loss_func(out, y_train.to(device))
                        # print("train loss:%f" %loss.detach().numpy())
                        optimizer.zero_grad()
                        loss.backward()
                        optimizer.step()

                        if t % 10 == 0 and t > 0:
                                model.train(False)
                                pred_y = out.detach().cpu().numpy()
                                for item in range(pred_y.shape[0]):
                                        if pred_y[item] > 0.5:
                                                pred_y[item] = 1
                                        else:
                                                pred_y[item] = 0
                                target_y = y_train.data.numpy()
                                print("epoch %d, train acc %.4f" % (t, accuracy_score(pred_y, target_y)))

                
                                out = model(mfcc_ft_x_test.to(device), face_x_test.to(device), text_x_test.to(device), iris_x_test.to(device), physical_index_x_test.to(device))
                                loss = loss_func(out,  y_test.to(device))
                                print("test loss:%f" %loss.detach().cpu().numpy())
                
                                pred_y = out.detach().cpu().numpy()
                                for item in range(pred_y.shape[0]):
                                        if pred_y[item] > 0.5:
                                                pred_y[item] = 1
                                        else:
                                                pred_y[item] = 0
                                target_y = y_test.data.numpy()
                                test_score = accuracy_score(target_y, pred_y)
                                if test_score > temp_acc:
                                        acc =  test_score
                                        temp_acc = test_score
                                        print(acc)
                                print("test acc %.6f" % test_score)
                                # test_f1 = f1_score(target_y, pred_y, average='weighted')
                #     print("test f1-score %f" %test_f1 )
                ACC.append(acc)
        # print("LMF Max score:", np.max(ACC))
        # print("LMF Min score:", np.min(ACC))
        print("LMF Avg score:", np.mean(ACC))
        total_acc.append(np.mean(ACC))
print("Average acc:{avg}, Best acc:{best}".format(avg=np.mean(total_acc), best=np.max(total_acc)))
        

The 0 group of 0
epoch 10, train acc 0.8547
test loss:0.234430
0.7857142857142857
test acc 0.785714
epoch 20, train acc 0.9567
test loss:0.254450
test acc 0.761905
epoch 30, train acc 0.9827
test loss:0.290430
test acc 0.761905
epoch 40, train acc 0.9931
test loss:0.351640
test acc 0.690476
epoch 50, train acc 0.9931
test loss:0.381479
test acc 0.738095
epoch 60, train acc 0.9931
test loss:0.403748
test acc 0.714286
epoch 70, train acc 0.9965
test loss:0.365567
test acc 0.666667
epoch 80, train acc 0.9965
test loss:0.404863
test acc 0.690476
epoch 90, train acc 0.9965
test loss:0.437672
test acc 0.714286
epoch 100, train acc 0.9965
test loss:0.408429
test acc 0.666667
epoch 110, train acc 0.9965
test loss:0.398363
test acc 0.714286
epoch 120, train acc 0.9965
test loss:0.425715
test acc 0.738095
epoch 130, train acc 0.9965
test loss:0.420091
test acc 0.714286
epoch 140, train acc 0.9965
test loss:0.417778
test acc 0.690476
epoch 150, train acc 0.9965
test loss:0.408497
test acc 0.69047

KeyboardInterrupt: 