In [1]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import f1_score, accuracy_score, roc_curve
from ML_util import stack_ydata_from_stride, latefusion
from tqdm import tqdm
import json

path = "data/Result1/Different_All.csv"
data = pd.read_csv(path)
data = data.dropna(axis=0)
data = data.sample(frac=1, random_state=5).reset_index(drop=True)

data.head()

Unnamed: 0,x1,y1,x2,y2,x3,y3,x4,y4,x5,y5,...,pupil_left_std,pupil_right_avg,pupil_right_max,pupil_right_min,pupil_right_std,pupil_together_avg,pupil_together_max,pupil_together_min,pupil_together_std,participant
0,954.0,494.0,953.0,496.0,952.0,496.0,950.0,495.0,952.0,496.0,...,0.021262,2.926812,2.992,2.832,0.031753,2.957859,2.994,2.882,0.024621,22
1,941.0,566.0,945.0,564.0,935.0,558.0,950.0,577.0,950.0,559.0,...,0.152624,3.43643,3.82,3.02,0.231824,3.449081,3.712,3.263,0.157124,31
2,966.0,594.0,955.0,587.0,958.0,585.0,949.0,610.0,943.0,665.0,...,0.046774,2.826464,2.885,2.726,0.040748,2.930738,2.97,2.817,0.041995,12
3,851.222222,441.666667,852.333333,438.0,853.444444,434.333333,854.555556,430.666667,855.666667,427.0,...,0.092154,4.608286,4.761,4.437,0.075058,4.726923,4.835,4.484,0.080101,32
4,987.0,399.0,989.0,389.0,988.0,394.0,985.0,399.0,987.0,398.0,...,0.087643,2.636894,2.739,2.515,0.055507,2.730212,2.839,2.62,0.069882,12


In [2]:
class LoadResult1Data:
    def __init__(self, path) -> None:
        data = pd.read_csv(path)
        data = data.dropna(axis=0)
        data = data.sample(frac=1, random_state=5).reset_index(drop=True)

        self.data = data
        self.exceptlist = ['participant']

    def set_domain(self, domain_name:str):
        columnList = self.data.columns.to_list()
        if domain_name == 'all':
            pass
        elif domain_name == "mfcc":
            for i in range(1,13):
                columnList.remove(domain_name+str(i))
                self.exceptlist = columnList
        else:
            avg_name = domain_name+'_avg'
            max_name = domain_name+'_max'
            min_name = domain_name+'_min'
            std_name = domain_name+'_std'
            if domain_name in columnList:
                columnList.remove(domain_name)
            if avg_name in columnList:
                columnList.remove(avg_name)
            if max_name in columnList:
                columnList.remove(max_name)
            if min_name in columnList:
                columnList.remove(min_name)
            if std_name in columnList:
                columnList.remove(std_name)
            self.exceptlist = columnList

    def take_x(self, data=pd.DataFrame()):
        if data.empty:
            x_data = self.data.loc[:,~self.data.columns.isin(self.exceptlist)]
        else:
            x_data = data.loc[:,~self.data.columns.isin(self.exceptlist)]
        return x_data

    def take_y(self, data=pd.DataFrame()):
        if data.empty:
            y_data = self.data['participant']
        else:
            y_data = data['participant']
        return y_data
    
    def take_individual(self, individual, data=pd.DataFrame()):
        if data.empty:
            y_data = self.data['participant'].apply(lambda x: 1 if x==individual else 0)
        else:
            y_data = data['participant'].apply(lambda x: 1 if x==individual else 0)
        return y_data

    def get_data(self):
        return self.data

    def split_data(self, train_data_ratio, valid_data_ratio, test_data_ratio):
        if train_data_ratio+valid_data_ratio+test_data_ratio != 1:
            raise Exception("train + valid + test should be equal 1.")
        
        kf = StratifiedKFold(n_splits=10)
        x_data = self.take_x().to_numpy()
        y_data = self.take_y().to_numpy()
        index_blocks = []

        for _, test in kf.split(x_data, y_data):
            index_blocks.append(test.tolist())

        train_data_block = []
        valid_data_block = []
        test_data_block = []

        train_data_index = list(range(0, int(train_data_ratio*10)))
        valid_data_index = list(range(int(train_data_ratio*10), int((train_data_ratio+valid_data_ratio)*10)))
        test_data_index = list(range(int((train_data_ratio+valid_data_ratio)*10), 10))

        for i in range(10):
            if i in train_data_index:
                train_data_block.extend(index_blocks[i])
            elif i in valid_data_index:
                valid_data_block.extend(index_blocks[i])
            elif i in test_data_index:
                test_data_block.extend(index_blocks[i])

        return train_data_block, valid_data_block, test_data_block

    def ml_train(self, train_data_block):
        x_train_data = self.take_x().to_numpy()[train_data_block]
        y_train_data = self.take_y().to_numpy()[train_data_block]
        # warnings.filterwarnings('ignore')
        rf_model = RandomForestClassifier(random_state=0)  # Random Forest
        rf_model.fit(x_train_data, y_train_data)
        return rf_model

    def ml_test(self, model:RandomForestClassifier, test_data_block):
        test_x = self.take_x().to_numpy()[test_data_block]
        test_y = self.take_y().to_numpy()[test_data_block]
        pred_y = model.predict(test_x)
        accuracy = accuracy_score(test_y, pred_y)
        f1 = f1_score(test_y, pred_y, average='macro')
        results = {"acc":accuracy, "f1":f1}
        return results

    def CrossValidation(self):
        kf = StratifiedKFold(n_splits=10)
        x_data = self.take_x().to_numpy()
        y_data = self.take_y().to_numpy()
        accuracyList = []
        f1List = []
        for train, test in kf.split(x_data, y_data):
            thisModel = self.ml_train(train)
            thisResult = self.ml_test(thisModel, test)
            this_acc = thisResult['acc']
            this_f1 = thisResult['f1']
            accuracyList.append(this_acc)
            f1List.append(this_f1)
        print("Accuracy", np.mean(accuracyList))
        print("F1", np.mean(f1List))

    def get_indexlist(self):
        return self.data.index.to_list()
    
    def ml_test_individual_latefusion(self, train_data_ratio, valid_data_ratio, test_data_ratio, individual):

        trb, _, teb = self.split_data(train_data_ratio, valid_data_ratio, test_data_ratio)
        train_x = self.take_x().to_numpy()[trb]
        train_y = self.take_individual(individual).to_numpy()[trb]

        # warnings.filterwarnings('ignore')
        rf_model = RandomForestClassifier(random_state=0)  # Random Forest
        rf_model.fit(train_x, train_y)

        test_x = self.take_x().to_numpy()[teb]
        test_y = self.take_individual(individual).to_numpy()[teb]

        sampleSize = len(teb)
        stack_index, stack_y = stack_ydata_from_stride(test_y, 3, sampleSize)

        results = latefusion(rf_model, test_x, stack_index, stack_y)
        y_scores = np.array(results['multiply_proba'])[:, 1]

        eer = compute_eer(stack_y, y_scores)
        return eer


In [14]:
def compute_eer(y_true, y_scores):
    fpr, tpr, thresholds = roc_curve(y_true, y_scores)
    frr = 1 - tpr
    eer = fpr[np.nanargmin(np.absolute(frr - fpr))]
    return eer

In [20]:
test1 = LoadResult1Data(path)
test1.set_domain("mfcc")
test1.take_x()

Unnamed: 0,mfcc1,mfcc2,mfcc3,mfcc4,mfcc5,mfcc6,mfcc7,mfcc8,mfcc9,mfcc10,mfcc11,mfcc12
0,540492.388143,301065.056335,175705.949479,-23433.816881,-8703.613937,80033.564828,34278.115101,193.161685,-17377.442583,-8091.885376,463.990473,-6629.912974
1,428247.585872,253669.088271,114074.590541,7332.881977,-259.127266,9158.536101,28090.178949,1621.189095,-8855.748057,822.617423,725.226580,8608.905349
2,665844.602931,309549.556252,157585.130819,-44480.185153,14682.319897,63519.250222,-6122.357675,-21235.430671,-13242.129214,-30301.820380,-28204.055364,-40920.208176
3,272217.012224,57878.969666,29878.048661,13877.215061,25931.636403,-10061.717459,-4425.320890,1131.410250,-13236.426911,6379.289146,4070.461031,-2426.862106
4,471461.674071,208665.197740,99125.963057,46989.094344,21204.330549,-27138.593894,-19474.258639,17668.386988,35426.906157,7677.679485,34201.705315,103690.926724
...,...,...,...,...,...,...,...,...,...,...,...,...
32368,231199.781142,73294.473495,-5631.945029,-12361.869496,-21038.575734,83.492805,41736.617457,51597.926025,56054.420137,9537.258336,6322.634531,-3489.388947
32369,332343.141670,204802.736966,106861.131448,44151.780603,6872.771359,2590.120038,-15334.449328,-27094.857242,-15385.919045,-29751.454854,-7320.503832,82782.856395
32370,260620.639778,124496.358946,69565.956169,5664.623612,-1641.467686,-21615.976832,8702.900479,10079.431902,-3414.769237,-30468.914403,-16780.569104,-22730.464742
32371,398042.009666,293943.081846,152312.342337,161108.707635,129540.350056,77701.020281,27174.062795,11851.603989,9761.860139,25960.463781,2285.360637,30750.454981


In [23]:
gazeFeatureList = ['gaze_motion', 'gaze_velocity', 'gaze_rotation', 'reaction_time', 'fixation_duration', 
                   'fixation_dispersion', 'fixation_count', 'saccade_duration', 'saccade_velocity',
                   'saccade_amplitude', 'saccade_dispersion', 'saccade_count', 'mfcc', 'pupil_left',
                   'pupil_right', 'pupil_together', 'all']
path = "data/Result1/Different_All.csv"
train_data_ratio = 0.9
valid_data_ratio = 0
test_data_ratio = 0.1

for gazeFeature in gazeFeatureList:
    print("========")
    print(gazeFeature)
    differentResult1 = LoadResult1Data(path)
    differentResult1.set_domain(gazeFeature)
    trb, vdb, teb = differentResult1.split_data(train_data_ratio, valid_data_ratio, test_data_ratio)
    model = differentResult1.ml_train(trb)
    # print("train done")
    testResult = differentResult1.ml_test(model, teb)
    this_acc = testResult['acc']
    this_f1 = testResult['f1']
    print(f"Acc: {this_acc}, F1: {this_f1}")

gaze_motion
Acc: 0.05872602415344542, F1: 0.058337808106777966
gaze_velocity
Acc: 0.18233483305706844, F1: 0.17873416868591963
gaze_rotation
Acc: 0.0639355908122188, F1: 0.06373176010658285
reaction_time
Acc: 0.07695950745915227, F1: 0.06586245639987988
fixation_duration
Acc: 0.10158654984608098, F1: 0.09964146663030206
fixation_dispersion
Acc: 0.09069381955955481, F1: 0.08950243086155549
fixation_count
Acc: 0.07814349988160076, F1: 0.0285268187836659
saccade_duration
Acc: 0.13544873312810798, F1: 0.11694945240369403
saccade_velocity
Acc: 0.08453705896282264, F1: 0.07238464995550416
saccade_amplitude
Acc: 0.09306180440445182, F1: 0.08136289340130842
saccade_dispersion
Acc: 0.1004025574236325, F1: 0.09720227105794713
saccade_count
Acc: 0.07388112716078617, F1: 0.027866216602870513
mfcc
Acc: 0.18422922093298602, F1: 0.17281048077526986
pupil_left
Acc: 0.20909306180440446, F1: 0.20421781830464644
pupil_right
Acc: 0.22969452995500828, F1: 0.22870680278035777
pupil_together
Acc: 0.206488278

In [16]:
gazeFeatureList = ['gaze_motion', 'gaze_velocity', 'gaze_rotation', 'reaction_time', 'fixation_duration', 
                   'fixation_dispersion', 'fixation_count', 'saccade_duration', 'saccade_velocity',
                   'saccade_amplitude', 'saccade_dispersion', 'saccade_count', 'mfcc', 'pupil_left',
                   'pupil_right', 'pupil_together', 'all']
path = "data/Result1/Similar_All.csv"

for gazeFeature in gazeFeatureList:
    print("========")
    print(gazeFeature)
    differentResult1 = LoadResult1Data(path)
    differentResult1.set_domain(gazeFeature)
    differentResult1.CrossValidation()


gaze_motion


10it [01:40, 10.05s/it]


Accuracy 0.06310777389979932
F1 0.06293964482032828
gaze_velocity


10it [01:39,  9.95s/it]


Accuracy 0.18073676375097003
F1 0.1751139592551451
gaze_rotation


10it [01:42, 10.27s/it]


Accuracy 0.06088388332634
F1 0.06085569950959557
reaction_time


10it [00:16,  1.62s/it]


Accuracy 0.06903928728645756
F1 0.058314228896421384
fixation_duration


10it [01:10,  7.04s/it]


Accuracy 0.09822985580369656
F1 0.09588404058925407
fixation_dispersion


10it [01:04,  6.44s/it]


Accuracy 0.09313309683834402
F1 0.09282425234808706
fixation_count


10it [00:06,  1.62it/s]


Accuracy 0.07120105833129638
F1 0.025509500715478268
saccade_duration


10it [00:22,  2.21s/it]


Accuracy 0.1331356022274111
F1 0.11657443148407313
saccade_velocity


10it [01:41, 10.10s/it]


Accuracy 0.07988131554106385
F1 0.06969570159626723
saccade_amplitude


10it [01:25,  8.56s/it]


Accuracy 0.08760373369755928
F1 0.07713388630915516
saccade_dispersion


10it [01:31,  9.14s/it]


Accuracy 0.09603663859600516
F1 0.09431731397608724
saccade_count


10it [00:06,  1.57it/s]


Accuracy 0.07722462997807737
F1 0.03131055142973301
mfcc


10it [02:43, 16.32s/it]


Accuracy 0.18116912940878352
F1 0.1686709691388652
pupil_left


10it [01:40, 10.09s/it]


Accuracy 0.22793706302379663
F1 0.22186111332698957
pupil_right


10it [01:40, 10.08s/it]


Accuracy 0.24146706081226127
F1 0.23662942429824607
pupil_together


10it [01:41, 10.12s/it]


Accuracy 0.21832983094062
F1 0.2127187222973584
all


10it [07:26, 44.66s/it]

Accuracy 0.766749842530668
F1 0.7643103194328715





In [14]:
pathList = ["EyeMovement", 'Fixation', "MFCC", 'Pupil', "RawGaze", "Saccade"]
train_data_ratio = 0.9
valid_data_ratio = 0
test_data_ratio = 0.1

for pathName in pathList:
    print("========")
    print(pathName)
    path = f"data/Result1/Similar_{pathName}.csv"
    differentResult1 = LoadResult1Data(path)
    differentResult1.set_domain('all')
    differentResult1.CrossValidation()



EyeMovement


10it [02:23, 14.39s/it]


Accuracy 0.23550420168067226
F1 0.22534323077144203
Fixation


10it [01:39,  9.95s/it]


Accuracy 0.23613445378151257
F1 0.23014879205611166
MFCC


10it [02:56, 17.67s/it]


Accuracy 0.18478391356542617
F1 0.1712562039816248
Pupil


10it [02:13, 13.33s/it]


Accuracy 0.5051125488317121
F1 0.4988625959293559
RawGaze


10it [06:25, 38.50s/it]


Accuracy 0.4900360144057623
F1 0.4810797648221678
Saccade


10it [02:22, 14.22s/it]

Accuracy 0.231812725090036
F1 0.22263431810322212





Both Option

In [11]:
gazeFeatureList = ['gaze_motion', 'gaze_velocity', 'gaze_rotation', 'reaction_time', 'fixation_duration', 
                   'fixation_dispersion', 'fixation_count', 'saccade_duration', 'saccade_velocity',
                   'saccade_amplitude', 'saccade_dispersion', 'saccade_count', 'mfcc', 'pupil_left',
                   'pupil_right', 'pupil_together', 'all']

pathSVC = "data/Result1/Similar_All.csv"
pathMVC = "data/Result1/Different_All.csv"

dataMVC = pd.read_csv(pathMVC)
dataMVC = dataMVC.dropna(axis=0)
dataMVC = dataMVC.sample(frac=0.5, random_state=5).reset_index(drop=True)

dataSVC = pd.read_csv(pathSVC)
dataSVC = dataSVC.dropna(axis=0)
dataSVC = dataSVC.sample(frac=0.5, random_state=5).reset_index(drop=True)

dataBoth = pd.concat([dataSVC, dataMVC])
dataBoth = dataBoth.sample(frac=1, random_state=5).reset_index(drop=True)

Result1Both = LoadResult1Data(pathMVC)
Result1Both.data = dataBoth


# for gazeFeature in gazeFeatureList:
#     print("========")
#     print(gazeFeature)
#     Result1Both.data = dataBoth
#     Result1Both.set_domain(gazeFeature)
#     Result1Both.CrossValidation()

gazeFeature = 'all'
print("========")
print(gazeFeature)
Result1Both.data = dataBoth
Result1Both.set_domain(gazeFeature)
print(Result1Both.take_x().columns.to_list())
Result1Both.CrossValidation()

all
['x1', 'y1', 'x2', 'y2', 'x3', 'y3', 'x4', 'y4', 'x5', 'y5', 'x6', 'y6', 'x7', 'y7', 'x8', 'y8', 'x9', 'y9', 'x10', 'y10', 'x11', 'y11', 'x12', 'y12', 'x13', 'y13', 'x14', 'y14', 'x15', 'y15', 'x16', 'y16', 'x17', 'y17', 'x18', 'y18', 'x19', 'y19', 'x20', 'y20', 'x21', 'y21', 'x22', 'y22', 'x23', 'y23', 'x24', 'y24', 'x25', 'y25', 'x26', 'y26', 'x27', 'y27', 'x28', 'y28', 'x29', 'y29', 'x30', 'y30', 'x31', 'y31', 'x32', 'y32', 'x33', 'y33', 'x34', 'y34', 'x35', 'y35', 'x36', 'y36', 'x37', 'y37', 'x38', 'y38', 'x39', 'y39', 'x40', 'y40', 'x41', 'y41', 'x42', 'y42', 'x43', 'y43', 'x44', 'y44', 'x45', 'y45', 'x46', 'y46', 'x47', 'y47', 'x48', 'y48', 'x49', 'y49', 'x50', 'y50', 'x51', 'y51', 'x52', 'y52', 'x53', 'y53', 'x54', 'y54', 'x55', 'y55', 'x56', 'y56', 'x57', 'y57', 'x58', 'y58', 'x59', 'y59', 'x60', 'y60', 'x61', 'y61', 'x62', 'y62', 'x63', 'y63', 'x64', 'y64', 'x65', 'y65', 'x66', 'y66', 'x67', 'y67', 'x68', 'y68', 'x69', 'y69', 'x70', 'y70', 'x71', 'y71', 'x72', 'y72', 'x73'

In [5]:
pathList = ["EyeMovement", 'Fixation', "MFCC", 'Pupil', "RawGaze", "Saccade"]
train_data_ratio = 0.9
valid_data_ratio = 0
test_data_ratio = 0.1

for pathName in pathList:
    print("========")
    print(pathName)
    pathSVC = f"data/Result1/Similar_{pathName}.csv"
    pathMVC = f"data/Result1/Different_{pathName}.csv"

    dataMVC = pd.read_csv(pathMVC)
    dataMVC = dataMVC.dropna(axis=0)
    dataMVC = dataMVC.sample(frac=0.5, random_state=5).reset_index(drop=True)

    dataSVC = pd.read_csv(pathSVC)
    dataSVC = dataSVC.dropna(axis=0)
    dataSVC = dataSVC.sample(frac=0.5, random_state=5).reset_index(drop=True)

    dataBoth = pd.concat([dataSVC, dataMVC])
    dataBoth = dataBoth.sample(frac=1, random_state=5).reset_index(drop=True)

    BothResult1 = LoadResult1Data(pathSVC)
    BothResult1.data = dataBoth
    BothResult1.set_domain('all')
    BothResult1.CrossValidation()


EyeMovement
Accuracy 0.2329515877147319
F1 0.22176680583772929
Fixation
Accuracy 0.23198854763144197
F1 0.22640888018584468
MFCC
Accuracy 0.18630921395106717
F1 0.17225398973172384
Pupil
Accuracy 0.493002846235044
F1 0.48717002267120435
RawGaze
Accuracy 0.48027069234773556
F1 0.47005001660245177
Saccade
Accuracy 0.2377407600208225
F1 0.2268864579911758


Feature Selection by Sequential Feature Selector

In [None]:
from sklearn.feature_selection import SequentialFeatureSelector
from sklearn.ensemble import RandomForestClassifier

path = "data/Result1/Different_All.csv"
thisData = LoadResult1Data(path)
thisData.set_domain("all")
X = thisData.take_x()
y = thisData.take_y()

rf_model = RandomForestClassifier(random_state=0)  # Random Forest
sfs = SequentialFeatureSelector(rf_model, n_features_to_select="auto", cv=10, n_jobs=-1)
sfs.fit(X, y)
sfs.get_support()
sfs.transform(X).shape

In [13]:
path = "data/Result1/Different_All.csv"
df = pd.read_csv(path)
print(df.columns.to_list())

['x1', 'y1', 'x2', 'y2', 'x3', 'y3', 'x4', 'y4', 'x5', 'y5', 'x6', 'y6', 'x7', 'y7', 'x8', 'y8', 'x9', 'y9', 'x10', 'y10', 'x11', 'y11', 'x12', 'y12', 'x13', 'y13', 'x14', 'y14', 'x15', 'y15', 'x16', 'y16', 'x17', 'y17', 'x18', 'y18', 'x19', 'y19', 'x20', 'y20', 'x21', 'y21', 'x22', 'y22', 'x23', 'y23', 'x24', 'y24', 'x25', 'y25', 'x26', 'y26', 'x27', 'y27', 'x28', 'y28', 'x29', 'y29', 'x30', 'y30', 'x31', 'y31', 'x32', 'y32', 'x33', 'y33', 'x34', 'y34', 'x35', 'y35', 'x36', 'y36', 'x37', 'y37', 'x38', 'y38', 'x39', 'y39', 'x40', 'y40', 'x41', 'y41', 'x42', 'y42', 'x43', 'y43', 'x44', 'y44', 'x45', 'y45', 'x46', 'y46', 'x47', 'y47', 'x48', 'y48', 'x49', 'y49', 'x50', 'y50', 'x51', 'y51', 'x52', 'y52', 'x53', 'y53', 'x54', 'y54', 'x55', 'y55', 'x56', 'y56', 'x57', 'y57', 'x58', 'y58', 'x59', 'y59', 'x60', 'y60', 'x61', 'y61', 'x62', 'y62', 'x63', 'y63', 'x64', 'y64', 'x65', 'y65', 'x66', 'y66', 'x67', 'y67', 'x68', 'y68', 'x69', 'y69', 'x70', 'y70', 'x71', 'y71', 'x72', 'y72', 'x73', 'y

In [None]:
selected_feature = sfs.get_support()
str_sf = " ".join(selected_feature)
with open("data/Result1/SelectedFeature.txt", 'w') as f:
    f.write(str_sf)

AttributeError: 'SequentialFeatureSelector' object has no attribute 'support_'

In [16]:

# pathList = ["data/Result1/Different_All_Encoding.csv", "data/Result1/Different_EyeMovement_Encoding.csv", "data/Result1/Different_Fixation_Encoding.csv", 
#             "data/Result1/Different_MFCC_Encoding.csv", "data/Result1/Different_Pupil_Encoding.csv", "data/Result1/Different_RawGaze_Encoding.csv",
#             "data/Result1/Different_Saccade_Encoding.csv"]

pathList = ["data/Result1/Different_All.csv", "data/Result1/Different_EyeMovement.csv", "data/Result1/Different_Fixation.csv", 
            "data/Result1/Different_MFCC.csv", "data/Result1/Different_Pupil.csv", "data/Result1/Different_RawGaze.csv",
            "data/Result1/Different_Saccade.csv"]

for path in pathList:
    print(path)
    result1_df=LoadResult1Data(path)
    train_data_ratio = 0.9
    valid_data_ratio = 0
    test_data_ratio = 0.1
    trb, vdb, teb = result1_df.split_data(train_data_ratio, valid_data_ratio, test_data_ratio)
    for i in range(34):
        train_x = result1_df.take_x().to_numpy()[trb]
        train_y = result1_df.take_individual(i).to_numpy()[trb]
        test_x = result1_df.take_x().to_numpy()[teb]
        test_y = result1_df.take_individual(i).to_numpy()[teb]

        rf_model = RandomForestClassifier(random_state=0)  # Random Forest
        rf_model.fit(train_x, train_y)
        y_scores = rf_model.predict_proba(test_x)[:, 1]
        eer = compute_eer(test_y, y_scores)
        print(f"participant {i}, {eer}")

data/Result1/Different_All.csv
participant 0, 0.115966796875
participant 1, 0.07158509861212564
participant 2, 0.03028083028083028
participant 3, 0.09377289377289377
participant 4, 0.1078838174273859
participant 5, 0.1171875
participant 6, 0.06129426129426129
participant 7, 0.09201855015865268
participant 8, 0.14188034188034188
participant 9, 0.11575091575091576
participant 10, 0.030472940029254023
participant 11, 0.12560856864654332
participant 12, 0.0615234375
participant 13, 0.047619047619047616
participant 14, 0.07936507936507936
participant 15, 0.0485958485958486
participant 16, 0.056654456654456654
participant 17, 0.05401459854014599
participant 18, 0.1072039072039072
participant 19, 0.059152872444011684
participant 20, 0.04468864468864469
participant 21, 0.1426129426129426
participant 22, 0.0971916971916972
participant 23, 0.02531645569620253
participant 24, 0.04844206426484907
participant 25, 0.017094017094017096
participant 26, 0.10151293313811616
participant 27, 0.11819291819

In [8]:

pathList = ["data/Result1/Similar_All_Encoding.csv", "data/Result1/Similar_EyeMovement_Encoding.csv", "data/Result1/Similar_Fixation_Encoding.csv", 
            "data/Result1/Similar_MFCC_Encoding.csv", "data/Result1/Similar_Pupil_Encoding.csv", "data/Result1/Similar_RawGaze_Encoding.csv",
            "data/Result1/Similar_Saccade_Encoding.csv"]

for path in pathList:
    print(path)
    result1_df=LoadResult1Data(path)
    train_data_ratio = 0.9
    valid_data_ratio = 0
    test_data_ratio = 0.1
    trb, vdb, teb = result1_df.split_data(train_data_ratio, valid_data_ratio, test_data_ratio)
    for i in range(34):
        train_x = result1_df.take_x().to_numpy()[trb]
        train_y = result1_df.take_individual(i).to_numpy()[trb]
        test_x = result1_df.take_x().to_numpy()[teb]
        test_y = result1_df.take_individual(i).to_numpy()[teb]

        rf_model = RandomForestClassifier(random_state=0)  # Random Forest
        rf_model.fit(train_x, train_y)
        y_scores = rf_model.predict_proba(test_x)[:, 1]
        eer = compute_eer(test_y, y_scores)
        print(f"participant {i}, {eer}")

data/Result1/Similar_All_Encoding.csv
participant 0, 0.16661357120101944
participant 1, 0.04251269035532995
participant 2, 0.044281618349792926
participant 3, 0.12742911755336095
participant 4, 0.11302133078637376
participant 5, 0.13853503184713375
participant 6, 0.05097164702134438
participant 7, 0.11436763300414145
participant 8, 0.11819050653074227
participant 9, 0.1207390888818095
participant 10, 0.046193055113093344
participant 11, 0.08314749920356801
participant 12, 0.03751987281399046
participant 13, 0.03791016247212488
participant 14, 0.08282892640968462
participant 15, 0.1120607787274454
participant 16, 0.059891685250079645
participant 17, 0.05356576862123613
participant 18, 0.0821917808219178
participant 19, 0.06817457789104811
participant 20, 0.038228735266008285
participant 21, 0.11181905065307422
participant 22, 0.07337992376111817
participant 23, 0.05001592863969417
participant 24, 0.05573248407643312
participant 25, 0.030264415418923225
participant 26, 0.0874125874125874

In [18]:

pathList = ["data/Result1/Similar_All.csv", "data/Result1/Similar_EyeMovement.csv", "data/Result1/Similar_Fixation.csv", 
            "data/Result1/Similar_MFCC.csv", "data/Result1/Similar_Pupil.csv", "data/Result1/Similar_RawGaze.csv",
            "data/Result1/Similar_Saccade.csv"]

for path in pathList:
    print(path)
    result1_df=LoadResult1Data(path)
    train_data_ratio = 0.9
    valid_data_ratio = 0
    test_data_ratio = 0.1
    trb, vdb, teb = result1_df.split_data(train_data_ratio, valid_data_ratio, test_data_ratio)
    for i in range(34):
        train_x = result1_df.take_x().to_numpy()[trb]
        train_y = result1_df.take_individual(i).to_numpy()[trb]
        test_x = result1_df.take_x().to_numpy()[teb]
        test_y = result1_df.take_individual(i).to_numpy()[teb]

        rf_model = RandomForestClassifier(random_state=0)  # Random Forest
        rf_model.fit(train_x, train_y)
        y_scores = rf_model.predict_proba(test_x)[:, 1]
        eer = compute_eer(test_y, y_scores)
        print(f"participant {i}, {eer}")

data/Result1/Similar_All.csv
participant 0, 0.13889773813316342
participant 1, 0.05615482233502538
participant 2, 0.03376871615164065
participant 3, 0.13252628225549537
participant 4, 0.11524992040751353
participant 5, 0.13343949044585987
participant 6, 0.04300732717425932
participant 7, 0.11277476903472443
participant 8, 0.11882765211850908
participant 9, 0.11659764256132527
participant 10, 0.039821599235425297
participant 11, 0.06817457789104811
participant 12, 0.04642289348171701
participant 13, 0.046193055113093344
participant 14, 0.08665179993628544
participant 15, 0.098448876226654
participant 16, 0.05957311245619624
participant 17, 0.045007923930269415
participant 18, 0.08346607199745142
participant 19, 0.05129021981522778
participant 20, 0.0407773176170755
participant 21, 0.1223319528512265
participant 22, 0.07496823379923762
participant 23, 0.05001592863969417
participant 24, 0.06687898089171974
participant 25, 0.03663587129659127
participant 26, 0.08773045136681501
participan

In [9]:
path = "data/Result1/Similar_All_Encoding.csv"
differentResult1 = LoadResult1Data(path)
differentResult1.CrossValidation()

10it [06:52, 41.26s/it]

Accuracy 0.7655759542183558
F1 0.7632256964634287





In [11]:
path = "data/Result1/Different_All_Encoding.csv"
differentResult1 = LoadResult1Data(path)
differentResult1.CrossValidation()

10it [09:26, 56.64s/it]

Accuracy 0.7647561054094102
F1 0.7628027298482525





Encoding Both

In [2]:
import pandas as pd

pathMVC = "data/Result1/Different_All_Encoding.csv"
pathSVC = "data/Result1/Similar_All_Encoding.csv"

dataMVC = pd.read_csv(pathMVC)
dataMVC = dataMVC.dropna(axis=0)
dataMVC = dataMVC.sample(frac=0.5, random_state=5).reset_index(drop=True)

dataSVC = pd.read_csv(pathSVC)
dataSVC = dataSVC.dropna(axis=0)
dataSVC = dataSVC.sample(frac=0.5, random_state=5).reset_index(drop=True)

dataBoth = pd.concat([dataSVC, dataMVC])
dataBoth = dataBoth.sample(frac=1, random_state=5).reset_index(drop=True)

print(dataBoth.shape)
# Result1Both = LoadResult1Data(pathMVC)
# Result1Both.data = dataBoth

# Result1Both.CrossValidation()

(37304, 293)


In [None]:
path = "data/Result1/Different_All.csv"
differentResult1 = LoadResult1Data(path)


result1_df=LoadResult1Data(path)

train_data_ratio = 0.9
valid_data_ratio = 0
test_data_ratio = 0.1

trb, vdb, teb = result1_df.split_data(train_data_ratio, valid_data_ratio, test_data_ratio)
for i in range(34):
    train_x = result1_df.take_x().to_numpy()[trb]
    train_y = result1_df.take_individual(i).to_numpy()[trb]
    test_x = result1_df.take_x().to_numpy()[teb]
    test_y = result1_df.take_individual(i).to_numpy()[teb]

    # warnings.filterwarnings('ignore')
    rf_model = RandomForestClassifier(random_state=0)  # Random Forest
    rf_model.fit(train_x, train_y)
    y_scores = rf_model.predict_proba(test_x)[:, 1]
    eer = compute_eer(test_y, y_scores)
    print(f"participant {i}, {eer}")

In [21]:
path = "data/Result1/Different_All.csv"
differentResult1 = LoadResult1Data(path)
train_data_ratio = 0.9
valid_data_ratio = 0
test_data_ratio = 0.1

for i in range(34):
    eer = differentResult1.ml_test_individual_latefusion(0.9, 0, 0.1, i)
    print(f"participant {i}, {eer}")

participant 0, 0.016588532275513886
participant 1, 0.006481814908174289
participant 2, 0.0
participant 3, 0.0569780021637216
participant 4, 0.025243418680129824
participant 5, 0.02993148214929679
participant 6, 0.0010818608005769925
participant 7, 0.06419040750090155
participant 8, 0.10638297872340426
participant 9, 0.03281644428416877
participant 10, 0.001081081081081081
participant 11, 0.06661865322290242
participant 12, 0.020555355210962856
participant 13, 0.0018031013342949874
participant 14, 0.02271907681211684
participant 15, 0.016588532275513886
participant 16, 0.003966822935448972
participant 17, 0.0010799136069114472
participant 18, 0.05192931842769564
participant 19, 0.0028808066258552397
participant 20, 0.00288496213487198
participant 21, 0.06130544536602957
participant 22, 0.007573025604038947
participant 23, 0.007562117392870004
participant 24, 0.020165646380986675
participant 25, 0.02271907681211684
participant 26, 0.055155010814708
participant 27, 0.08186080057699242
par

In [22]:
path = "data/Result1/Similar_All.csv"
differentResult1 = LoadResult1Data(path)
train_data_ratio = 0.9
valid_data_ratio = 0
test_data_ratio = 0.1

for i in range(34):
    eer = differentResult1.ml_test_individual_latefusion(0.9, 0, 0.1, i)
    print(f"participant {i}, {eer}")

participant 0, 0.045155221072436504
participant 1, 0.0014084507042253522
participant 2, 0.0023518344308560675
participant 3, 0.022107243650047036
participant 4, 0.01317027281279398
participant 5, 0.025870178739416744
participant 6, 0.03386641580432737
participant 7, 0.09454374412041393
participant 8, 0.018344308560677328
participant 9, 0.04186265286923801
participant 10, 0.0009407337723424271
participant 11, 0.0042333019755409216
participant 12, 0.01550751879699248
participant 13, 0.0009407337723424271
participant 14, 0.004703668861712135
participant 15, 0.07176360225140713
participant 16, 0.0042333019755409216
participant 17, 0.0004692632566870014
participant 18, 0.0051740357478833494
participant 19, 0.06396989651928504
participant 20, 0.0018814675446848542
participant 21, 0.08701787394167451
participant 22, 0.0056364490371066224
participant 23, 0.0023518344308560675
participant 24, 0.004703668861712135
participant 25, 0.021636876763875823
participant 26, 0.03853383458646616
participa

In [3]:
import pandas as pd

pathSVC = f"data/Result1/Similar_All.csv"
pathMVC = f"data/Result1/Different_All.csv"

dataMVC = pd.read_csv(pathMVC)
dataMVC = dataMVC.dropna(axis=0)

dataSVC = pd.read_csv(pathSVC)
dataSVC = dataSVC.dropna(axis=0)

print(dataSVC.shape[0], dataMVC.shape[0])

32373 42237


In [5]:
from sklearn.dummy import DummyClassifier
from sklearn.metrics import accuracy_score

zeroR_model = DummyClassifier(strategy='most_frequent', random_state=0)  # ZeroR

train_data_ratio = 0.9
valid_data_ratio = 0
test_data_ratio = 0.1

pathSVC = f"data/Result1/Similar_All.csv"
pathMVC = f"data/Result1/Different_All.csv"

gazeFeature = 'all'
differentResult1 = LoadResult1Data(pathMVC)
differentResult1.set_domain(gazeFeature)
trb, vdb, teb = differentResult1.split_data(train_data_ratio, valid_data_ratio, test_data_ratio)

train_x = differentResult1.take_x().to_numpy()[trb]
train_y = differentResult1.take_y().to_numpy()[trb]

test_x = differentResult1.take_x().to_numpy()[teb]
test_y = differentResult1.take_y().to_numpy()[teb]

zeroR_model.fit(train_x, train_y)
pred_y = zeroR_model.predict(test_x)

accuracy_score(test_y, pred_y)


0.030310206014681505