In [42]:
import itertools

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import svm
from sklearn.model_selection import KFold
import xgboost as xgb
import elm
from imblearn.over_sampling import SMOTE 

import dataloader
import bandpower

# Leave-one-subject out cross validation

In [50]:
label_format = 3
X, Y, df_all = dataloader.read_data(label_format=label_format)

# Transform into power
low, high = list(range(1,50)), list(range(2,51))
powers = bandpower.get_bandpower(X, 500, low=low, high=high, dB_scale=True)

Load data from .mat files...


  val = np.array(val, copy=False)
  return array(a, dtype, copy=False, order=order)


Calculating the bandpower of time-series data...
freqs:  [0.000e+00 2.000e-01 4.000e-01 ... 2.496e+02 2.498e+02 2.500e+02]


In [51]:
# Select models
model_names = ['svm', 'xgboost', 'elm']
sampling_names = ['original', 'SMOTE']
cv_modes = ['KFold', 'LOSO']

for model_name, sampling_name, cv_mode in itertools.product(model_names, sampling_names, cv_modes):

    file_name = 'label%d_%s_%s_%s.csv'%(label_format, model_name, sampling_name, cv_mode)
    description = 'Label: %d, Model: %s, Sampling: %s, CV mode: %s'%(label_format, model_name, sampling_name, cv_mode)

    # Average band power over channels
    num_sample = len(X)
    X_power = np.zeros((num_sample, len(low)))
    for i in range(len(X)):
        X_power[i,:] = np.mean(powers[i],0)

    kf = KFold(n_splits=10, shuffle=True, random_state=23)
    S = df_all['subject'].values
    folds = kf.split(X_power) if cv_mode=='KFold' else np.unique(S)

    train_acc_list = []
    val_acc_list = []
    num_val_list = []
    chance_list = []
    fold_name_list = []

    print('Sub  \t Chance\t | Train | Val')
    for i_fold, fold in enumerate(folds):
        if cv_mode == 'KFold':
            train_indices = fold[0]
            val_indices = fold[1]
        else:
            subID = fold
            train_indices = np.where(S!=subID)[0]
            val_indices = np.where(S==subID)[0]

        X_train, Y_train = X_power[train_indices,...], Y[train_indices]
        X_val, Y_val = X_power[val_indices,...], Y[val_indices]

        # Resample training data
        if sampling_name == 'SMOTE':
            sm = SMOTE(random_state=23)
            X_train, Y_train = sm.fit_resample(X_train, Y_train)

        # Train classifier
        if model_name in ['svm','xgboost']:
            if model_name == 'svm':
                clf = svm.SVC()
            elif model_name == 'xgboost':
                clf = xgb.XGBClassifier()
            clf.fit(X_train, Y_train)

            # Test classifier
            train_acc = clf.score(X_train, Y_train)
            val_acc = clf.score(X_val, Y_val)
        elif model_name == 'elm':
            clf = elm.ELMKernel()
            train_elm_data = np.concatenate((Y_train[:,np.newaxis], X_train), axis=1)
            val_elm_data = np.concatenate((Y_val[:,np.newaxis], X_val), axis=1)
            clf.search_param(train_elm_data, cv="kfold", of="accuracy", eval=10)
            train_acc = clf.train(train_elm_data).get_accuracy()
            val_acc = clf.test(val_elm_data).get_accuracy()

        fold_name = 'Fold %d'%(i_fold) if cv_mode == 'KFold' else 'Sub %d'%(subID)
        chance = np.sum(Y_train)/len(Y_train)
        chance = (1-chance) if chance<0.5 else chance

        train_acc_list.append(train_acc)
        val_acc_list.append(val_acc)
        num_val_list.append(len(Y_val))
        fold_name_list.append(fold_name)
        chance_list.append(chance)

        print('%s\t %.1f%%\t | %.1f%% | %.1f%%'%(fold_name, chance*100, train_acc*100, val_acc*100))

    avg_acc = sum([val_acc_list[i]*num_val_list[i] for i in range(len(num_val_list))])/num_sample
    print('Average val acc: %.1f%%'%(avg_acc*100))

    # Save result as csv file
    df_result = pd.DataFrame({description: fold_name_list, 'Chance': chance_list, 
                              'Train': train_acc_list, 'Val': val_acc_list})
    df_result.loc[len(df_result),description] = 'Average val acc: %.1f%%'%(avg_acc*100)
    df_result.to_csv('./results/%s'%(file_name))

Sub  	 Chance	 | Train | Val
Fold 0	 80.3%	 | 80.3% | 76.9%
Fold 1	 79.5%	 | 79.5% | 84.6%
Fold 2	 79.5%	 | 79.5% | 84.6%
Fold 3	 81.2%	 | 81.2% | 69.2%
Fold 4	 80.3%	 | 80.3% | 76.9%
Fold 5	 78.6%	 | 78.6% | 92.3%
Fold 6	 79.5%	 | 79.5% | 84.6%
Fold 7	 81.2%	 | 81.2% | 69.2%
Fold 8	 79.5%	 | 79.5% | 84.6%
Fold 9	 80.3%	 | 80.3% | 76.9%
Average val acc: 80.0%
Sub  	 Chance	 | Train | Val
Sub 1	 80.0%	 | 80.0% | 80.0%
Sub 2	 79.8%	 | 79.8% | 100.0%
Sub 3	 80.6%	 | 80.6% | 66.7%
Sub 4	 79.8%	 | 79.8% | 83.3%
Sub 5	 79.0%	 | 79.0% | 100.0%
Sub 6	 81.6%	 | 81.6% | 40.0%
Sub 7	 80.0%	 | 80.0% | 80.0%
Sub 8	 79.4%	 | 79.4% | 100.0%
Sub 9	 80.5%	 | 80.5% | 50.0%
Sub 10	 79.5%	 | 79.5% | 100.0%
Sub 11	 79.8%	 | 79.8% | 83.3%
Sub 12	 82.3%	 | 82.3% | 33.3%
Sub 13	 79.2%	 | 79.2% | 100.0%
Sub 14	 79.7%	 | 79.7% | 100.0%
Sub 16	 80.6%	 | 80.6% | 0.0%
Sub 17	 80.3%	 | 80.3% | 66.7%
Sub 18	 80.2%	 | 80.2% | 75.0%
Sub 19	 80.0%	 | 80.0% | 80.0%
Sub 20	 80.5%	 | 80.5% | 75.0%
Sub 21	 78.5%	 | 78.5% |



Kernel function:  linear  best cv value:  0.7416666666666667
Kernel function:  poly  best cv value:  0.45555555555555555
##### Search complete #####

Regressor Parameters

Regularization coefficient:  2.2483734909356063
Kernel Function:  rbf
Kernel parameters:  [0.6232831104148938]

CV error:  0.8055555555555556

Fold 0	 80.3%	 | 100.0% | 76.9%
elmk
##### Start search #####
Kernel function:  rbf  best cv value:  0.8
Kernel function:  linear  best cv value:  0.7444444444444445
Kernel function:  poly  best cv value:  0.3555555555555555
##### Search complete #####

Regressor Parameters

Regularization coefficient:  -0.801277403007913
Kernel Function:  rbf
Kernel parameters:  [-0.5349111372992282]

CV error:  0.8

Fold 1	 79.5%	 | 79.5% | 84.6%
elmk
##### Start search #####
Kernel function:  rbf  best cv value:  0.7972222222222223
Kernel function:  linear  best cv value:  0.7666666666666666
Kernel function:  poly  best cv value:  0.35833333333333334
##### Search complete #####

Regressor P

Kernel function:  linear  best cv value:  0.7483516483516484
Kernel function:  poly  best cv value:  0.32857142857142857
##### Search complete #####

Regressor Parameters

Regularization coefficient:  -0.13489110986295721
Kernel Function:  rbf
Kernel parameters:  [0.4373265991829188]

CV error:  0.8076923076923077

Sub 11	 79.8%	 | 79.8% | 83.3%
elmk
##### Start search #####
Kernel function:  rbf  best cv value:  0.8307692307692307
Kernel function:  linear  best cv value:  0.7934065934065935
Kernel function:  poly  best cv value:  0.4879120879120879
##### Search complete #####

Regressor Parameters

Regularization coefficient:  -1.0721437982225028
Kernel Function:  rbf
Kernel parameters:  [0.06553583448723488]

CV error:  0.8307692307692307

Sub 12	 82.3%	 | 82.3% | 33.3%
elmk
##### Start search #####
Kernel function:  rbf  best cv value:  0.8
Kernel function:  linear  best cv value:  0.7442307692307693
Kernel function:  poly  best cv value:  0.7067307692307693
##### Search complete ##



Kernel function:  rbf  best cv value:  0.558204334365325
Kernel function:  linear  best cv value:  0.7572755417956656
Kernel function:  poly  best cv value:  0.7226006191950465
##### Search complete #####

Regressor Parameters

Regularization coefficient:  -1.4631899868223306
Kernel Function:  linear
Kernel parameters:  []

CV error:  0.7572755417956656

Fold 0	 50.0%	 | 86.7% | 53.8%
elmk
##### Start search #####




Kernel function:  rbf  best cv value:  0.5796491228070175
Kernel function:  linear  best cv value:  0.7073684210526315
Kernel function:  poly  best cv value:  0.6810526315789474
##### Search complete #####

Regressor Parameters

Regularization coefficient:  0.25686796877040985
Kernel Function:  linear
Kernel parameters:  []

CV error:  0.7073684210526315

Fold 1	 50.0%	 | 86.0% | 69.2%
elmk
##### Start search #####




Kernel function:  rbf  best cv value:  0.64
Kernel function:  linear  best cv value:  0.7287719298245614
Kernel function:  poly  best cv value:  0.7192982456140351
##### Search complete #####

Regressor Parameters

Regularization coefficient:  -0.2470406711758763
Kernel Function:  linear
Kernel parameters:  []

CV error:  0.7287719298245614

Fold 2	 50.0%	 | 87.6% | 61.5%
elmk
##### Start search #####




Kernel function:  rbf  best cv value:  0.7263157894736842
Kernel function:  linear  best cv value:  0.7578947368421052
Kernel function:  poly  best cv value:  0.6789473684210525
##### Search complete #####

Regressor Parameters

Regularization coefficient:  -0.27433601835557014
Kernel Function:  linear
Kernel parameters:  []

CV error:  0.7578947368421052

Fold 3	 50.0%	 | 85.8% | 46.2%
elmk
##### Start search #####




Kernel function:  rbf  best cv value:  0.6758513931888546
Kernel function:  linear  best cv value:  0.7613003095975233
Kernel function:  poly  best cv value:  0.7238390092879257
##### Search complete #####

Regressor Parameters

Regularization coefficient:  -1.0610407147943657
Kernel Function:  linear
Kernel parameters:  []

CV error:  0.7613003095975233

Fold 4	 50.0%	 | 89.4% | 61.5%
elmk
##### Start search #####




Kernel function:  rbf  best cv value:  0.511336032388664
Kernel function:  linear  best cv value:  0.7530364372469636
Kernel function:  poly  best cv value:  0.7271255060728745
##### Search complete #####

Regressor Parameters

Regularization coefficient:  -0.5000807909715973
Kernel Function:  linear
Kernel parameters:  []

CV error:  0.7530364372469636

Fold 5	 50.0%	 | 88.6% | 61.5%
elmk
##### Start search #####




Kernel function:  rbf  best cv value:  0.5877192982456141
Kernel function:  linear  best cv value:  0.7126315789473684
Kernel function:  poly  best cv value:  0.7049122807017545
##### Search complete #####

Regressor Parameters

Regularization coefficient:  0.783821777455634
Kernel Function:  linear
Kernel parameters:  []

CV error:  0.7126315789473684

Fold 6	 50.0%	 | 83.3% | 69.2%
elmk
##### Start search #####




Kernel function:  rbf  best cv value:  0.6105263157894736
Kernel function:  linear  best cv value:  0.7894736842105263
Kernel function:  poly  best cv value:  0.6631578947368422
##### Search complete #####

Regressor Parameters

Regularization coefficient:  -1.5206474310761426
Kernel Function:  linear
Kernel parameters:  []

CV error:  0.7894736842105263

Fold 7	 50.0%	 | 86.3% | 46.2%
elmk
##### Start search #####




Kernel function:  rbf  best cv value:  0.6031578947368421
Kernel function:  linear  best cv value:  0.8038596491228069
Kernel function:  poly  best cv value:  0.6575438596491228
##### Search complete #####

Regressor Parameters

Regularization coefficient:  -4.134760881684312
Kernel Function:  linear
Kernel parameters:  []

CV error:  0.8038596491228069

Fold 8	 50.0%	 | 85.5% | 46.2%
elmk
##### Start search #####




Kernel function:  rbf  best cv value:  0.6114551083591331
Kernel function:  linear  best cv value:  0.7804953560371517
Kernel function:  poly  best cv value:  0.7179566563467492
##### Search complete #####

Regressor Parameters

Regularization coefficient:  -1.0607158021706709
Kernel Function:  linear
Kernel parameters:  []

CV error:  0.7804953560371517

Fold 9	 50.0%	 | 89.9% | 23.1%
Average val acc: 53.8%
Sub  	 Chance	 | Train | Val
elmk
##### Start search #####




Kernel function:  rbf  best cv value:  0.5700000000000001
Kernel function:  linear  best cv value:  0.74
Kernel function:  poly  best cv value:  0.6799999999999999
##### Search complete #####

Regressor Parameters

Regularization coefficient:  -1.1431489413335159
Kernel Function:  linear
Kernel parameters:  []

CV error:  0.74

Sub 1	 50.0%	 | 82.0% | 60.0%
elmk
##### Start search #####




Kernel function:  rbf  best cv value:  0.6064425770308123
Kernel function:  linear  best cv value:  0.7515406162464985
Kernel function:  poly  best cv value:  0.7218487394957984
##### Search complete #####

Regressor Parameters

Regularization coefficient:  -1.959673145911506
Kernel Function:  linear
Kernel parameters:  []

CV error:  0.7515406162464985

Sub 2	 50.0%	 | 85.4% | 100.0%
elmk
##### Start search #####




Kernel function:  rbf  best cv value:  0.7350000000000001
Kernel function:  linear  best cv value:  0.77
Kernel function:  poly  best cv value:  0.655
##### Search complete #####

Regressor Parameters

Regularization coefficient:  -0.03769587417941711
Kernel Function:  linear
Kernel parameters:  []

CV error:  0.77

Sub 3	 50.0%	 | 88.5% | 50.0%
elmk
##### Start search #####




Kernel function:  rbf  best cv value:  0.5855555555555556
Kernel function:  linear  best cv value:  0.7077777777777777
Kernel function:  poly  best cv value:  0.7216666666666667
##### Search complete #####

Regressor Parameters

Regularization coefficient:  -0.6374410753623754
Kernel Function:  poly
Kernel parameters:  [-1.0903323557343925, 1.3159396028031913]

CV error:  0.7216666666666667

Sub 4	 50.0%	 | 86.9% | 50.0%
elmk
##### Start search #####




Kernel function:  rbf  best cv value:  0.51125
Kernel function:  linear  best cv value:  0.7374999999999999
Kernel function:  poly  best cv value:  0.7075
##### Search complete #####

Regressor Parameters

Regularization coefficient:  -0.35475903310152535
Kernel Function:  linear
Kernel parameters:  []

CV error:  0.7374999999999999

Sub 5	 50.0%	 | 87.2% | 83.3%
elmk
##### Start search #####




Kernel function:  rbf  best cv value:  0.6076190476190476
Kernel function:  linear  best cv value:  0.7209523809523809
Kernel function:  poly  best cv value:  0.698095238095238
##### Search complete #####

Regressor Parameters

Regularization coefficient:  -1.98563908316252
Kernel Function:  linear
Kernel parameters:  []

CV error:  0.7209523809523809

Sub 6	 50.0%	 | 86.3% | 60.0%
elmk
##### Start search #####




Kernel function:  rbf  best cv value:  0.5400000000000001
Kernel function:  linear  best cv value:  0.72
Kernel function:  poly  best cv value:  0.7100000000000001
##### Search complete #####

Regressor Parameters

Regularization coefficient:  -0.39031566738715084
Kernel Function:  linear
Kernel parameters:  []

CV error:  0.72

Sub 7	 50.0%	 | 86.5% | 40.0%
elmk
##### Start search #####




Kernel function:  rbf  best cv value:  0.595
Kernel function:  linear  best cv value:  0.735
Kernel function:  poly  best cv value:  0.655
##### Search complete #####

Regressor Parameters

Regularization coefficient:  0.0763961584592386
Kernel Function:  linear
Kernel parameters:  []

CV error:  0.735

Sub 8	 50.0%	 | 87.5% | 50.0%
elmk
##### Start search #####




Kernel function:  rbf  best cv value:  0.5896358543417367
Kernel function:  linear  best cv value:  0.7350140056022408
Kernel function:  poly  best cv value:  0.6778711484593838
##### Search complete #####

Regressor Parameters

Regularization coefficient:  -1.6892136279251826
Kernel Function:  linear
Kernel parameters:  []

CV error:  0.7350140056022408

Sub 9	 50.0%	 | 85.9% | 0.0%
elmk
##### Start search #####




Kernel function:  rbf  best cv value:  0.552014652014652
Kernel function:  linear  best cv value:  0.756043956043956
Kernel function:  poly  best cv value:  0.6432234432234434
##### Search complete #####

Regressor Parameters

Regularization coefficient:  -5.667320583583194
Kernel Function:  linear
Kernel parameters:  []

CV error:  0.756043956043956

Sub 10	 50.0%	 | 80.7% | 66.7%
elmk
##### Start search #####




Kernel function:  rbf  best cv value:  0.5922222222222222
Kernel function:  linear  best cv value:  0.7355555555555555
Kernel function:  poly  best cv value:  0.6655555555555555
##### Search complete #####

Regressor Parameters

Regularization coefficient:  -1.2195701048925423
Kernel Function:  linear
Kernel parameters:  []

CV error:  0.7355555555555555

Sub 11	 50.0%	 | 82.3% | 50.0%
elmk
##### Start search #####




Kernel function:  rbf  best cv value:  0.5171428571428571
Kernel function:  linear  best cv value:  0.8085714285714285
Kernel function:  poly  best cv value:  0.7057142857142857
##### Search complete #####

Regressor Parameters

Regularization coefficient:  0.8553795182969423
Kernel Function:  linear
Kernel parameters:  []

CV error:  0.8085714285714285

Sub 12	 50.0%	 | 91.2% | 33.3%
elmk
##### Start search #####




Kernel function:  rbf  best cv value:  0.6405555555555555
Kernel function:  linear  best cv value:  0.7261111111111112
Kernel function:  poly  best cv value:  0.6661111111111111
##### Search complete #####

Regressor Parameters

Regularization coefficient:  0.4989670438880793
Kernel Function:  linear
Kernel parameters:  []

CV error:  0.7261111111111112

Sub 13	 50.0%	 | 86.9% | 60.0%
elmk
##### Start search #####




Kernel function:  rbf  best cv value:  0.6028571428571429
Kernel function:  linear  best cv value:  0.7409523809523809
Kernel function:  poly  best cv value:  0.7123809523809522
##### Search complete #####

Regressor Parameters

Regularization coefficient:  -0.01969464410238208
Kernel Function:  linear
Kernel parameters:  []

CV error:  0.7409523809523809

Sub 14	 50.0%	 | 86.8% | 0.0%
elmk
##### Start search #####




Kernel function:  rbf  best cv value:  0.6245614035087719
Kernel function:  linear  best cv value:  0.7488721804511278
Kernel function:  poly  best cv value:  0.687468671679198
##### Search complete #####

Regressor Parameters

Regularization coefficient:  0.12326161288072754
Kernel Function:  linear
Kernel parameters:  []

CV error:  0.7488721804511278

Sub 16	 50.0%	 | 88.5% | 0.0%
elmk
##### Start search #####




Kernel function:  rbf  best cv value:  0.6542857142857142
Kernel function:  linear  best cv value:  0.7733333333333333
Kernel function:  poly  best cv value:  0.6657142857142857
##### Search complete #####

Regressor Parameters

Regularization coefficient:  -0.6899830057291936
Kernel Function:  linear
Kernel parameters:  []

CV error:  0.7733333333333333

Sub 17	 50.0%	 | 88.7% | 66.7%
elmk
##### Start search #####




Kernel function:  rbf  best cv value:  0.5615384615384617
Kernel function:  linear  best cv value:  0.7465201465201464
Kernel function:  poly  best cv value:  0.6663003663003663
##### Search complete #####

Regressor Parameters

Regularization coefficient:  0.9584576051404583
Kernel Function:  linear
Kernel parameters:  []

CV error:  0.7465201465201464

Sub 18	 50.0%	 | 88.1% | 50.0%
elmk
##### Start search #####




Kernel function:  rbf  best cv value:  0.6449999999999999
Kernel function:  linear  best cv value:  0.74
Kernel function:  poly  best cv value:  0.705
##### Search complete #####

Regressor Parameters

Regularization coefficient:  -1.1033674067418366
Kernel Function:  linear
Kernel parameters:  []

CV error:  0.74

Sub 19	 50.0%	 | 87.0% | 60.0%
elmk
##### Start search #####




Kernel function:  rbf  best cv value:  0.5631578947368421
Kernel function:  linear  best cv value:  0.7578947368421052
Kernel function:  poly  best cv value:  0.7526315789473685
##### Search complete #####

Regressor Parameters

Regularization coefficient:  -4.350286933491249
Kernel Function:  linear
Kernel parameters:  []

CV error:  0.7578947368421052

Sub 20	 50.0%	 | 82.6% | 41.7%
elmk
##### Start search #####




Kernel function:  rbf  best cv value:  0.7105263157894737
Kernel function:  linear  best cv value:  0.731578947368421
Kernel function:  poly  best cv value:  0.6631578947368422
##### Search complete #####

Regressor Parameters

Regularization coefficient:  -1.1470498753767244
Kernel Function:  linear
Kernel parameters:  []

CV error:  0.731578947368421

Sub 21	 50.0%	 | 84.7% | 33.3%
elmk
##### Start search #####




Kernel function:  rbf  best cv value:  0.6272445820433437
Kernel function:  linear  best cv value:  0.7396284829721362
Kernel function:  poly  best cv value:  0.6470588235294118
##### Search complete #####

Regressor Parameters

Regularization coefficient:  -0.47166419411895305
Kernel Function:  linear
Kernel parameters:  []

CV error:  0.7396284829721362

Sub 22	 50.0%	 | 84.6% | 60.0%
elmk
##### Start search #####




Kernel function:  rbf  best cv value:  0.5405555555555556
Kernel function:  linear  best cv value:  0.7277777777777776
Kernel function:  poly  best cv value:  0.6799999999999999
##### Search complete #####

Regressor Parameters

Regularization coefficient:  -1.103958154956739
Kernel Function:  linear
Kernel parameters:  []

CV error:  0.7277777777777776

Sub 23	 50.0%	 | 84.8% | 80.0%
elmk
##### Start search #####




Kernel function:  rbf  best cv value:  0.595
Kernel function:  linear  best cv value:  0.7422222222222222
Kernel function:  poly  best cv value:  0.6972222222222222
##### Search complete #####

Regressor Parameters

Regularization coefficient:  -1.2341635685310886
Kernel Function:  linear
Kernel parameters:  []

CV error:  0.7422222222222222

Sub 24	 50.0%	 | 85.9% | 50.0%
elmk
##### Start search #####




Kernel function:  rbf  best cv value:  0.58125
Kernel function:  linear  best cv value:  0.72
Kernel function:  poly  best cv value:  0.67
##### Search complete #####

Regressor Parameters

Regularization coefficient:  0.13112022233332488
Kernel Function:  linear
Kernel parameters:  []

CV error:  0.72

Sub 25	 50.0%	 | 84.7% | 33.3%
elmk
##### Start search #####




Kernel function:  rbf  best cv value:  0.5175824175824176
Kernel function:  linear  best cv value:  0.7703296703296703
Kernel function:  poly  best cv value:  0.6835164835164835
##### Search complete #####

Regressor Parameters

Regularization coefficient:  0.8433896642097217
Kernel Function:  linear
Kernel parameters:  []

CV error:  0.7703296703296703

Sub 26	 50.0%	 | 91.1% | 42.9%
Average val acc: 50.0%
