In [47]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import svm
from sklearn.model_selection import KFold
import xgboost as xgb
import elm
from imblearn.over_sampling import SMOTE 

import dataloader
import bandpower

# Leave-one-subject out cross validation

In [44]:
X, Y, S = dataloader.read_data(label_format=1)
# Transform into power
X_power = bandpower.get_bandpower(X, 500, low=[3,7,13], high=[7,13,30])

Load data from .mat files...
Calculating the bandpower of time-series data...
Shape of psd:  (8, 1251)


In [48]:
model_name = 'svm'
sampling_name = 'original'
num_sample = len(X)
X_power = X_power.reshape((X_power.shape[0],-1))

train_acc_list = []
val_acc_list = []
num_val_list = []
kf = KFold(n_splits=10, shuffle=True, random_state=23)
print('Sub  \t Chance\t | Train | Val')
for i_sub, (train_indices, val_indices) in enumerate(kf.split(X_power)):
    '''
for i_sub, subID in enumerate(np.unique(S)):
    
    train_indices = np.where(S!=subID)[0]
    val_indices = np.where(S==subID)[0]
    '''
    X_train, Y_train = X_power[train_indices,...], Y[train_indices]
    X_val, Y_val = X_power[val_indices,...], Y[val_indices]
    
    # Resample training data
    if sampling_name == 'SMOTE':
        sm = SMOTE(random_state=23)
        X_train, Y_train = sm.fit_resample(X_train, Y_train)
    
    # Train classifier
    if model_name in ['svm','xgboost']:
        if model_name == 'svm':
            clf = svm.SVC()
        elif model_name == 'xgboost':
            clf = xgb.XGBClassifier()
        clf.fit(X_train, Y_train)

        # Test classifier
        train_acc = clf.score(X_train, Y_train)
        val_acc = clf.score(X_val, Y_val)
    elif model_name == 'elm':
        clf = elm.ELMKernel()
        train_elm_data = np.concatenate((Y_train[:,np.newaxis], X_train), axis=1)
        val_elm_data = np.concatenate((Y_val[:,np.newaxis], X_val), axis=1)
        clf.search_param(train_elm_data, cv="kfold", of="accuracy", eval=10)
        train_acc = clf.train(train_elm_data).get_accuracy()
        val_acc = clf.test(val_elm_data).get_accuracy()
    
    train_acc_list.append(train_acc)
    val_acc_list.append(val_acc)
    num_val_list.append(len(Y_val))
    
    chance = np.sum(Y_train)/len(Y_train)
    chance = (1-chance) if chance<0.5 else chance
    print('Sub %d\t %.1f%%\t | %.1f%% | %.1f%%'%(subID, chance*100, train_acc*100, val_acc*100))
    
avg_acc = sum([val_acc_list[i]*num_val_list[i] for i in range(len(num_val_list))])/num_sample
print('Average val acc: %.1f%%'%(avg_acc*100))

Sub  	 Chance	 | Train | Val
Sub 26	 65.4%	 | 68.6% | 64.7%
Sub 26	 67.3%	 | 69.3% | 52.9%
Sub 26	 66.7%	 | 68.6% | 58.8%
Sub 26	 65.4%	 | 67.3% | 70.6%
Sub 26	 65.4%	 | 66.7% | 70.6%
Sub 26	 66.0%	 | 66.0% | 64.7%
Sub 26	 65.4%	 | 67.3% | 70.6%
Sub 26	 66.7%	 | 69.3% | 58.8%
Sub 26	 66.0%	 | 68.0% | 58.8%
Sub 26	 64.7%	 | 66.7% | 76.5%
Average val acc: 64.7%
