In [0]:
"""
Set up for using CoLab GPU and Google drive
"""
# !pip install braindecode
# !pip install pandas==0.23.0 #needed for functionality of dataframes code

from google.colab import drive
drive.mount('/content/drive')
%cd /content/drive/My Drive/ColabProjects/Study_2a/scripts/
import tensorflow as tf
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /content/drive
/content/drive/My Drive/ColabProjects/Study_2a/scripts
Found GPU at: /device:GPU:0


In [0]:
import numpy as np
import pandas as pd
print(pd.__version__)
from preprocessing import load_subject_eeg, eeg_to_3d, format_data, down_and_normal, balanced_subsample
from utils_2 import current_acc
import warnings
from imblearn.over_sampling import SMOTE, ADASYN
warnings.filterwarnings('ignore', category=FutureWarning)
import logging  
import time
import sys 
from utils import balanced_subsample, current_loss

from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.metrics import accuracy_score

#####import network architectures#####
from braindecode.models.shallow_fbcsp import ShallowFBCSPNet
from braindecode.models.deep4 import Deep4Net
from eegnet import EEGNetv4
from braindecode.torch_ext.optimizers import AdamW
from braindecode.torch_ext.functions import square, safe_log
from braindecode.experiments.stopcriteria import MaxEpochs, NoDecrease, Or, And
from braindecode.experiments.monitors import LossMonitor, MisclassMonitor, RuntimeMonitor, compute_pred_labels_from_trial_preds
from braindecode.torch_ext.constraints import MaxNormDefaultConstraint 
from experiment_sans_test import Experiment 
from experiment import Experiment as op_exp # experiemnt for saving optimized models
from braindecode.experiments.monitors import LossMonitor, MisclassMonitor, RuntimeMonitor 
from braindecode.datautil.iterators import BalancedBatchSizeIterator
from braindecode.datautil.signal_target import SignalAndTarget
from braindecode.torch_ext.util import set_random_seeds, np_to_var, var_to_np

from torch.nn.functional import elu, relu6, leaky_relu, relu, rrelu
import torch 
import torch.nn.functional as F
from torch.nn.functional import cross_entropy
from torch.nn.functional import nll_loss
from torch import optim

from tensorflow.keras.utils import normalize
torch.backends.cudnn.deterministic = True

log = logging.getLogger(__name__)

0.23.0




In [0]:
def results_df(index, index_name, columns_list, column_names):
    """
    create tiered dataframe for hyper-parameter results.
    """
    assert len(columns_list) == len(column_names), "Unequal length for columns/names!"
    miindex = pd.MultiIndex.from_product([index],names=[index_name])
    micol = pd.MultiIndex.from_product(columns_list,names=column_names)
    return pd.DataFrame(index=miindex, columns=micol).sortlevel().sortlevel(axis=1)

def param_scores_df(columns_list, index):
    """
    Creates dataframe for storing the mean scores for each hyper-parameter
    for each subject. Mean and Std. of each hyper-parameter is then stored for plotting.
    """
    index.append("Mean")
    index.append("Std.")
    df = pd.DataFrame(index=index, columns=columns_list)
    a = df.columns.str.split(', ', expand=True).values

    #swap values in NaN and replace NAN to ''
    df.columns = pd.MultiIndex.from_tuples([('', x[0]) if pd.isnull(x[1]) else x for x in a])
    return df

def get_col_list(hyp_params):
    """
    returns a list of lists containing hyper-parameters of XD.
    """
    y = []
    for n in range(len(list(hyp_params.keys()))):
        a = []
        x = hyp_params[list(hyp_params.keys())[n]]
        if callable(x[0]):
            a.append([x[s].__name__ for s in range(len(x))])
            y.append(a[0])
        else:
            y.append(x)
    return y

def get_loss_acc_df(hyp_params,index_name,num_folds):
    
    # 2 -- Main Accruacy/loss DataFrame for innerfold
    index = list(n+1 for n in range(num_folds*num_folds))
    index.append("Mean")
    index.append("Std.")
    columns_list = get_col_list(hyp_params)
    names = list(hyp_params.keys())

    lossdf = results_df(index,index_name,columns_list,names)
    accdf  = results_df(index,index_name,columns_list,names)
    
    return lossdf, accdf

def get_results_df(hyp_params,index_name,subjects,num_folds):
    
    #1 - Final accuracies DataFrame
    folds = []
    for i in range(1,num_folds+1):
        folds.append(f'fold{i}')
    final_resultsdf = pd.DataFrame(index=subjects, columns=folds)
    
    # 3 -- DataFrame for storing best HPs by subject
    names = list(hyp_params.keys())
    paramsdf = pd.DataFrame(index=subjects, columns=names)
    
    # 4 -- DataFrame for storing HP-specific mean accuracy scores per subject.
    # Hard-coded at present.
    columns_list = get_col_list(hyp_params)
    col =[f'{list(hyp_params.keys())[0]}, {columns_list[0][0]}',f'{list(hyp_params.keys())[0]}, {columns_list[0][1]}',
          f'{list(hyp_params.keys())[0]}, {columns_list[0][2]}',f'{list(hyp_params.keys())[0]}, {columns_list[0][3]}',
          f'{list(hyp_params.keys())[1]}, {columns_list[1][0]}',f'{list(hyp_params.keys())[1]}, {columns_list[1][1]}',
          f'{list(hyp_params.keys())[1]}, {columns_list[1][2]}',f'{list(hyp_params.keys())[1]}, {columns_list[1][3]}', 
          f'{list(hyp_params.keys())[2]}, {columns_list[2][0]}',f'{list(hyp_params.keys())[2]}, {columns_list[2][1]}',
          f'{list(hyp_params.keys())[2]}, {columns_list[2][2]}',f'{list(hyp_params.keys())[2]}, {columns_list[2][3]}',
          f'{list(hyp_params.keys())[3]}, {columns_list[3][0]}',f'{list(hyp_params.keys())[3]}, {columns_list[3][1]}']
    paramscoresdf = param_scores_df(col, subjects)
    return final_resultsdf, paramsdf, paramscoresdf, subjects

def call_model(model_type, activation):
    if model_type == 'shallow':
        model =  ShallowFBCSPNet(in_chans=n_chans, n_classes=n_classes, input_time_length=input_time_length,
                     n_filters_time=40, filter_time_length=25, n_filters_spat=40, 
                     pool_time_length=75, pool_time_stride=15, final_conv_length='auto',
                     conv_nonlin=activation, pool_mode='mean', pool_nonlin=safe_log, 
                     split_first_layer=True, batch_norm=True, batch_norm_alpha=0.1,
                     drop_prob=drop_prob).create_network()
       
    elif model_type == 'deep':
        model = Deep4Net(in_chans=n_chans, n_classes=n_classes, input_time_length=input_time_length,
                     final_conv_length='auto', n_filters_time=25, n_filters_spat=25, filter_time_length=10,
                     pool_time_length=3, pool_time_stride=3, n_filters_2=50, filter_length_2=10,
                     n_filters_3=100, filter_length_3=10, n_filters_4=200, filter_length_4=10,
                     first_nonlin=activation, first_pool_mode='max', first_pool_nonlin=safe_log, later_nonlin=activation,
                     later_pool_mode='max', later_pool_nonlin=safe_log, drop_prob=0.1, 
                     double_time_convs=False, split_first_layer=False, batch_norm=True, batch_norm_alpha=0.1,
                     stride_before_pool=False).create_network() #filter_length_4 changed from 15 to 10

    elif model_type == 'eegnet':
        model = EEGNetv4(in_chans=n_chans, n_classes=n_classes, final_conv_length='auto', 
                     input_time_length=input_time_length, pool_mode='mean', F1=16, D=2, F2=32,
                     kernel_length=64, third_kernel_size=(8,4), conv_nonlin=activation, drop_prob=0.5).create_network()
        
    return model

In [0]:
subjects = ['01','02','03','04','05','06','07','08','09','10','11','12','13','14','15'] 
data_type = 'vowels'

model_type = 'eegnet'
s = SMOTE(sampling_strategy='minority', random_state=10, k_neighbors=3)
fs = 1024
dec = 8

parameters = dict(best_loss = 100.0,
                  batch_size = 64,
                  monitors = [LossMonitor(), MisclassMonitor(), RuntimeMonitor()],
                  model_constraint = MaxNormDefaultConstraint(),
                  max_increase_epochs = 30,
                 cuda = True)

"""
Instantiate dataframes for storing accuracies and hyper-parameter results. 
"""
hyp_params = dict(activation = [elu, square, leaky_relu, relu],
                  lr=[0.001,0.01,0.1,1],
                  epochs=[20,40,60,80],
                  loss = [cross_entropy, nll_loss]) # model hyper-parameters
num_folds = 4
skf = StratifiedKFold(n_splits=num_folds, shuffle=True, random_state=10)
index_name = 'Fold'
all_outer_accuracies, all_best_scores, all_new_scores = [], [], []
hyp_param_means_list = []
BestParamsList = []
final_resultsdf, paramsdf, paramscoresdf, subjects = get_results_df(hyp_params,index_name,subjects,num_folds)
final_resultsdf_1 = final_resultsdf.copy()
final_resultsdf_2 = final_resultsdf.copy()
subjects = subjects[:-2]

In [0]:
def train_inner(train_set, val_set, hyp_params, parameters):
    """
    Function for performing training on inner loop and 
    applying nested hyper-parameters.
    """
    best_loss  = parameters["best_loss"]
    batch_size = parameters["batch_size"]
    monitors   = parameters["monitors"]
    cuda       = parameters["cuda"]
    model_constraint    = parameters["model_constraint"]
    max_increase_epochs = parameters['max_increase_epochs']

    iterator = BalancedBatchSizeIterator(batch_size=batch_size)
    val_acc, val_loss = [], []
    
    for activation in hyp_params['activation']:
        for lr in hyp_params['lr']:
            for n_epochs in hyp_params['epochs']:
              for loss in hyp_params['loss']:
                  model = None
                  model = call_model('eegnet', activation)

                  set_random_seeds(seed=20190629, cuda=cuda)

                  if cuda:
                      model.cuda()
                      torch.backends.cudnn.deterministic = True

                  log.info("%s model: ".format(str(model)))
                  optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=0, eps=1e-8, amsgrad=False)
                  stop_criterion = Or([MaxEpochs(n_epochs),
                           NoDecrease('valid_misclass', max_increase_epochs)])

                  loss_function = loss
                  model_loss_function = None

                  #####Setup to run the selected model#####
                  model_test = Experiment(model, train_set, val_set, test_set=None, iterator=iterator,
                                          loss_function=loss_function, optimizer=optimizer,
                                          model_constraint=model_constraint, monitors=monitors,
                                          stop_criterion=stop_criterion, remember_best_column='valid_misclass',
                                          run_after_early_stop=True, model_loss_function=model_loss_function, cuda=cuda)
                  model_test.run()

                  model_acc = model_test.epochs_df['valid_misclass'].astype('float')
                  model_loss = model_test.epochs_df['valid_loss'].astype('float')
                  current_val_acc = 1 - current_acc(model_acc)
                  current_val_loss = current_loss(model_loss)

                  val_acc.append(current_val_acc)
                  val_loss.append(current_val_loss)
                
    return val_loss, val_acc

In [0]:
def train_outer(trainsetlist,testsetlist,BestParams,subject,data_type,model_type):
    
    test_scores, best_scores, new_scores = [], [], []
    best_loss  = parameters["best_loss"]
    batch_size = parameters["batch_size"]
    monitors   = parameters["monitors"]
    cuda       = parameters["cuda"]
    model_constraint    = parameters["model_constraint"]
    max_increase_epochs = parameters['max_increase_epochs']

    iterator = BalancedBatchSizeIterator(batch_size=batch_size)
    
    #####These are the learned hyper-parameters -- only set once for testing#####
   
    stop_criterion = MaxEpochs(BestParams[2])
    activation = getattr(torch.nn.functional, BestParams[0])
    loss_function = getattr(torch.nn.functional, BestParams[3])
    model_number = 1
    for trainset, testset in zip(trainsetlist, testsetlist):
    
        
        model = None
        model = call_model(model_type, activation)
        optimizer = optim.Adam(model.parameters(), lr=BestParams[1], weight_decay=0, eps=1e-8, amsgrad=False)
        
        set_random_seeds(seed=20190629, cuda=cuda)

        if cuda:
            model.cuda()
            torch.backends.cudnn.deterministic = True

        log.info("%s model: ".format(str(model)))

        
        model_loss_function = None

        #####Setup to run the selected model#####
        trainset_X, valset_X, trainset_y, valset_y = train_test_split(trainset.X, trainset.y, test_size=0.2,
                                                                      shuffle=True, random_state=42, stratify=trainset.y)
        train_set = SignalAndTarget(trainset_X, trainset_y)
        val_set = SignalAndTarget(valset_X, valset_y)
    
        optimised_model = op_exp(model, train_set, val_set, test_set=testset, iterator=iterator,
                                loss_function=loss_function, optimizer=optimizer,
                                model_constraint=model_constraint, monitors=monitors,
                                stop_criterion=stop_criterion, remember_best_column='valid_misclass',
                                run_after_early_stop=True, model_loss_function=model_loss_function, cuda=cuda,
                                data_type=data_type, subject_id=subject, model_type=model_type, 
                                model_number=str(model_number))
        
        optimised_model.run()
        
        best_accuracy = (1 - np.min(np.array(optimised_model.class_acc)))*100
        test_accuracy = round((1 - optimised_model.class_acc.pop())*100,3)
        new_accuracy = round((1 - optimised_model.epochs_df['test_misclass'].min())*100,3)
        
        best_scores.append(best_accuracy)
        test_scores.append(test_accuracy) # k accuracy scores for this param set.
        new_scores.append(new_accuracy)
        model_number += 1
    return test_scores, best_scores, new_scores, optimised_model

In [0]:
start = time.time()
for subject in subjects:
    print(f"Training Subject {subject} on {data_type}")
    data_folder = f'..//imagined_speech/S{subject}/post_ica/'
    start1 = time.time()
    _,w_data,_,w_labels = load_subject_eeg(data_folder) #swap depnding on data type
    
    data, labels = format_data(w_data,w_labels,data_type,4096) #reshape for CNN
    data = down_and_normal(data, dec) #downsample and normalise

    drs = data.reshape((data.shape[0],data.shape[1]*data.shape[2])) #2D for SMOTE
    X, y = s.fit_resample(drs, labels)
    X = X.reshape((X.shape[0],data.shape[1],data.shape[2]))
    
    unique, counts = np.unique(labels, return_counts=True)
    n_classes = len(unique)
    n_chans   = int(data.shape[1])
    input_time_length = data.shape[2]
    lossdf, accdf = get_loss_acc_df(hyp_params,index_name,num_folds)
    lossdf.head()
    
    out_fold_num = 0 # outer-fold number
    trainsetlist, testsetlist = [],[]
    

    inner_fold_acc,inner_fold_loss = [],[]
    val_acc = 1
    
    #####Outer=Fold#####
    for inner_ind, outer_index in skf.split(X, y):
        inner_fold, outer_fold     = X[inner_ind], X[outer_index]
        inner_labels, outer_labels = y[inner_ind], y[outer_index]
        out_fold_num += 1
        loss_with_params = dict()# for storing param values and losses
        in_fold_num = 0 # inner-fold number

        trainsetlist.append(SignalAndTarget(inner_fold, inner_labels))
        testsetlist.append(SignalAndTarget(outer_fold, outer_labels))

        #####Inner-Fold#####
        for train_idx, valid_idx in skf.split(inner_fold, inner_labels):
            X_Train, X_val = inner_fold[train_idx], inner_fold[valid_idx]
            y_train, y_val = inner_labels[train_idx], inner_labels[valid_idx]
            train_set = SignalAndTarget(X_Train, y_train)
            val_set = SignalAndTarget(X_val, y_val)
            in_fold_num += 1
            hyp_param_acc, hyp_param_loss = [], []

            hyp_param_loss, hyp_param_acc = train_inner(train_set, val_set,hyp_params,parameters)

            inner_fold_loss.append(hyp_param_loss)
            inner_fold_acc.append(hyp_param_acc)

        print(f"Fold run time: {(time.time()-start1) / 60} minutes")
    ####Assigns each fold to DataFrame and computes mean####
    for i,j in enumerate(inner_fold_loss):
        lossdf.iloc[i] = j
        lossdf.head(6)
    lossdf.loc["Mean"].iloc[0] = lossdf.iloc[1:16].mean(axis=0).values
    lossdf.loc["Std."].iloc[0] = lossdf.iloc[1:16].std(axis=0).values
    lossdf.to_excel(f"..//results/S{subject}/{model_type}_{data_type}/HP_loss.xlsx")

    for i,j in enumerate(inner_fold_acc):
        accdf.iloc[i] = j
        accdf.head(6)
    accdf.loc["Mean"].iloc[0] = accdf.iloc[1:16].mean(axis=0).values
    accdf.loc["Std."].iloc[0] = accdf.iloc[1:16].std(axis=0).values
    accdf.to_excel(f"..//results/S{subject}/{model_type}_{data_type}/HP_acc.xlsx")

    #####Finds best hyper-parameter set for subject#####
    BestParams = lossdf.columns[lossdf.loc["Mean"].values.argmin()]
    BestParamsList.append(list(BestParams))
    
    columns_list = get_col_list(hyp_params)
    hyp_param_means = []
    for x in columns_list:
        for y in x:
            sub_df = accdf[[i for i in accdf.columns if i[0] == y or i[1] == y or i[2] == y]]
            hyp_param_means.append(sub_df.loc["Mean"].values.mean())
    hyp_param_means_list.append(hyp_param_means)

    #####Run best hyper-params on entire inner-fold#####
    outer_accuracies, best_scores, new_scores, optimised_model = train_outer(trainsetlist,testsetlist,BestParams,subject,data_type,model_type)
    all_outer_accuracies.append(outer_accuracies) #k-fold accuracies for all subjects.
    all_best_scores.append(best_scores)
    all_new_scores.append(new_scores)
    
    print(f"subject run time: {(time.time()-start) / 60} minutes")

for i,j in enumerate(all_outer_accuracies):
    final_resultsdf.iloc[i] = j

#####Compute Final Mean And Standard Deviation of Outer Fold Results#####
final_resultsdf['Mean'] = final_resultsdf.mean(axis=1,skipna=True)
final_resultsdf['Std.'] = final_resultsdf.std(axis=1,skipna=True)
final_resultsdf.to_excel(f'..//results/{model_type}_{data_type}_final_results.xlsx')

for i,j in enumerate(all_best_scores):
    final_resultsdf_1.iloc[i] = j

#####Compute Final Mean And Standard Deviation of Outer Fold Results#####
final_resultsdf_1['Mean'] = final_resultsdf_1.mean(axis=1,skipna=True)
final_resultsdf_1['Std.'] = final_resultsdf_1.std(axis=1,skipna=True)
final_resultsdf_1.to_excel(f'..//results/{model_type}_{data_type}_final_results_1.xlsx')

for i,j in enumerate(all_new_scores):
    final_resultsdf_2.iloc[i] = j

#####Compute Final Mean And Standard Deviation of Outer Fold Results#####
final_resultsdf_2['Mean'] = final_resultsdf_2.mean(axis=1,skipna=True)
final_resultsdf_2['Std.'] = final_resultsdf_2.std(axis=1,skipna=True)
final_resultsdf_2.to_excel(f'..//results/{model_type}_{data_type}_final_results_2.xlsx')

for i,j in enumerate(BestParamsList):
    paramsdf.iloc[i] = j
paramsdf.to_excel(f"..//results/{model_type}_{data_type}_params.xlsx")

##### Means of each hyper-parameter#####



for i,j in enumerate(hyp_param_means_list):
    paramscoresdf.iloc[i] = j
paramscoresdf.to_excel(f"..//results/{model_type}_{data_type}_paramscores.xlsx")
print(f"run time: {(time.time()-start) / 60} minutes")

Training Subject 01 on vowels


  input = module(input)


Fold run time: 7.9249866286913555 minutes


  input = module(input)


Fold run time: 15.170343367258708 minutes


  input = module(input)


Fold run time: 23.455295034249623 minutes


  input = module(input)


Fold run time: 30.88637411991755 minutes


  ret = ret.dtype.type(ret / rcount)
  input = module(input)
  input = module(input)
  input = module(input)
  input = module(input)


subject run time: 31.094328184922535 minutes
Training Subject 02 on vowels


  input = module(input)


Fold run time: 5.888937064011892 minutes


  input = module(input)


Fold run time: 11.6874986966451 minutes


  input = module(input)


Fold run time: 17.43274978796641 minutes


  input = module(input)


Fold run time: 23.49900647799174 minutes


  ret = ret.dtype.type(ret / rcount)
  input = module(input)
  input = module(input)
  input = module(input)
  input = module(input)


subject run time: 54.73709511359532 minutes
Training Subject 03 on vowels


  input = module(input)


Fold run time: 7.3946011940638225 minutes


  input = module(input)


Fold run time: 14.57887532711029 minutes


  input = module(input)


Fold run time: 22.00837979714076 minutes


  input = module(input)


Fold run time: 28.679277698198955 minutes


  ret = ret.dtype.type(ret / rcount)
  input = module(input)
  input = module(input)
  input = module(input)
  input = module(input)


subject run time: 83.5871349453926 minutes
Training Subject 04 on vowels


  input = module(input)


Fold run time: 5.641532667477926 minutes


  input = module(input)


Fold run time: 11.55327440102895 minutes


  input = module(input)


Fold run time: 17.971861545244852 minutes


  input = module(input)


Fold run time: 23.87020038763682 minutes


  ret = ret.dtype.type(ret / rcount)
  input = module(input)
  input = module(input)
  input = module(input)
  input = module(input)


subject run time: 107.61988495190938 minutes
Training Subject 05 on vowels


  input = module(input)


Fold run time: 6.811366804440817 minutes


  input = module(input)


Fold run time: 13.419100324312845 minutes


  input = module(input)


Fold run time: 20.048412903149924 minutes


  input = module(input)


Fold run time: 26.12866568962733 minutes


  ret = ret.dtype.type(ret / rcount)
  input = module(input)
  input = module(input)
  input = module(input)
  input = module(input)


subject run time: 133.98622844219207 minutes
Training Subject 06 on vowels


  input = module(input)


Fold run time: 7.1027605652809145 minutes


  input = module(input)


Fold run time: 14.009677223364513 minutes


  input = module(input)


Fold run time: 21.014112663269042 minutes


  input = module(input)


Fold run time: 27.836992776393892 minutes


  ret = ret.dtype.type(ret / rcount)
  input = module(input)
  input = module(input)
  input = module(input)
  input = module(input)


subject run time: 161.98239254554113 minutes
Training Subject 07 on vowels


  input = module(input)


Fold run time: 6.5225813627243046 minutes


  input = module(input)


Fold run time: 13.206719907124837 minutes


  input = module(input)


Fold run time: 19.687029004096985 minutes


  input = module(input)


Fold run time: 25.56460276444753 minutes


  ret = ret.dtype.type(ret / rcount)
  input = module(input)
  input = module(input)
  input = module(input)
  input = module(input)


subject run time: 187.71628708442051 minutes
Training Subject 08 on vowels


  input = module(input)


Fold run time: 6.232553827762604 minutes


  input = module(input)


Fold run time: 12.624326022466024 minutes


  input = module(input)


Fold run time: 19.410241889953614 minutes


  input = module(input)


Fold run time: 25.411933143933613 minutes


  ret = ret.dtype.type(ret / rcount)
  input = module(input)
  input = module(input)
  input = module(input)
  input = module(input)


subject run time: 213.29097394943238 minutes
Training Subject 09 on vowels


  input = module(input)


Fold run time: 6.683462011814117 minutes


  input = module(input)


Fold run time: 13.88858557542165 minutes


  input = module(input)


Fold run time: 20.435198215643563 minutes


  input = module(input)


Fold run time: 27.61863247950872 minutes


  ret = ret.dtype.type(ret / rcount)
  input = module(input)
  input = module(input)
  input = module(input)
  input = module(input)


subject run time: 241.0806014418602 minutes
Training Subject 10 on vowels


  input = module(input)


Fold run time: 7.023008422056834 minutes


  input = module(input)


Fold run time: 13.611684656143188 minutes


  input = module(input)


Fold run time: 20.50451749563217 minutes


  input = module(input)


Fold run time: 27.239591932296754 minutes


  ret = ret.dtype.type(ret / rcount)
  input = module(input)
  input = module(input)
  input = module(input)
  input = module(input)


subject run time: 268.4649108966192 minutes
Training Subject 11 on vowels


  input = module(input)


Fold run time: 7.398387610912323 minutes


  input = module(input)


Fold run time: 14.185014514128367 minutes


  input = module(input)


Fold run time: 21.15505658388138 minutes


  input = module(input)


Fold run time: 27.968397720654806 minutes


  ret = ret.dtype.type(ret / rcount)
  input = module(input)
  input = module(input)
  input = module(input)
  input = module(input)


subject run time: 296.5854327201843 minutes
Training Subject 12 on vowels


  input = module(input)


Fold run time: 6.773257772127788 minutes


  input = module(input)


Fold run time: 13.274000155925751 minutes


  input = module(input)


Fold run time: 19.852378141880035 minutes


  input = module(input)


Fold run time: 26.33035631974538 minutes


  ret = ret.dtype.type(ret / rcount)
  input = module(input)
  input = module(input)
  input = module(input)
  input = module(input)


subject run time: 323.0460660735766 minutes
Training Subject 13 on vowels


  input = module(input)


Fold run time: 6.721330038706461 minutes


  input = module(input)


Fold run time: 13.652339919408162 minutes


  input = module(input)


Fold run time: 20.135908448696135 minutes


  input = module(input)


Fold run time: 26.379280547300976 minutes


  ret = ret.dtype.type(ret / rcount)
  input = module(input)
  input = module(input)
  input = module(input)
  input = module(input)


subject run time: 349.55478875637056 minutes
Training Subject 14 on vowels


  input = module(input)


Fold run time: 6.8450182716051735 minutes


  input = module(input)


Fold run time: 14.144464993476868 minutes


  input = module(input)


Fold run time: 21.54734296798706 minutes


  input = module(input)


Fold run time: 27.93216275771459 minutes


  ret = ret.dtype.type(ret / rcount)
  input = module(input)
  input = module(input)
  input = module(input)
  input = module(input)


subject run time: 377.64052727619804 minutes
Training Subject 15 on vowels


  input = module(input)


Fold run time: 7.621236590544383 minutes


  input = module(input)


Fold run time: 14.744380402565003 minutes


  input = module(input)


Fold run time: 21.691618422667187 minutes


  input = module(input)


Fold run time: 28.63861999511719 minutes


  ret = ret.dtype.type(ret / rcount)
  input = module(input)
  input = module(input)
  input = module(input)
  input = module(input)


subject run time: 406.44045574267705 minutes
run time: 406.4504487315814 minutes


In [0]:
def train_outer_inter(trainsetlist,testsetlist,BestParams,subject,data_type,model_type):
    
    test_scores, best_scores, new_scores = [], [], []
    best_loss  = parameters["best_loss"]
    batch_size = parameters["batch_size"]
    monitors   = parameters["monitors"]
    cuda       = parameters["cuda"]
    model_constraint    = parameters["model_constraint"]
    max_increase_epochs = parameters['max_increase_epochs']

    iterator = BalancedBatchSizeIterator(batch_size=batch_size)
    
    #####These are the learned hyper-parameters -- only set once for testing#####
   
    stop_criterion = MaxEpochs(BestParams[2])
    activation = getattr(torch.nn.functional, BestParams[0])
    loss_function = getattr(torch.nn.functional, BestParams[3])
    model_number = 1
    for trainset, testset in zip(trainsetlist, testsetlist):
    
        
        model = None
        model = call_model(model_type, activation)
        optimizer = optim.Adam(model.parameters(), lr=BestParams[1], weight_decay=0, eps=1e-8, amsgrad=False)
        
        set_random_seeds(seed=20190629, cuda=cuda)

        if cuda:
            model.cuda()
            torch.backends.cudnn.deterministic = True

        log.info("%s model: ".format(str(model)))

        
        model_loss_function = None

        #####Setup to run the selected model#####
        trainset_X, valset_X, trainset_y, valset_y = train_test_split(trainset.X, trainset.y, test_size=0.2,
                                                                      shuffle=True, random_state=42, stratify=trainset.y)
        train_set = SignalAndTarget(trainset_X, trainset_y)
        val_set = SignalAndTarget(valset_X, valset_y)
    
        optimised_model = op_exp(model, train_set, val_set, test_set=testset, iterator=iterator,
                                loss_function=loss_function, optimizer=optimizer,
                                model_constraint=model_constraint, monitors=monitors,
                                stop_criterion=stop_criterion, remember_best_column='valid_misclass',
                                run_after_early_stop=True, model_loss_function=model_loss_function, cuda=cuda,
                                data_type=data_type, subject_id=subject, model_type=f"{model_type}_inter", 
                                model_number=str(model_number))
        
        optimised_model.run()
        
        best_accuracy = (1 - np.min(np.array(optimised_model.class_acc)))*100
        test_accuracy = round((1 - optimised_model.class_acc.pop())*100,3)
        new_accuracy = round((1 - optimised_model.epochs_df['test_misclass'].min())*100,3)
        
        best_scores.append(best_accuracy)
        test_scores.append(test_accuracy) # k accuracy scores for this param set.
        new_scores.append(new_accuracy)
        model_number += 1
    return test_scores, best_scores, new_scores, optimised_model

In [0]:
all_inter_accuracies = []
for subject in subjects:
    data_folder = f'..//imagined_speech/S{subject}/post_ica/'

    w_data,_,w_labels,_ = load_subject_eeg(data_folder)
    
    data, labels = format_data(w_data,w_labels,data_type,4096) #reshape for CNN
    data = down_and_normal(data, dec) #downsample and normalise

    drs = data.reshape((data.shape[0],data.shape[1]*data.shape[2])) #2D for SMOTE
    X, y = s.fit_resample(drs, labels)
    X = X.reshape((X.shape[0],data.shape[1],data.shape[2]))
    
    unique, counts = np.unique(labels, return_counts=True)
    n_classes = len(unique)
    n_chans   = int(data.shape[1])
    input_time_length = data.shape[2]

    lossdf, accdf = get_loss_acc_df(hyp_params,index_name,num_folds)
    
    out_fold_num = 0 # outer-fold number
    trainsetlist, testsetlist = [],[]
  
    inner_fold_acc = []
    val_acc = 1
    start = time.time()
    #####Outer=Fold#####
    for inner_ind, outer_index in skf.split(X, y):
        inner_fold, outer_fold     = X[inner_ind], X[outer_index]
        inner_labels, outer_labels = y[inner_ind], y[outer_index]
        out_fold_num += 1

        in_fold_num = 0 # inner-fold number

        trainsetlist.append(SignalAndTarget(inner_fold, inner_labels))
        testsetlist.append(SignalAndTarget(outer_fold, outer_labels))

    
    #####Run best hyper-params on entire inner-fold#####
    test_accuracy, best_scores, new_scores, _ = train_outer_inter(trainsetlist,testsetlist,BestParamsInter,subject,data_type,model_type)
    all_inter_accuracies.append(new_scores) #k-fold accuracies for all subjects.

for i,j in enumerate(all_inter_accuracies):
    inter_resultsdf.iloc[i] = j

# #####Compute Final Mean And Standard Deviation of Outer Fold Results#####
inter_resultsdf['Mean'] = inter_resultsdf.mean(axis=1,skipna=True)
inter_resultsdf['Std.'] = inter_resultsdf.std(axis=1,skipna=True)
inter_resultsdf.to_excel(f'..//results/inter_subjects/{model_type}_{data_type}_final_inter_results.xlsx')