In [2]:
"""
Set up for using CoLab GPU and Google drive
"""
from google.colab import drive
drive.mount('/content/drive') # required to access files in your Google drive

%cd /content/drive/My Drive/ColabProjects/Study_2a/scripts/

"""
Quick test to ensure GPU is present
"""
import tensorflow as tf
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /content/drive
/content/drive/My Drive/ColabProjects/Study_2a/scripts
Found GPU at: /device:GPU:0


Install packages not already available with Colab -- requires restarting runtime

In [0]:
"""
Use the !pip install command to retrieve packages not installed
"""
!pip install braindecode
!pip install pandas==0.23.0 # facilitates version control 

Alternatively use a try/except clause during import

In [0]:
try:
  import pywt
except:
   !pip install PyWavelets
import pywt

Install all remaining required packages - including those in your own Google
drive.

In [0]:
import numpy as np
import pandas as pd
from preprocessing import load_subject_eeg, eeg_to_3d, format_data, down_and_normal, balanced_subsample
from utils_2 import current_acc
import warnings
from imblearn.over_sampling import SMOTE, ADASYN
warnings.filterwarnings('ignore', category=FutureWarning)
import logging  
import time
import sys 
from utils import balanced_subsample, current_loss

from sklearn.model_selection import train_test_split, StratifiedKFold

#####import network architectures#####
from braindecode.models.shallow_fbcsp import ShallowFBCSPNet
from braindecode.models.deep4 import Deep4Net
#from eegnet import EEGNetv4
from braindecode.torch_ext.optimizers import AdamW
from braindecode.torch_ext.functions import square, safe_log
from braindecode.experiments.stopcriteria import MaxEpochs, NoDecrease, Or, And
from braindecode.experiments.monitors import LossMonitor, MisclassMonitor, RuntimeMonitor
from braindecode.torch_ext.constraints import MaxNormDefaultConstraint 
from experiment_sans_test import Experiment 
#from experiment import Experiment as op_exp # experiemnt for saving optimized models
from braindecode.experiments.monitors import LossMonitor, MisclassMonitor, RuntimeMonitor 
from braindecode.datautil.iterators import BalancedBatchSizeIterator
from braindecode.datautil.signal_target import SignalAndTarget
from braindecode.torch_ext.util import set_random_seeds, np_to_var

from torch.nn.functional import elu, relu6, leaky_relu, relu, rrelu
import torch 
import torch.nn.functional as F 
from torch import optim

from tensorflow.keras.utils import normalize
torch.backends.cudnn.deterministic = True

log = logging.getLogger(__name__)

Define functions required for storing results and calling models

In [0]:
def results_df(index, index_name, columns_list, column_names):
    """
    create tiered dataframe for hyper-parameter results.
    """
    assert len(columns_list) == len(column_names), "Unequal length for columns/names!"
    miindex = pd.MultiIndex.from_product([index],names=[index_name])
    micol = pd.MultiIndex.from_product(columns_list,names=column_names)
    return pd.DataFrame(index=miindex, columns=micol).sortlevel().sortlevel(axis=1)

def param_scores_df(columns_list, index):
    """
    Creates dataframe for storing the mean scores for each hyper-parameter
    for each subject. Mean and Std. of each hyper-parameter is then stored for plotting.
    """
    index.append("Mean")
    index.append("Std.")
    df = pd.DataFrame(index=index, columns=columns_list)
    a = df.columns.str.split(', ', expand=True).values

    #swap values in NaN and replace NAN to ''
    df.columns = pd.MultiIndex.from_tuples([('', x[0]) if pd.isnull(x[1]) else x for x in a])
    return df

def get_col_list(hyp_params):
    """
    returns a list of lists containing hyper-parameters of XD.
    """
    y, a = [],[]
    for n in range(len(list(hyp_params.keys()))):
        x = hyp_params[list(hyp_params.keys())[n]]
        if callable(x[0]):
            a.append([x[s].__name__ for s in range(len(x))])
            y.append(a[0])
        else:
            y.append(x)
    return y

def get_results_df(hyp_params,index_name,subjects,num_folds):
    
    #1 - Final accuracies DataFrame
    folds = []
    for i in range(1,num_folds+1):
        folds.append(f'fold{i}')
    final_resultsdf = pd.DataFrame(index=subjects, columns=folds)
    
    # 2 -- Main Accruacy DataFrame for innerfold
    index = list(n+1 for n in range(num_folds*num_folds))
    index.append("Mean")
    index.append("Std.")
    columns_list = get_col_list(hyp_params)
    names = list(hyp_params.keys())

    lossdf = results_df(index,index_name,columns_list,names)
    accdf  = results_df(index,index_name,columns_list,names)
    # 3 -- DataFrame for storing best HPs by subject
    paramsdf = pd.DataFrame(index=subjects, columns=names)
    
    # 4 -- DataFrame for storing HP-specific mean accuracy scores per subject.
    # Hard-coded at present.
    col =[f'{list(hyp_params.keys())[0]}, {columns_list[0][0]}',f'{list(hyp_params.keys())[0]}, {columns_list[0][1]}',
          f'{list(hyp_params.keys())[0]}, {columns_list[0][2]}',f'{list(hyp_params.keys())[0]}, {columns_list[0][3]}',
          f'{list(hyp_params.keys())[1]}, {columns_list[1][0]}',f'{list(hyp_params.keys())[1]}, {columns_list[1][1]}',
          f'{list(hyp_params.keys())[1]}, {columns_list[1][2]}',f'{list(hyp_params.keys())[1]}, {columns_list[1][3]}', 
          f'{list(hyp_params.keys())[2]}, {columns_list[2][0]}',f'{list(hyp_params.keys())[2]}, {columns_list[2][1]}',
          f'{list(hyp_params.keys())[2]}, {columns_list[2][2]}',f'{list(hyp_params.keys())[2]}, {columns_list[2][3]}']
    paramscoresdf = param_scores_df(col, subjects)
    return final_resultsdf, lossdf, accdf, paramsdf, paramscoresdf, subjects
  
def call_model(model_type, activation):
    if model_type == 'shallow':
        model =  ShallowFBCSPNet(in_chans=n_chans, n_classes=n_classes, input_time_length=input_time_length,
                     n_filters_time=80, filter_time_length=40, n_filters_spat=80, 
                     pool_time_length=75, pool_time_stride=25, final_conv_length='auto',
                     conv_nonlin=square, pool_mode='max', pool_nonlin=safe_log, 
                     split_first_layer=True, batch_norm=True, batch_norm_alpha=0.1,
                     drop_prob=drop_prob).create_network()
       
    elif model_type == 'deep':
        model = Deep4Net(in_chans=n_chans, n_classes=n_classes, input_time_length=input_time_length,
                     final_conv_length='auto', n_filters_time=25, n_filters_spat=25, filter_time_length=10,
                     pool_time_length=3, pool_time_stride=3, n_filters_2=50, filter_length_2=10,
                     n_filters_3=100, filter_length_3=10, n_filters_4=200, filter_length_4=10,
                     first_nonlin=activation, first_pool_mode='mean', first_pool_nonlin=safe_log, later_nonlin=activation,
                     later_pool_mode='mean', later_pool_nonlin=safe_log, drop_prob=0.1, 
                     double_time_convs=False, split_first_layer=True, batch_norm=True, batch_norm_alpha=0.1,
                     stride_before_pool=False).create_network() #filter_length_4 changed from 15 to 10

    elif model_type == 'eegnet':
        model = EEGNetv4(in_chans=n_chans, n_classes=n_classes, final_conv_length='auto', 
                     input_time_length=input_time_length, pool_mode='mean', F1=16, D=2, F2=32,
                     kernel_length=64, third_kernel_size=(8,4), drop_prob=drop_prob).create_network()
        
    return model

In [0]:
subjects = ['01','02']#,'03','04','05','06','07','08','09','10','11','12','13','14','15'] 
data_type = 'words'
s = SMOTE(sampling_strategy='minority', random_state=10, k_neighbors=3)
fs = 1024
dec = 8

parameters = dict(best_loss = 100.0,
                  batch_size = 64,
                  monitors = [LossMonitor(), MisclassMonitor(), RuntimeMonitor()],
                  model_constraint = MaxNormDefaultConstraint(),
                  max_increase_epochs = 30,
                  cuda = True)

In [0]:
for subject in subjects:
    data_folder = f'..//imagined_speech/S{subject}/post_ica/'

    w_data,_,w_labels,_ = load_subject_eeg(data_folder)
    
    data, labels = format_data(w_data,w_labels,data_type,4096) #reshape for CNN
    data = down_and_normal(data, dec) #downsample and normalise

    drs = data.reshape((data.shape[0],data.shape[1]*data.shape[2])) #2D for SMOTE
    X, y = s.fit_resample(drs, labels)
    X = X.reshape((X.shape[0],data.shape[1],data.shape[2]))
    
    unique, counts = np.unique(labels, return_counts=True)
    n_classes = len(unique)
    n_chans   = int(data.shape[1])
    input_time_length = data.shape[2]
    
    print(X.shape)

(306, 6, 512)
(262, 6, 512)


In [0]:
"""
Instantiate dataframes for storing accuracies and hyper-parameter results. 
"""
hyp_params = dict(activation = [elu, relu6, leaky_relu, relu],
                  lr=[0.001,0.01,0.1,1],
                  epochs=[20,40,60,80]) # model hyper-parameters
num_folds = 4
index_name = 'Fold'
final_resultsdf, lossdf, accdf, paramsdf, paramscoresdf, subjects = get_results_df(hyp_params,index_name,subjects,num_folds)
subjects = subjects[:-2]

In [0]:
def train_inner(train_set, val_set, hyp_params, parameters):
    """
    Function for performing training on inner loop and 
    applying nested hyper-parameters.
    """
    best_loss  = parameters["best_loss"]
    batch_size = parameters["batch_size"]
    monitors   = parameters["monitors"]
    cuda       = parameters["cuda"]
    model_constraint    = parameters["model_constraint"]
    max_increase_epochs = parameters['max_increase_epochs']

    iterator = BalancedBatchSizeIterator(batch_size=batch_size)
    val_acc, val_loss = [], []
    
    for activation in hyp_params['activation']:
        for lr in hyp_params['lr']:
            for n_epochs in hyp_params['epochs']:
                model = None
                model = call_model('deep', activation)
                
                set_random_seeds(seed=20190629, cuda=cuda)

                if cuda:
                    model.cuda()
                    torch.backends.cudnn.deterministic = True
                
                log.info("%s model: ".format(str(model)))
                optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=0, eps=1e-8, amsgrad=False)
                stop_criterion = Or([MaxEpochs(n_epochs),
                         NoDecrease('valid_misclass', max_increase_epochs)])
                
                loss_function = F.cross_entropy
                model_loss_function = None
               
                #####Setup to run the selected model#####
                model_test = Experiment(model, train_set, val_set, test_set=None, iterator=iterator,
                                        loss_function=loss_function, optimizer=optimizer,
                                        model_constraint=model_constraint, monitors=monitors,
                                        stop_criterion=stop_criterion, remember_best_column='valid_misclass',
                                        run_after_early_stop=True, model_loss_function=model_loss_function, cuda=cuda)
             
                model_test.run()
                model_acc = model_test.epochs_df['valid_misclass'].astype('float')
                model_loss = model_test.epochs_df['valid_loss'].astype('float')
                current_val_acc = 1 - current_acc(model_acc)
                current_val_loss = current_loss(model_loss)
                
#                 accuracy = 1 - np.min(np.array(optimized_model.class_acc))
# 		            cv_scores.append(accuracy) # k accuracy scores for this param set. 
		
                
                val_acc.append(current_val_acc)
                val_loss.append(current_val_loss)
    
    return val_loss, val_acc

In [0]:
skf = StratifiedKFold(n_splits=num_folds, shuffle=True, random_state=10)
out_fold_num = 0 # outer-fold number
trainsetlist, testsetlist = [],[]
cv_scores = []
BestParamsList = []


inner_fold_acc,inner_fold_loss = [],[]
val_acc = 1
start = time.time()
#####Outer=Fold#####
for inner_ind, outer_index in skf.split(X, y):
    inner_fold, outer_fold     = X[inner_ind], X[outer_index]
    inner_labels, outer_labels = y[inner_ind], y[outer_index]
    out_fold_num += 1
    loss_with_params = dict()# for storing param values and losses
    in_fold_num = 0 # inner-fold number
    
    trainsetlist.append(SignalAndTarget(inner_fold, inner_labels))
    testsetlist.append(SignalAndTarget(outer_fold, outer_labels))
   
    #####Inner-Fold#####
    for train_idx, valid_idx in skf.split(inner_fold, inner_labels):
        X_Train, X_val = inner_fold[train_idx], inner_fold[valid_idx]
        y_train, y_val = inner_labels[train_idx], inner_labels[valid_idx]
        train_set = SignalAndTarget(X_Train, y_train)
        val_set = SignalAndTarget(X_val, y_val)
        in_fold_num += 1
        hyp_param_acc, hyp_param_loss = [], []
        
        hyp_param_loss, hyp_param_acc = train_inner(train_set, val_set,hyp_params,parameters)
        
        inner_fold_loss.append(hyp_param_loss)
        inner_fold_acc.append(hyp_param_acc)
       
print(f"run time: {time.time()-start} seconds")
#####Assigns each fold to DataFrame and computes mean####
for i,j in enumerate(inner_fold_loss):
    lossdf.iloc[i] = j
    lossdf.head(6)
lossdf.loc["Mean"].iloc[0] = lossdf.iloc[1:16].mean(axis=0).values
lossdf.loc["Std."].iloc[0] = lossdf.iloc[1:16].std(axis=0).values
lossdf.to_excel(f"..//results/S{'01'}/HP_acc.xlsx")

for i,j in enumerate(inner_fold_acc):
    accdf.iloc[i] = j
    accdf.head(6)
accdf.loc["Mean"].iloc[0] = accdf.iloc[1:16].mean(axis=0).values
accdf.loc["Std."].iloc[0] = accdf.iloc[1:16].std(axis=0).values

#####Finds best hyper-parameter set for subject#####
BestParams = lossdf.columns[df.loc["Mean"].values.argmin()]
BestParamsList.append(list(BestParams))

for i,j in enumerate(BestParamsList):
    paramsdf.iloc[i] = j

##### Means of each hyper-parameter#####
columns_list = get_col_list(hyp_params)
hyp_param_means_list = []
hyp_param_means = []
for x in columns_list:
    for y in x:
        sub_df = accdf[[i for i in accdf.columns if i[0] == y or i[1] == y or i[2] == y]]
        hyp_param_means.append(sub_df.loc["Mean"].values.mean())
hyp_param_means_list.append(hyp_param_means)
    
for i,j in enumerate(hyp_param_means_list):
    paramscoresdf.iloc[i] = j

df.tail()

run time: 1884.9905281066895 seconds


activation,elu,elu,elu,elu,elu,elu,elu,elu,elu,elu,elu,elu,elu,elu,elu,elu,leaky_relu,leaky_relu,leaky_relu,leaky_relu,leaky_relu,leaky_relu,leaky_relu,leaky_relu,leaky_relu,leaky_relu,leaky_relu,leaky_relu,leaky_relu,leaky_relu,leaky_relu,leaky_relu,relu,relu,relu,relu,relu,relu,relu,relu,relu,relu,relu,relu,relu,relu,relu,relu,relu6,relu6,relu6,relu6,relu6,relu6,relu6,relu6,relu6,relu6,relu6,relu6,relu6,relu6,relu6,relu6
lr,0.001,0.001,0.001,0.001,0.010,0.010,0.010,0.010,0.100,0.100,0.100,0.100,1.000,1.000,1.000,1.000,0.001,0.001,0.001,0.001,0.010,0.010,0.010,0.010,0.100,0.100,0.100,0.100,1.000,1.000,1.000,1.000,0.001,0.001,0.001,0.001,0.010,0.010,0.010,0.010,0.100,0.100,0.100,0.100,1.000,1.000,1.000,1.000,0.001,0.001,0.001,0.001,0.010,0.010,0.010,0.010,0.100,0.100,0.100,0.100,1.000,1.000,1.000,1.000
epochs,20,40,60,80,20,40,60,80,20,40,60,80,20,40,60,80,20,40,60,80,20,40,60,80,20,40,60,80,20,40,60,80,20,40,60,80,20,40,60,80,20,40,60,80,20,40,60,80,20,40,60,80,20,40,60,80,20,40,60,80,20,40,60,80
Fold,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3,Unnamed: 19_level_3,Unnamed: 20_level_3,Unnamed: 21_level_3,Unnamed: 22_level_3,Unnamed: 23_level_3,Unnamed: 24_level_3,Unnamed: 25_level_3,Unnamed: 26_level_3,Unnamed: 27_level_3,Unnamed: 28_level_3,Unnamed: 29_level_3,Unnamed: 30_level_3,Unnamed: 31_level_3,Unnamed: 32_level_3,Unnamed: 33_level_3,Unnamed: 34_level_3,Unnamed: 35_level_3,Unnamed: 36_level_3,Unnamed: 37_level_3,Unnamed: 38_level_3,Unnamed: 39_level_3,Unnamed: 40_level_3,Unnamed: 41_level_3,Unnamed: 42_level_3,Unnamed: 43_level_3,Unnamed: 44_level_3,Unnamed: 45_level_3,Unnamed: 46_level_3,Unnamed: 47_level_3,Unnamed: 48_level_3,Unnamed: 49_level_3,Unnamed: 50_level_3,Unnamed: 51_level_3,Unnamed: 52_level_3,Unnamed: 53_level_3,Unnamed: 54_level_3,Unnamed: 55_level_3,Unnamed: 56_level_3,Unnamed: 57_level_3,Unnamed: 58_level_3,Unnamed: 59_level_3,Unnamed: 60_level_3,Unnamed: 61_level_3,Unnamed: 62_level_3,Unnamed: 63_level_3,Unnamed: 64_level_3
14,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
15,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
16,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
Mean,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
Std.,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [0]:
paramscoresdf

Unnamed: 0_level_0,activation,activation,activation,activation,lr,lr,lr,lr,epochs,epochs,epochs,epochs
Unnamed: 0_level_1,elu,relu6,leaky_relu,relu,0.001,0.01,0.1,1,20,40,60,80
01,3.6215,3.26488,4.12335,3.64576,2.62177,2.74448,3.76998,5.51926,4.53114,3.45129,3.33585,3.3372
02,,,,,,,,,,,,
Mean,,,,,,,,,,,,
Std.,,,,,,,,,,,,
