In [1]:
# Set notebook to use only one GPU
%env CUDA_VISIBLE_DEVICES=0

env: CUDA_VISIBLE_DEVICES=0


In [31]:
from braindecode.datasets.moabb import MOABBDataset
import numpy as np
import pandas as pd
from braindecode.preprocessing import create_windows_from_events
from braindecode.preprocessing import (
    exponential_moving_standardize, preprocess, Preprocessor)
from numpy import multiply
from sklearn.preprocessing import OneHotEncoder
from models_bachelors import *
from file_functions import *

# Preprocessing functions


In [32]:
def load_dataset():
    dataset = MOABBDataset(dataset_name="BNCI2014001", subject_ids=None)
    return dataset

def preprocess_data(dataset):
    low_cut_hz = 4.  # low cut frequency for filtering
    high_cut_hz = 38.  # high cut frequency for filtering
    # Parameters for exponential moving standardization
    '''
    CHECK IF THE FACTOR IS SAME AS 0.999 MENTIONED IN
    THE ARTICLES
    '''
    factor_new = 1e-3
    init_block_size = 1000
    # Factor to convert from V to uV
    factor = 1e6
    iir_params = dict(order=3, ftype='butter', output='sos')

    preprocessors = [
        Preprocessor('pick_types', eeg=True, meg=False, stim=False),  # Keep EEG sensors
        Preprocessor(lambda data: multiply(data, factor))  # Convert from V to uV
        # Preprocessor('filter', l_freq=low_cut_hz, h_freq=high_cut_hz, iir_params=iir_params, method='iir', phase='forward'),  # Third order butterworth filter
        # # The logs say it's a causal filter but the order is 6?
        # Preprocessor(exponential_moving_standardize,  # Exponential moving standardization
        #             factor_new=factor_new, init_block_size=init_block_size)
    ]

    return preprocess(dataset, preprocessors)

def epoch_data(dataset):
    trial_start_offset_seconds = -0.5
    # Extract sampling frequency, check that they are same in all datasets
    sfreq = dataset.datasets[0].raw.info['sfreq']
    assert all([ds.raw.info['sfreq'] == sfreq for ds in dataset.datasets])
    # Calculate the trial start offset in samples.
    trial_start_offset_samples = int(trial_start_offset_seconds * sfreq)

    # Create windows using braindecode function for this. It needs parameters to
    # define how trials should be used.
    windows_dataset = create_windows_from_events(
        dataset,
        trial_start_offset_samples=trial_start_offset_samples,
        trial_stop_offset_samples=0,
        preload=True,
    )

    return windows_dataset


In [33]:
# Expects a BaseConcatDataset object
# Iterate through subject datasets and create dataset of 9 rows and 576 columns
# (9 subjects and 576 trials).
def create_dataframe_helper(dataset):
    subjects_lst = []
    subjects_targets = []
    for subject_id in range(0, len(dataset)):
        # Append to list a set of inputs and targets from each run
        # in subject dataset
        inputs = []
        targets = []
        subject_dataset = dataset[subject_id].datasets
        for run in subject_dataset:
            for trial in run:
                inputs.append(trial[0])
                targets.append(trial[1])
        subjects_lst.append(inputs)
        subjects_targets.append(targets)

    return np.asarray(subjects_lst), np.asarray(subjects_targets)


def create_dataframe(processed_data):
    # Data to be saved gonna have shape (9, 576, 22, 1125)
    # 9 subjects. 576 trials each. 22 channels. 1125 timestamps
    split_data = processed_data.split('subject')
    split_data = [split_data[str(i)] for i in range(1, 9 + 1)]
    inputs, targets = create_dataframe_helper(split_data)
    return inputs, targets

def onehot(targets):
    encoder = OneHotEncoder(sparse=False)
    targets = targets.reshape(-1,1)
    targets = encoder.fit_transform(targets)
    n_subj = 9
    n_trials = 576
    n_classes = 4
    targets = targets.reshape(n_subj, n_trials, n_classes)
    return targets
    

def get_x_y(inputs, targets):
    n_subjects = inputs.shape[0]
    n_runs = inputs.shape[1] * inputs.shape[0]
    channels = inputs.shape[2]
    timestamps = inputs.shape[3]
    n_classes = targets.shape[2]
    X = np.vstack(inputs).reshape(n_runs, channels, timestamps)
    Y = np.vstack(targets).reshape(n_runs, n_classes)
    return X, Y

In [34]:
processed_data = epoch_data(preprocess_data(load_dataset()))

48 events found
Event IDs: [1 2 3 4]
48 events found
Event IDs: [1 2 3 4]
48 events found
Event IDs: [1 2 3 4]
48 events found
Event IDs: [1 2 3 4]
48 events found
Event IDs: [1 2 3 4]
48 events found
Event IDs: [1 2 3 4]
48 events found
Event IDs: [1 2 3 4]
48 events found
Event IDs: [1 2 3 4]
48 events found
Event IDs: [1 2 3 4]
48 events found
Event IDs: [1 2 3 4]
48 events found
Event IDs: [1 2 3 4]
48 events found
Event IDs: [1 2 3 4]
48 events found
Event IDs: [1 2 3 4]
48 events found
Event IDs: [1 2 3 4]
48 events found
Event IDs: [1 2 3 4]
48 events found
Event IDs: [1 2 3 4]
48 events found
Event IDs: [1 2 3 4]
48 events found
Event IDs: [1 2 3 4]
48 events found
Event IDs: [1 2 3 4]
48 events found
Event IDs: [1 2 3 4]
48 events found
Event IDs: [1 2 3 4]
48 events found
Event IDs: [1 2 3 4]
48 events found
Event IDs: [1 2 3 4]
48 events found
Event IDs: [1 2 3 4]
48 events found
Event IDs: [1 2 3 4]
48 events found
Event IDs: [1 2 3 4]
48 events found
Event IDs: [1 2 3 4]
4

  warn('Preprocessing choices with lambda functions cannot be saved.')


NOTE: pick_types() is a legacy function. New code should use inst.pick(...).
NOTE: pick_types() is a legacy function. New code should use inst.pick(...).
NOTE: pick_types() is a legacy function. New code should use inst.pick(...).
NOTE: pick_types() is a legacy function. New code should use inst.pick(...).
NOTE: pick_types() is a legacy function. New code should use inst.pick(...).
NOTE: pick_types() is a legacy function. New code should use inst.pick(...).
NOTE: pick_types() is a legacy function. New code should use inst.pick(...).
NOTE: pick_types() is a legacy function. New code should use inst.pick(...).
NOTE: pick_types() is a legacy function. New code should use inst.pick(...).
NOTE: pick_types() is a legacy function. New code should use inst.pick(...).
NOTE: pick_types() is a legacy function. New code should use inst.pick(...).
NOTE: pick_types() is a legacy function. New code should use inst.pick(...).
NOTE: pick_types() is a legacy function. New code should use inst.pick(...).

In [35]:
inputs, targets = create_dataframe(processed_data)
targets = onehot(targets)
print(inputs.shape, targets.shape)

(9, 576, 22, 1125) (9, 576, 4)




# Lockbox Creation

Instead of saving a separate file containing the data of the lockboxed set, the lockbox file contains the indices of the to-be lockboxed set for each test subject. 

This way, the lockbox trials can be easily excluded during training.

In [36]:
from models_bachelors import *
from file_functions import *
from numpy import floor
from numpy import random
from numpy.random import default_rng
from sklearn.model_selection import KFold
loaded_inputs = inputs
loaded_targets = targets

# Create Lockbox for each subject
n_s = 9  # Number of subjects

'''
NEED TO IGNORE THE TEST SUBJECT AND LOXCKBOX THE REST!
YOU CAN USE KFOLD SPLIT FOR THISb  
'''

kfold_lock = KFold(n_splits= n_s, shuffle= False)
rng = default_rng()

lockbox = []

# Split into train and test indices. The test indices are to indicate which
# subject to save as and the train indices are where the magic happens
for train_idx, test_idx in kfold_lock.split(loaded_inputs, loaded_targets):
    lockbox_idx = []
    # Perform the lockbox operation on the train indices as follows:
    # Take 10% of the indices from each train subject, making sure 
    # to separate them by subject and NOT concatenating.
    # This is because the indices are dependent on the subject.
    for idx in train_idx:
        subject_inputs = loaded_inputs[idx]
        num_trials= subject_inputs.shape[0]
        # Get random 10% of subject's trials
        indexes = rng.choice(num_trials, size=int(0.1 * num_trials), replace=False)
        lockbox_idx.append(indexes)

    # This operation assumes index of lockbox corresponds to test_idx
    lockbox.append(lockbox_idx)
    

lockbox = np.array(lockbox)
save('lockbox', dict({'data': lockbox}))
    