## GroupWithNoName EEG Data Processing

### Event Codes
33024 - Universal for key release <br>
33025 - a (start) key press <br>
33027 - e (waldo found) key press

### Imports

In [265]:
import pandas as pd
import numpy as np
import os
from os import listdir
from os.path import isfile, join, isdir

### Known Variables

In [290]:
sampling_rate = 512.0

### Helper Functions

In [296]:
def generate_epoch(train_data, channels, fs, eeg_filter, stimulus_times = None, baseline = True,  epoch_s = -200, epoch_e = 1300, bl_s = -400, bl_e = -300):
    
    """
    :description: Generating epoch given csv file. Make sure the csv file layout meets the requirement.
        It should contain 'Time' column that represents timepoints, and the time should start from 0.
        If your csv file does not have FeedBackEvent indicating the stimulus, you must pass stumulus_times.
        Here we used a butter bandpass filter, but you can change to your favorite one.
    :train_data (Pandas df): data to epoch
    :channels ([String]): array of channels to epoch
    :fs (float): sampling rate
    :eeg_filter (function): the filter you want to apply to raw eeg data
    :stimulus_times ([float], optional): The time points that stimulus occur
    :baseline (boolean, optional): whether you want to apply baseline correction after epoching
    :epoch_s (int, optional): epoch starting time relative to stmulus in miliseconds
    :epoch_e (int, optional): epoch ending time relative to stmulus in miliseconds
    :bl_s (int, optional): baseline starting time relative to stmulus in miliseconds
    :bl_e (int, optional): baseline ending time relative to stmulus in miliseconds
    :rtype (3d-nparray): epoched data with dimension (stimulus_per_subj, number_of_channels, number_of_time_points)
    """


    train_data.loc[:,'Time'] = train_data.loc[:,'Time']*1000
    raw_eeg  = train_data[channels].values.T

    time_df = train_data['Time'].values
    train_data['index'] = train_data.index.values
    if stimulus_times is None:
        mark_indices = np.asarray(train_data[train_data['FeedBackEvent']==1].index).flatten()
    else:
        mark_indices = np.round(np.asarray(stimulus_times).flatten() * fs).astype(int)
        
    # Define the bounds of our epoch as well as our baseline
    b_s = int((abs(epoch_s) + bl_s) * (fs / 1000)) # index in epoch_df where our baseline begins
    b_e = int((abs(epoch_s) + bl_e) * (fs / 1000)) # index in epoch_df where our baseline ends
    # Let's calculate the length our epoch with our given sampling rate
    epoch_len = int((abs(epoch_s) + abs(epoch_e)) * (fs / 1000))

    # Let's define some helpful variables to make our extraction easier
    e_s = int((epoch_s * (fs / 1000))) # effectively the number of indices before marker we want
    e_e = int((epoch_e * (fs / 1000))) # effectively the number of indices after marker we want

    # Epoch the data
    final_epoch = np.empty((mark_indices.shape[0], epoch_len, 0), float)
    for channel in channels:
        epoch = np.zeros(shape = (int(mark_indices.shape[0]), epoch_len))
        raw_eeg = train_data[channel].values

        ################# You may want to apply your own filter ################
        clean_eeg = raw_eeg #eeg_filter(raw_eeg, fs, 1.0, 40.0, 5)
        ########################################################################

        for i, mark_idx in enumerate(mark_indices):
            epoch[i, :] = clean_eeg[mark_idx + e_s : mark_idx + e_e+1] # grab the appropriate samples around the stimulus onset

        # Baseline correction
        if baseline:
            for i in range(0, int(epoch.shape[0])):
                epoch[i, :] = epoch[i, :] - np.mean(epoch[i, b_s:b_e])

        # stack epoch of each channel
        final_epoch = np.dstack((final_epoch, epoch))
    final_epoch = np.swapaxes(final_epoch, 1, 2)
    return final_epoch

In [256]:
#
# Cleans a given raw openvibe dataframe
#
def clean_trial(trial):
    
    #drop unwanted columns
    trial = trial.drop(columns=['Event Duration'])
    trial = trial.drop(columns=['Event Date'])
    trial = trial.drop(columns=['Electrode'])
    
    #remove nans and key releases
    trial.loc[trial['Event Id'] == 33024,  'Event Id'] = 0
    trial.loc[trial['Event Id'].isnull(),  'Event Id'] = 0
    
    #shift df to start on start key press
    start_index = trial.index[trial['Event Id'] == 33025]
    trial = trial[(start_index[0]+1):].reset_index()
    trial = trial.drop(columns=['index'])
    
    #reset epoch to begin at 0/drop epoch column
    trial = trial.drop(columns=['Epoch'])
    #trial['Epoch']-=trial['Epoch'][0]
    
    #reset time to begin at 0
    trial['Time:512Hz']-=trial['Time:512Hz'][0]
    
    #set found waldo id 1
    trial.loc[trial['Event Id'] == 33027,  'Event Id'] = 1
    
    #rename columns as needed
    trial = trial.rename(columns={"Time:512Hz": "Time", "Event Id": "FeedBackEvent"})
    
    return trial

In [257]:
#
# Reads and cleans the csv file at the given filepath
#
def processcsv(filename):
    raw = pd.read_csv('cece_trial.csv')
    clean = clean_trial(raw)
    return clean

In [287]:
def getchannels(trial):
    chnls = trial.columns.tolist()
    return chnls[1:11]

In [293]:
def custom_filter(trial):
    print("Filter not implemented yet")

### Testing

In [263]:
#read cece's trial
cece_trial = processcsv('cece_trial.csv')

In [264]:
cece_trial.head()

Unnamed: 0,Time,Attention,Meditation,Delta,Theta,Low Alpha,High Alpha,Low Beta,High Beta,Low Gamma,Mid Gamma,FeedBackEvent
0,0.0,26.0,74.0,1434788.0,238670.0,45000.0,196656.0,73717.0,19313.0,16411.0,26864.0,0.0
1,0.001953,26.0,74.0,1434788.0,238670.0,45000.0,196656.0,73717.0,19313.0,16411.0,26864.0,0.0
2,0.003906,26.0,74.0,1434788.0,238670.0,45000.0,196656.0,73717.0,19313.0,16411.0,26864.0,0.0
3,0.005859,26.0,74.0,1434788.0,238670.0,45000.0,196656.0,73717.0,19313.0,16411.0,26864.0,0.0
4,0.007812,26.0,74.0,1434788.0,238670.0,45000.0,196656.0,73717.0,19313.0,16411.0,26864.0,0.0


In [295]:
cece_trial.shape

(31519, 13)

In [262]:
cece_trial.loc[cece_trial['FeedBackEvent'] == 1]

Unnamed: 0,Time,Attention,Meditation,Delta,Theta,Low Alpha,High Alpha,Low Beta,High Beta,Low Gamma,Mid Gamma,FeedBackEvent
23135,45.185547,60.0,77.0,831923.0,61359.0,14862.0,5192.0,3921.0,4290.0,3395.0,1645.0,1.0
27167,53.060547,37.0,63.0,900981.0,63149.0,18119.0,11465.0,3773.0,2018.0,791.0,482.0,1.0


In [288]:
channels = getchannels(cece_trial)

In [298]:
epoch = generate_epoch(cece_trial, channels, sampling_rate, custom_filter)

In [325]:
epoch[1][0]

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0.