In [7]:
import numpy as np
import pandas as pd
import glob
import time
from scipy import signal

In [8]:
'''
extract_d(files)
Ingest Data by looping through files

Epoch 1.3 seconds after feedbackevent == 1 using epoch_d function

Append values to list of arrays called temp


Input: 
    files: array of string of file names (Data_S*_Sess*.csv)
Output: 
    temp: final array of appended values
'''
def extract_d(files):
    start = time.time()
    
    training_subjects = 16 #num of training subjects
    num_of_fb = 340 #num of feedbacks / subject
    freq = 200 #sampling rate
    epoch_time = 1.3 #proposed epoching time in seconds
    epoch = freq * epoch_time #epoch in indices 
    num_of_cols = int(59) 
    eeg_cols = int(56)
    b_s = int(-0.4*freq) #index where baseline starts relative to feedback (-400ms)
    b_e = int(-0.3*freq) #index where baseline ends relative to feedback (-300ms)
    order = 5 #butterworth order
    low_pass = 1 #low frequency pass for butterworth filter
    high_pass = 40 #high frequency pass for butterworth filter
    
    temp = np.empty((1,260,num_of_cols), float)
    for i, f in enumerate(files):
        print(i,f, temp.shape)
        df = pd.read_csv(f) #read each file
        index_fb = df[df['FeedBackEvent'] == 1].index.values
        df = np.array(df) 
        
        #uncomment below for butterworth filter
        
        #eeg = df[:,1:57] #only eeg values to apply butterworth filter
        #eeg_filtered = butter_filter(order, low_pass, high_pass, freq, eeg) #butterworth filter applied
        #df[:,1:57] = eeg_filtered #replacing old eeg values with new ones
        
        for j, indx in enumerate(index_fb): #epoching 260 indexes (1.3 seconds) after each stimulus
            epoch_array = df[indx:(indx+int(epoch)),:]
            epoch_array = epoch_array.reshape((1,int(epoch),int(epoch_array.shape[1])))
            
            #uncomment below for baseline correction
            
            #baseline_array = df[indx+b_s:indx+b_e,:] #baseline correction of 100ms (20 indexes), 400ms to 300ms before fb
            #baseline_array = baseline_array.reshape((1,20,int(baseline_array.shape[1])))
            #baseline_mean = np.mean(baseline_array, axis = 1) 
            #epoch_array = epoch_array - baseline_mean #noise subtracted from epoched data
            
            if i == 0:
                temp = np.vstack((temp,epoch_array)) #stacking the first epoch
            else:
                temp = np.vstack((temp,epoch_array))
                
    now = time.time()
    print('Elapsed Time: ' + str(int(now-start)) + ' seconds')
    return temp

In [4]:
train_files = glob.glob('Data/train/Data*.csv')
test_files = glob.glob('Data/test/Data*.csv')
train_files[0:6]

['Data/train\\Data_S02_Sess01.csv',
 'Data/train\\Data_S02_Sess02.csv',
 'Data/train\\Data_S02_Sess03.csv',
 'Data/train\\Data_S02_Sess04.csv',
 'Data/train\\Data_S02_Sess05.csv',
 'Data/train\\Data_S06_Sess01.csv']

In [5]:
# Notice: Because of the way the data was epoched, for some reason, the training and test data start with an array of zeros
# Make you keep this in mind.
train = extract_d(train_files[:5])

0 Data/train\Data_S02_Sess01.csv (1, 260, 59)
1 Data/train\Data_S02_Sess02.csv (61, 260, 59)
2 Data/train\Data_S02_Sess03.csv (121, 260, 59)
3 Data/train\Data_S02_Sess04.csv (181, 260, 59)
4 Data/train\Data_S02_Sess05.csv (241, 260, 59)
Elapsed Time: 18 seconds


In [6]:
# Notice: Because of the way the data was epoched, for some reason, the training and test data start with an array of zeros
# Make you keep this in mind.
test = extract_d(test_files[:5])

0 Data/test\Data_S01_Sess01.csv (1, 260, 59)
1 Data/test\Data_S01_Sess02.csv (61, 260, 59)
2 Data/test\Data_S01_Sess03.csv (121, 260, 59)
3 Data/test\Data_S01_Sess04.csv (181, 260, 59)
4 Data/test\Data_S01_Sess05.csv (241, 260, 59)
Elapsed Time: 18 seconds
