In [1]:
## Erdos Bootcamp
## Josimar A. Silva, May 2021
## This script reads the EEG dataset and create features for classification.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
## reading dataframe
df = pd.read_csv('./data/FullData/dataHeader.csv')
sensorValue = np.loadtxt('./data/FullData/sensorValue.dat')

In [3]:
## Merging the header information and the sensorValue. This helps later on to filter the dataframe.
df['groupUnicode'] = df['group'] == 'a' 
df['groupUnicode'] = df['groupUnicode'].astype(int)

dfAll = pd.DataFrame(sensorValue)
dfAll.head()

dfAll.insert(0,column='groupUnicode',value=df['groupUnicode'])
dfAll.insert(0,column='chanNumber',value=df['chanNumber'])
dfAll.insert(0,column='channel',value=df['channel'])
dfAll.insert(0,column='trial',value=df['trial'])
dfAll.insert(0,column='condition',value=df['condition'])
dfAll.insert(0,column='subject',value=df['subject'])
dfAll.insert(0,column='group',value=df['group'])
idStart=7 ## defines the column number where the sensor data starts
dfAll.head()

Unnamed: 0,group,subject,condition,trial,channel,chanNumber,groupUnicode,0,1,2,...,246,247,248,249,250,251,252,253,254,255
0,a,co2a0000436,S1 obj,0,FP1,0,1,8.006,8.494,8.494,...,11.912,8.494,6.541,6.053,6.541,7.517,9.471,11.424,14.353,17.771
1,a,co2a0000436,S1 obj,0,FP2,1,1,2.096,3.072,3.072,...,-53.569,-56.986,-59.428,-61.381,-61.381,-60.893,-59.916,-57.963,-55.033,-52.592
2,a,co2a0000436,S1 obj,0,F7,2,1,8.199,7.711,6.734,...,8.687,6.246,4.781,3.316,2.34,3.316,5.758,9.664,14.058,17.965
3,a,co2a0000436,S1 obj,0,F8,3,1,-6.114,-4.649,-1.719,...,-3.672,-6.602,-9.043,-10.508,-10.02,-8.067,-6.602,-5.137,-4.649,-4.161
4,a,co2a0000436,S1 obj,0,AF1,4,1,4.812,4.812,4.323,...,-24.485,-26.438,-27.903,-28.88,-28.392,-27.903,-26.927,-25.462,-23.997,-22.044


In [4]:
## Defining window to compute feature 
dt = 3.9 ## ms
tMin = 250
tMax = 550
tData = np.arange(0,255) * dt
indexTMinSearch = int(tMin / dt) + idStart ## 200 ms
indexTMaxSearch = int(tMax / dt) + idStart ## 600 ms

print("idMin search = ", indexTMinSearch)
print("idMax search = ", indexTMaxSearch)


idMin search =  71
idMax search =  148


In [5]:
## Compute the mean sensor value for each subject for each channel

## Initializing the dataframe to hold results
dfSubject = pd.DataFrame(np.zeros([122, 67])) ## 122 subjects, 64 channels + 3 other columns (subject, group and condition)
dfSubject.rename(columns={0 : 'subject' , 1 : 'group' , 2 : 'condition' } , inplace=True)
dfSubject.head()

conditionAll = sorted(dfAll['condition'].unique())
conditionAll.remove('S2 match err')
conditionAll.remove('S2 nomatch err')

for condition in conditionAll:
    print('\n Working on condition '+condition)
    
    ##Get all channels and create list of subjects
    subjectAll = sorted(dfAll['subject'].unique())
    cNameAll = sorted(df['channel'].unique())

    countRow=0
    for subject in subjectAll:
        print('Subject number = '+str(countRow)+'\t'+subject)
        group = dfAll[ (dfAll['subject'] == subject) & (dfAll['condition'] == condition) ].iloc[:,0]    
        dfSubject.iloc[countRow,0] = subject
        dfSubject.iloc[countRow,1] = np.unique(group)
        dfSubject.iloc[countRow,2] = condition
        countCol=3
        for cName in cNameAll:
            tmpA = dfAll[ (dfAll['channel'] == cName) & (dfAll['subject'] == subject) & (dfAll['condition'] == condition) ].iloc[:,indexTMinSearch:indexTMaxSearch].to_numpy().mean()
            dfSubject.iloc[countRow,countCol] = tmpA
            countCol=countCol+1

        countRow = countRow + 1

    ## Saving dataframe to file
    fileNameToSave='./data/output/SubjectFeatureMeanAcrossIndividualChannels_'+condition.replace(" ","")+'.csv'
    print('Saving file = ', fileNameToSave)
    dfSubject.to_csv(fileNameToSave, index=False)



 Working on condition S1 obj
Subject number = 0	co2a0000364
Subject number = 1	co2a0000365
Subject number = 2	co2a0000368
Subject number = 3	co2a0000369
Subject number = 4	co2a0000370
Subject number = 5	co2a0000371
Subject number = 6	co2a0000372
Subject number = 7	co2a0000375
Subject number = 8	co2a0000377
Subject number = 9	co2a0000378
Subject number = 10	co2a0000379
Subject number = 11	co2a0000380
Subject number = 12	co2a0000381
Subject number = 13	co2a0000382
Subject number = 14	co2a0000384
Subject number = 15	co2a0000385
Subject number = 16	co2a0000386
Subject number = 17	co2a0000387
Subject number = 18	co2a0000388
Subject number = 19	co2a0000390
Subject number = 20	co2a0000392
Subject number = 21	co2a0000394
Subject number = 22	co2a0000395
Subject number = 23	co2a0000396
Subject number = 24	co2a0000398
Subject number = 25	co2a0000400
Subject number = 26	co2a0000402
Subject number = 27	co2a0000403
Subject number = 28	co2a0000404
Subject number = 29	co2a0000405
Subject number = 30	

Subject number = 4	co2a0000370
Subject number = 5	co2a0000371
Subject number = 6	co2a0000372
Subject number = 7	co2a0000375
Subject number = 8	co2a0000377
Subject number = 9	co2a0000378
Subject number = 10	co2a0000379
Subject number = 11	co2a0000380
Subject number = 12	co2a0000381
Subject number = 13	co2a0000382
Subject number = 14	co2a0000384
Subject number = 15	co2a0000385
Subject number = 16	co2a0000386
Subject number = 17	co2a0000387
Subject number = 18	co2a0000388
Subject number = 19	co2a0000390
Subject number = 20	co2a0000392
Subject number = 21	co2a0000394
Subject number = 22	co2a0000395
Subject number = 23	co2a0000396
Subject number = 24	co2a0000398
Subject number = 25	co2a0000400
Subject number = 26	co2a0000402
Subject number = 27	co2a0000403
Subject number = 28	co2a0000404
Subject number = 29	co2a0000405
Subject number = 30	co2a0000406
Subject number = 31	co2a0000407
Subject number = 32	co2a0000409
Subject number = 33	co2a0000410
Subject number = 34	co2a0000411
Subject number