In [1]:
# Import python packages for exploration

import numpy as np
import pandas as pd

In [2]:
# Load train and test dataset

train_dataset = pd.read_csv('../dataset/eeg_train_dataset.csv')
test_dataset = pd.read_csv('../dataset/eeg_train_dataset.csv')

In [3]:
# Build 'Subject-Trial' column for preprocessing
# 'Subject-Trial' is combination of subject number and trial number

train_dataset['Subject-Trial'] = train_dataset['Subject_id'].astype(str) + '-' + train_dataset['Trial_id'].astype(str)
test_dataset['Subject-Trial'] = test_dataset['Subject_id'].astype(str) + '-' + test_dataset['Trial_id'].astype(str)

In [8]:
# Check head of train and test dataset

print(len(train_dataset))
train_dataset.head()

153600


Unnamed: 0,Time-Series,FP1,FP2,F7,F8,AF1,AF2,FZ,F4,F3,...,P2,P1,CPZ,nd,Y,Alcohol,Exp,Subject_id,Trial_id,Subject-Trial
0,0,-8.921,0.834,-19.847,8.148,-2.146,1.129,-0.071,3.408,-0.092,...,-2.421,-4.313,-0.478,-8.901,-5.636,Y,S1,364,0,364-0
1,1,-8.433,3.276,-12.522,1.801,-2.146,0.641,-0.559,1.455,0.397,...,-3.886,-5.29,-0.966,-7.924,-2.706,Y,S1,364,0,364-0
2,2,-2.574,5.717,1.149,-2.594,-1.658,-0.336,-1.048,0.478,-1.068,...,-4.862,-5.29,-0.966,-3.042,1.689,Y,S1,364,0,364-0
3,3,5.239,7.67,14.821,-4.547,-0.682,-0.824,-0.559,0.966,-3.51,...,-3.886,-4.313,-0.966,4.771,5.595,Y,S1,364,0,364-0
4,4,11.587,9.623,20.681,-5.035,2.248,0.641,0.905,1.943,-5.463,...,-1.933,-2.36,-0.478,11.607,9.013,Y,S1,364,0,364-0


In [9]:
print(len(test_dataset))
test_dataset.head()

153600


Unnamed: 0,Time-Series,FP1,FP2,F7,F8,AF1,AF2,FZ,F4,F3,...,P2,P1,CPZ,nd,Y,Alcohol,Exp,Subject_id,Trial_id,Subject-Trial
0,0,-8.921,0.834,-19.847,8.148,-2.146,1.129,-0.071,3.408,-0.092,...,-2.421,-4.313,-0.478,-8.901,-5.636,Y,S1,364,0,364-0
1,1,-8.433,3.276,-12.522,1.801,-2.146,0.641,-0.559,1.455,0.397,...,-3.886,-5.29,-0.966,-7.924,-2.706,Y,S1,364,0,364-0
2,2,-2.574,5.717,1.149,-2.594,-1.658,-0.336,-1.048,0.478,-1.068,...,-4.862,-5.29,-0.966,-3.042,1.689,Y,S1,364,0,364-0
3,3,5.239,7.67,14.821,-4.547,-0.682,-0.824,-0.559,0.966,-3.51,...,-3.886,-4.313,-0.966,4.771,5.595,Y,S1,364,0,364-0
4,4,11.587,9.623,20.681,-5.035,2.248,0.641,0.905,1.943,-5.463,...,-1.933,-2.36,-0.478,11.607,9.013,Y,S1,364,0,364-0


In [18]:
# Check the number of subjects and trials, only train dataset

print('Number of subjects : ', len(train_dataset['Subject_id'].value_counts()))
subject_list = list(train_dataset['Subject_id'].value_counts().index)
print(subject_list)

Number of subjects :  20
[378, 377, 338, 339, 340, 341, 342, 344, 345, 346, 347, 364, 365, 368, 369, 370, 371, 372, 375, 337]


In [20]:
# Every subject has 30 trials, and it's made by 3 different stimulus with 10 trials
# For example, subject 378's trial is, 10 times of S1, 10 times of S2-nonmatch, 10 times of S2-match

for subject in subject_list:
    tmp_df = train_dataset[train_dataset['Subject_id'] == subject]
    num_of_trial = len(tmp_df['Trial_id'].value_counts().index)
    print(str(subject), ' has ', str(num_of_trial), 'trials')

378  has  30 trials
377  has  30 trials
338  has  30 trials
339  has  30 trials
340  has  30 trials
341  has  30 trials
342  has  30 trials
344  has  30 trials
345  has  30 trials
346  has  30 trials
347  has  30 trials
364  has  30 trials
365  has  30 trials
368  has  30 trials
369  has  30 trials
370  has  30 trials
371  has  30 trials
372  has  30 trials
375  has  30 trials
337  has  30 trials


In [28]:
# If you want to access to specific data, you have to know number of subject and trial
# And Subject-Trial is the key of single data

all_subject_trial_list = list(train_dataset['Subject-Trial'].value_counts().index)
print(all_subject_trial_list[:10])

['340-10', '364-10', '368-15', '341-15', '377-29', '338-0', '365-11', '378-37', '339-7', '338-29']


In [31]:
# For example, you can access to Subject 340, Trial 10 with this way
# We could find the time-series is 0 to 255, and it's about S1 stimulus experiment
# And also Subject 340 is N(Non-Alcoholism) Subject

train_dataset[train_dataset['Subject-Trial'] == '340-10']

Unnamed: 0,Time-Series,FP1,FP2,F7,F8,AF1,AF2,FZ,F4,F3,...,P2,P1,CPZ,nd,Y,Alcohol,Exp,Subject_id,Trial_id,Subject-Trial
101120,0,-10.173,-9.023,-12.980,-13.804,-8.738,-6.226,-4.649,-4.679,-7.050,...,-1.729,11.444,1.587,-10.061,-15.127,N,S1,340,10,340-10
101121,1,-8.219,-8.046,-10.539,-10.386,-6.785,-5.249,-3.184,-3.703,-5.096,...,-2.706,8.026,0.122,-8.596,-10.732,N,S1,340,10,340-10
101122,2,-4.801,-4.628,-6.144,-6.968,-3.367,-2.319,-1.719,-1.261,-2.167,...,-1.729,6.073,-0.366,-4.690,-6.337,N,S1,340,10,340-10
101123,3,-0.407,-1.211,-1.261,-3.550,0.539,0.610,0.234,1.180,0.275,...,0.712,5.585,0.122,-0.295,-2.431,N,S1,340,10,340-10
101124,4,3.499,2.696,2.157,-0.132,3.957,3.540,1.211,3.133,2.228,...,4.130,7.050,1.587,3.123,0.498,N,S1,340,10,340-10
101125,5,5.452,4.649,4.110,2.309,5.422,5.493,1.699,4.110,3.204,...,6.571,8.514,2.563,5.564,3.428,N,S1,340,10,340-10
101126,6,6.429,5.625,4.598,3.286,5.422,5.493,2.187,3.133,2.716,...,7.548,9.003,2.563,6.053,6.358,N,S1,340,10,340-10
101127,7,5.452,4.649,3.621,3.286,4.445,4.517,2.187,1.668,1.740,...,8.036,8.026,2.563,6.053,7.823,N,S1,340,10,340-10
101128,8,4.964,3.184,3.133,2.797,2.981,3.540,1.699,0.203,0.763,...,8.525,6.561,2.563,5.076,8.311,N,S1,340,10,340-10
101129,9,4.476,2.207,3.133,2.797,3.469,3.052,1.211,-0.285,0.763,...,8.525,5.585,2.563,5.076,6.846,N,S1,340,10,340-10
