In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split


In [4]:
# Load EEG data
path = 'Dataset/EEG_data.csv'
df = pd.read_csv(path)
df.head()

Unnamed: 0,SubjectID,VideoID,Attention,Mediation,Raw,Delta,Theta,Alpha1,Alpha2,Beta1,Beta2,Gamma1,Gamma2,predefinedlabel,user-definedlabeln
0,0.0,0.0,56.0,43.0,278.0,301963.0,90612.0,33735.0,23991.0,27946.0,45097.0,33228.0,8293.0,0.0,0.0
1,0.0,0.0,40.0,35.0,-50.0,73787.0,28083.0,1439.0,2240.0,2746.0,3687.0,5293.0,2740.0,0.0,0.0
2,0.0,0.0,47.0,48.0,101.0,758353.0,383745.0,201999.0,62107.0,36293.0,130536.0,57243.0,25354.0,0.0,0.0
3,0.0,0.0,47.0,57.0,-5.0,2012240.0,129350.0,61236.0,17084.0,11488.0,62462.0,49960.0,33932.0,0.0,0.0
4,0.0,0.0,44.0,53.0,-8.0,1005145.0,354328.0,37102.0,88881.0,45307.0,99603.0,44790.0,29749.0,0.0,0.0


In [5]:
df = df.apply(pd.to_numeric)
df.head()

Unnamed: 0,SubjectID,VideoID,Attention,Mediation,Raw,Delta,Theta,Alpha1,Alpha2,Beta1,Beta2,Gamma1,Gamma2,predefinedlabel,user-definedlabeln
0,0.0,0.0,56.0,43.0,278.0,301963.0,90612.0,33735.0,23991.0,27946.0,45097.0,33228.0,8293.0,0.0,0.0
1,0.0,0.0,40.0,35.0,-50.0,73787.0,28083.0,1439.0,2240.0,2746.0,3687.0,5293.0,2740.0,0.0,0.0
2,0.0,0.0,47.0,48.0,101.0,758353.0,383745.0,201999.0,62107.0,36293.0,130536.0,57243.0,25354.0,0.0,0.0
3,0.0,0.0,47.0,57.0,-5.0,2012240.0,129350.0,61236.0,17084.0,11488.0,62462.0,49960.0,33932.0,0.0,0.0
4,0.0,0.0,44.0,53.0,-8.0,1005145.0,354328.0,37102.0,88881.0,45307.0,99603.0,44790.0,29749.0,0.0,0.0


In [6]:
print(df.dtypes)

SubjectID             float64
VideoID               float64
Attention             float64
Mediation             float64
Raw                   float64
Delta                 float64
Theta                 float64
Alpha1                float64
Alpha2                float64
Beta1                 float64
Beta2                 float64
Gamma1                float64
Gamma2                float64
predefinedlabel       float64
user-definedlabeln    float64
dtype: object


In [None]:
features = ['Attention', 'Mediation', 'Delta', 'Theta', 'Alpha1', 'Alpha2', 
            'Beta1', 'Beta2', 'Gamma1', 'Gamma2']

Unnamed: 0,Attention,Mediation,Delta,Theta,Alpha1,Alpha2,Beta1,Beta2,Gamma1,Gamma2
0,0.634334,-0.184623,-0.476510,-0.317217,-0.105613,-0.157642,0.094523,0.087938,0.045544,-0.169923
1,-0.056750,-0.537745,-0.834378,-0.573352,-0.551518,-0.530654,-0.562100,-0.435821,-0.304417,-0.324028
2,0.245599,0.036078,0.239285,0.883532,2.217577,0.496016,0.312017,1.168583,0.346396,0.303549
3,0.245599,0.433339,2.205862,-0.158536,0.274088,-0.276091,-0.334314,0.307573,0.255157,0.541603
4,0.116021,0.256779,0.626350,0.763033,-0.059126,0.955168,0.546890,0.777338,0.190389,0.425518
...,...,...,...,...,...,...,...,...,...,...
12806,0.979876,-0.405324,-0.750019,-0.647626,-0.561597,-0.196382,-0.532760,0.020030,-0.338179,-0.373426
12807,0.850297,-0.537745,-0.443420,2.578234,1.543418,1.931368,0.404152,6.743178,0.087461,-0.122273
12808,0.807105,-0.802586,0.117948,-0.056351,-0.018175,0.101842,-0.347916,-0.141271,-0.114823,-0.343898
12809,0.807105,-0.802586,-0.375654,-0.576371,-0.413381,-0.398742,-0.583127,-0.440931,-0.216347,-0.351114


In [23]:
# Normalize features
scaler = StandardScaler()
df[features] = scaler.fit_transform(df[features])

preprocessed_data = df[features].copy()
preprocessed_data.head()


Unnamed: 0,Attention,Mediation,Delta,Theta,Alpha1,Alpha2,Beta1,Beta2,Gamma1,Gamma2
0,0.634334,-0.184623,-0.47651,-0.317217,-0.105613,-0.157642,0.094523,0.087938,0.045544,-0.169923
1,-0.05675,-0.537745,-0.834378,-0.573352,-0.551518,-0.530654,-0.5621,-0.435821,-0.304417,-0.324028
2,0.245599,0.036078,0.239285,0.883532,2.217577,0.496016,0.312017,1.168583,0.346396,0.303549
3,0.245599,0.433339,2.205862,-0.158536,0.274088,-0.276091,-0.334314,0.307573,0.255157,0.541603
4,0.116021,0.256779,0.62635,0.763033,-0.059126,0.955168,0.54689,0.777338,0.190389,0.425518


In [24]:
preprocessed_data['userdefined-Confusion'] = df['user-definedlabeln'].astype(int)
preprocessed_data['predefined-Confusion'] = df['predefinedlabel'].astype(int)

In [25]:
preprocessed_data.head()

Unnamed: 0,Attention,Mediation,Delta,Theta,Alpha1,Alpha2,Beta1,Beta2,Gamma1,Gamma2,userdefined-Confusion,predefined-Confusion
0,0.634334,-0.184623,-0.47651,-0.317217,-0.105613,-0.157642,0.094523,0.087938,0.045544,-0.169923,0,0
1,-0.05675,-0.537745,-0.834378,-0.573352,-0.551518,-0.530654,-0.5621,-0.435821,-0.304417,-0.324028,0,0
2,0.245599,0.036078,0.239285,0.883532,2.217577,0.496016,0.312017,1.168583,0.346396,0.303549,0,0
3,0.245599,0.433339,2.205862,-0.158536,0.274088,-0.276091,-0.334314,0.307573,0.255157,0.541603,0,0
4,0.116021,0.256779,0.62635,0.763033,-0.059126,0.955168,0.54689,0.777338,0.190389,0.425518,0,0
