## Importing necessary libraries

In [None]:
import tensorflow.compat.v1 as tf
from sklearn.metrics import confusion_matrix
import numpy as np
from scipy.io import loadmat
import os
from pywt import wavedec
from functools import reduce
from scipy import signal
from scipy.stats import entropy
from scipy.fft import fft, ifft
import pandas as pd
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.preprocessing import StandardScaler
from tensorflow import keras as K
import matplotlib.pyplot as plt
import scipy
from sklearn import metrics
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.model_selection import KFold,cross_validate
from tensorflow.keras.layers import Dense, Activation, Flatten, concatenate, Input, Dropout, LSTM, Bidirectional,BatchNormalization,PReLU,ReLU,Reshape
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.metrics import classification_report
from tensorflow.keras.models import Sequential, Model, load_model
import matplotlib.pyplot as plt;
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from sklearn.decomposition import PCA
from tensorflow import keras
from sklearn.model_selection import cross_val_score
from tensorflow.keras.layers import Conv1D,Conv2D,Add
from tensorflow.keras.layers import MaxPool1D, MaxPooling2D
import seaborn as sns

## Reading EEG data along with Subject Demographics.

In [None]:
data = pd.read_csv("../input/confused-eeg/EEG_data.csv")
print(data.info())

In [None]:
demo_data = pd.read_csv('../input/confused-eeg/demographic_info.csv')
demo_data

### Combining the 2 CSV files on SubjectID

In [None]:
demo_data = demo_data.rename(columns = {'subject ID': 'SubjectID'})
data = data.merge(demo_data,how = 'inner',on = 'SubjectID')
data.head()

### One hot encoding categorical variables

The Categorical variables here include 'Ethnicity', 'Gender'.

In [None]:
data = pd.get_dummies(data)

## Understanding the Variables and relationships between them.

In [None]:
import pandas_profiling as pp
pp.ProfileReport(data)

In [None]:
plt.figure(figsize = (15,15))
cor_matrix = data.corr()
sns.heatmap(cor_matrix,annot=True)

It was mentioned in the Description of the Dataset that features like 'VideoID' and 'SubjectID'. The SubjectID and VideoID will provide hinderance while model training as there are 10 clips for 10 students and these 1-2 min clips are divided ino parts of 0.5 sec samples. So Model will most probably learn based on IDs but we want it to learn on based of EEG recordings, ethinicity and gender and age parameters.

## Defining necessary features for model training

In [None]:
data.drop(columns = ['SubjectID','VideoID','predefinedlabel'],inplace=True)

In [None]:
y= data.pop('user-definedlabeln')
x= data

Before Normalization

In [None]:
x.iloc[:1000,:11].plot(figsize = (15,10))

The scale of some EEG features were wide, so we use Z-score normalization.

In [None]:
x = StandardScaler().fit_transform(x)

After Normalization

In [None]:
pd.DataFrame(x).iloc[:1000,:11].plot(figsize = (15,10))

### Dividing into Training and Validation sets

In [None]:
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.15)

In [None]:
x_train.shape, x_test.shape,y_train.shape,y_test.shape

## Reshaping the data as required by the model

In [None]:
x_train = np.array(x_train).reshape(-1,17,1)
x_test = np.array(x_test).reshape(-1,17,1)


In [None]:
x_train.shape,x_test.shape,y_train.shape,y_test.shape

## Defining the Model's architecture

In [None]:
inputs = tf.keras.Input(shape=(17,1))

Dense1 = Dense(64, activation = 'relu',kernel_regularizer=keras.regularizers.l2())(inputs)

#Dense2 = Dense(128, activation = 'relu',kernel_regularizer=keras.regularizers.l2())(Dense1)
#Dense3 = Dense(256, activation = 'relu',kernel_regularizer=keras.regularizers.l2())(Dense2)

lstm_1=  Bidirectional(LSTM(256, return_sequences = True))(Dense1)
drop = Dropout(0.3)(lstm_1)
lstm_3=  Bidirectional(LSTM(128, return_sequences = True))(drop)
drop2 = Dropout(0.3)(lstm_3)

flat = Flatten()(drop2)

#Dense_1 = Dense(256, activation = 'relu')(flat)

Dense_2 = Dense(128, activation = 'relu')(flat)
outputs = Dense(1, activation='sigmoid')(Dense_2)

model = tf.keras.Model(inputs, outputs)

model.summary()

In [None]:
tf.keras.utils.plot_model(model)

The loss function used will be 'Binary CrossEntropy'. We will be using callback functions like Early_Stopping to avoid overfitting and lr_scheduler to change the learning rate while model trains.

We will be training for 100 epochs starting with learning_rate = 0.001 and batch_size = 20.

In [None]:
def train_model(model,x_train, y_train,x_test,y_test, save_to, epoch = 2):

        opt_adam = keras.optimizers.Adam(learning_rate=0.001)

        es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=10)
        mc = ModelCheckpoint(save_to + '_best_model.h5', monitor='val_accuracy', mode='max', verbose=1, save_best_only=True)
        lr_schedule = tf.keras.callbacks.LearningRateScheduler(lambda epoch: 0.001 * np.exp(-epoch / 10.))
        
        model.compile(optimizer=opt_adam,
                  loss=['binary_crossentropy'],
                  metrics=['accuracy'])
        
        history = model.fit(x_train,y_train,
                        batch_size=20,
                        epochs=epoch,
                        validation_data=(x_test,y_test),
                        callbacks=[es,mc,lr_schedule])
        
        saved_model = load_model(save_to + '_best_model.h5')
        
        return model,history

In [None]:
model,history = train_model(model, x_train, y_train,x_test, y_test, save_to= './', epoch = 100) 

## Plotting the Training and Validation Accuracy along with losses

In [None]:
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()
# summarize history for loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

## Analyzing the results.

1. Confusion Matrix

In [None]:
y_pred =model.predict(x_test)
y_pred = np.array(y_pred >= 0.5, dtype = np.int)
confusion_matrix(y_test, y_pred)

In [None]:
y_pred

2. Classification Report

In [None]:
print(classification_report(y_test, y_pred))

## Please upvote if you found it useful :)