### Final Project motor task classification using EEG Data

#### Importing necessary libraries

In [1]:
import numpy as np
import os
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [2]:
cd /content/gdrive/Shareddrives/C247

/content/gdrive/Shareddrives/C247


In [26]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Flatten,Dropout
from tensorflow.keras.layers import Conv2D,LSTM, BatchNormalization, MaxPooling2D, Reshape
from tensorflow.keras.layers import Dense, Dropout, Reshape, Conv1D, BatchNormalization, Activation, AveragePooling1D, GlobalAveragePooling1D, Lambda, Input, Concatenate, Add, UpSampling1D, Multiply
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.losses import categorical_crossentropy
from tensorflow.keras import activations
from tensorflow.keras.models import Model
import matplotlib.pyplot as plt
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from load_data import *
from sklearn.model_selection import KFold

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [4]:
%tensorflow_version 2.x
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))

Found GPU at: /device:GPU:0


In [5]:
# Loading full data
data_path = './project'

X_train_valid, y_train_valid, X_test, y_test, person_train_valid, person_test = load_full_data(data_path)

print ('Training/Valid data shape: {}'.format(X_train_valid.shape))
print ('Test data shape: {}'.format(X_test.shape))
print ('Training/Valid target shape: {}'.format(y_train_valid.shape))
print ('Test target shape: {}'.format(y_test.shape))
print ('Person train/valid shape: {}'.format(person_train_valid.shape))
print ('Person test shape: {}'.format(person_test.shape))

Training/Valid data shape: (2115, 22, 1000)
Test data shape: (443, 22, 1000)
Training/Valid target shape: (2115,)
Test target shape: (443,)
Person train/valid shape: (2115,)
Person test shape: (443,)


In [6]:
## Adjusting the labels to {0,1,2,3}

# left hand - 0
# right hand - 1
# feet motion - 2
# tongue motion - 3

# labels are given as {769, 770, 771, 772}
base = y_train_valid.min()
y_train_valid-= base
y_test-= base

In [7]:
# Getting data for individual subjects

subject_data_train = split_data_by_subject(X_train_valid, y_train_valid, person_train_valid)
subject_data_test = split_data_by_subject(X_test, y_test, person_test)

In [8]:
# Changing of data dimensions

print ('Shape of training set: {}'.format(X_train_valid.shape))
print ('Shape of test set: {}'.format(X_test.shape))
print ('Shape of train labels: {}'.format(y_train_valid.shape))
print ('Shape of test labels: {}'.format(y_test.shape))

y_train_cv = to_categorical(y_train_valid, 4)
y_test_cv = to_categorical(y_test, 4)
print('Shape of training labels after categorical conversion:', y_train_cv.shape)
print('Shape of test labels after categorical conversion:', y_test_cv.shape)

# Adding width of the segment to be 1
x_train_cv = X_train_valid.reshape(X_train_valid.shape[0], X_train_valid.shape[1], X_train_valid.shape[2], 1)
x_test_cv = X_test.reshape(X_test.shape[0], X_test.shape[1], X_test.shape[2], 1)
print('Shape of training set after adding width info:',x_train_cv.shape)
print('Shape of test set after adding width info:',x_test_cv.shape)


# Reshaping the training and validation dataset
x_train_cv = np.swapaxes(x_train_cv, 1,3)
x_train_cv = np.swapaxes(x_train_cv, 1,2)

x_test_cv = np.swapaxes(x_test_cv, 1,3)
x_test_cv = np.swapaxes(x_test_cv, 1,2)
print('Shape of training set after dimension reshaping:',x_train_cv.shape)

print('Shape of test set after dimension reshaping:',x_test_cv.shape)

Shape of training set: (2115, 22, 1000)
Shape of test set: (443, 22, 1000)
Shape of train labels: (2115,)
Shape of test labels: (443,)
Shape of training labels after categorical conversion: (2115, 4)
Shape of test labels after categorical conversion: (443, 4)
Shape of training set after adding width info: (2115, 22, 1000, 1)
Shape of test set after adding width info: (443, 22, 1000, 1)
Shape of training set after dimension reshaping: (2115, 1000, 1, 22)
Shape of test set after dimension reshaping: (443, 1000, 1, 22)


In [32]:
def data_prep(X,y,window_size,sub_sample,average,noise):
    # N,H,W,C ----- N,1000,1,22
    total_X = None
    total_y = None
    X = X[:,0:window_size,:,:]
    Xi = X
    X = np.transpose(X, (0,2, 3, 1))
    # Trimming the data (sample,22,1000) -> (sample,22,500)
    
    print('Shape of X after trimming:',X.shape)
    
    # Maxpooling the data (sample,22,1000) -> (sample,22,500/sub_sample)
    X_max = np.max(X.reshape(X.shape[0], X.shape[1], X.shape[2],-1, sub_sample), axis=4)   # N,W,C,H/sub_sample
    
    X_max = np.transpose(X_max, (0,3,1, 2))
    
    total_X = X_max
    total_y = y
    print('Shape of X after maxpooling:',total_X.shape,y.shape)
    
    # Averaging + noise 
    X_average = np.mean(X.reshape(X.shape[0], X.shape[1], X.shape[2],-1, sub_sample), axis=4)
    X_average = X_average + np.random.normal(0.0, 0.5, X_average.shape)
    
    X_average = np.transpose(X_average, (0,3,1, 2))

    total_X = np.vstack((total_X, X_average))
    total_y = np.vstack((total_y, y))
    print('Shape of X after averaging+noise and concatenating:',total_X.shape,y.shape)
    
    # Subsampling
    
    for i in range(sub_sample):
        
        X_subsample = Xi[:,i::sub_sample, :, :] + \
                            (np.random.normal(0.0, 0.5, Xi[:,i::sub_sample, :, :].shape) if noise else 0.0)
            
        total_X = np.vstack((total_X, X_subsample))
        total_y = np.vstack((total_y, y))
        
    
    print('Shape of X after subsampling and concatenating:',total_X.shape,y.shape)
    total_X = np.transpose(total_X, (0,3,1, 2))
    total_X = total_X.reshape(total_X.shape[0],-1)
    # total_X = np.squeeze(total_X)
    return total_X,total_y



#Building 2D CNN + LSTM

In [33]:
from project.helpers import *

In [34]:
X_train, X_val, y_train, y_val = train_test_split(x_train_cv, y_train_cv, test_size=0.2, random_state=42)

In [35]:
tf.keras.backend.clear_session()

In [36]:
window_size = 500
print(X_train.shape,y_train.shape)
X_train_dp,y_train_dp = data_prep(X_train,y_train,window_size,2,2,True)
# X_train_grv = conv2DPrepreprocessing(X_train,5)
# y_train_grv = y_train
print(X_train_dp.shape,y_train_dp.shape)

(1692, 1000, 1, 22) (1692, 4)
Shape of X after trimming: (1692, 1, 22, 500)
Shape of X after maxpooling: (1692, 250, 1, 22) (1692, 4)
Shape of X after averaging+noise and concatenating: (3384, 250, 1, 22) (1692, 4)
Shape of X after subsampling and concatenating: (6768, 250, 1, 22) (1692, 4)
(6768, 5500) (6768, 4)


In [37]:
print(X_val.shape,y_val.shape)
X_val_dp,y_val_dp = data_prep(X_val,y_val,window_size,2,2,True)
# X_val_grv = conv2DPrepreprocessing(X_val,5)
# y_val_grv = y_val
print(X_val_dp.shape,y_val_dp.shape)

(423, 1000, 1, 22) (423, 4)
Shape of X after trimming: (423, 1, 22, 500)
Shape of X after maxpooling: (423, 250, 1, 22) (423, 4)
Shape of X after averaging+noise and concatenating: (846, 250, 1, 22) (423, 4)
Shape of X after subsampling and concatenating: (1692, 250, 1, 22) (423, 4)
(1692, 5500) (1692, 4)


In [38]:
print(x_test_cv.shape,y_test_cv.shape)
x_test_dp,y_test_dp = data_prep(x_test_cv,y_test_cv,window_size,2,2,True)
# x_test_grv = conv2DPrepreprocessing(x_test_cv,5)
# y_test_grv = y_test_cv
print(x_test_dp.shape,y_test_dp.shape)

(443, 1000, 1, 22) (443, 4)
Shape of X after trimming: (443, 1, 22, 500)
Shape of X after maxpooling: (443, 250, 1, 22) (443, 4)
Shape of X after averaging+noise and concatenating: (886, 250, 1, 22) (443, 4)
Shape of X after subsampling and concatenating: (1772, 250, 1, 22) (443, 4)
(1772, 5500) (1772, 4)


In [23]:
# import tensorflow as tf
from tensorflow.keras.layers import PReLU
import tensorflow

In [57]:
def cbr(x, out_layer, kernel, stride, dilation):
    x = Conv1D(out_layer, kernel_size=kernel, dilation_rate=dilation, strides=stride, padding="same")(x)
    x = BatchNormalization()(x)
    x = Activation("relu")(x)
    x = Dropout(0.6)(x)
    return x

def se_block(x_in, layer_n):
    x = GlobalAveragePooling1D()(x_in)
    x = Dense(layer_n//8, activation="relu")(x)
    x = Dropout(0.5)(x)
    x = Dense(layer_n, activation="sigmoid")(x)
    x = Dropout(0.5)(x)
    x_out=Multiply()([x_in, x])
    return x_out

def resblock(x_in, layer_n, kernel, dilation, use_se=True):
    x = cbr(x_in, layer_n, kernel, 1, dilation)
    x = cbr(x, layer_n, kernel, 1, dilation)
    if use_se:
        x = se_block(x, layer_n)
    x = Add()([x_in, x])
    return x  

def Unet(input_shape=(5500, 1)):
    layer_n = 16
    kernel_size = 7
    depth = 2

    input_layer = Input(input_shape)    
    input_layer_1 = AveragePooling1D(5)(input_layer)
    input_layer_2 = AveragePooling1D(25)(input_layer)
    
    ########## Encoder
    x = cbr(input_layer, layer_n, kernel_size, 1, 1)#1000
    for i in range(depth):
        x = resblock(x, layer_n, kernel_size, 1)
    out_0 = x

    x = cbr(x, layer_n*2, kernel_size, 5, 1)
    for i in range(depth):
        x = resblock(x, layer_n*2, kernel_size, 1)
    out_1 = x

    x = Concatenate()([x, input_layer_1])    
    x = cbr(x, layer_n*3, kernel_size, 5, 1)
    for i in range(depth):
        x = resblock(x, layer_n*3, kernel_size, 1)
    out_2 = x

    x = Concatenate()([x, input_layer_2])    
    x = cbr(x, layer_n*4, kernel_size, 5, 1)
    for i in range(depth):
        x = resblock(x, layer_n*4, kernel_size, 1)
    
    ########### Decoder
    x = UpSampling1D(5)(x)
    x = Concatenate()([x, out_2])
    x = cbr(x, layer_n*3, kernel_size, 1, 1)

    x = UpSampling1D(5)(x)
    x = Concatenate()([x, out_1])
    x = cbr(x, layer_n*2, kernel_size, 1, 1)

    x = UpSampling1D(5)(x)
    x = Concatenate()([x, out_0])
    x = cbr(x, layer_n, kernel_size, 1, 1)    

    #regressor
    #x = Conv1D(1, kernel_size=kernel_size, strides=1, padding="same")(x)
    #out = Activation("sigmoid")(x)
    #out = Lambda(lambda x: 12*x)(out)
    
    #classifier
    x = Conv1D(11, kernel_size=kernel_size, strides=1, padding="same")(x)
    x = Flatten()(x)
    out = Dense(4, activation='softmax')(x)
    # out = Activation("softmax")(x)
    
    model = Model(input_layer, out)
    
    return model

In [44]:
# Building the CNN model using sequential class
tf.keras.backend.clear_session()
hybrid_cnn_lstm_model = Sequential()

# Conv. block 1
hybrid_cnn_lstm_model.add(Conv2D(filters=16, kernel_size=(3,7), padding='same', activation='elu', input_shape=(5, 200, 22)))
hybrid_cnn_lstm_model.add(MaxPooling2D(pool_size=(2,2), padding='same')) # Read the keras documentation
hybrid_cnn_lstm_model.add(BatchNormalization())
hybrid_cnn_lstm_model.add(Dropout(0.5))

# Conv. block 2
hybrid_cnn_lstm_model.add(Conv2D(filters=32, kernel_size=(3,7), padding='same', activation='elu'))
hybrid_cnn_lstm_model.add(MaxPooling2D(pool_size=(2,2), padding='same'))
hybrid_cnn_lstm_model.add(BatchNormalization())
hybrid_cnn_lstm_model.add(Dropout(0.5))

# Conv. block 3
hybrid_cnn_lstm_model.add(Conv2D(filters=64, kernel_size=(3,7), padding='same', activation='elu'))
hybrid_cnn_lstm_model.add(MaxPooling2D(pool_size=(2,2), padding='same'))
hybrid_cnn_lstm_model.add(BatchNormalization())
hybrid_cnn_lstm_model.add(Dropout(0.5))

# Conv. block 4
hybrid_cnn_lstm_model.add(Conv2D(filters=128, kernel_size=(3,7), padding='same', activation='elu'))
hybrid_cnn_lstm_model.add(MaxPooling2D(pool_size=(2,2), padding='same'))
hybrid_cnn_lstm_model.add(BatchNormalization())
hybrid_cnn_lstm_model.add(Dropout(0.5))

# # Output layer with Softmax activation
# hybrid_cnn_lstm_model.add(Flatten()) # Flattens the input
# hybrid_cnn_lstm_model.add(Dense(4, activation='softmax')) # Output FC layer with softmax activation

# FC+LSTM layers
hybrid_cnn_lstm_model.add(Flatten()) # Adding a flattening operation to the output of CNN block
hybrid_cnn_lstm_model.add(Dense((128))) # FC layer with 100 units
hybrid_cnn_lstm_model.add(Dropout(0.4))
hybrid_cnn_lstm_model.add(Reshape((128,1))) # Reshape my output of FC layer so that it's compatible
hybrid_cnn_lstm_model.add(LSTM(16, dropout=0.5, recurrent_dropout=0, input_shape=(128,1), return_sequences=False))


# Output layer with Softmax activation 
hybrid_cnn_lstm_model.add(Dense(4, activation='softmax')) # Output FC layer with softmax activation


# Printing the model summary
hybrid_cnn_lstm_model.summary()







Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 5, 200, 16)        7408      
                                                                 
 max_pooling2d (MaxPooling2D  (None, 3, 100, 16)       0         
 )                                                               
                                                                 
 batch_normalization (BatchN  (None, 3, 100, 16)       64        
 ormalization)                                                   
                                                                 
 dropout (Dropout)           (None, 3, 100, 16)        0         
                                                                 
 conv2d_1 (Conv2D)           (None, 3, 100, 32)        10784     
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 2, 50, 32)        0

In [59]:
# Model parameters
learning_rate = 1e-4
epochs = 50
cnn_optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
unet1d = Unet()

In [60]:

unet1d.compile(loss='categorical_crossentropy',
                 optimizer=cnn_optimizer,
                 metrics=['accuracy'])

# Training and validating the model
unet1d_results = unet1d.fit(X_train_dp,
             y_train_dp,
             batch_size=64,
             epochs=epochs,
             validation_data=(X_val_dp, y_val_dp), verbose=True)



Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [62]:
## Testing the basic CNN model

unet1d_score = unet1d.evaluate(x_test_dp, y_test_dp, verbose=0)
print('Test accuracy of the basic CNN model:',unet1d_score[1])

Test accuracy of the basic CNN model: 0.43905192613601685
