## Imports

In [4]:
import tensorflow as tf                                
from tensorflow import keras             
import numpy as np                       
from sklearn.model_selection import train_test_split   
import numpy as np
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import keras
from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten,Dropout
from keras.layers import Conv2D,BatchNormalization,MaxPooling2D,Reshape, LSTM, ConvLSTM2D, Permute, TimeDistributed
from keras.utils import to_categorical
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from tensorflow.keras.layers import Dense, Reshape, Conv2DTranspose, LeakyReLU, BatchNormalization, Input, concatenate
from tensorflow.keras.models import Model

## Examples:
* CNN
* CNN-LSTM

#### Preprocessing

In [None]:
X_test = np.load("X_test.npy")
y_test = np.load("y_test.npy")
person_train_valid = np.load("person_train_valid.npy")
X_train_valid = np.load("X_train_valid.npy")
y_train_valid = np.load("y_train_valid.npy")
person_test = np.load("person_test.npy")


In [None]:
print ('Training/Valid data shape: {}'.format(X_train_valid.shape))
print ('Test data shape: {}'.format(X_test.shape))
print ('Training/Valid target shape: {}'.format(y_train_valid.shape))
print ('Test target shape: {}'.format(y_test.shape))
print ('Person train/valid shape: {}'.format(person_train_valid.shape))
print ('Person test shape: {}'.format(person_test.shape))
print("testing data:", X_test.shape)


In [10]:
def data_prep(X,y,sub_sample,average,noise):
    
    total_X = None
    total_y = None
    
    # Trimming the data (sample,22,1000) -> (sample,22,500)
    X = X[:,:,0:500]
    # print('Shape of X after trimming:',X.shape)
    
    # Maxpooling the data (sample,22,1000) -> (sample,22,500/sub_sample)
    X_max = np.max(X.reshape(X.shape[0], X.shape[1], -1, sub_sample), axis=3)
    
    
    total_X = X_max
    total_y = y
    # print('Shape of X after maxpooling:',total_X.shape)
    
    # Averaging + noise 
    X_average = np.mean(X.reshape(X.shape[0], X.shape[1], -1, average),axis=3)
    X_average = X_average + np.random.normal(0.0, 0.5, X_average.shape)
    
    total_X = np.vstack((total_X, X_average))
    total_y = np.hstack((total_y, y))
    # print('Shape of X after averaging+noise and concatenating:',total_X.shape)
    
    # Subsampling
    
    for i in range(sub_sample):
        
        X_subsample = X[:, :, i::sub_sample] + \
                            (np.random.normal(0.0, 0.5, X[:, :,i::sub_sample].shape) if noise else 0.0)
            
        total_X = np.vstack((total_X, X_subsample))
        total_y = np.hstack((total_y, y))
        
    
    # print('Shape of X after subsampling and concatenating:',total_X.shape)
    return total_X,total_y




#### CNN

In [None]:
## Loading and visualizing the data

## Loading the dataset


X_test = np.load("X_test.npy")
y_test = np.load("y_test.npy")
person_train_valid = np.load("person_train_valid.npy")
X_train_valid = np.load("X_train_valid.npy")
y_train_valid = np.load("y_train_valid.npy")
person_test = np.load("person_test.npy")

## Adjusting the labels so that 

# Cue onset left - 0
# Cue onset right - 1
# Cue onset foot - 2
# Cue onset tongue - 3

y_train_valid -= 769
y_test -= 769

## Visualizing the data

ch_data = X_train_valid[:,8,:] # extracts the 9th channel from the data


class_0_ind = np.where(y_train_valid == 0) # finds the indices where the label is 0
ch_data_class_0 = ch_data[class_0_ind] # finds the data where label is 0
avg_ch_data_class_0 = np.mean(ch_data_class_0,axis=0) # finds the average representation of the 9th channel when label is 0


class_1_ind = np.where(y_train_valid == 1)
ch_data_class_1 = ch_data[class_1_ind]
avg_ch_data_class_1 = np.mean(ch_data_class_1,axis=0)

class_2_ind = np.where(y_train_valid == 2)
ch_data_class_2 = ch_data[class_2_ind]
avg_ch_data_class_2 = np.mean(ch_data_class_2,axis=0)

class_3_ind = np.where(y_train_valid == 3)
ch_data_class_3 = ch_data[class_3_ind]
avg_ch_data_class_3 = np.mean(ch_data_class_3,axis=0)


plt.plot(np.arange(1000),avg_ch_data_class_0)
plt.plot(np.arange(1000),avg_ch_data_class_1)
plt.plot(np.arange(1000),avg_ch_data_class_2)
plt.plot(np.arange(1000),avg_ch_data_class_3)
plt.axvline(x=500, label='line at t=500',c='cyan')

plt.legend(["Cue Onset left", "Cue Onset right", "Cue onset foot", "Cue onset tongue"])





In [None]:
## Random splitting and reshaping the data
# First generating the training and validation indices using random splitting

ind_valid = np.random.choice(2115, 375, replace=False)
ind_train = np.array(list(set(range(2115)).difference(set(ind_valid))))

# Creating the training and validation sets using the generated indices
(X_train, X_valid) = X_train_valid[ind_train], X_train_valid[ind_valid] 
(y_train, y_valid) = y_train_valid[ind_train], y_train_valid[ind_valid]


## Preprocessing the dataset
x_train,y_train = data_prep(X_train,y_train,2,2,True)
x_valid,y_valid = data_prep(X_valid,y_valid,2,2,True)
X_test_prep,y_test_prep = data_prep(X_test,y_test,2,2,True)


print('Shape of testing set:',X_test_prep.shape)
print('Shape of testing labels:',y_test_prep.shape)

print('Shape of training set:',x_train.shape)
print('Shape of validation set:',x_valid.shape)
print('Shape of training labels:',y_train.shape)
print('Shape of validation labels:',y_valid.shape)



# Converting the labels to categorical variables for multiclass classification
y_train = to_categorical(y_train, 4)
y_valid = to_categorical(y_valid, 4)
y_test = to_categorical(y_test_prep, 4)
print('Shape of training labels after categorical conversion:',y_train.shape)
print('Shape of validation labels after categorical conversion:',y_valid.shape)
print('Shape of test labels after categorical conversion:',y_test.shape)

# Adding width of the segment to be 1
x_train = x_train.reshape(x_train.shape[0], x_train.shape[1], x_train.shape[2], 1)
x_valid = x_valid.reshape(x_valid.shape[0], x_valid.shape[1], x_train.shape[2], 1)
x_test = X_test_prep.reshape(X_test_prep.shape[0], X_test_prep.shape[1], X_test_prep.shape[2], 1)
print('Shape of training set after adding width info:',x_train.shape)
print('Shape of validation set after adding width info:',x_valid.shape)
print('Shape of test set after adding width info:',x_test.shape)


# Reshaping the training and validation dataset
x_train = np.swapaxes(x_train, 1,3)
x_train = np.swapaxes(x_train, 1,2)
x_valid = np.swapaxes(x_valid, 1,3)
x_valid = np.swapaxes(x_valid, 1,2)
x_test = np.swapaxes(x_test, 1,3)
x_test = np.swapaxes(x_test, 1,2)
print('Shape of training set after dimension reshaping:',x_train.shape)
print('Shape of validation set after dimension reshaping:',x_valid.shape)
print('Shape of test set after dimension reshaping:',x_test.shape)





    



In [None]:

# Building the CNN model using sequential class
basic_cnn_model = Sequential()

# Conv. block 1
basic_cnn_model.add(Conv2D(filters=25, kernel_size=(10,1), padding='same', activation='elu', input_shape=(250,1,22)))
basic_cnn_model.add(MaxPooling2D(pool_size=(3,1), padding='same')) # Read the keras documentation
basic_cnn_model.add(BatchNormalization())
basic_cnn_model.add(Dropout(0.5))

# Conv. block 2
basic_cnn_model.add(Conv2D(filters=50, kernel_size=(10,1), padding='same', activation='elu'))
basic_cnn_model.add(MaxPooling2D(pool_size=(3,1), padding='same'))
basic_cnn_model.add(BatchNormalization())
basic_cnn_model.add(Dropout(0.5))

# Conv. block 3
basic_cnn_model.add(Conv2D(filters=100, kernel_size=(10,1), padding='same', activation='elu'))
basic_cnn_model.add(MaxPooling2D(pool_size=(3,1), padding='same'))
basic_cnn_model.add(BatchNormalization())
basic_cnn_model.add(Dropout(0.5))

# Conv. block 4
basic_cnn_model.add(Conv2D(filters=200, kernel_size=(10,1), padding='same', activation='elu'))
basic_cnn_model.add(MaxPooling2D(pool_size=(3,1), padding='same'))
basic_cnn_model.add(BatchNormalization())
basic_cnn_model.add(Dropout(0.5))

# Output layer with Softmax activation
basic_cnn_model.add(Flatten()) # Flattens the input
basic_cnn_model.add(Dense(4, activation='softmax')) # Output FC layer with softmax activation


# Printing the model summary
basic_cnn_model.summary()



In [None]:
# Model parameters
learning_rate = 1e-3
epochs = 50
cnn_optimizer = keras.optimizers.Adam(lr=learning_rate)

In [None]:
# Compiling the model
basic_cnn_model.compile(loss='categorical_crossentropy',
                 optimizer=cnn_optimizer,
                 metrics=['accuracy'])

# Training and validating the model
basic_cnn_model_results = basic_cnn_model.fit(x_train,
             y_train,
             batch_size=64,
             epochs=epochs,
             validation_data=(x_valid, y_valid), verbose=True)


In [None]:
cnn_score = basic_cnn_model.evaluate(x_test, y_test, verbose=0)
print('Test accuracy of the basic CNN model:',cnn_score[1])

#### CNN-LSTM

In [None]:
## Loading the dataset


X_test = np.load("X_test.npy")
y_test = np.load("y_test.npy")
person_train_valid = np.load("person_train_valid.npy")
X_train_valid = np.load("X_train_valid.npy")
y_train_valid = np.load("y_train_valid.npy")
person_test = np.load("person_test.npy")

## Adjusting the labels so that 

# Cue onset left - 0
# Cue onset right - 1
# Cue onset foot - 2
# Cue onset tongue - 3

y_train_valid -= 769
y_test -= 769


## Random splitting and reshaping the data
# First generating the training and validation indices using random splitting

ind_valid = np.random.choice(2115, 375, replace=False)
ind_train = np.array(list(set(range(2115)).difference(set(ind_valid))))

# Creating the training and validation sets using the generated indices
(X_train, X_valid) = X_train_valid[ind_train], X_train_valid[ind_valid] 
(y_train, y_valid) = y_train_valid[ind_train], y_train_valid[ind_valid]


## Preprocessing the dataset
x_train,y_train = data_prep(X_train,y_train,2,2,True)
x_valid,y_valid = data_prep(X_valid,y_valid,2,2,True)
X_test_prep,y_test_prep = data_prep(X_test,y_test,2,2,True)


print('Shape of training set:',x_train.shape)
print('Shape of validation set:',x_valid.shape)
print('Shape of training labels:',y_train.shape)
print('Shape of validation labels:',y_valid.shape)
print('Shape of testing set:',X_test_prep.shape)
print('Shape of testing labels:',y_test_prep.shape)


# Converting the labels to categorical variables for multiclass classification
y_train = to_categorical(y_train, 4)
y_valid = to_categorical(y_valid, 4)
y_test = to_categorical(y_test_prep, 4)
print('Shape of training labels after categorical conversion:',y_train.shape)
print('Shape of validation labels after categorical conversion:',y_valid.shape)
print('Shape of test labels after categorical conversion:',y_test.shape)

# Adding width of the segment to be 1
x_train = x_train.reshape(x_train.shape[0], x_train.shape[1], x_train.shape[2], 1)
x_valid = x_valid.reshape(x_valid.shape[0], x_valid.shape[1], x_train.shape[2], 1)
x_test = X_test_prep.reshape(X_test_prep.shape[0], X_test_prep.shape[1], X_test_prep.shape[2], 1)
print('Shape of training set after adding width info:',x_train.shape)
print('Shape of validation set after adding width info:',x_valid.shape)
print('Shape of test set after adding width info:',x_test.shape)


# Reshaping the training and validation dataset
x_train = np.swapaxes(x_train, 1,3)
x_train = np.swapaxes(x_train, 1,2)
x_valid = np.swapaxes(x_valid, 1,3)
x_valid = np.swapaxes(x_valid, 1,2)
x_test = np.swapaxes(x_test, 1,3)
x_test = np.swapaxes(x_test, 1,2)
print('Shape of training set after dimension reshaping:',x_train.shape)
print('Shape of validation set after dimension reshaping:',x_valid.shape)
print('Shape of test set after dimension reshaping:',x_test.shape)




In [None]:
# Building the CNN model using sequential class
hybrid_cnn_lstm_model = Sequential()

# Conv. block 1
hybrid_cnn_lstm_model.add(Conv2D(filters=25, kernel_size=(10,1), padding='same', activation='elu', input_shape=(250,1,22)))
hybrid_cnn_lstm_model.add(MaxPooling2D(pool_size=(3,1), padding='same')) # Read the keras documentation
hybrid_cnn_lstm_model.add(BatchNormalization())
hybrid_cnn_lstm_model.add(Dropout(0.5))

# Conv. block 2
hybrid_cnn_lstm_model.add(Conv2D(filters=50, kernel_size=(10,1), padding='same', activation='elu'))
hybrid_cnn_lstm_model.add(MaxPooling2D(pool_size=(3,1), padding='same'))
hybrid_cnn_lstm_model.add(BatchNormalization())
hybrid_cnn_lstm_model.add(Dropout(0.5))

# Conv. block 3
hybrid_cnn_lstm_model.add(Conv2D(filters=100, kernel_size=(10,1), padding='same', activation='elu'))
hybrid_cnn_lstm_model.add(MaxPooling2D(pool_size=(3,1), padding='same'))
hybrid_cnn_lstm_model.add(BatchNormalization())
hybrid_cnn_lstm_model.add(Dropout(0.5))

# Conv. block 4
hybrid_cnn_lstm_model.add(Conv2D(filters=200, kernel_size=(10,1), padding='same', activation='elu'))
hybrid_cnn_lstm_model.add(MaxPooling2D(pool_size=(3,1), padding='same'))
hybrid_cnn_lstm_model.add(BatchNormalization())
hybrid_cnn_lstm_model.add(Dropout(0.5))

# FC+LSTM layers
hybrid_cnn_lstm_model.add(Flatten()) # Adding a flattening operation to the output of CNN block
hybrid_cnn_lstm_model.add(Dense((100))) # FC layer with 100 units
hybrid_cnn_lstm_model.add(Reshape((100,1))) # Reshape my output of FC layer so that it's compatible
hybrid_cnn_lstm_model.add(LSTM(10, dropout=0.6, recurrent_dropout=0.1, input_shape=(100,1), return_sequences=False))


# Output layer with Softmax activation 
hybrid_cnn_lstm_model.add(Dense(4, activation='softmax')) # Output FC layer with softmax activation


# Printing the model summary
hybrid_cnn_lstm_model.summary()







In [None]:
# Model parameters
learning_rate = 1e-3
epochs = 50
hybrid_cnn_lstm_optimizer = keras.optimizers.Adam(lr=learning_rate)

In [None]:
# Compiling the model
hybrid_cnn_lstm_model.compile(loss='categorical_crossentropy',
                 optimizer=hybrid_cnn_lstm_optimizer,
                 metrics=['accuracy'])

# Training and validating the model
hybrid_cnn_lstm_model_results = hybrid_cnn_lstm_model.fit(x_train,
             y_train,
             batch_size=64,
             epochs=epochs,
             validation_data=(x_valid, y_valid), verbose=True)



In [None]:
## Testing the hybrid CNN-LSTM model

hybrid_cnn_lstm_score = hybrid_cnn_lstm_model.evaluate(x_test, y_test, verbose=0)
print('Test accuracy of the hybrid CNN-LSTM model:',hybrid_cnn_lstm_score[1])

# Default Project

## 1. Optimize the classification accuracy for subject 1. Does it help to train across all subjects?

#### Preprocessing

In [None]:
X_test = np.load("X_test.npy")
y_test = np.load("y_test.npy")
person_train_valid = np.load("person_train_valid.npy")
X_train_valid = np.load("X_train_valid.npy")
y_train_valid = np.load("y_train_valid.npy")
person_test = np.load("person_test.npy")

## Adjusting the labels so that 

# Cue onset left - 0
# Cue onset right - 1
# Cue onset foot - 2
# Cue onset tongue - 3

y_train_valid -= 769
y_test -= 769

subject = 0
subject_test_idx = np.where(person_test==subject)[0]
subject_valid_idx = np.where(person_train_valid==subject)[0]


subject_X_test = X_test[subject_test_idx]
suject_y_test = y_test[subject_test_idx]
suject_X_train_valid = X_train_valid[subject_valid_idx]
suject_y_train_valid = y_train_valid[subject_valid_idx]

print(f'X_test Shape for Subject {subject}: {subject_X_test.shape}')
print(f'y_test Shape for Subject {subject}: {suject_y_test.shape}')
print(f'X_train_valid Shape for Subject {subject}: {suject_X_train_valid.shape}')
print(f'y_train_valid Shape for Subject {subject}: {suject_y_train_valid.shape}')




In [None]:
# shuffle with 5 fold
indicies_valid = np.random.choice(suject_X_train_valid.shape[0], suject_X_train_valid.shape[0] // 5, replace=False)
indicies_train = np.array(list(set(range(suject_X_train_valid.shape[0])).difference(set(indicies_valid))))

# Creating the training and validation sets using the generated indices
X_train, X_valid = suject_X_train_valid[indicies_train], suject_X_train_valid[indicies_valid] 
y_train, y_valid = suject_y_train_valid[indicies_train], suject_y_train_valid[indicies_valid]


# Preprocessing the dataset
x_train,y_train = data_prep(X_train,y_train,2,2,True)
x_valid,y_valid = data_prep(X_valid,y_valid,2,2,True)
X_test_prep,y_test_prep = data_prep(subject_X_test,suject_y_test,2,2,True)



print('Shape of training set:',x_train.shape)
print('Shape of validation set:',x_valid.shape)
print('Shape of training labels:',y_train.shape)
print('Shape of validation labels:',y_valid.shape)
print('Shape of testing set:',X_test_prep.shape)
print('Shape of testing labels:',y_test_prep.shape)


# Converting the labels to categorical variables for multiclass classification
y_train = to_categorical(y_train, 4)
y_valid = to_categorical(y_valid, 4)
y_test = to_categorical(y_test_prep, 4)
print('Shape of training labels after categorical conversion:',y_train.shape)
print('Shape of validation labels after categorical conversion:',y_valid.shape)
print('Shape of test labels after categorical conversion:',y_test.shape)

# Adding width of the segment to be 1
x_train = x_train.reshape(x_train.shape[0], x_train.shape[1], x_train.shape[2], 1)
x_valid = x_valid.reshape(x_valid.shape[0], x_valid.shape[1], x_train.shape[2], 1)
x_test = X_test_prep.reshape(X_test_prep.shape[0], X_test_prep.shape[1], X_test_prep.shape[2], 1)
print('Shape of training set after adding width info:',x_train.shape)
print('Shape of validation set after adding width info:',x_valid.shape)
print('Shape of test set after adding width info:',x_test.shape)


# Reshaping the training and validation dataset
x_train = np.swapaxes(x_train, 1,3)
x_train = np.swapaxes(x_train, 1,2)
x_valid = np.swapaxes(x_valid, 1,3)
x_valid = np.swapaxes(x_valid, 1,2)
x_test = np.swapaxes(x_test, 1,3)
x_test = np.swapaxes(x_test, 1,2)
print('Shape of training set after dimension reshaping:',x_train.shape)
print('Shape of validation set after dimension reshaping:',x_valid.shape)
print('Shape of test set after dimension reshaping:',x_test.shape)



#### Model

In [None]:

# Building the CNN model using sequential class
cnn_subject_model = Sequential()

# Conv. block 1
cnn_subject_model.add(Conv2D(filters=10, kernel_size=(5,1), padding='same', activation='elu', input_shape=(250,1,22)))
cnn_subject_model.add(MaxPooling2D(pool_size=(3,1), padding='same')) 
cnn_subject_model.add(BatchNormalization())
cnn_subject_model.add(Dropout(0.5))

# Conv. block 2
cnn_subject_model.add(Conv2D(filters=10, kernel_size=(15,1), padding='same', activation='elu'))
cnn_subject_model.add(MaxPooling2D(pool_size=(3,1), padding='same'))
cnn_subject_model.add(BatchNormalization())
cnn_subject_model.add(Dropout(0.5))



# # Conv. block 4
# cnn_subject_model.add(Conv2D(filters=25, kernel_size=(10,1), padding='same', activation='elu'))
# cnn_subject_model.add(MaxPooling2D(pool_size=(3,1), padding='same'))
# cnn_subject_model.add(BatchNormalization())
# cnn_subject_model.add(Dropout(0.5))

# # Conv. block 5
# cnn_subject_model.add(Conv2D(filters=100, kernel_size=(50,1), padding='same', activation='elu'))
# cnn_subject_model.add(MaxPooling2D(pool_size=(3,1), padding='same'))
# cnn_subject_model.add(BatchNormalization())
# cnn_subject_model.add(Dropout(0.5))

# # Conv. block 6
# cnn_subject_model.add(Conv2D(filters=50, kernel_size=(50,1), padding='same', activation='elu'))
# cnn_subject_model.add(MaxPooling2D(pool_size=(3,1), padding='same'))
# cnn_subject_model.add(BatchNormalization())
# cnn_subject_model.add(Dropout(0.5))

# # Conv. block 7
# cnn_subject_model.add(Conv2D(filters=25, kernel_size=(50,1), padding='same', activation='elu'))
# cnn_subject_model.add(MaxPooling2D(pool_size=(3,1), padding='same'))
# cnn_subject_model.add(BatchNormalization())
# cnn_subject_model.add(Dropout(0.5))

# Output layer with Softmax activation
cnn_subject_model.add(Flatten()) # Flattens the input
cnn_subject_model.add(Dense(4, activation='softmax')) # Output FC layer with softmax activation


# Printing the model summary
cnn_subject_model.summary()



#### Hyperparameters

In [None]:
learning_rate = 1e-3
epochs = 100
cnn_subject_model_optimizer = keras.optimizers.Adam(learning_rate=learning_rate)

#### Training

In [None]:
cnn_subject_model.compile(loss='categorical_crossentropy',
                 optimizer=cnn_subject_model_optimizer,
                 metrics=['accuracy'])

cnn_subject_model_results = cnn_subject_model.fit(x_train,
             y_train,
             batch_size=64,
             epochs=epochs,
             validation_data=(x_valid, y_valid), verbose=True
             )

#### Testing

In [None]:
cnn_subject_model_score = cnn_subject_model.evaluate(x_test, y_test, verbose=0)
print(f'Test accuracy of the CNN model for subject {subject}:',cnn_subject_model_score[1])

### Now Training across all subjects

#### Preprocessing

In [None]:
X_test = np.load("X_test.npy")
y_test = np.load("y_test.npy")
person_train_valid = np.load("person_train_valid.npy")
X_train_valid = np.load("X_train_valid.npy")
y_train_valid = np.load("y_train_valid.npy")
person_test = np.load("person_test.npy")

## Adjusting the labels so that 

# Cue onset left - 0
# Cue onset right - 1
# Cue onset foot - 2
# Cue onset tongue - 3

y_train_valid -= 769
y_test -= 769

subject = 0
subject_test_idx = np.where(person_test==subject)[0]
subject_valid_idx = np.where(person_train_valid==subject)[0]


subject_X_test = X_test[subject_test_idx]
suject_y_test = y_test[subject_test_idx]
suject_X_train_valid = X_train_valid[subject_valid_idx]
suject_y_train_valid = y_train_valid[subject_valid_idx]

print(f'X_test Shape for Subject {subject}: {subject_X_test.shape}')
print(f'y_test Shape for Subject {subject}: {suject_y_test.shape}')
print(f'X_train_valid Shape for Subject {subject}: {suject_X_train_valid.shape}')
print(f'y_train_valid Shape for Subject {subject}: {suject_y_train_valid.shape}')

# shuffle with 5 fold
indicies_valid = np.random.choice(X_train_valid.shape[0], X_train_valid.shape[0] // 5, replace=False)
indicies_train = np.array(list(set(range(X_train_valid.shape[0])).difference(set(indicies_valid))))

# Creating the training and validation sets using the generated indices
X_train, X_valid = X_train_valid[indicies_train], X_train_valid[indicies_valid] 
y_train, y_valid = y_train_valid[indicies_train], y_train_valid[indicies_valid]


# Preprocessing the dataset
x_train,y_train = data_prep(X_train,y_train,2,2,True)
x_valid,y_valid = data_prep(X_valid,y_valid,2,2,True)
X_test_prep,y_test_prep = data_prep(subject_X_test,suject_y_test,2,2,True)



print('Shape of training set:',x_train.shape)
print('Shape of validation set:',x_valid.shape)
print('Shape of training labels:',y_train.shape)
print('Shape of validation labels:',y_valid.shape)
print('Shape of testing set:',X_test_prep.shape)
print('Shape of testing labels:',y_test_prep.shape)


# Converting the labels to categorical variables for multiclass classification
y_train = to_categorical(y_train, 4)
y_valid = to_categorical(y_valid, 4)
y_test = to_categorical(y_test_prep, 4)
print('Shape of training labels after categorical conversion:',y_train.shape)
print('Shape of validation labels after categorical conversion:',y_valid.shape)
print('Shape of test labels after categorical conversion:',y_test.shape)

# Adding width of the segment to be 1
x_train = x_train.reshape(x_train.shape[0], x_train.shape[1], x_train.shape[2], 1)
x_valid = x_valid.reshape(x_valid.shape[0], x_valid.shape[1], x_train.shape[2], 1)
x_test = X_test_prep.reshape(X_test_prep.shape[0], X_test_prep.shape[1], X_test_prep.shape[2], 1)
print('Shape of training set after adding width info:',x_train.shape)
print('Shape of validation set after adding width info:',x_valid.shape)
print('Shape of test set after adding width info:',x_test.shape)


# Reshaping the training and validation dataset
x_train = np.swapaxes(x_train, 1,3)
x_train = np.swapaxes(x_train, 1,2)
x_valid = np.swapaxes(x_valid, 1,3)
x_valid = np.swapaxes(x_valid, 1,2)
x_test = np.swapaxes(x_test, 1,3)
x_test = np.swapaxes(x_test, 1,2)
print('Shape of training set after dimension reshaping:',x_train.shape)
print('Shape of validation set after dimension reshaping:',x_valid.shape)
print('Shape of test set after dimension reshaping:',x_test.shape)



#### Model

In [None]:

# Building the CNN model using sequential class
cnn_subject_model = Sequential()

# Conv. block 1
cnn_subject_model.add(Conv2D(filters=10, kernel_size=(5,1), padding='same', activation='elu', input_shape=(250,1,22)))
cnn_subject_model.add(MaxPooling2D(pool_size=(3,1), padding='same')) 
cnn_subject_model.add(BatchNormalization())
cnn_subject_model.add(Dropout(0.5))

# Conv. block 2
cnn_subject_model.add(Conv2D(filters=10, kernel_size=(15,1), padding='same', activation='elu'))
cnn_subject_model.add(MaxPooling2D(pool_size=(3,1), padding='same'))
cnn_subject_model.add(BatchNormalization())
cnn_subject_model.add(Dropout(0.5))



# # Conv. block 4
# cnn_subject_model.add(Conv2D(filters=25, kernel_size=(10,1), padding='same', activation='elu'))
# cnn_subject_model.add(MaxPooling2D(pool_size=(3,1), padding='same'))
# cnn_subject_model.add(BatchNormalization())
# cnn_subject_model.add(Dropout(0.5))

# # Conv. block 5
# cnn_subject_model.add(Conv2D(filters=100, kernel_size=(50,1), padding='same', activation='elu'))
# cnn_subject_model.add(MaxPooling2D(pool_size=(3,1), padding='same'))
# cnn_subject_model.add(BatchNormalization())
# cnn_subject_model.add(Dropout(0.5))

# # Conv. block 6
# cnn_subject_model.add(Conv2D(filters=50, kernel_size=(50,1), padding='same', activation='elu'))
# cnn_subject_model.add(MaxPooling2D(pool_size=(3,1), padding='same'))
# cnn_subject_model.add(BatchNormalization())
# cnn_subject_model.add(Dropout(0.5))

# # Conv. block 7
# cnn_subject_model.add(Conv2D(filters=25, kernel_size=(50,1), padding='same', activation='elu'))
# cnn_subject_model.add(MaxPooling2D(pool_size=(3,1), padding='same'))
# cnn_subject_model.add(BatchNormalization())
# cnn_subject_model.add(Dropout(0.5))

# Output layer with Softmax activation
cnn_subject_model.add(Flatten()) # Flattens the input
cnn_subject_model.add(Dense(4, activation='softmax')) # Output FC layer with softmax activation


# Printing the model summary
cnn_subject_model.summary()



#### Hyperparameters

In [None]:
learning_rate = 1e-3
epochs = 100
cnn_subject_model_optimizer = keras.optimizers.Adam(learning_rate=learning_rate)

#### Training

In [None]:
cnn_subject_model.compile(loss='categorical_crossentropy',
                 optimizer=cnn_subject_model_optimizer,
                 metrics=['accuracy'])

cnn_subject_model_results = cnn_subject_model.fit(x_train,
             y_train,
             batch_size=64,
             epochs=epochs,
             validation_data=(x_valid, y_valid), verbose=True)


#### Testing

In [None]:
cnn_subject_model_score = cnn_subject_model.evaluate(x_test, y_test, verbose=False)
print(f'Test accuracy of the CNN model for subject {subject}:',cnn_subject_model_score[1])


## 2. Optimize the classification accuracy across all subjects. How does the classifier do? Do you notice any interesting trends?

#### Preprocessing

In [5]:
def preprocess_subjects(subject):
    X_test = np.load("X_test.npy")
    y_test = np.load("y_test.npy")
    person_train_valid = np.load("person_train_valid.npy")
    X_train_valid = np.load("X_train_valid.npy")
    y_train_valid = np.load("y_train_valid.npy")
    person_test = np.load("person_test.npy")

    ## Adjusting the labels so that 

    # Cue onset left - 0
    # Cue onset right - 1
    # Cue onset foot - 2
    # Cue onset tongue - 3

    y_train_valid -= 769
    y_test -= 769
    


    subject_test_idx = np.where(person_test==subject)[0]
    subject_valid_idx = np.where(person_train_valid==subject)[0]


    subject_X_test = X_test[subject_test_idx]
    suject_y_test = y_test[subject_test_idx]
    suject_X_train_valid = X_train_valid[subject_valid_idx]
    suject_y_train_valid = y_train_valid[subject_valid_idx]

    # print(f'X_test Shape for Subject {subject}: {subject_X_test.shape}')
    # print(f'y_test Shape for Subject {subject}: {suject_y_test.shape}')
    # print(f'X_train_valid Shape for Subject {subject}: {suject_X_train_valid.shape}')
    # print(f'y_train_valid Shape for Subject {subject}: {suject_y_train_valid.shape}')

    # shuffle with 5 fold
    indicies_valid = np.random.choice(X_train_valid.shape[0], X_train_valid.shape[0] // 5, replace=False)
    indicies_train = np.array(list(set(range(X_train_valid.shape[0])).difference(set(indicies_valid))))

    # Creating the training and validation sets using the generated indices
    X_train, X_valid = X_train_valid[indicies_train], X_train_valid[indicies_valid] 
    y_train, y_valid = y_train_valid[indicies_train], y_train_valid[indicies_valid]


    # Preprocessing the dataset
    x_train,y_train = data_prep(X_train,y_train,2,2,True)
    x_valid,y_valid = data_prep(X_valid,y_valid,2,2,True)
    X_test_prep,y_test_prep = data_prep(subject_X_test,suject_y_test,2,2,True)



    # print('Shape of training set:',x_train.shape)
    # print('Shape of validation set:',x_valid.shape)
    # print('Shape of training labels:',y_train.shape)
    # print('Shape of validation labels:',y_valid.shape)
    # print('Shape of testing set:',X_test_prep.shape)
    # print('Shape of testing labels:',y_test_prep.shape)


    # Converting the labels to categorical variables for multiclass classification
    y_train = to_categorical(y_train, 4)
    y_valid = to_categorical(y_valid, 4)
    y_test = to_categorical(y_test_prep, 4)
    # print('Shape of training labels after categorical conversion:',y_train.shape)
    # print('Shape of validation labels after categorical conversion:',y_valid.shape)
    # print('Shape of test labels after categorical conversion:',y_test.shape)

    # Adding width of the segment to be 1
    x_train = x_train.reshape(x_train.shape[0], x_train.shape[1], x_train.shape[2], 1)
    x_valid = x_valid.reshape(x_valid.shape[0], x_valid.shape[1], x_train.shape[2], 1)
    x_test = X_test_prep.reshape(X_test_prep.shape[0], X_test_prep.shape[1], X_test_prep.shape[2], 1)
    # print('Shape of training set after adding width info:',x_train.shape)
    # print('Shape of validation set after adding width info:',x_valid.shape)
    # print('Shape of test set after adding width info:',x_test.shape)


    # Reshaping the training and validation dataset
    x_train = np.swapaxes(x_train, 1,3)
    x_train = np.swapaxes(x_train, 1,2)
    x_valid = np.swapaxes(x_valid, 1,3)
    x_valid = np.swapaxes(x_valid, 1,2)
    x_test = np.swapaxes(x_test, 1,3)
    x_test = np.swapaxes(x_test, 1,2)
    # print('Shape of training set after dimension reshaping:',x_train.shape)
    # print('Shape of validation set after dimension reshaping:',x_valid.shape)
    # print('Shape of test set after dimension reshaping:',x_test.shape)

    return (x_train, y_train, x_valid, y_valid, x_test, y_test)




#### Model

In [6]:
learning_rate = 1e-3
epochs = 100
cnn_subject_model_optimizer = keras.optimizers.Adam(learning_rate=learning_rate)

x_train, y_train, x_valid, y_valid, _, _ = preprocess_subjects(subject=2)

# Building the CNN model using sequential class
cnn_subject_model = Sequential()

# Conv. block 1
cnn_subject_model.add(Conv2D(filters=20, kernel_size=(5,1), padding='same', activation='elu', input_shape=(250,1,22)))
cnn_subject_model.add(MaxPooling2D(pool_size=(3,1), padding='same')) 
cnn_subject_model.add(BatchNormalization())
cnn_subject_model.add(Dropout(0.5))

# Conv. block 2
cnn_subject_model.add(Conv2D(filters=20, kernel_size=(15,1), padding='same', activation='elu'))
cnn_subject_model.add(MaxPooling2D(pool_size=(3,1), padding='same'))
cnn_subject_model.add(BatchNormalization())
cnn_subject_model.add(Dropout(0.5))

# Conv. block 3
cnn_subject_model.add(Conv2D(filters=10, kernel_size=(10,1), padding='same', activation='elu'))
cnn_subject_model.add(MaxPooling2D(pool_size=(3,1), padding='same'))
cnn_subject_model.add(BatchNormalization())
cnn_subject_model.add(Dropout(0.5))


# Output layer with Softmax activation
cnn_subject_model.add(Flatten()) # Flattens the input
cnn_subject_model.add(Dense(4, activation='softmax')) # Output FC layer with softmax activation

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

#### Training


In [None]:
# Printing the model summary
# cnn_subject_model.summary()
cnn_subject_model.compile(loss='categorical_crossentropy',
                optimizer=cnn_subject_model_optimizer,
                metrics=['accuracy'])

cnn_subject_model_results = cnn_subject_model.fit(x_train,
            y_train,
            batch_size=64,
            epochs=epochs,
            validation_data=(x_valid, y_valid), verbose=True)

#### Testing

In [7]:
subjects = 9
for subject in range(subjects):
    # tf.keras.backend.clear_session()
    _, _, _, _, x_test, y_test = preprocess_subjects(subject=subject)
    cnn_subject_model_score = cnn_subject_model.evaluate(x_test, y_test, verbose=True)
    print(f'Test accuracy of the CNN model for subject {subject}:',cnn_subject_model_score[1])



Test accuracy of the CNN model for subject 0: 0.6399999856948853
Test accuracy of the CNN model for subject 1: 0.5600000023841858
Test accuracy of the CNN model for subject 2: 0.7799999713897705
Test accuracy of the CNN model for subject 3: 0.6800000071525574
Test accuracy of the CNN model for subject 4: 0.813829779624939
Test accuracy of the CNN model for subject 5: 0.7244898080825806
Test accuracy of the CNN model for subject 6: 0.675000011920929
Test accuracy of the CNN model for subject 7: 0.7300000190734863
Test accuracy of the CNN model for subject 8: 0.7659574747085571


## 3. Evaluate the classification accuracy as a function of time (e.g., does it increase as you have data over longer periods of time? how much time is required to get a reasonable classification accuracy?)

#### Preprocessing

In [20]:
X_test = np.load("X_test.npy")
y_test = np.load("y_test.npy")
person_train_valid = np.load("person_train_valid.npy")
X_train_valid = np.load("X_train_valid.npy")
y_train_valid = np.load("y_train_valid.npy")
person_test = np.load("person_test.npy")

## Adjusting the labels so that 

# Cue onset left - 0
# Cue onset right - 1
# Cue onset foot - 2
# Cue onset tongue - 3

y_train_valid -= 769
y_test -= 769


# shuffle with 5 fold
indicies_valid = np.random.choice(X_train_valid.shape[0], X_train_valid.shape[0] // 5, replace=False)
indicies_train = np.array(list(set(range(X_train_valid.shape[0])).difference(set(indicies_valid))))

# Creating the training and validation sets using the generated indices
X_train, X_valid = X_train_valid[indicies_train], X_train_valid[indicies_valid] 
y_train, y_valid = y_train_valid[indicies_train], y_train_valid[indicies_valid]



In [29]:
def data_prep_modular(X,y,sub_sample,average,noise, trim_ratio=0.5):
    
    total_X = None
    total_y = None
    
    # Trimming the data (sample,22,1000) -> (sample,22,500)
    X = X[:,:, 0:(int(X.shape[2] * trim_ratio))]
    print('Shape of X after trimming:',X.shape)
    
    # Maxpooling the data (sample,22,1000) -> (sample,22,500/sub_sample)
    X_max = np.max(X.reshape(X.shape[0], X.shape[1], -1, sub_sample), axis=3)
    
    
    total_X = X_max
    total_y = y
    # print('Shape of X after maxpooling:',total_X.shape)
    
    # Averaging + noise 
    X_average = np.mean(X.reshape(X.shape[0], X.shape[1], -1, average), axis=3)
    X_average = X_average + np.random.normal(0.0, 0.5, X_average.shape)
    
    total_X = np.vstack((total_X, X_average))
    total_y = np.hstack((total_y, y))
    # print('Shape of X after averaging+noise and concatenating:',total_X.shape)
    
    # Subsampling
    
    for i in range(sub_sample):
        
        X_subsample = X[:, :, i::sub_sample] + (np.random.normal(0.0, 0.5, X[:, :,i::sub_sample].shape) if noise else 0.0)
        total_X = np.vstack((total_X, X_subsample))
        total_y = np.hstack((total_y, y))
        
    
    # print('Shape of X after subsampling and concatenating:',total_X.shape)
    return total_X,total_y




In [30]:

def preprocess_time(trim_ratio):
    X_test = np.load("X_test.npy")
    y_test = np.load("y_test.npy")
    person_train_valid = np.load("person_train_valid.npy")
    X_train_valid = np.load("X_train_valid.npy")
    y_train_valid = np.load("y_train_valid.npy")
    person_test = np.load("person_test.npy")

    ## Adjusting the labels so that 

    # Cue onset left - 0
    # Cue onset right - 1
    # Cue onset foot - 2
    # Cue onset tongue - 3

    y_train_valid -= 769
    y_test -= 769


    # shuffle with 5 fold
    indicies_valid = np.random.choice(X_train_valid.shape[0], X_train_valid.shape[0] // 5, replace=False)
    indicies_train = np.array(list(set(range(X_train_valid.shape[0])).difference(set(indicies_valid))))

    # Creating the training and validation sets using the generated indices
    X_train, X_valid = X_train_valid[indicies_train], X_train_valid[indicies_valid] 
    y_train, y_valid = y_train_valid[indicies_train], y_train_valid[indicies_valid]



    # Preprocessing the dataset
    x_train,y_train = data_prep_modular(X_train,y_train,2,2,True, trim_ratio=trim_ratio)
    x_valid,y_valid = data_prep_modular(X_valid,y_valid,2,2,True, trim_ratio=trim_ratio)
    X_test_prep,y_test_prep = data_prep_modular(X_test,y_test,2,2,True, trim_ratio=trim_ratio)

    print('Shape of training set:',x_train.shape)
    print('Shape of validation set:',x_valid.shape)
    print('Shape of training labels:',y_train.shape)
    print('Shape of validation labels:',y_valid.shape)
    print('Shape of testing set:',X_test_prep.shape)
    print('Shape of testing labels:',y_test_prep.shape)

    # Converting the labels to categorical variables for multiclass classification
    y_train = to_categorical(y_train, 4)
    y_valid = to_categorical(y_valid, 4)
    y_test = to_categorical(y_test_prep, 4)
    print('Shape of training labels after categorical conversion:',y_train.shape)
    print('Shape of validation labels after categorical conversion:',y_valid.shape)
    print('Shape of test labels after categorical conversion:',y_test.shape)

    # Adding width of the segment to be 1
    x_train = x_train.reshape(x_train.shape[0], x_train.shape[1], x_train.shape[2], 1)
    x_valid = x_valid.reshape(x_valid.shape[0], x_valid.shape[1], x_train.shape[2], 1)
    x_test = X_test_prep.reshape(X_test_prep.shape[0], X_test_prep.shape[1], X_test_prep.shape[2], 1)
    print('Shape of training set after adding width info:',x_train.shape)
    print('Shape of validation set after adding width info:',x_valid.shape)
    print('Shape of test set after adding width info:',x_test.shape)

    # Reshaping the training and validation dataset
    x_train = np.swapaxes(x_train, 1,3)
    x_train = np.swapaxes(x_train, 1,2)
    x_valid = np.swapaxes(x_valid, 1,3)
    x_valid = np.swapaxes(x_valid, 1,2)
    x_test = np.swapaxes(x_test, 1,3)
    x_test = np.swapaxes(x_test, 1,2)
    print('Shape of training set after dimension reshaping:',x_train.shape)
    print('Shape of validation set after dimension reshaping:',x_valid.shape)
    print('Shape of test set after dimension reshaping:',x_test.shape)


    return (x_train, y_train, x_valid, y_valid, x_test, y_test)





#### Model / Hyperparameters

In [31]:
def cnn_model(trim_ratio):
    learning_rate = 1e-3
    epochs = 100
    cnn_subject_model_optimizer = keras.optimizers.Adam(learning_rate=learning_rate)

    x_train, y_train, x_valid, y_valid, x_test, y_test = preprocess_time(trim_ratio=trim_ratio)

    # Building the CNN model using sequential class
    cnn = Sequential()

    # Conv. block 1
    cnn.add(Conv2D(filters=20, kernel_size=(5,1), padding='same', activation='elu', input_shape=(x_train.shape[1],1,22)))
    cnn.add(MaxPooling2D(pool_size=(3,1), padding='same')) 
    cnn.add(BatchNormalization())
    cnn.add(Dropout(0.5))

    # Conv. block 2
    cnn.add(Conv2D(filters=20, kernel_size=(15,1), padding='same', activation='elu'))
    cnn.add(MaxPooling2D(pool_size=(3,1), padding='same'))
    cnn.add(BatchNormalization())
    cnn.add(Dropout(0.5))

    # Conv. block 3
    cnn.add(Conv2D(filters=10, kernel_size=(10,1), padding='same', activation='elu'))
    cnn.add(MaxPooling2D(pool_size=(3,1), padding='same'))
    cnn.add(BatchNormalization())
    cnn.add(Dropout(0.5))


    # Output layer with Softmax activation
    cnn.add(Flatten()) # Flattens the input
    cnn.add(Dense(4, activation='softmax')) # Output FC layer with softmax activation

    cnn.compile(loss='categorical_crossentropy',
                    optimizer=cnn_subject_model_optimizer,
                    metrics=['accuracy'])
    
    cnn_training_results = cnn.fit(x_train,
            y_train,
            batch_size=64,
            epochs=epochs,
            validation_data=(x_valid, y_valid), verbose=True)
    
    cnn_test_score = cnn.evaluate(x_test, y_test, verbose=True)
    print(f'Test Accuracy: {cnn_test_score[1]}')
    return cnn, cnn_training_results, cnn_test_score
    

#### Training /Testing

In [32]:
accuracies = []
for i, trim_ratio in enumerate(np.arange(0, 1.1, step=.1)):
    if i == 0:
        continue
    cnn, cnn_training_results, cnn_test_score = cnn_model(trim_ratio=trim_ratio)
    accuracies.append(cnn_test_score)
    

Shape of X after trimming: (1692, 22, 100)
Shape of X after trimming: (423, 22, 100)
Shape of X after trimming: (443, 22, 100)
Shape of training set: (6768, 22, 50)
Shape of validation set: (1692, 22, 50)
Shape of training labels: (6768,)
Shape of validation labels: (1692,)
Shape of testing set: (1772, 22, 50)
Shape of testing labels: (1772,)
Shape of training labels after categorical conversion: (6768, 4)
Shape of validation labels after categorical conversion: (1692, 4)
Shape of test labels after categorical conversion: (1772, 4)
Shape of training set after adding width info: (6768, 22, 50, 1)
Shape of validation set after adding width info: (1692, 22, 50, 1)
Shape of test set after adding width info: (1772, 22, 50, 1)
Shape of training set after dimension reshaping: (6768, 50, 1, 22)
Shape of validation set after dimension reshaping: (1692, 50, 1, 22)
Shape of test set after dimension reshaping: (1772, 50, 1, 22)
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/10

# Deeper exploration and analysis into other architectures

## Task 2: But with CRNN

#### Preprocessing

In [104]:
def preprocess():
    X_test = np.load("X_test.npy")
    y_test = np.load("y_test.npy")
    person_train_valid = np.load("person_train_valid.npy")
    X_train_valid = np.load("X_train_valid.npy")
    y_train_valid = np.load("y_train_valid.npy")
    person_test = np.load("person_test.npy")

    ## Adjusting the labels so that 

    # Cue onset left - 0
    # Cue onset right - 1
    # Cue onset foot - 2
    # Cue onset tongue - 3

    y_train_valid -= 769
    y_test -= 769
    

    # print(f'X_test Shape for Subject {subject}: {subject_X_test.shape}')
    # print(f'y_test Shape for Subject {subject}: {suject_y_test.shape}')
    # print(f'X_train_valid Shape for Subject {subject}: {suject_X_train_valid.shape}')
    # print(f'y_train_valid Shape for Subject {subject}: {suject_y_train_valid.shape}')

    # shuffle with 5 fold
    indicies_valid = np.random.choice(X_train_valid.shape[0], X_train_valid.shape[0] // 5, replace=False)
    indicies_train = np.array(list(set(range(X_train_valid.shape[0])).difference(set(indicies_valid))))

    # Creating the training and validation sets using the generated indices
    X_train, X_valid = X_train_valid[indicies_train], X_train_valid[indicies_valid] 
    y_train, y_valid = y_train_valid[indicies_train], y_train_valid[indicies_valid]


    # Preprocessing the dataset
    x_train,y_train = data_prep(X_train,y_train,2,2,True)
    x_valid,y_valid = data_prep(X_valid,y_valid,2,2,True)
    X_test_prep,y_test_prep = data_prep(X_test,y_test,2,2,True)



    # print('Shape of training set:',x_train.shape)
    # print('Shape of validation set:',x_valid.shape)
    # print('Shape of training labels:',y_train.shape)
    # print('Shape of validation labels:',y_valid.shape)
    # print('Shape of testing set:',X_test_prep.shape)
    # print('Shape of testing labels:',y_test_prep.shape)


    # Converting the labels to categorical variables for multiclass classification
    y_train = to_categorical(y_train, 4)
    y_valid = to_categorical(y_valid, 4)
    y_test = to_categorical(y_test_prep, 4)
    # print('Shape of training labels after categorical conversion:',y_train.shape)
    # print('Shape of validation labels after categorical conversion:',y_valid.shape)
    # print('Shape of test labels after categorical conversion:',y_test.shape)

    # Adding width of the segment to be 1
    x_train = x_train.reshape(x_train.shape[0], x_train.shape[1], x_train.shape[2], 1)
    x_valid = x_valid.reshape(x_valid.shape[0], x_valid.shape[1], x_train.shape[2], 1)
    x_test = X_test_prep.reshape(X_test_prep.shape[0], X_test_prep.shape[1], X_test_prep.shape[2], 1)
    # print('Shape of training set after adding width info:',x_train.shape)
    # print('Shape of validation set after adding width info:',x_valid.shape)
    # print('Shape of test set after adding width info:',x_test.shape)


    # Reshaping the training and validation dataset
    x_train = np.swapaxes(x_train, 1,3)
    x_train = np.swapaxes(x_train, 1,2)
    x_valid = np.swapaxes(x_valid, 1,3)
    x_valid = np.swapaxes(x_valid, 1,2)
    x_test = np.swapaxes(x_test, 1,3)
    x_test = np.swapaxes(x_test, 1,2)
    # print('Shape of training set after dimension reshaping:',x_train.shape)
    # print('Shape of validation set after dimension reshaping:',x_valid.shape)
    # print('Shape of test set after dimension reshaping:',x_test.shape)

    return (x_train, y_train, x_valid, y_valid, x_test, y_test)




#### Model

In [131]:
def cnn_lstm_model():    
    # Building the CNN model using sequential class
    cnn_lstm_model = Sequential()

    # Conv. block 1
    cnn_lstm_model.add(Conv2D(filters=20, kernel_size=(5,1), padding='same', activation='elu', input_shape=(250,1,22)))
    cnn_lstm_model.add(MaxPooling2D(pool_size=(3,1), padding='same')) 
    cnn_lstm_model.add(BatchNormalization())
    cnn_lstm_model.add(Dropout(0.5))

    # Conv. block 2
    cnn_lstm_model.add(Conv2D(filters=20, kernel_size=(15,1), padding='same', activation='elu'))
    cnn_lstm_model.add(MaxPooling2D(pool_size=(3,1), padding='same'))
    cnn_lstm_model.add(BatchNormalization())
    cnn_lstm_model.add(Dropout(0.5))

    # Conv. block 3
    cnn_lstm_model.add(Conv2D(filters=10, kernel_size=(10,1), padding='same', activation='elu'))
    cnn_lstm_model.add(MaxPooling2D(pool_size=(3,1), padding='same'))
    cnn_lstm_model.add(BatchNormalization())
    cnn_lstm_model.add(Dropout(0.5))

    # Add LSTM layers
    cnn_lstm_model.add(Permute((2, 3, 1)))
    cnn_lstm_model.add(TimeDistributed(Flatten()))

    cnn_lstm_model.add(LSTM(250, return_sequences=True))
    cnn_lstm_model.add(Dropout(0.5))
    cnn_lstm_model.add(LSTM(100, return_sequences=True))
    cnn_lstm_model.add(Dropout(0.5))
    cnn_lstm_model.add(LSTM(50))

    # Output layer with Softmax activation
    cnn_lstm_model.add(Flatten()) # Flattens the input
    cnn_lstm_model.add(Dense(4, activation='softmax')) # Output FC layer with softmax activation


    return cnn_lstm_model

In [156]:
def cnn_model():    
    # Building the CNN model using sequential class
    basic_cnn = Sequential()

    # Conv. block 1
    basic_cnn.add(Conv2D(filters=10, kernel_size=(5,1), padding='same', activation='elu', input_shape=(250,1,22)))
    basic_cnn.add(MaxPooling2D(pool_size=(3,1), padding='same')) 
    basic_cnn.add(BatchNormalization())
    basic_cnn.add(Dropout(0.5))

    # Conv. block 2
    basic_cnn.add(Conv2D(filters=10, kernel_size=(15,1), padding='same', activation='elu'))
    basic_cnn.add(MaxPooling2D(pool_size=(3,1), padding='same'))
    basic_cnn.add(BatchNormalization())
    basic_cnn.add(Dropout(0.5))

    # # Conv. block 2
    # basic_cnn.add(Conv2D(filters=10, kernel_size=(10,1), padding='same', activation='elu'))
    # basic_cnn.add(MaxPooling2D(pool_size=(3,1), padding='same'))
    # basic_cnn.add(BatchNormalization())
    # basic_cnn.add(Dropout(0.5))


    # Output layer with Softmax activation
    basic_cnn.add(Flatten()) # Flattens the input
    basic_cnn.add(Dense(4, activation='softmax')) # Output FC layer with softmax activation


    return basic_cnn

In [99]:

def make_generator_model():
    model = Sequential()
    model.add(Dense(64 * 125 * 1, use_bias=False, input_shape=(100,)))
    model.add(BatchNormalization())
    model.add(LeakyReLU())

    model.add(Reshape((125, 1, 64)))
    assert model.output_shape == (None, 125, 1, 64)  # Note: None is the batch size

    model.add(Conv2DTranspose(32, (3, 3), strides=(2, 1), padding='same', use_bias=False))
    assert model.output_shape == (None, 250, 1, 32)
    model.add(BatchNormalization())
    model.add(LeakyReLU())

    model.add(Conv2DTranspose(16, (3, 3), strides=(1, 1), padding='same', use_bias=False))
    assert model.output_shape == (None, 250, 1, 16)
    model.add(BatchNormalization())
    model.add(LeakyReLU())

    model.add(Conv2DTranspose(1, (3, 3), strides=(1, 1), padding='same', use_bias=False, activation='tanh'))
    assert model.output_shape == (None, 250, 1, 1)

    model.add(Reshape((250, 1, 1)))
    assert model.output_shape == (None, 250, 1, 1)

    model.add(Conv2DTranspose(22, (3, 3), strides=(1, 1), padding='same', use_bias=False, activation='tanh'))
    assert model.output_shape == (None, 250, 1, 22)

    return model


def make_discriminator_model():
    model = Sequential()
    model.add(Conv2D(16, (3, 3), strides=(1, 1), padding='same',
                                     input_shape=[250, 1, 22]))
    model.add(LeakyReLU())
    model.add(Dropout(0.3))

    model.add(Conv2D(32, (3, 3), strides=(2, 1), padding='same'))
    model.add(LeakyReLU())
    model.add(Dropout(0.3))

    model.add(Conv2D(64, (3, 3), strides=(2, 1), padding='same'))
    model.add(LeakyReLU())
    model.add(Dropout(0.3))

    model.add(Flatten())
    model.add(Dense(1))

    return model

def discriminator_loss(real_output, fake_output):
    cross_entropy = tf.keras.losses.BinaryCrossentropy(from_logits=True)
    real_loss = cross_entropy(tf.ones_like(real_output), real_output)
    fake_loss = cross_entropy(tf.zeros_like(fake_output), fake_output)
    total_loss = real_loss + fake_loss
    return total_loss

def generator_loss(fake_output):
    cross_entropy = tf.keras.losses.BinaryCrossentropy(from_logits=True)
    return cross_entropy(tf.ones_like(fake_output), fake_output)



#### Hyperparameters

In [149]:
learning_rate = 1e-3
batch_size = 64
epochs = 100
optimizer = keras.optimizers.Adam(learning_rate=learning_rate)
noise_dim = 100
num_classes = 4

#### Training


In [97]:

# Define the generator and discriminator models
generators = [make_generator_model() for _ in range(4)]
discriminators = [make_discriminator_model() for _ in range(4)]

# Define the optimizer for the generator and discriminator
generator_optimizer = keras.optimizers.Adam(1e-4)
discriminator_optimizer = keras.optimizers.Adam(1e-4)

def train_step(images, class_label):
    noise = tf.random.normal([batch_size, noise_dim])

    with tf.GradientTape() as gen_tape, tf.GradientTape() as disc_tape:
        generated_images = generators[class_label](noise, training=True)


        real_output = discriminators[class_label](images, training=True)
        fake_output = discriminators[class_label](generated_images, training=True)


        gen_loss = generator_loss(fake_output)
        disc_loss = discriminator_loss(real_output, fake_output)

    gradients_of_generator = gen_tape.gradient(gen_loss, generators[class_label].trainable_variables)
    gradients_of_discriminator = disc_tape.gradient(disc_loss, discriminators[class_label].trainable_variables)

    generator_optimizer.apply_gradients(zip(gradients_of_generator, generators[class_label].trainable_variables))
    discriminator_optimizer.apply_gradients(zip(gradients_of_discriminator, discriminators[class_label].trainable_variables))


In [96]:
def train_gan(dataset, labels, epochs):
    dataset_0 = dataset[np.where(labels[:,0] == 1)[0]]
    dataset_1 = dataset[np.where(labels[:,1] == 1)[0]]
    dataset_2 = dataset[np.where(labels[:,2] == 1)[0]]
    dataset_3 = dataset[np.where(labels[:,3] == 1)[0]]

    for epoch in range(epochs):
        print(f'Epoch: {epoch}, Label: 0')
        train_step(dataset_0,0)
        print(f'Epoch: {epoch}, Label: 1')
        train_step(dataset_1,1)
        print(f'Epoch: {epoch}, Label: 2')
        train_step(dataset_2,2)
        print(f'Epoch: {epoch}, Label: 3')
        train_step(dataset_3,3)



In [142]:
def data_prep_gan(X,y,sub_sample,average,noise, generators, trim_ratio=0.5):
    
    total_X = None
    total_y = None
    
    # Trimming the data (sample,22,1000) -> (sample,22,500)
    X = X[:,:, 0:(int(X.shape[2] * trim_ratio))]
    print('Shape of X after trimming:',X.shape)
    

    
    # Maxpooling the data (sample,22,1000) -> (sample,22,500/sub_sample)
    X_max = np.max(X.reshape(X.shape[0], X.shape[1], -1, sub_sample), axis=3)
    
    
    total_X = X_max
    total_y = y
    # print('Shape of X after maxpooling:',total_X.shape)
    
    # Averaging + noise 
    X_average = np.mean(X.reshape(X.shape[0], X.shape[1], -1, average), axis=3)
    X_average = X_average + np.random.normal(0.0, 0.5, X_average.shape)
    
    total_X = np.vstack((total_X, X_average))
    total_y = np.hstack((total_y, y))
    # print('Shape of X after averaging+noise and concatenating:',total_X.shape)
    
    # Subsampling
    
    for i in range(sub_sample):
        
        X_subsample = X[:, :, i::sub_sample] + (np.random.normal(0.0, 0.5, X[:, :,i::sub_sample].shape) if noise else 0.0)
        total_X = np.vstack((total_X, X_subsample))
        total_y = np.hstack((total_y, y))
        
    
    # GAN
    # get generated samples from conditional gan
    trimmed_off_data = 1000 - int(X.shape[2] * trim_ratio)
    
    noise = tf.random.normal([batch_size, noise_dim])

    generated_eeg = generators[0](noise, training=False)
    generated_samples = generated_eeg.shape[0]
    generated_eeg = np.swapaxes(generated_eeg, 2, 3).reshape(-1, 22, 250)
    total_X = np.vstack((total_X, generated_eeg))
    total_y = np.hstack((total_y, np.full(shape=(generated_samples,), fill_value=0)))

    generated_eeg = generators[1](noise, training=False)
    generated_samples = generated_eeg.shape[0]
    generated_eeg = np.swapaxes(generated_eeg, 2, 3).reshape(-1, 22, 250)
    total_X = np.vstack((total_X, generated_eeg))
    total_y = np.hstack((total_y, np.full(shape=(generated_samples,), fill_value=1)))

    generated_eeg = generators[2](noise, training=False)
    generated_samples = generated_eeg.shape[0]
    generated_eeg = np.swapaxes(generated_eeg, 2, 3).reshape(-1, 22, 250)
    total_X = np.vstack((total_X, generated_eeg))
    total_y = np.hstack((total_y, np.full(shape=(generated_samples,), fill_value=2)))

    generated_eeg = generators[3](noise, training=False)
    generated_samples = generated_eeg.shape[0]
    generated_eeg = np.swapaxes(generated_eeg, 2, 3).reshape(-1, 22, 250)
    total_X = np.vstack((total_X, generated_eeg))
    total_y = np.hstack((total_y, np.full(shape=(generated_samples,), fill_value=3)))


    
    print('Shape of X after GAN:',total_X.shape)
    
    return total_X,total_y




In [106]:
def preprocess_gan():
    X_test = np.load("X_test.npy")
    y_test = np.load("y_test.npy")
    person_train_valid = np.load("person_train_valid.npy")
    X_train_valid = np.load("X_train_valid.npy")
    y_train_valid = np.load("y_train_valid.npy")
    person_test = np.load("person_test.npy")

    ## Adjusting the labels so that 

    # Cue onset left - 0
    # Cue onset right - 1
    # Cue onset foot - 2
    # Cue onset tongue - 3

    y_train_valid -= 769
    y_test -= 769
    

    # print(f'X_test Shape for Subject {subject}: {subject_X_test.shape}')
    # print(f'y_test Shape for Subject {subject}: {suject_y_test.shape}')
    # print(f'X_train_valid Shape for Subject {subject}: {suject_X_train_valid.shape}')
    # print(f'y_train_valid Shape for Subject {subject}: {suject_y_train_valid.shape}')

    # shuffle with 5 fold
    indicies_valid = np.random.choice(X_train_valid.shape[0], X_train_valid.shape[0] // 5, replace=False)
    indicies_train = np.array(list(set(range(X_train_valid.shape[0])).difference(set(indicies_valid))))

    # Creating the training and validation sets using the generated indices
    X_train, X_valid = X_train_valid[indicies_train], X_train_valid[indicies_valid] 
    y_train, y_valid = y_train_valid[indicies_train], y_train_valid[indicies_valid]


    # Preprocessing the dataset
    x_train,y_train = data_prep_gan(X_train,y_train,2,2,True, generators)
    x_valid,y_valid = data_prep_gan(X_valid,y_valid,2,2,True, generators)
    X_test_prep,y_test_prep = data_prep_gan(X_test,y_test,2,2,True, generators)



    # print('Shape of training set:',x_train.shape)
    # print('Shape of validation set:',x_valid.shape)
    # print('Shape of training labels:',y_train.shape)
    # print('Shape of validation labels:',y_valid.shape)
    # print('Shape of testing set:',X_test_prep.shape)
    # print('Shape of testing labels:',y_test_prep.shape)


    # Converting the labels to categorical variables for multiclass classification
    y_train = to_categorical(y_train, 4)
    y_valid = to_categorical(y_valid, 4)
    y_test = to_categorical(y_test_prep, 4)
    # print('Shape of training labels after categorical conversion:',y_train.shape)
    # print('Shape of validation labels after categorical conversion:',y_valid.shape)
    # print('Shape of test labels after categorical conversion:',y_test.shape)

    # Adding width of the segment to be 1
    x_train = x_train.reshape(x_train.shape[0], x_train.shape[1], x_train.shape[2], 1)
    x_valid = x_valid.reshape(x_valid.shape[0], x_valid.shape[1], x_train.shape[2], 1)
    x_test = X_test_prep.reshape(X_test_prep.shape[0], X_test_prep.shape[1], X_test_prep.shape[2], 1)
    # print('Shape of training set after adding width info:',x_train.shape)
    # print('Shape of validation set after adding width info:',x_valid.shape)
    # print('Shape of test set after adding width info:',x_test.shape)


    # Reshaping the training and validation dataset
    x_train = np.swapaxes(x_train, 1,3)
    x_train = np.swapaxes(x_train, 1,2)
    x_valid = np.swapaxes(x_valid, 1,3)
    x_valid = np.swapaxes(x_valid, 1,2)
    x_test = np.swapaxes(x_test, 1,3)
    x_test = np.swapaxes(x_test, 1,2)
    # print('Shape of training set after dimension reshaping:',x_train.shape)
    # print('Shape of validation set after dimension reshaping:',x_valid.shape)
    # print('Shape of test set after dimension reshaping:',x_test.shape)

    return (x_train, y_train, x_valid, y_valid, x_test, y_test)




In [115]:
x_train, y_train, x_valid, y_valid, x_test, y_test = preprocess()
train_gan(x_train, y_train, epochs)


Shape of X after trimming: (1692, 22, 500)


KeyboardInterrupt: 

In [129]:
x_train, y_train, x_valid, y_valid, x_test, y_test = preprocess_gan()


Shape of X after trimming: (1692, 22, 500)
Shape of X after GAN: (7024, 22, 250)
Shape of X after trimming: (423, 22, 500)
Shape of X after GAN: (1948, 22, 250)
Shape of X after trimming: (443, 22, 500)
Shape of X after GAN: (2028, 22, 250)


In [157]:
# Printing the model summary
x_train, y_train, x_valid, y_valid, x_test, y_test = preprocess_gan()
model = cnn_model()
model.summary()
model.compile(loss='categorical_crossentropy',
                optimizer=optimizer,
                metrics=['accuracy'])

model_results = model.fit(x_train,
            y_train,
            batch_size=batch_size,
            epochs=epochs,
            validation_data=(x_valid, y_valid), verbose=True)

Shape of X after trimming: (1692, 22, 500)
Shape of X after GAN: (7024, 22, 250)
Shape of X after trimming: (423, 22, 500)
Shape of X after GAN: (1948, 22, 250)
Shape of X after trimming: (443, 22, 500)
Shape of X after GAN: (2028, 22, 250)
Model: "sequential_91"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_141 (Conv2D)         (None, 250, 1, 10)        1110      
                                                                 
 max_pooling2d_12 (MaxPoolin  (None, 84, 1, 10)        0         
 g2D)                                                            
                                                                 
 batch_normalization_138 (Ba  (None, 84, 1, 10)        40        
 tchNormalization)                                               
                                                                 
 dropout_143 (Dropout)       (None, 84, 1, 10)         0         
          

#### Testing

In [158]:
model_name = 'GAN-CNN'
model_score = model.evaluate(x_test, y_test, verbose=True)
print(f'Test accuracy of the {model_name} model:',model_score[1])

Test accuracy of the GAN-CNN model: 0.6400394439697266
