In [1]:
import numpy as np
import pandas as pd
import pywt
import matplotlib.pyplot as plt
import keras
from keras.models import Sequential, Model
from keras.layers import Dense, Activation, Flatten, Dropout, Input
from keras.layers import Conv2D, LSTM, GRU, RNN, BatchNormalization, MaxPooling2D, Reshape, Concatenate
from keras.utils import to_categorical
from keras.callbacks import EarlyStopping
from keras.regularizers import l2
from sklearn.model_selection import train_test_split

In [2]:
## Loading and visualizing the data

## Loading the dataset
X_test = np.load("X_test.npy")
y_test = np.load("y_test.npy")
person_train_valid = np.load("person_train_valid.npy")
X_train_valid = np.load("X_train_valid.npy")
print(X_train_valid.shape)
y_train_valid = np.load("y_train_valid.npy")
person_test = np.load("person_test.npy")

## Adjusting the labels so that 

# Cue onset left - 0
# Cue onset right - 1
# Cue onset foot - 2
# Cue onset tongue - 3

y_train_valid -= 769
y_test -= 769

(2115, 22, 1000)


In [3]:
def train_data_prep(X,y,sub_sample,average,noise):
    
    total_X = None
    total_y = None
    
    # Trimming the data (sample,22,1000) -> (sample,22,800)
    X = X[:,:,0:800]
    print('Shape of X after trimming:',X.shape)
    
    # Maxpooling the data (sample,22,800) -> (sample,22,800/sub_sample)
    X_max = np.max(X.reshape(X.shape[0], X.shape[1], -1, sub_sample), axis=3)
    
    total_X = X_max
    total_y = y
    print('Shape of X after maxpooling:',total_X.shape)
    
    # Averaging + noise 
    X_average = np.mean(X.reshape(X.shape[0], X.shape[1], -1, average),axis=3)
    X_average = X_average + np.random.normal(0.0, 0.5, X_average.shape)
    
    total_X = np.vstack((total_X, X_average))
    total_y = np.hstack((total_y, y))
    print('Shape of X after averaging + noise and concatenating:',total_X.shape)
    
    # Subsampling
    for i in range(sub_sample):
        X_subsample = X[:, :, i::sub_sample] + \
                            (np.random.normal(0.0, 0.5, X[:, :,i::sub_sample].shape) if noise else 0.0)
            
        total_X = np.vstack((total_X, X_subsample))
        total_y = np.hstack((total_y, y))
    
    print('Shape of X after subsampling and concatenating:', total_X.shape)
    print('Shape of Y:', total_y.shape)
    return total_X, total_y

def test_data_prep(X):
    total_X = None
    
    # Trimming the data (sample,22,1000) -> (sample,22,800)
    X = X[:,:,0:800]
    print('Shape of X after trimming:',X.shape)
    
    # Maxpooling the data (sample,22,800) -> (sample,22,800/sub_sample)
    X_max = np.max(X.reshape(X.shape[0], X.shape[1], -1, 2), axis=3)
    
    total_X = X_max
    print('Shape of X after maxpooling:', total_X.shape)
    return total_X

In [4]:
## Preprocessing the dataset

# Define the model, loss function, and optimizer
x_train, x_valid, y_train, y_valid = train_test_split(X_train_valid, y_train_valid, test_size=0.1)

x_train, y_train = train_data_prep(x_train, y_train, 2, 2, True)
x_valid, y_valid = train_data_prep(x_valid, y_valid, 2, 2, True)
x_test = test_data_prep(X_test)
print('Shape of training set:',x_train.shape)
print('Shape of validation set:',x_valid.shape)
print('Shape of training labels:',y_train.shape)
print('Shape of validation labels:',y_valid.shape)

# Converting the labels to categorical variables for multiclass classification
y_train = to_categorical(y_train, 4)
y_valid = to_categorical(y_valid, 4)
y_test = to_categorical(y_test, 4)
print('Shape of training labels after categorical conversion:',y_train.shape)
print('Shape of validation labels after categorical conversion:',y_valid.shape)
print('Shape of test labels after categorical conversion:',y_test.shape)

Shape of X after trimming: (1903, 22, 800)
Shape of X after maxpooling: (1903, 22, 400)
Shape of X after averaging + noise and concatenating: (3806, 22, 400)
Shape of X after subsampling and concatenating: (7612, 22, 400)
Shape of Y: (7612,)
Shape of X after trimming: (212, 22, 800)
Shape of X after maxpooling: (212, 22, 400)
Shape of X after averaging + noise and concatenating: (424, 22, 400)
Shape of X after subsampling and concatenating: (848, 22, 400)
Shape of Y: (848,)
Shape of X after trimming: (443, 22, 800)
Shape of X after maxpooling: (443, 22, 400)
Shape of training set: (7612, 22, 400)
Shape of validation set: (848, 22, 400)
Shape of training labels: (7612,)
Shape of validation labels: (848,)
Shape of training labels after categorical conversion: (7612, 4)
Shape of validation labels after categorical conversion: (848, 4)
Shape of test labels after categorical conversion: (443, 4)


In [14]:
# Adding width of the segment to be 1
x_train_cnn = x_train.reshape(x_train.shape[0], x_train.shape[1], x_train.shape[2], 1)
x_valid_cnn = x_valid.reshape(x_valid.shape[0], x_valid.shape[1], x_train.shape[2], 1)
x_test_cnn = x_test.reshape(x_test.shape[0], x_test.shape[1], x_test.shape[2], 1)
print('Shape of training set after adding width info:',x_train.shape)
print('Shape of validation set after adding width info:',x_valid.shape)
print('Shape of test set after adding width info:',x_test.shape)

# Reshaping the training and validation dataset
x_train = np.swapaxes(x_train, 1,3)
x_train = np.swapaxes(x_train, 1,2)
x_valid = np.swapaxes(x_valid, 1,3)
x_valid = np.swapaxes(x_valid, 1,2)
x_test = np.swapaxes(x_test, 1,3)
x_test = np.swapaxes(x_test, 1,2)
print('Shape of training set after dimension reshaping:',x_train_cnn.shape)
print('Shape of validation set after dimension reshaping:',x_valid_cnn.shape)
print('Shape of test set after dimension reshaping:',x_test_cnn.shape)

Shape of training set after adding width info: (7612, 400, 22)
Shape of validation set after adding width info: (848, 400, 22)
Shape of test set after adding width info: (443, 400, 22)
Shape of training set after dimension reshaping: (7612, 400, 1, 22)
Shape of validation set after dimension reshaping: (848, 400, 1, 22)
Shape of test set after dimension reshaping: (443, 400, 1, 22)


In [7]:
# Reshaping the training and validation dataset
x_train = np.swapaxes(x_train, 1,2)
x_valid = np.swapaxes(x_valid, 1,2)
x_test = np.swapaxes(x_test, 1,2)
print('Shape of training set after dimension reshaping:',x_train.shape)
print('Shape of validation set after dimension reshaping:',x_valid.shape)
print('Shape of test set after dimension reshaping:',x_test.shape)

Shape of training set after dimension reshaping: (7612, 400, 22)
Shape of validation set after dimension reshaping: (848, 400, 22)
Shape of test set after dimension reshaping: (443, 400, 22)


In [17]:
cnn_layers = 5
gru_layers = 3
gru_size= 60

def create_model(cnn_layers, gru_layers, gru_size):
    filters = 25
    kernel_size = (7,7)
    dropout = 0.6
    l2_lambda = 0.005
    
    gru_model = Sequential()
    for i in range(gru_layers - 1):
        gru_model.add(GRU(gru_size, dropout=0.4, return_sequences=True, kernel_regularizer=l2(l2_lambda)))
    gru_model.add(GRU(gru_size, dropout=0.4, kernel_regularizer=l2(l2_lambda)))
    gru_model.add(Flatten())
    
    cnn_model = Sequential()
    cnn_model.add(Conv2D(filters=filters, kernel_size=kernel_size, padding='same', activation='elu', input_shape=(400,1,22), kernel_regularizer=l2(l2_lambda)))
    cnn_model.add(MaxPooling2D(pool_size=(3,1), padding='same')) 
    cnn_model.add(BatchNormalization())
    cnn_model.add(Dropout(dropout))
    filters *= 2
    
    for i in range(cnn_layers - 1):
        cnn_model.add(Conv2D(filters=filters, kernel_size=kernel_size, padding='same', activation='elu', kernel_regularizer=l2(l2_lambda)))
        cnn_model.add(MaxPooling2D(pool_size=(3,1), padding='same'))
        cnn_model.add(BatchNormalization())
        cnn_model.add(Dropout(dropout))
        filters *= 2
    
    cnn_model.add(Flatten())
    
    gru_input = Input(shape=(400, 22))
    cnn_input = Input(shape=(400,1,22))
    gru_output = gru_model(gru_input)
    cnn_output = cnn_model(cnn_input)
    
    concatenated = Concatenate()([cnn_output, gru_output])
    
    x = Dense(128, activation='relu')(concatenated)
    x = Dropout(0.3)(x)
    output = Dense(4, activation='softmax')(x)
    
    # Create a new model with the concatenated output
    cnn_gru_model = Model(inputs=[cnn_input, gru_input], outputs=output)

    return cnn_gru_model

In [18]:
# Model parameters
learning_rate = 1e-3
epochs = 100
cnn_gru_optimizer = keras.optimizers.Adam(learning_rate=learning_rate)

# Define early stopping criteria
early_stopping = EarlyStopping(monitor='val_loss', patience=10, verbose=1, restore_best_weights=True)
# Add early stopping callback to the list of callbacks
callbacks = [early_stopping]

cnn_gru_model = create_model(cnn_layers, gru_layers, gru_size)

cnn_gru_model.compile(loss='categorical_crossentropy',
                 optimizer=cnn_gru_optimizer,
                 metrics=['accuracy'])

# Training and validating the model
cnn_gru_model_results = cnn_gru_model.fit([x_train_cnn, x_train],
             y_train,
             batch_size=64,
             epochs=epochs,
             validation_data=([x_valid_cnn, x_valid], y_valid),
             callbacks=callbacks, verbose=True)

## Testing the hybrid CNN-GRU model
cnn_gru_score = cnn_gru_model.evaluate([x_test_cnn, x_test], y_test, verbose=0)
print('Test accuracy of the hybrid CNN-GRU model:', cnn_gru_score[1])

Epoch 1/100
[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m64s[0m 510ms/step - accuracy: 0.2511 - loss: 4.0881 - val_accuracy: 0.2795 - val_loss: 2.2736
Epoch 2/100
[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m61s[0m 509ms/step - accuracy: 0.2868 - loss: 2.3133 - val_accuracy: 0.3443 - val_loss: 2.0554
Epoch 3/100
[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m61s[0m 511ms/step - accuracy: 0.3154 - loss: 2.0525 - val_accuracy: 0.3644 - val_loss: 1.9248
Epoch 4/100
[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m60s[0m 508ms/step - accuracy: 0.3407 - loss: 1.9336 - val_accuracy: 0.3656 - val_loss: 1.8416
Epoch 5/100
[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m60s[0m 508ms/step - accuracy: 0.3895 - loss: 1.8286 - val_accuracy: 0.3715 - val_loss: 1.8431
Epoch 6/100
[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m61s[0m 511ms/step - accuracy: 0.4104 - loss: 1.7484 - val_accuracy: 0.3608 - val_loss: 1.7754
Epoc