In [86]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.python.keras.callbacks import EarlyStopping
from tensorflow.python.keras.layers import Dropout, Dense, BatchNormalization, LSTM
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split, KFold
from keras import backend as K
import pandas as pd
from tensorflow.python.keras.models import Sequential

import bnci_utils as utils

In [87]:
# All the datasets that can be run with this notebook
#   Entire dataset - all data
#   Female dataset - data from female subjects
#   Male dataset - data from male subjects
datasets = {
    'entire_dataset': 'entire_dataset.npz',
    'female_dataset': 'dataset_female_gender.npz',
    'male_dataset': 'dataset_male_gender.npz'
}

# Dataset path is by default saved in dataset_result/bci_dataset.npz
dataset_path = os.path.join('dataset_result', datasets['male_dataset'])

data_output_folder = 'entire_dataset_output' # output path for statistics from the simulation
iteration_data_file_name = 'lstm_male_subj.xlsx' # file name of excel file with data from each iteration
iteration_stats_file_name = 'lstm_male_subj_stats.xlsx' # file name for statistics from the simulation (i.e
                                                                    # max and average accuracy, max and average recall...)

In [88]:
# Get features and labels
features, labels = utils.load_dataset(dataset_path)

f'Features shape: {features.shape}, labels shape: {labels.shape}'

'Features shape: (1296, 14, 36, 10), labels shape: (1296,)'

In [89]:
# Reshape the dataset for TensorFlow only
features = features.reshape((features.shape[0], 14, -1))

labels = labels.reshape((-1, 1))
labels = OneHotEncoder().fit_transform(labels).toarray()

f'features shape: {features.shape}, labels_shape: {labels.shape}'

'features shape: (1296, 14, 360), labels_shape: (1296, 2)'

In [90]:
# set seed to produce a consistent result
seed = 1
np.random.seed(seed)
tf.random.set_seed(seed)

In [91]:
# Function to create the LSTM model
def lstm_model():
    model = Sequential([
        LSTM(124, input_shape=(14,360), activation=tf.nn.relu, return_sequences=True),
        Dropout(0.4),
        BatchNormalization(),
        LSTM(124, activation=tf.nn.relu),
        Dropout(0.3),
        BatchNormalization(),
        Dense(64, activation=tf.nn.relu),
        Dropout(0.2),
        Dense(2, activation=tf.nn.softmax, name='output_layer')
    ])

    return model

In [92]:
def run_network(model, train, valid, test, iteration, epochs=30):
    x_train, y_train = train[0], train[1]
    x_val, y_val = valid[0], valid[1]
    x_test, y_test = test[0], test[1]

    model.compile(
        optimizer=tf.keras.optimizers.Adam(),
        loss=tf.losses.BinaryCrossentropy(),
        metrics=['accuracy']
    )

    # Train the model and validate on the validation data
    model.fit(x_train, y_train, epochs=epochs, validation_data=(x_val, y_val),
              callbacks=[EarlyStopping(patience=8, verbose=1, restore_best_weights=True)]
              )

    # Get the statistics
    accuracy, precision, recall, f1, confusion_matrix = utils.get_metrics_keras(model, x_test, y_test, f'{iteration}. LSTM ')

    return {
            'accuracy': accuracy,
            'precision': precision,
            'recall': recall,
            'f1': f1,
            'confusion_matrix': confusion_matrix
        }

In [93]:
results = []
num_splits = 10
iteration = 1

# Run 10-fold CV in the same manner as in the case of the CNN
x_train, x_test, y_train, y_test = train_test_split(features, labels, test_size=0.25, shuffle=True)
for train_idx, val_idx in KFold(n_splits=num_splits).split(x_train):
    x_train_curr, y_train_curr = x_train[train_idx], y_train[train_idx] # get the current training data
    x_val, y_val = x_train[val_idx], y_train[val_idx] # get the current validation data

    model = lstm_model() # create the LSTM model

    # Run the network and save the results
    result = run_network(model, (x_train_curr, y_train_curr), (x_val, y_val), (x_test, y_test), iteration)
    results.append(result)

    # Delete the model
    K.clear_session()
    del model

    iteration += 1

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Restoring model weights from the end of the best epoch.
Epoch 00016: early stopping
1. LSTM : accuracy = 46.2962962962963%, precision = 0.5, recall = 0.5344827586206896, f1 = 0.5166666666666667
Confusion matrix:
[[57 93]
 [81 93]]
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Restoring model weights from the end of the best epoch.
Epoch 00009: early stopping
2. LSTM : accuracy = 50.92592592592593%, precision = 0.5496688741721855, recall = 0.47701149425287354, f1 = 0.5107692307692308
Confusion matrix:
[[82 68]
 [91 83]]
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21

In [94]:
# Create pandas dataframe with stats from each iterations
df = pd.DataFrame({
    'iterations': [x for x in range(1, num_splits + 1)],
    'accuracy': [x['accuracy'] for x in results],
    'precision': [x['precision'] for x in results],
    'recall': [x['recall'] for x in results],
    'f1': [x['f1'] for x in results],
})

df

Unnamed: 0,iterations,accuracy,precision,recall,f1
0,1,0.462963,0.5,0.534483,0.516667
1,2,0.509259,0.549669,0.477011,0.510769
2,3,0.515432,0.53527,0.741379,0.621687
3,4,0.493827,0.580645,0.206897,0.305085
4,5,0.530864,0.58209,0.448276,0.506494
5,6,0.444444,0.474576,0.321839,0.383562
6,7,0.484568,0.52,0.522989,0.52149
7,8,0.540123,0.552743,0.752874,0.63747
8,9,0.472222,0.514563,0.304598,0.382671
9,10,0.469136,0.505814,0.5,0.50289


In [95]:
os.makedirs(data_output_folder, exist_ok=True)

# Save the dataframe
df.to_excel(os.path.join(data_output_folder, iteration_data_file_name))

In [96]:
# Create a dataframe with statistics
df_stats = pd.DataFrame({
    'average_accuracy': [df['accuracy'].mean()],
    'max_accuracy': [df['accuracy'].max()],
    'accuracy_std': [df['accuracy'].std()],
    'average_precision': [df['precision'].mean()],
    'max_precision': [df['precision'].max()],
    'average_recall': [df['recall'].mean()],
    'max_recall': [df['recall'].max()],
    'average_f1': [df['f1'].mean()],
    'max_f1': [df['f1'].max()],
})

df_stats

Unnamed: 0,average_accuracy,max_accuracy,accuracy_std,average_precision,max_precision,average_recall,max_recall,average_f1,max_f1
0,0.492284,0.540123,0.03118,0.531537,0.58209,0.481034,0.752874,0.488878,0.63747


In [97]:
# Save the dataframe
df_stats.to_excel(os.path.join(data_output_folder, iteration_stats_file_name))

In [98]:
# Print confusion matrices
utils.print_confusion_matrices(ann=results)

Confusion matrices for the ANN:
[[57 93]
 [81 93]] 

[[82 68]
 [91 83]] 

[[ 38 112]
 [ 45 129]] 

[[124  26]
 [138  36]] 

[[94 56]
 [96 78]] 

[[ 88  62]
 [118  56]] 

[[66 84]
 [83 91]] 

[[ 44 106]
 [ 43 131]] 

[[100  50]
 [121  53]] 

[[65 85]
 [87 87]] 

