In [8]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.python.keras import  Sequential
from tensorflow.python.keras.callbacks import EarlyStopping
from tensorflow.python.keras.layers import Dropout, Dense, BatchNormalization, LSTM
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split, KFold
from keras import backend as K
import bnci_utils as utils

In [9]:
num_participants = 18
dataset_path = os.path.join('dataset_result')
files = [os.path.join(dataset_path, 'P{:02d}.npz'.format(i+1))
         for i in range(num_participants)] # P01 - P18 files


# Set seed to produce consistent result
seed = 2
np.random.seed(seed)
tf.random.set_seed(seed)

In [10]:
# Function to transform the dataset to be usable for the neural network - i.e one hot encode and reshape the dataset
def transform_dataset(features, labels):
    labels = labels.reshape((-1, 1)) # reshape so one hot encoding can be used
    labels = OneHotEncoder().fit_transform(labels).toarray() # apply one hot encoding
    features = features.reshape((features.shape[0], 14, -1))

    return features, labels

In [11]:
# Definition of the lstm model
def lstm_model():
    model = Sequential([
        LSTM(124, input_shape=(14, 360), activation=tf.nn.relu, return_sequences=True),
        Dropout(0.4),
        BatchNormalization(),
        LSTM(124, activation=tf.nn.relu),
        Dropout(0.3),
        BatchNormalization(),
        Dense(64, activation=tf.nn.relu),
        Dropout(0.2),
        Dense(2, activation=tf.nn.softmax, name='output_layer')
    ])

    return model

In [12]:
# Function to run the network with training data and testing data
def run_network(model, x_train, y_train, x_test, y_test, iteration, epochs=30):

    model.compile(
        optimizer=tf.keras.optimizers.Adam(),
        loss=tf.losses.BinaryCrossentropy(),
        metrics=['accuracy'],

    )
    # Train the model
    model.fit(x_train, y_train, epochs=epochs, callbacks=[
        EarlyStopping(patience=8, verbose=1, restore_best_weights=True, monitor='loss')
    ])

    # Get the results
    accuracy, precision, recall, f1, confusion_matrix = utils.get_metrics_keras(model, x_test, y_test,
                                                                                f'{iteration}. LSTM')
    # Return the results as a dictionary
    return {
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1': f1,
        'confusion_matrix': confusion_matrix
    }

In [13]:
def run_individual(file, particip_num, model, test_size=0.25, epochs=30):
    print('Running ANN for file:', file)
    dataset = np.load(file) # load numpy file containing the preprocessed data for specific participant
    features, labels = dataset['features'], dataset['labels'] # get features and labels from the numpy file

    # transform numpy arrays
    features, labels = transform_dataset(features, labels)
    # split to training and testing data
    x_train, x_test, y_train, y_test = train_test_split(features, labels, test_size=test_size, random_state=seed,
                                                        shuffle=True)
    print('X (train) shape:', x_train.shape, 'Y (train) shape:', y_train.shape)
    print('X (test) shape:', x_test.shape, 'Y (test) shape:', y_test.shape)

    return run_network(model, x_train, y_train, x_test, y_test, particip_num, epochs)

In [14]:
i = 1
results = []
for file in files:
    results.append(run_individual(file=file, particip_num=i, model=lstm_model()))
    i += 1
    K.clear_session()

Running ANN for file: dataset_result\P01.npz
X (train) shape: (90, 14, 360) Y (train) shape: (90, 2)
X (test) shape: (30, 14, 360) Y (test) shape: (30, 2)
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
1. LSTM: accuracy = 63.33333333333333%, precision = 0.6842105263157895, recall = 0.7222222222222222, f1 = 0.7027027027027027
Confusion matrix:
[[ 6  6]
 [ 5 13]]
Running ANN for file: dataset_result\P02.npz
X (train) shape: (126, 14, 360) Y (train) shape: (126, 2)
X (test) shape: (42, 14, 360) Y (test) shape: (42, 2)
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoc

In [15]:
# Create pandas dataframe with stats from each iterations
df = pd.DataFrame({
    'participant': [x for x in range(1, num_participants + 1)],
    'accuracy': [x['accuracy'] for x in results],
    'precision': [x['precision'] for x in results],
    'recall': [x['recall'] for x in results],
    'f1': [x['f1'] for x in results],
})

df

Unnamed: 0,participant,accuracy,precision,recall,f1
0,1,0.633333,0.684211,0.722222,0.702703
1,2,0.452381,0.454545,0.75,0.566038
2,3,0.595238,0.666667,0.380952,0.484848
3,4,0.404762,0.391304,0.45,0.418605
4,5,0.404762,0.368421,0.35,0.358974
5,6,0.52381,0.565217,0.565217,0.565217
6,7,0.47619,0.48,0.571429,0.521739
7,8,0.547619,0.52,0.65,0.577778
8,9,0.52381,0.52381,0.52381,0.52381
9,10,0.571429,0.555556,0.714286,0.625


In [22]:
data_output_folder = 'lstm_individuals'
iteration_data_file_name = 'lstm_individuals_data.xlsx'

os.makedirs(data_output_folder, exist_ok=True)

df.to_excel(os.path.join(data_output_folder, iteration_data_file_name))
'Data from individuals successfully saved.'

'Data from individuals successfully saved.'

In [23]:
# Create a dataframe with statistics
df_stats = pd.DataFrame({
    'average_accuracy': [df['accuracy'].mean()],
    'max_accuracy': [df['accuracy'].max()],
    'accuracy_std': [df['accuracy'].std()],
    'average_precision': [df['precision'].mean()],
    'max_precision': [df['precision'].max()],
    'average_recall': [df['recall'].mean()],
    'max_recall': [df['recall'].max()],
    'average_f1': [df['f1'].mean()],
    'max_f1': [df['f1'].max()],
})

df_stats


Unnamed: 0,average_accuracy,max_accuracy,accuracy_std,average_precision,max_precision,average_recall,max_recall,average_f1,max_f1
0,0.500794,0.633333,0.074028,0.508948,0.684211,0.527154,0.75,0.509401,0.702703


In [24]:
# Save the dataframe
stats_file_name = 'lstm_individuals_stats.xlsx'
df_stats.to_excel(os.path.join(data_output_folder, stats_file_name))
'Stats successfully saved.'

'Stats successfully saved.'

In [25]:
utils.print_confusion_matrices(results)

Confusion matrices for the ANN:
[[ 6  6]
 [ 5 13]] 

[[ 4 18]
 [ 5 15]] 

[[17  4]
 [13  8]] 

[[ 8 14]
 [11  9]] 

[[10 12]
 [13  7]] 

[[ 9 10]
 [10 13]] 

[[ 8 13]
 [ 9 12]] 

[[10 12]
 [ 7 13]] 

[[11 10]
 [10 11]] 

[[ 9 12]
 [ 6 15]] 

[[15  7]
 [12  8]] 

[[15  5]
 [13  9]] 

[[13 10]
 [ 8 11]] 

[[ 9 11]
 [13  9]] 

[[ 9 12]
 [10 11]] 

[[ 9 10]
 [13 10]] 

[[ 6 15]
 [11 10]] 

[[ 8 15]
 [ 8 11]] 

