
The notebook requires a preprocessed dataset. This can be done either by executing the notebook
**data_preprocessing.ipynb** or by running the **data_preprocessing.py** Python script, which can be easily executed like so:
```
python data_preprocessing.py
```

Also, make sure that the **bnci_utils.py** file is in the same directory as this notebook as it contains part of the
functionality. The resulting preprocessed data should not be renamed unless you also change their names in the
notebook.

In [1]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.python.keras import  Sequential
from tensorflow.python.keras.callbacks import EarlyStopping
from tensorflow.python.keras.layers import Dropout, Dense, BatchNormalization, LSTM
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split, KFold
from keras import backend as K
import bnci_utils as utils

The name of the output files can be changed - change the value of the **iteration_data_file_name** variable to
rename the output file for each iteration, and the name of the **iteration_stats_file_name** to rename the output file
for statistics from the entire simulation. Note that both names **need** to have the .xlsx extension. E.g:

```
iteration_data_file_name = 'simulation_output.xlsx'
```

In [2]:
data_output_folder = 'bnci_horizon_output' # output folder for statistics from the simulation
iteration_data_file_name = 'lstm_individuals_data.xlsx' # file name of excel file with data from each iteration (participant)
iteration_stats_file_name = 'lstm_individuals_stats.xlsx'  # file name for statistics from the simulation (i.e,
                                                                    # max and average accuracy, max and average recall...)

os.makedirs(data_output_folder, exist_ok=True)

In [3]:
num_participants = 18
dataset_path = os.path.join('dataset_result')
files = [os.path.join(dataset_path, 'P{:02d}.npz'.format(i+1))
         for i in range(num_participants)] # P01 - P18 files


# Set seed to produce consistent result
seed = 2
np.random.seed(seed)
tf.random.set_seed(seed)

In [4]:
# Function to transform the dataset to be usable for the neural network - i.e one hot encode and reshape the dataset
def transform_dataset(features, labels):
    labels = labels.reshape((-1, 1)) # reshape so one hot encoding can be used
    labels = OneHotEncoder().fit_transform(labels).toarray() # apply one hot encoding
    features = features.reshape((features.shape[0], 14, -1))

    return features, labels

In [5]:
# Definition of the lstm model
def lstm_model():
    model = Sequential([
        LSTM(124, input_shape=(14, 360), activation=tf.nn.relu, return_sequences=True),
        Dropout(0.4),
        LSTM(124, activation=tf.nn.relu),
        Dropout(0.3),
        Dense(64, activation=tf.nn.relu),
        Dropout(0.2),
        Dense(2, activation=tf.nn.softmax, name='output_layer')
    ])

    return model

In [6]:
# Function to run the network with training data and testing data
def run_network(model, x_train, y_train, x_test, y_test, iteration, epochs=30):

    model.compile(
        optimizer=tf.keras.optimizers.Adam(),
        loss=tf.losses.BinaryCrossentropy(),
        metrics=['accuracy'],

    )
    # Train the model
    model.fit(x_train, y_train, epochs=epochs, callbacks=[
        EarlyStopping(patience=8, verbose=1, restore_best_weights=True, monitor='loss')
    ])

    # Get the results
    accuracy, precision, recall, f1, confusion_matrix = utils.get_metrics_keras(model, x_test, y_test,
                                                                                f'{iteration}. LSTM')
    # Return the results as a dictionary
    return {
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1': f1,
        'confusion_matrix': confusion_matrix
    }

In [7]:
def run_individual(file, particip_num, model, test_size=0.25, epochs=30):
    print('Running ANN for file:', file)
    dataset = np.load(file) # load numpy file containing the preprocessed data for specific participant
    features, labels = dataset['features'], dataset['labels'] # get features and labels from the numpy file

    # transform numpy arrays
    features, labels = transform_dataset(features, labels)
    # split to training and testing data
    x_train, x_test, y_train, y_test = train_test_split(features, labels, test_size=test_size, random_state=seed,
                                                        shuffle=True)
    print('X (train) shape:', x_train.shape, 'Y (train) shape:', y_train.shape)
    print('X (test) shape:', x_test.shape, 'Y (test) shape:', y_test.shape)

    return run_network(model, x_train, y_train, x_test, y_test, particip_num, epochs)

In [8]:
i = 1
results = []
for file in files:
    results.append(run_individual(file=file, particip_num=i, model=lstm_model()))
    i += 1
    K.clear_session()

Running ANN for file: dataset_result\P01.npz
X (train) shape: (90, 14, 360) Y (train) shape: (90, 2)
X (test) shape: (30, 14, 360) Y (test) shape: (30, 2)
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Restoring model weights from the end of the best epoch.
Epoch 00028: early stopping
1. LSTM: accuracy = 60.0%, precision = 0.75, recall = 0.5, f1 = 0.6
Confusion matrix:
[[9 3]
 [9 9]]
Running ANN for file: dataset_result\P02.npz
X (train) shape: (126, 14, 360) Y (train) shape: (126, 2)
X (test) shape: (42, 14, 360) Y (test) shape: (42, 2)
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch

In [9]:
# Create pandas dataframe with stats from each iterations
df = pd.DataFrame({
    'participant': [x for x in range(1, num_participants + 1)],
    'accuracy': [x['accuracy'] for x in results],
    'precision': [x['precision'] for x in results],
    'recall': [x['recall'] for x in results],
    'f1': [x['f1'] for x in results],
})

df

Unnamed: 0,participant,accuracy,precision,recall,f1
0,1,0.6,0.75,0.5,0.6
1,2,0.547619,0.517241,0.75,0.612245
2,3,0.52381,0.526316,0.47619,0.5
3,4,0.428571,0.333333,0.2,0.25
4,5,0.47619,0.444444,0.4,0.421053
5,6,0.452381,0.5,0.391304,0.439024
6,7,0.47619,0.473684,0.428571,0.45
7,8,0.571429,0.55,0.55,0.55
8,9,0.47619,0.481481,0.619048,0.541667
9,10,0.357143,0.333333,0.285714,0.307692


In [10]:
df.to_excel(os.path.join(data_output_folder, iteration_data_file_name))
'Data from individuals successfully saved.'

'Data from individuals successfully saved.'

In [11]:
# Create a dataframe with statistics
df_stats = pd.DataFrame({
    'average_accuracy': [df['accuracy'].mean()],
    'max_accuracy': [df['accuracy'].max()],
    'accuracy_std': [df['accuracy'].std()],
    'average_precision': [df['precision'].mean()],
    'max_precision': [df['precision'].max()],
    'average_recall': [df['recall'].mean()],
    'max_recall': [df['recall'].max()],
    'average_f1': [df['f1'].mean()],
    'max_f1': [df['f1'].max()],
})

df_stats


Unnamed: 0,average_accuracy,max_accuracy,accuracy_std,average_precision,max_precision,average_recall,max_recall,average_f1,max_f1
0,0.489683,0.6,0.061184,0.488838,0.75,0.469007,0.75,0.473096,0.612245


In [12]:
# Save the dataframe
df_stats.to_excel(os.path.join(data_output_folder, iteration_stats_file_name))
'Stats successfully saved.'

NameError: name 'stats_file_name' is not defined

In [None]:
utils.print_confusion_matrices(results)