This notebook tests the CNN model from bnci_utils.py -> cnn_model() on the entire dataset. It was also used to test the
model from p300 dataset (simply by changing the commented line in the cross-validation loop)

Note that the results presented in the thesis might not be reproducible since the network computation was done on GPU
which does not rely on the specified seed (i.e. the network might get initialized with random weights and only the splitting
of the dataset is consistent).

To run this notebook, bnci_utils.py is necessary

In [57]:
import os
import numpy as np
import tensorflow as tf
import keras
import nengo_dl
from tensorflow.python.keras import Input, Model
import nengo
from tensorflow.python.keras.callbacks import EarlyStopping
from tensorflow.python.keras.layers import Conv2D, Dropout, AveragePooling2D, Flatten, Dense, BatchNormalization, \
    Conv3D, MaxPooling2D, Conv1D, MaxPooling1D
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split, KFold
from keras import backend as K
import pandas as pd
from sklearn import metrics

import bnci_utils as utils

In [58]:
# All the datasets that can be run with this notebook
#   Entire dataset - all data
#   Female subjects - data from female subjects
#   Male subjects - data from male subjects
datasets = {
    'entire_dataset': 'entire_dataset.npz',
    'female_subjects': 'dataset_female_gender.npz',
    'male_subjects': 'dataset_male_gender.npz'
}

dataset_path = os.path.join('dataset_result', datasets['male_subjects'])

data_output_folder = 'entire_dataset_output_cnn' # output path for statistics from the simulation
iteration_data_file_name = 'cnn_exp_10_fold_male_subj.xlsx' # file name of excel file with data from each iteration
iteration_stats_file_name = 'cnn_exp_10_fold_male_subj_stats.xlsx' # file name for statistics from the simulation (i.e
                                                                   # max and average accuracy, max and average recall...)



# List of tested models - default CNN is the originally tested network, p300_exp_cnn is the CNN from the P300 experiment
models = {
    'default_cnn': utils.cnn_model,
    'p300_exp_cnn': utils.original_p300_model
}

# Model function to create the model for simulation
model_fn = models['default_cnn']

In [59]:
# Get features and labels
features, labels = utils.load_dataset(dataset_path)

f'Features shape: {features.shape}, labels shape: {labels.shape}'

'Features shape: (1296, 14, 36, 10), labels shape: (1296,)'

In [60]:
# Check if the dataset is balanced
yes = labels[labels == 'yes']
no = labels[labels == 'no']

f'yes: {yes.shape} ({(yes.shape[0]/labels.shape[0]) * 100}%), no: {no.shape} ({(no.shape[0]/labels.shape[0]) * 100}%)'

'yes: (672,) (51.85185185185185%), no: (624,) (48.148148148148145%)'

In [61]:
# Reshape the dataset
features, labels = utils.reshape_dataset(features, labels)
f'Features shape: {features.shape}, labels shape: {labels.shape}'

'Features shape: (1296, 1, 5040), labels shape: (1296, 1, 2)'

In [62]:
# Set seed for consistency
seed = 1
np.random.seed(seed)
tf.random.set_seed(seed)

In [63]:
x_train, x_test, y_train, y_test = train_test_split(features, labels, test_size=0.25, random_state=seed, shuffle=True)
print(y_train)
print(y_test)

f'x_train shape: {x_train.shape}, y_train shape: {y_train.shape}, ' \
f'x_test shape: {x_test.shape}, y_test shape: {y_test.shape}'

[[[1. 0.]]

 [[1. 0.]]

 [[0. 1.]]

 ...

 [[0. 1.]]

 [[1. 0.]]

 [[0. 1.]]]
[[[0. 1.]]

 [[1. 0.]]

 [[0. 1.]]

 [[1. 0.]]

 [[0. 1.]]

 [[0. 1.]]

 [[0. 1.]]

 [[0. 1.]]

 [[1. 0.]]

 [[0. 1.]]

 [[1. 0.]]

 [[1. 0.]]

 [[1. 0.]]

 [[0. 1.]]

 [[1. 0.]]

 [[1. 0.]]

 [[0. 1.]]

 [[0. 1.]]

 [[0. 1.]]

 [[0. 1.]]

 [[0. 1.]]

 [[1. 0.]]

 [[0. 1.]]

 [[1. 0.]]

 [[1. 0.]]

 [[1. 0.]]

 [[1. 0.]]

 [[1. 0.]]

 [[1. 0.]]

 [[0. 1.]]

 [[1. 0.]]

 [[1. 0.]]

 [[0. 1.]]

 [[0. 1.]]

 [[1. 0.]]

 [[1. 0.]]

 [[1. 0.]]

 [[0. 1.]]

 [[0. 1.]]

 [[0. 1.]]

 [[0. 1.]]

 [[1. 0.]]

 [[1. 0.]]

 [[0. 1.]]

 [[0. 1.]]

 [[1. 0.]]

 [[0. 1.]]

 [[1. 0.]]

 [[0. 1.]]

 [[0. 1.]]

 [[0. 1.]]

 [[0. 1.]]

 [[0. 1.]]

 [[0. 1.]]

 [[1. 0.]]

 [[1. 0.]]

 [[1. 0.]]

 [[0. 1.]]

 [[1. 0.]]

 [[0. 1.]]

 [[1. 0.]]

 [[0. 1.]]

 [[0. 1.]]

 [[1. 0.]]

 [[0. 1.]]

 [[0. 1.]]

 [[0. 1.]]

 [[1. 0.]]

 [[1. 0.]]

 [[1. 0.]]

 [[1. 0.]]

 [[1. 0.]]

 [[1. 0.]]

 [[1. 0.]]

 [[0. 1.]]

 [[0. 1.]]

 [[1. 0.]]

'x_train shape: (972, 1, 5040), y_train shape: (972, 1, 2), x_test shape: (324, 1, 5040), y_test shape: (324, 1, 2)'

In [64]:
params_output_path = 'cnn_all_samples_nengo_params'
os.makedirs(params_output_path, exist_ok=True)

utils.cnn_model(seed=seed).summary() # Print the model

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_layer (InputLayer)     [(None, 14, 360, 1)]      0         
_________________________________________________________________
conv2d (Conv2D)              (None, 14, 360, 32)       832       
_________________________________________________________________
dropout (Dropout)            (None, 14, 360, 32)       0         
_________________________________________________________________
average_pooling2d (AveragePo (None, 7, 180, 32)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 5, 178, 64)        18496     
_________________________________________________________________
dropout_1 (Dropout)          (None, 5, 178, 64)        0         
_________________________________________________________________
average_pooling2d_1 (Average (None, 2, 89, 64)         0     

In [65]:
ann, snn = [], [] # arrays that will contain data from each iteration for the analog and spiking network

num_iterations = 10 # number of iterations in the cross-validation (10)
iteration = 1 # number of current iteration

for train, valid in KFold(n_splits=num_iterations).split(x_train): # perform K-Fold CV
    print('Current iteration: ', iteration)
    x_train_curr, y_train_curr = x_train[train], y_train[train] # get current training data
    x_val_curr, y_val_curr = x_train[valid], y_train[valid] # get current validation data

    params_path = os.path.join(params_output_path, f'params_{iteration}') # configure path for parameters

    model = model_fn(seed=seed) # create the model

    # run ann
    ann_result = utils.run_ann(model=model,
                               train=(x_train_curr, y_train_curr),
                               valid=(x_val_curr, y_val_curr),
                               test=(x_test, y_test),
                               optimizer=keras.optimizers.Adam(),
                               loss=keras.losses.BinaryCrossentropy(),
                               params_save_path=params_path,
                               iteration=iteration,
                               callbacks=[EarlyStopping(patience=8, restore_best_weights=True, verbose=1)],
                               num_epochs=30
                         )

    # run snn
    snn_result = utils.run_snn(model,
                               x_test, y_test,
                               params_load_path=params_path,
                               iteration=iteration
                               )

    ann.append(ann_result)
    snn.append(snn_result)
    iteration += 1

    K.clear_session() # clear session and delete model since it sometimes causes memory leaks
    del model

Current iteration:  1
Build finished in 0:00:01                                                      
Optimization finished in 0:00:00                                               
Construction finished in 0:00:00                                               
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 00011: early stopping
1. CNN: accuracy = 53.125%, precision = 0.5412186379928315, recall = 0.8728323699421965, f1 = 0.668141592920354
Confusion matrix:
[[ 19 128]
 [ 22 151]]
Build finished in 0:00:01                                                      
Optimization finished in 0:00:00                                               
Construction finished in 0:00:00                                               
1. CNN (SNN conversion): accuracy = 48.125%, precision = 0.5321100917431193, recall = 0.3352601156069364, f1 = 0.41134751773049644
Confusion matrix:
[[ 96  51]
 [115  58]]
Current iteration:  2




In [66]:
# Create data dictionary for pandas dataframe
df = utils.create_data_df(ann, snn, num_iterations)

df

Unnamed: 0,iterations,ann_accuracy,ann_precision,ann_recall,ann_f1,snn_accuracy,snn_precision,snn_recall,snn_f1
0,1,0.53125,0.541219,0.872832,0.668142,0.48125,0.53211,0.33526,0.411348
1,2,0.45,0.487805,0.346821,0.405405,0.4875,0.535433,0.393064,0.453333
2,3,0.484375,0.528571,0.427746,0.472843,0.45,0.4,0.034682,0.06383
3,4,0.428125,0.455357,0.294798,0.357895,0.434375,0.454545,0.231214,0.306513
4,5,0.4625,0.503356,0.433526,0.465839,0.4875,0.526946,0.508671,0.517647
5,6,0.425,0.377778,0.098266,0.155963,0.48125,0.517073,0.612717,0.560847
6,7,0.484375,0.518692,0.641618,0.573643,0.45625,0.497238,0.520231,0.508475
7,8,0.540625,0.540625,1.0,0.701826,0.521875,0.533113,0.930636,0.677895
8,9,0.5375,0.539432,0.988439,0.697959,0.5625,0.560886,0.878613,0.684685
9,10,0.4625,0.50303,0.479769,0.491124,0.48125,0.517766,0.589595,0.551351


In [67]:
os.makedirs(data_output_folder, exist_ok=True)

# Save the dataframe to excel
df.to_excel(os.path.join(data_output_folder, iteration_data_file_name))

# Save path for the P300 model
# df.to_excel(os.path.join(data_output_folder, 'cnn_p300_model_10_fold_entire_dataset.xlsx'))

'Statistics for iterations successfully saved.'

'Statistics for iterations successfully saved.'

In [68]:
# Create statistics such as maximums and averages for each metric
df_stats = utils.create_stats_df(df)

df_stats

Unnamed: 0,models,average_accuracy,max_accuracy,accuracy_std,average_precision,max_precision,average_recall,max_recall,average_f1,max_f1
0,ann,0.480625,0.540625,0.043296,0.499586,0.541219,0.558382,1.0,0.499064,0.701826
1,snn,0.484375,0.5625,0.036562,0.507511,0.560886,0.503468,0.930636,0.473592,0.684685


In [69]:
# Create dataframe for statistics and save it as excel file
df_stats.to_excel(os.path.join(data_output_folder, iteration_stats_file_name))

# Save path for the P300 model
# df_stats.to_excel(os.path.join(data_output_folder, 'cnn_p300_model_10_fold_entire_dataset_stats.xlsx'))

'File with statistics successfully saved.'

'File with statistics successfully saved.'

In [70]:
# Print confusion matrices for ANN and SNN in each iteration
utils.print_confusion_matrices(ann, snn)


Confusion matrices for the ANN:
[[ 19 128]
 [ 22 151]] 

[[ 84  63]
 [113  60]] 

[[81 66]
 [99 74]] 

[[ 86  61]
 [122  51]] 

[[73 74]
 [98 75]] 

[[119  28]
 [156  17]] 

[[ 44 103]
 [ 62 111]] 

[[  0 147]
 [  0 173]] 

[[  1 146]
 [  2 171]] 

[[65 82]
 [90 83]] 

Confusion matrices for the SNN
[[ 96  51]
 [115  58]] 

[[ 88  59]
 [105  68]] 

[[138   9]
 [167   6]] 

[[ 99  48]
 [133  40]] 

[[68 79]
 [85 88]] 

[[ 48  99]
 [ 67 106]] 

[[56 91]
 [83 90]] 

[[  6 141]
 [ 12 161]] 

[[ 28 119]
 [ 21 152]] 

[[ 52  95]
 [ 71 102]] 

