In [250]:
import os
import nengo
import numpy as np
import pandas as pd
import tensorflow as tf
import keras
import nengo_dl
from tensorflow.python.keras import Input, Model
from tensorflow.python.keras.callbacks import EarlyStopping
from tensorflow.python.keras.layers import Conv2D, Dropout, AveragePooling2D, Flatten, Dense
from sklearn.preprocessing import OneHotEncoder
from sklearn import metrics
from sklearn.model_selection import train_test_split, KFold
from keras import backend as K

In [251]:
# Dataset path is by default saved in dataset_result/bci_dataset.npz
dataset_path = os.path.join('dataset_result', 'bci_dataset.npz')

# Load the numpy file containing the dataset
dataset = np.load(dataset_path)
features, labels = dataset['features'], dataset['labels'] # get features and labels

f'Features shape: {features.shape}, labels shape: {labels.shape}'

'Features shape: (2976, 14, 36, 10), labels shape: (2976,)'

In [252]:
yes = labels[labels == 'yes']
no = labels[labels == 'no']

f'yes: {yes.shape} ({(yes.shape[0]/labels.shape[0]) * 100}%), no: {no.shape} ({(no.shape[0]/labels.shape[0]) * 100}%)'

'yes: (1521,) (51.108870967741936%), no: (1455,) (48.891129032258064%)'

In [253]:
# Set seed for consistency
seed = 1
np.random.seed(seed)
tf.random.set_seed(seed)

In [254]:
# Convert labels to one hot encoding
labels = labels.reshape(-1, 1)
labels = OneHotEncoder().fit_transform(labels).toarray()
labels = labels.reshape((labels.shape[0], 1, -1))

# Reshape features for the NN
features = features.reshape((features.shape[0], 14, -1)) # reshape to channels x data
features = features.reshape((features.shape[0], 1, -1)) # add time dimension

f'Features and labels reshaped. Features shape: {features.shape}, labels shape: {labels.shape}'

'Features and labels reshaped. Features shape: (2976, 1, 5040), labels shape: (2976, 1, 2)'

In [255]:
def cnn_model():
    inp = Input(shape=(14, 360, 1), name='input_layer')
    conv1 = Conv2D(filters=32, kernel_size=(3, 3), activation=tf.nn.relu)(inp)
    dropout1 = Dropout(0.2, seed=seed)(conv1)
    avg_pool1 = AveragePooling2D(pool_size=(2, 2))(dropout1)
    conv2 = Conv2D(filters=64, kernel_size=(3, 3), activation=tf.nn.relu)(avg_pool1)
    dropout2 = Dropout(0.2, seed=seed)(conv2)
    avg_pool2 = AveragePooling2D(pool_size=(2, 2))(dropout2)
    flatten = Flatten()(avg_pool2)
    dense1 = Dense(512, activation=tf.nn.relu)(flatten)
    dropout3 = Dropout(0.2, seed=seed)(dense1)
    dense2 = Dense(256, activation=tf.nn.relu)(dropout3)
    output = Dense(2, activation=tf.nn.softmax, name='output_layer')(dense2)

    return Model(inputs=inp, outputs=output)

In [256]:
def get_metrics(simulator, output_layer, x_test, y_test, minibatch_size, network_name):
    """
    Function for calculating metrics
    :param simulator: simulator instance
    :param input_layer: input layer reference
    :param output_layer: output layer reference
    :param x_test: features of the testing subset
    :param y_test: labels of the testing subset
    :param network_name: name of the network
    :return: accuracy, recall and precision metrics
    """

    # Truncate the remaining number of samples since the predict function does use minibatch
    samples = (x_test.shape[0] // minibatch_size ) * minibatch_size
    x_test, y_test = x_test[:samples], y_test[:samples]

    predictions = simulator.predict(x_test)[output_layer] # get result from output layer when predicting on x_test
    predictions = predictions[:,-1,:] # get the last timestep
    predictions_argm = np.argmax(predictions, axis=-1) # get predicted label

    y_test = np.squeeze(y_test, axis=1) # remove time dimension
    y_test_argm = np.argmax(y_test, axis=-1) # get labels

    precision = metrics.precision_score(y_true=y_test_argm, y_pred=predictions_argm, average='binary') # get precision score
    recall = metrics.recall_score(y_true=y_test_argm, y_pred=predictions_argm, average='binary') # get recall
    f1 = metrics.f1_score(y_true=y_test_argm, y_pred=predictions_argm, average='binary')
    accuracy = metrics.accuracy_score(y_true=y_test_argm, y_pred=predictions_argm) # get accuracy
    confusion_matrix = metrics.confusion_matrix(y_true=y_test_argm, y_pred=predictions_argm)

    # Log the statistics
    print(f'{network_name}: accuracy = {accuracy * 100}%, precision = {precision}, '
          f'recall = {recall}, f1 = {f1}')
    print('Confusion matrix:')
    print(confusion_matrix)

    return accuracy, precision, recall, f1, confusion_matrix

def run_ann(model, train, valid, test, params_save_path, iteration, shuffle_training=True, num_epochs=30):
    """
    Run analog network with cross-validation
    :param model: reference to the tensorflow model
    :param train: pair of training data (x_train, y_train)
    :param valid: pair of validation data (x_val, y_val)
    :param test: pair of testing data (x_test, y_test)
    :param params_save_path: output path to save weights of the network
    :param iteration: number of the iteration in CV
    :param shuffle_training: shuffle samples
    :param num_epochs: number of epochs to train for
    :return: accuracy, precision, recall, f1 and confusion matrix from the testing data
    """
    x_train, y_train = train[0], train[1]
    x_valid, y_valid = valid[0], valid[1]
    x_test, y_test = test[0], test[1]

    converter = nengo_dl.Converter(model)

    with nengo_dl.Simulator(converter.net, minibatch_size=16) as simulator:
        simulator.compile(optimizer=keras.optimizers.Adam(),
                          loss=keras.losses.BinaryCrossentropy(),
                          metrics=['accuracy'])

        input_layer = converter.inputs[model.get_layer('input_layer')] # get the input layer reference
        output_layer = converter.outputs[model.get_layer('output_layer')] # get the output layer reference

        # fit the model with the training data
        simulator.fit(
            x={ input_layer: x_train }, y={ output_layer: y_train },
            validation_data=(
                { input_layer: x_valid }, { output_layer: y_valid }
            ),
            epochs=num_epochs,
            shuffle=shuffle_training,
            callbacks=[EarlyStopping(patience=8, verbose=1, restore_best_weights=True)] # early stop to avoid overfitting
        )

        simulator.save_params(params_save_path) # save weights to the file

        # Get statistics
        accuracy, precision, recall, f1, confusion_matrix = get_metrics(simulator, output_layer, x_test, y_test, 16,
                                                                        f'{iteration}. CNN')
        return {
            'accuracy': accuracy,
            'precision': precision,
            'recall': recall,
            'f1': f1,
            'confusion_matrix': confusion_matrix
        }


def run_snn(model, x_test, y_test, params_load_path, iteration, timesteps=50, scale_firing_rates=1000, synapse=0.01):
    """
    Run model in spiking setting
    :param model: model reference
    :param x_test: testing features
    :param y_test: testing labels
    :param params_load_path: path to load parameters
    :param iteration: number of current iteration
    :param timesteps: number of timesteps
    :param scale_firing_rates: firing rate scaling
    :param synapse: synaptic smoothing
    :return: accuracy, precision, recall, f1 and confusion matrix from the testing data
    """
    converter = nengo_dl.Converter(
        model,
        swap_activations={ tf.nn.relu: nengo.SpikingRectifiedLinear() },
        scale_firing_rates=scale_firing_rates,
        synapse=synapse
    ) # create a Nengo converter object and swap all relu activations with spiking relu

    with converter.net:
        nengo_dl.configure_settings(stateful=False)

    output_layer = converter.outputs[model.get_layer('output_layer')] # output layer for simulator

    x_test_tiled = np.tile(x_test, (1, timesteps, 1)) # tile test data to timesteps

    with nengo_dl.Simulator(converter.net) as simulator:
        simulator.load_params(params_load_path)

        # Get statistics
        accuracy, precision, recall, f1, confusion_matrix = get_metrics(simulator, output_layer, x_test_tiled, y_test, 16,
                                                                        f'{iteration}. CNN (SNN conversion)')
        return {
            'accuracy': accuracy,
            'precision': precision,
            'recall': recall,
            'f1': f1,
            'confusion_matrix': confusion_matrix
        }

In [257]:
x_train, x_test, y_train, y_test = train_test_split(features, labels, test_size=0.25, random_state=seed, shuffle=True)
print(y_train)
print(y_test)
f'x_train shape: {x_train.shape}, y_train shape: {y_train.shape}, ' \
f'x_test shape: {x_test.shape}, y_test shape: {y_test.shape}'

[[[0. 1.]]

 [[1. 0.]]

 [[0. 1.]]

 ...

 [[0. 1.]]

 [[1. 0.]]

 [[0. 1.]]]
[[[0. 1.]]

 [[0. 1.]]

 [[1. 0.]]

 ...

 [[0. 1.]]

 [[1. 0.]]

 [[0. 1.]]]


'x_train shape: (2232, 1, 5040), y_train shape: (2232, 1, 2), x_test shape: (744, 1, 5040), y_test shape: (744, 1, 2)'

In [258]:
params_output_path = 'cnn_all_samples_nengo_params'
os.makedirs(params_output_path, exist_ok=True)

In [259]:
ann, snn = [], [] # arrays that will contain data from each iteration for the analog and spiking network

num_iterations = 10 # number of iterations in the cross-validation (10)
iteration = 1 # number of current iteration

for train, valid in KFold(n_splits=num_iterations).split(x_train): # perform K-Fold CV
    print('Current iteration: ', iteration)
    x_train_curr, y_train_curr = x_train[train], y_train[train] # get current training data
    x_valid_curr, y_valid_curr = x_train[valid], y_train[valid] # get current validation data

    params_path = os.path.join(params_output_path, f'params_{iteration}') # configure path for parameters
    model = cnn_model() # create the model

    # run ann
    ann_result = run_ann(model=model,
                         train=(x_train_curr, y_train_curr),
                         valid=(x_valid_curr, y_valid_curr),
                         test=(x_test, y_test),
                         params_save_path=params_path,
                         iteration=iteration,
                         num_epochs=30
                         )

    # run snn
    snn_result = run_snn(model=model,
                         x_test=x_test,
                         y_test=y_test,
                         params_load_path=params_path,
                         iteration=iteration)

    ann.append(ann_result)
    snn.append(snn_result)
    iteration += 1

    K.clear_session() # clear session and delete model since it sometimes causes memory leaks
    del model

Current iteration:  1
Build finished in 0:00:01                                                      
Optimization finished in 0:00:00                                               
Construction finished in 0:00:00                                               
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Restoring model weights from the end of the best epoch.
Epoch 00016: early stopping
1. CNN: accuracy = 52.17391304347826%, precision = 0.5217391304347826, recall = 1.0, f1 = 0.6857142857142856
Confusion matrix:
[[  0 352]
 [  0 384]]
Build finished in 0:00:01                                                      
Optimization finished in 0:00:00                                               
Construction finished in 0:00:00                                               
1. CNN (SNN conversion): accuracy = 51.766304347826086%, precision = 0.5203938115

  _warn_prf(average, modifier, msg_start, len(result))


In [260]:
# Create data dictionary for pandas dataframe
data = {
    'iterations': [x for x in range(1, num_iterations + 1)],
    'ann_accuracy': [x['accuracy'] for x in ann],
    'ann_precision': [x['precision'] for x in ann],
    'ann_recall': [x['recall'] for x in ann],
    'ann_f1': [x['f1'] for x in ann],
    'snn_accuracy': [x['accuracy'] for x in snn],
    'snn_precision': [x['precision'] for x in snn],
    'snn_recall': [x['recall'] for x in snn],
    'snn_f1': [x['f1'] for x in snn]
}

data

{'iterations': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
 'ann_accuracy': [0.5217391304347826,
  0.5176630434782609,
  0.4782608695652174,
  0.5054347826086957,
  0.5067934782608695,
  0.5217391304347826,
  0.5176630434782609,
  0.5067934782608695,
  0.4796195652173913,
  0.5203804347826086],
 'ann_precision': [0.5217391304347826,
  0.5202797202797202,
  0.0,
  0.5276243093922652,
  0.541501976284585,
  0.5217391304347826,
  0.520863309352518,
  0.5353535353535354,
  1.0,
  0.5210884353741496],
 'ann_recall': [1.0,
  0.96875,
  0.0,
  0.4973958333333333,
  0.3567708333333333,
  1.0,
  0.9427083333333334,
  0.4140625,
  0.0026041666666666665,
  0.9973958333333334],
 'ann_f1': [0.6857142857142856,
  0.6769790718835305,
  0.0,
  0.5120643431635389,
  0.43014128728414447,
  0.6857142857142856,
  0.6709916589434662,
  0.4669603524229075,
  0.005194805194805194,
  0.6845397676496873],
 'snn_accuracy': [0.5176630434782609,
  0.5163043478260869,
  0.5108695652173914,
  0.4782608695652174,
  0.527173913

In [261]:
data_output_folder = 'results' # output path for data from each iteration
os.makedirs(data_output_folder, exist_ok=True)

df = pd.DataFrame(data) # create pandas dataframe and save it
df.to_excel(os.path.join(data_output_folder, 'cnn_10_fold_entire_dataset.xlsx'))

'Statistics for iterations successfully saved.'

'Statistics for iterations successfully saved.'

In [262]:
# Create statistics such as maximums and averages for each metric
data_stats = {
    'models': ['ann', 'snn'],
    'average_accuracy': [],
    'max_accuracy': [],
    'accuracy_std': [],
    'average_precision': [],
    'max_precision': [],
    'average_recall': [],
    'max_recall': [],
    'average_f1': [],
    'max_f1': []
}

# slightly less code if we iterate over snn_{metric_name} in dictionary
for model in ['ann', 'snn']:
    data_stats['average_accuracy'].append(df[f'{model}_accuracy'].mean())
    data_stats['accuracy_std'].append(df[f'{model}_accuracy'].std())
    data_stats['average_precision'].append(df[f'{model}_precision'].mean())
    data_stats['average_recall'].append(df[f'{model}_recall'].mean())
    data_stats['average_f1'].append(df[f'{model}_f1'].mean())
    data_stats['max_accuracy'].append(df[f'{model}_accuracy'].max())
    data_stats['max_f1'].append(df[f'{model}_f1'].max())
    data_stats['max_precision'].append(df[f'{model}_precision'].max())
    data_stats['max_recall'].append(df[f'{model}_recall'].max())

data_stats

{'models': ['ann', 'snn'],
 'average_accuracy': [0.5076086956521738, 0.507608695652174],
 'max_accuracy': [0.5217391304347826, 0.529891304347826],
 'average_precision': [0.5210189546906339, 0.517530986827837],
 'max_precision': [1.0, 0.5297805642633229],
 'average_recall': [0.61796875, 0.7354166666666666],
 'max_recall': [1.0, 1.0],
 'average_f1': [0.48182998579706515, 0.5833298140870984],
 'max_f1': [0.6857142857142856, 0.6857142857142856]}

In [263]:
# create dataframe for statistics and save it
df_stats = pd.DataFrame(data_stats)
df_stats.to_excel(os.path.join(data_output_folder, 'cnn_10_fold_entire_dataset_stats.xlsx'))

'File with statistics successfully saved.'

'File with statistics successfully saved.'

In [264]:
# Print confusion matrices for ANN
conf_matrices_ann = [x['confusion_matrix'] for x in ann]
print('Confusion matrices for the ANN:')
for confusion_matrix in conf_matrices_ann:
    print(confusion_matrix)

# Print confusion matrices for SNN
conf_matrices_snn = [x['confusion_matrix'] for x in snn]
print('\nConfusion matrices for the SNN')
for confusion_matrix in conf_matrices_snn:
    print(confusion_matrix)



Confusion matrices for the ANN:
[[  0 352]
 [  0 384]]
[[  9 343]
 [ 12 372]]
[[352   0]
 [384   0]]
[[181 171]
 [193 191]]
[[236 116]
 [247 137]]
[[  0 352]
 [  0 384]]
[[ 19 333]
 [ 22 362]]
[[214 138]
 [225 159]]
[[352   0]
 [383   1]]
[[  0 352]
 [  1 383]]

Confusion matrices for the SNN
[[ 11 341]
 [ 14 370]]
[[ 27 325]
 [ 31 353]]
[[ 31 321]
 [ 39 345]]
[[289  63]
 [321  63]]
[[ 51 301]
 [ 47 337]]
[[  0 352]
 [  0 384]]
[[104 248]
 [128 256]]
[[164 188]
 [185 199]]
[[183 169]
 [205 179]]
[[ 52 300]
 [ 46 338]]
