In [1]:
# Necessary imports

import os
import nengo
import keras
import pandas as pd
from sklearn import metrics
import tensorflow as tf
import nengo_dl
import numpy as np
from scipy.io import loadmat
from sklearn.model_selection import train_test_split, ShuffleSplit
from tensorflow.python.keras import Input, Model
from tensorflow.python.keras.callbacks import EarlyStopping
from tensorflow.python.keras.layers import Conv2D, BatchNormalization, Dropout, AveragePooling2D, Flatten, Dense
from keras import backend as K

In [2]:
# First load the dataset and set seeds for consistency
# The dataset should be by default saved in ../datasets/VarekaGTNEpochs.mat
# The file can be downloaded here https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/G9RRLN
dataset_path = os.path.join('dataset', 'VarekaGTNEpochs.mat')

params_folder_path = 'nengo_network_params' # path to saved parameters
os.makedirs('nengo_network_params', exist_ok=True)

seed = 0 # constant seed
np.random.seed(seed)
tf.random.set_seed(seed)

In [3]:
mat = loadmat(dataset_path)
target_data, non_target_data = mat['allTargetData'], mat['allNonTargetData'] # get target and non-target data
features = np.concatenate((target_data, non_target_data))

# Target labels are represented as (1, 0) vector, non target labels are represented as (0, 1) vector
target_labels = np.tile(np.array([1, 0]), (target_data.shape[0], 1)) # set 'target' as (1, 0) vector
non_target_labels = np.tile(np.array([0, 1]), (non_target_data.shape[0], 1)) # set 'non target' as (0, 1) vector
labels = np.vstack((target_labels, non_target_labels)) # concatenate target and non target labels

# Filter noise above 100 uV
threshold = 100.0
x_result, y_result = [], []
for i in range(features.shape[0]):
    if not np.max(np.abs(features[i])) > threshold:
        x_result.append(features[i])
        y_result.append(labels[i])

# Save data to numpy array
features, labels = np.array(x_result), np.array(y_result)
features = features.reshape((features.shape[0], 1, -1))
labels = labels.reshape((labels.shape[0], 1, -1))

# Print shapes
f'Features shape: {features.shape}, Labels shape: {labels.shape}'

'Features shape: (8036, 1, 3600), Labels shape: (8036, 1, 2)'

In [4]:
# Define function to create the CNN model
# Slightly modified version from https://www.sciencedirect.com/science/article/pii/S1746809419304185
def create_model():
    """
    Function to create tensorflow model
    """
    inp = Input(shape=(3, 1200, 1), name='input_layer')
    conv2d = Conv2D(filters=6, kernel_size=(3, 3), activation=tf.nn.relu)(inp)
    dropout1 = Dropout(0.5, seed=seed)(conv2d)
    avg_pooling = AveragePooling2D(pool_size=(1, 8), padding='same')(dropout1)
    flatten = Flatten()(avg_pooling)
    dense1 = Dense(100, activation=tf.nn.relu)(flatten)
    batch_norm = BatchNormalization()(dense1)
    dropout2 = Dropout(0.5, seed=seed)(batch_norm)
    output = Dense(2, activation=tf.nn.softmax, name='output_layer')(dropout2)

    return Model(inputs=inp, outputs=output)

In [5]:
# Since we used [1,0] as true (target) we also need to reference the correct index during metrics evaluation.
# In sklearn this means that we set the pos_label to 0 instead of 1
true_ref_idx = np.argmax(np.array([1, 0]))

def get_metrics(simulator, output_layer, x_test, y_test, minibatch_size, network_name):
    """
    Function for calculating metrics
    :param simulator: simulator instance
    :param input_layer: input layer reference
    :param output_layer: output layer reference
    :param x_test: features of the testing subset
    :param y_test: labels of the testing subset
    :param network_name: name of the network
    :return: accuracy, recall and precision metrics
    """

    # Truncate the remaining number of samples since the predict function uses minibatch_size as well (i.e.
    samples = (x_test.shape[0] // minibatch_size ) * minibatch_size
    x_test, y_test = x_test[:samples], y_test[:samples]

    predictions = simulator.predict(x_test)[output_layer] # get result from output layer when predicting on x_test
    predictions = predictions[:,-1,:] # get the last timestep
    predictions_argm = np.argmax(predictions, axis=-1) # get predicted label

    y_test = np.squeeze(y_test, axis=1) # remove time dimension
    y_test_argm = np.argmax(y_test, axis=-1) # get labels, due to one-hot encoding 0 = target, 1 = non-target

    precision = metrics.precision_score(y_true=y_test_argm, y_pred=predictions_argm, pos_label=true_ref_idx) # get precision score
    recall = metrics.recall_score(y_true=y_test_argm, y_pred=predictions_argm, pos_label=true_ref_idx) # get recall
    f1 = metrics.f1_score(y_true=y_test_argm, y_pred=predictions_argm, pos_label=true_ref_idx)
    accuracy = metrics.accuracy_score(y_true=y_test_argm, y_pred=predictions_argm) # get accuracy

    # First column - "0" = target samples, second column "1" = non-target samples
    confusion_matrix = metrics.confusion_matrix(y_true=y_test_argm, y_pred=predictions_argm)

    # Log the statistics
    print(f'{network_name}: accuracy = {accuracy * 100}%, precision = {precision}, '
          f'recall = {recall}, f1 = {f1}')
    print('Confusion matrix:')
    print(confusion_matrix)

    return accuracy, precision, recall, f1, confusion_matrix

# Define functions to run the analog and spiking networks

def run_ann(model, train, valid, test, params_save_path, iteration, shuffle_training=True):
    """
    Run ann via Nengo simulator. This fits the given model with the training data (train) and validates it using validation
    data (valid). Then accuracy is calculated using the test data (test) and weights are saved to params_save_path
    :param shuffle_training: whether to shuffle data (default true)
    :param model: tensorflow model created from create_model() function
    :param train: pair of features and labels from training data
    :param valid: pair of features and labels from validation data
    :param test: pair of features and labels from test data
    :param params_save_path: output path to save weights of the network for SNN testing
    :return accuracy, precision, recall, f1 and confusion matrix from the testing data
    """

    # unwrap into training and testing data for each subset
    x_train, y_train = train[0], train[1]
    x_valid, y_valid = valid[0], valid[1]
    x_test, y_test = test[0], test[1]

    converter = nengo_dl.Converter(model)
    with nengo_dl.Simulator(converter.net, minibatch_size=64) as simulator:
        # Compile the model with binary cross-entropy and Adam optimizer
        simulator.compile(
            optimizer=keras.optimizers.Adam(),
            loss=keras.losses.BinaryCrossentropy(),
            metrics=['accuracy']
        )

        input_layer = converter.inputs[model.get_layer('input_layer')] # get nengo input layer
        output_layer = converter.outputs[model.get_layer('output_layer')] # get nengo output layer

        simulator.fit(
            x={ input_layer: x_train }, y={ output_layer: y_train },
            validation_data=({ input_layer: x_valid }, { output_layer: y_valid }),
            epochs=30,
            shuffle=shuffle_training,
            callbacks=[EarlyStopping(patience=5, verbose=1, restore_best_weights=True)] # early stop to avoid overfitting
        ) # train model

        simulator.save_params(params_save_path) # save params for SNN

        accuracy, precision, recall, f1, confusion_matrix = get_metrics(simulator, output_layer, x_test, y_test,
                                                  minibatch_size=simulator.minibatch_size,
                                                  network_name=f'{iteration}. ANN')

        return {
            'accuracy': accuracy,
            'precision': precision,
            'recall': recall,
            'f1': f1,
            'confusion_matrix': confusion_matrix
        }


def run_snn(model, test, params_load_path, timesteps, scale_firing_rates, synapse, iteration):
    """
    Runs SNN on test data. Loads pre-trained weights from params_load path and uses timesteps, scale_firing_rates and synapse
    parameters for simulator.
    :param model: reference to the tensorflow model
    :param test: reference to the test features and labels
    :param params_load_path: path to the saved weights of the ANN
    :param timesteps: number of timesteps - i.e. how long is the input streamed to the network
    :param scale_firing_rates: firing rate scaling - amplifies spikes
    :param synapse: synaptic smoothing
    :param iteration: iteration to print the result
    :return: accuracy, precision, recall, f1 and confusion matrix from the testing data
    """

    # Conversion of the TensorFlow model to a spiking Nengo model
    converter = nengo_dl.Converter(
        model=model,
        swap_activations={ tf.nn.relu: nengo.SpikingRectifiedLinear() },
        scale_firing_rates=scale_firing_rates,
        synapse=synapse
    )

    x_test, y_test = test[0], test[1] # get test features and labels

    with converter.net:
        nengo_dl.configure_settings(stateful=False)

    output_layer = converter.outputs[model.get_layer('output_layer')] # output layer for simulator
    x_test_time_tiled = np.tile(x_test, (1, timesteps, 1)) # tile x_test to match desired timesteps for simulator

    with nengo_dl.Simulator(converter.net, minibatch_size=41, progress_bar=False) as simulator:
        simulator.load_params(params_load_path)

        # Name of the network for print in get_metrics function
        name = f'{iteration}. SNN [timesteps={timesteps}, scale_firing_rates={scale_firing_rates}, synapse={synapse}]'
        accuracy, precision, recall, f1, confusion_matrix = get_metrics(simulator, output_layer, x_test_time_tiled, y_test,
                                                  minibatch_size=simulator.minibatch_size,
                                                  network_name=name)

        return {
            'accuracy': accuracy,
            'precision': precision,
            'recall': recall,
            'f1': f1,
            'confusion_matrix': confusion_matrix
        }

In [6]:
# Split the dataset into training and testing data
x_train, x_test, y_train, y_test = train_test_split(features, labels, test_size=0.25, random_state=seed, shuffle=True)

print(f'Train features shape: {x_train.shape}, train labels shape: {y_train.shape}')
print(f'Test features shape: {x_test.shape}, test labels shape: {y_test.shape}')

# Arrays to store results from the ANN and the SNN
ann, snn = [], {}

# Configuration for the spiking network with format: timesteps, scale_firing_rates, synapse
snn_config = [
    [50, 1000, 0.01], # best performing parameters for simulator
    [50, 1000, None], # synaptic smoothing turned off
    [50, 1, 0.01], # spike scaling turned off
    [50, 1, None] # everything turned off, only RELU is swapped for spiking RELU
]

variants = []
for variant in snn_config:
        # name of the configuration
        name = f'snn [timesteps={variant[0]}, scaling={variant[1]}, synapse=None]' if variant[2] is None \
            else 'snn [timesteps={}, scaling={}, synapse={:3f}]'.format(variant[0], int(variant[1]), variant[2])
        variants.append(name)
        snn[name] = []


Train features shape: (6027, 1, 3600), train labels shape: (6027, 1, 2)
Test features shape: (2009, 1, 3600), test labels shape: (2009, 1, 2)


In [7]:
num_iterations = 30 # 30 iterations of CV
iteration = 1 # number of the current iteration
val_size = 0.25 # 25% of the data is used as validation data
for train_idx, val_idx in ShuffleSplit(n_splits=num_iterations, test_size=val_size, random_state=seed).split(x_train):
    print(f'Iteration: {iteration}')

    # Split all training data into current training data and validation data
    x_train_curr, y_train_curr = x_train[train_idx], y_train[train_idx]
    x_val_curr, y_val_curr = x_train[val_idx], y_train[val_idx]

    # Set params path
    params_path = os.path.join(params_folder_path, f'params_iter_{iteration}')

    # Create an untrained model
    model = create_model()

    # Run the analog network - train and evaluate
     # run ann
    ann_result = run_ann(model=model,
                         train=(x_train_curr, y_train_curr),
                         valid=(x_val_curr, y_val_curr),
                         test=(x_test, y_test),
                         params_save_path=params_path,
                         iteration=iteration
                         )
    K.clear_session() # clear session
    ann.append(ann_result) # append the result to the ANN array

    # Create an array where results for each configuration of the spiking network will be stored
    snn_config_results = []
    for i, variant_conf in enumerate(snn_config):
        snn_result = run_snn(model=model,
                          test=(x_test, y_test),
                          params_load_path=params_path,
                          timesteps=variant_conf[0],
                          scale_firing_rates=variant_conf[1],
                          synapse=variant_conf[2],
                          iteration=iteration
                          )
        K.clear_session() # clear session
        snn[variants[i]].append(snn_result) # save results

    del model # delete the model (if this is not called it "may" create a memory leak - depends on the simulation machine)
    iteration += 1

Iteration: 1
Build finished in 0:00:00                                                      
Optimization finished in 0:00:00                                               
Construction finished in 0:00:00                                               
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Restoring model weights from the end of the best epoch.
Epoch 00013: early stopping
1. ANN: accuracy = 63.91129032258065%, precision = 0.6464323748668797, recall = 0.6125126135216953, f1 = 0.6290155440414507
Confusion matrix:
[[607 384]
 [332 661]]
1. SNN [timesteps=50, scale_firing_rates=1000, synapse=0.01]: accuracy = 64.06172224987556%, precision = 0.6487341772151899, recall = 0.6125498007968128, f1 = 0.6301229508196722
Confusion matrix:
[[615 389]
 [333 672]]
1. SNN [timesteps=50, scale_firing_rates=1000, synapse=None]: accuracy = 63.663514186162274%, precision = 0.6439075630252101, recall



In [12]:
# Create dictionary with the data for pandas dataframe
data = {
    'iterations': [x for x in range(1, num_iterations + 1)],
    'ann_accuracy': [x['accuracy'] for x in ann],
    'ann_precision': [x['precision'] for x in ann],
    'ann_recall': [x['recall'] for x in ann],
    'ann_f1': [x['f1'] for x in ann]
}

for variant_name in variants:
    data[f'{variant_name}_accuracy'] = [x['accuracy'] for x in snn[variant_name]]
    data[f'{variant_name}_precision'] = [x['precision'] for x in snn[variant_name]]
    data[f'{variant_name}_recall'] = [x['recall'] for x in snn[variant_name]]
    data[f'{variant_name}_f1'] = [x['f1'] for x in snn[variant_name]]

In [13]:
# Create pandas dataframe and save it to xlsx file
output_folder_path = 'p300_exp_output'
os.makedirs(output_folder_path, exist_ok=True)

df = pd.DataFrame(data)
df

Unnamed: 0,iterations,ann_accuracy,ann_precision,ann_recall,ann_f1,"snn [timesteps=50, scaling=1000, synapse=0.010000]_accuracy","snn [timesteps=50, scaling=1000, synapse=0.010000]_precision","snn [timesteps=50, scaling=1000, synapse=0.010000]_recall","snn [timesteps=50, scaling=1000, synapse=0.010000]_f1","snn [timesteps=50, scaling=1000, synapse=None]_accuracy",...,"snn [timesteps=50, scaling=1000, synapse=None]_recall","snn [timesteps=50, scaling=1000, synapse=None]_f1","snn [timesteps=50, scaling=1, synapse=0.010000]_accuracy","snn [timesteps=50, scaling=1, synapse=0.010000]_precision","snn [timesteps=50, scaling=1, synapse=0.010000]_recall","snn [timesteps=50, scaling=1, synapse=0.010000]_f1","snn [timesteps=50, scaling=1, synapse=None]_accuracy","snn [timesteps=50, scaling=1, synapse=None]_precision","snn [timesteps=50, scaling=1, synapse=None]_recall","snn [timesteps=50, scaling=1, synapse=None]_f1"
0,1,0.639113,0.646432,0.612513,0.629016,0.640617,0.648734,0.61255,0.630123,0.636635,...,0.610558,0.626789,0.520159,0.527548,0.381474,0.442775,0.525137,0.531566,0.419323,0.46882
1,2,0.636089,0.655852,0.57114,0.610572,0.634644,0.653759,0.571713,0.609989,0.634644,...,0.567729,0.608324,0.528123,0.536842,0.406375,0.462585,0.512195,0.514963,0.411355,0.457364
2,3,0.631048,0.626341,0.64783,0.636905,0.636137,0.631884,0.651394,0.641491,0.630662,...,0.64243,0.634843,0.539074,0.556686,0.381474,0.452719,0.500747,0.500715,0.348606,0.411039
3,4,0.637601,0.64978,0.595358,0.62138,0.636137,0.64918,0.591633,0.619072,0.637133,...,0.594622,0.620905,0.527128,0.536,0.400398,0.458381,0.532603,0.542707,0.411355,0.467989
4,5,0.627016,0.618062,0.662967,0.639727,0.627178,0.618826,0.661355,0.639384,0.624191,...,0.660359,0.637194,0.504231,0.505479,0.36753,0.425606,0.52663,0.538686,0.36753,0.436945
5,6,0.634073,0.644809,0.595358,0.619098,0.634146,0.646037,0.592629,0.618182,0.636635,...,0.594622,0.620582,0.526132,0.541139,0.340637,0.418093,0.521155,0.529006,0.381474,0.443287
6,7,0.640121,0.67205,0.545913,0.60245,0.640119,0.673243,0.543825,0.601653,0.641115,...,0.545817,0.603192,0.527626,0.532391,0.450199,0.487858,0.543056,0.553616,0.442231,0.491694
7,8,0.633065,0.643716,0.594349,0.618048,0.636137,0.648855,0.592629,0.619469,0.639124,...,0.594622,0.622199,0.527626,0.537415,0.393426,0.454284,0.524141,0.535191,0.363546,0.432977
8,9,0.641633,0.644033,0.631685,0.637799,0.644599,0.647959,0.63247,0.640121,0.640617,...,0.62749,0.635721,0.530612,0.539457,0.415339,0.46933,0.532603,0.545076,0.391434,0.455652
9,10,0.640121,0.648765,0.609485,0.628512,0.642111,0.651113,0.611554,0.630714,0.639622,...,0.608566,0.627955,0.514684,0.518057,0.414343,0.460432,0.523644,0.529785,0.416335,0.466258


In [14]:
df.to_excel(os.path.join(output_folder_path, 'data_iterations.xlsx'))

'File with iteration data successfully saved.'

'File with iteration data successfully saved.'

In [15]:
network_names = ['ann'] + variants # names of each network in the pandas data frame
data_stats = {
    'models': network_names,
    'average_acc': [],
    'max_acc': [],
    'std_acc': [],
    'average_precision': [],
    'max_precision': [],
    'average_recall': [],
    'max_recall': [],
    'average_f1': [],
    'max_f1': []
} # statistics from the experiment

average_acc, max_acc, std_acc = [], [], []
average_precision, max_precision = [], []
average_recall, max_recall = [], []
for network_name in network_names:
    data_stats['average_acc'].append(df[f'{network_name}_accuracy'].mean())
    data_stats['max_acc'].append(df[f'{network_name}_accuracy'].max())
    data_stats['std_acc'].append(df[f'{network_name}_accuracy'].std())

    data_stats['average_precision'].append(df[f'{network_name}_precision'].mean())
    data_stats['max_precision'].append(df[f'{network_name}_precision'].max())

    data_stats['average_recall'].append(df[f'{network_name}_recall'].mean())
    data_stats['max_recall'].append(df[f'{network_name}_recall'].max())

    data_stats['average_f1'].append(df[f'{network_name}_f1'].mean())
    data_stats['max_f1'].append(df[f'{network_name}_f1'].max())

In [16]:
df_stats = pd.DataFrame(data_stats)

df_stats

Unnamed: 0,models,average_acc,max_acc,std_acc,average_precision,max_precision,average_recall,max_recall,average_f1,max_f1
0,ann,0.633417,0.647177,0.010733,0.643327,0.67205,0.600437,0.67003,0.620362,0.646229
1,"snn [timesteps=50, scaling=1000, synapse=0.010...",0.634296,0.648581,0.010596,0.644719,0.673243,0.600764,0.668327,0.621174,0.644573
2,"snn [timesteps=50, scaling=1000, synapse=None]",0.633549,0.649577,0.010726,0.644069,0.674047,0.599568,0.667331,0.620242,0.644851
3,"snn [timesteps=50, scaling=1, synapse=0.010000]",0.519711,0.543056,0.013647,0.526274,0.556686,0.391799,0.48008,0.447964,0.509245
4,"snn [timesteps=50, scaling=1, synapse=None]",0.521885,0.545545,0.012559,0.528771,0.560811,0.390538,0.467131,0.448391,0.49551


In [17]:
df_stats.to_excel(os.path.join(output_folder_path, 'statistics.xlsx'))

'File with statistics successfully saved.'

'File with statistics successfully saved.'