# Train the Neural Network

In [1]:
import numpy as np
import pandas as pd
import keras
import sklearn
import keras.utils
import sys
from keras import Sequential, regularizers
from keras.layers import Dense
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.model_selection import KFold, train_test_split
from numpy import argmax

Using TensorFlow backend.


Useful functions

In [0]:
def from_categorical(one_hot):
    return [argmax(x) for x in one_hot]

In [0]:
def binarize(labels):
    return [1 if x == 1 else 0 for x in labels]

Upload files (Colaboratory)

In [0]:
!pip install -U -q PyDrive ## you will have install for every colab session

from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

# 1. Authenticate and create the PyDrive client.
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

In [0]:
from google.colab import files
epilepsy_import = drive.CreateFile({'id':'1D7t70Eou3vtbzAY5op2dwFToUgdwCf5s'})
epilepsy_import.GetContentFile('Seizure.csv')

Read in data

In [0]:
df = pd.read_csv('Seizure.csv').drop(columns=["Unnamed: 0"])

In [0]:
y_all = df["y"]
x_all = df.drop(columns=["y"])

Normalise

In [0]:
x_array = sklearn.preprocessing.scale(x_all)

Split data into train/test setes

In [0]:
x_train_split, x_test_split, y_train_split, y_test_split = train_test_split(x_array, y_all, test_size=0.33, random_state=42)

Index class labels from zero

In [0]:
# y_train_split -= 1
# y_test_split -= 1

Create the feedforward neural network

## Multi-class classification (unused)

In [0]:
# model = Sequential()
# model.add(Dense(88, 
#                 activation='sigmoid', 
#                 input_dim=178))
# model.add(Dense(88, activation='sigmoid'))
# model.add(Dense(5, activation='softmax'))
# model.compile(loss='categorical_crossentropy',
#                 optimizer=keras.optimizers.Adadelta(),
#                 metrics=['accuracy'])

In [0]:
# y_train_split_one_hot = keras.utils.to_categorical(y_train_split, num_classes=5)
# y_test_split_one_hot = keras.utils.to_categorical(y_test_split, num_classes=5)

In [0]:
# model.fit(x_train_split, y_train_split_one_hot, epochs=32, batch_size=16)

In [0]:
# y_test_split_predict = from_categorical(model.predict(x_test_split))
# acc = accuracy_score(y_test_split_predict, y_test_split)

In [0]:
# acc

## Binary Classification

In [0]:
acc = 0

Get a good random start

In [102]:
while acc < 0.85:
    model_binary = Sequential()
    model_binary.add(Dense(88, 
                    activation='sigmoid', 
                    input_dim=178,
                    use_bias=False))
    model_binary.add(Dense(2, activation='sigmoid', use_bias=False))
    model_binary.compile(loss='categorical_crossentropy',
                    optimizer=keras.optimizers.Adadelta(),
                    metrics=['accuracy'])
    
    y_train_split_binary = keras.utils.to_categorical(binarize(y_train_split), num_classes=2)
    y_test_split_binary = binarize(y_test_split)
    
    model_binary.fit(x_train_split, y_train_split_binary, epochs=31, verbose=0)
    
    y_test_split_predict_binary = from_categorical(model_binary.predict(x_test_split))
    acc = accuracy_score(y_test_split_predict_binary, y_test_split_binary)
    print(acc)
    cm = confusion_matrix(y_test_split_predict_binary, y_test_split_binary)

0.8685111989459815


In [0]:
model_binary.save_weights('model_binary.hdf5')

# Generate C code

### Helper functions

In [0]:
from __future__ import print_function

import h5py

def print_structure(weight_file_path):
    """
    Prints out the structure of HDF5 file.

    Args:
      weight_file_path (str) : Path to the file to analyze
    """
    f = h5py.File(weight_file_path)
    try:
        if len(f.attrs.items()):
            print("{} contains: ".format(weight_file_path))
            print("Root attributes:")
        for key, value in f.attrs.items():
            print("  {}: {}".format(key, value))

        if len(f.items())==0:
            return 

        for layer, g in f.items():
            print("  {}".format(layer))
            print("    Attributes:")
            for key, value in g.attrs.items():
                print("      {}: {}".format(key, value))

            print("    Dataset:")
            for p_name in g.keys():
                param = g[p_name]
                subkeys = param.keys()
                for k_name in param.keys():
                    print("      {}/{}: {}".format(p_name, k_name, param.get(k_name)[:]))
    finally:
        f.close()

In [0]:
def save_structure(weight_file_path):
    f = h5py.File(weight_file_path)
    weights = []
    try:
        if len(f.items())==0:
            return 

        for layer, g in f.items():
            for p_name in g.keys():
                param = g[p_name]
                subkeys = param.keys()
                for k_name in param.keys():
                    weights.append(param.get(k_name)[:])
    finally:
        f.close()
        return weights

In [0]:
def neurons(model):
    weights = model.get_weights()
    neurons = []
    for weight_matrix in weights:
        neurons.append(weight_matrix.shape[0])
    return neurons

In [0]:
def layers(model):
    return len(neurons(model))

In [0]:
def weights(model):
    weights = model.get_weights()
    return sum([np.prod(layer.shape) for layer in weights])

In [0]:
def neuron_string(first_connection, last_connection, 
                 activation_steepness=0, activation_function=6):
    string = "{" + str(first_connection) + \
    ", " + str(last_connection) + \
    ", " + str(activation_steepness) + \
    ", " + str(activation_function) + "}"
    return string

In [0]:
def gen_neuron_array_string(model):
    num_neurons = neurons(model)
    neuron_array_string = 'fann_neuron fann_neurons[{}] = '.format(sum(num_neurons)) + '{'
    for layer_index in range(len(num_neurons)):
        # Input layer
        if layer_index == 0:
            for neuron in range(num_neurons[layer_index]):
                neuron_array_string += neuron_string(0, 0, 0, 0) + ", "
            total_neurons = num_neurons[0]
        else:
            for neuron in range(num_neurons[layer_index]):
                start_neuron = total_neurons
                end_neuron = total_neurons + num_neurons[layer_index-1]
                neuron_array_string += neuron_string(start_neuron, end_neuron, 1.00, 6) + ", "
                total_neurons = end_neuron
    neuron_array_string = neuron_array_string[:-2] + '};'
    return neuron_array_string

In [0]:
def gen_layer_array_string(model):
    num_neurons = neurons(model)
    layer_array_string = 'fann_layer fann_layers[{}] = '.format(len(num_neurons)) + '{'
    total_neurons = 0
    for i in range(len(num_neurons)):
        start, end = total_neurons, total_neurons + num_neurons[i]
        layer_array_string += '{' + '{}, {}'.format(start, end) + '}, '
        total_neurons = end
    layer_array_string = layer_array_string[:-2] + '};'
    return layer_array_string

In [0]:
def gen_weight_array_string(model):
    weights = model.get_weights()
    total_weights = sum([np.prod(layer.shape) for layer in weights])
    weight_array_string = 'fann_type fann_weights[{}] = '.format(total_weights) + '{'
    for weight_matrix in weights:
        if len(weight_matrix.shape) == 1:
            for i in range(len(weight_matrix)):
                weight_array_string += str(weight_matrix[i]) + ', '
        elif len(weight_matrix.shape) == 2:
            for i in range(weight_matrix.shape[0]):
                for j in range(weight_matrix.shape[1]):
                    weight_array_string += str(weight_matrix[i][j]) + ', '
        else:
            raise Exception('Weight matrix shape is incorrect')
    weight_array_string = weight_array_string[:-2] + '};'
    return weight_array_string

## Generate fann_net.h

In [0]:
def gen_fann_net_h(model):
    try:
        num_neurons = sum(neurons(model))
        num_weights = weights(model)
        num_layers = layers(model)        
        f = open('fann_net.h', 'w')
        f.write('#ifndef FANN_FANN_NET_H_\n')
        f.write('#define FANN_FANN_NET_H_\n\n')
        f.write('#include "fann.h"\n')
        f.write('#include "fann_structs.h"\n\n')
        f.write('extern const enum fann_nettype_enum network_type;\n\n')
        f.write('extern fann_neuron fann_neurons[' + str(num_neurons) + '];\n\n')
        f.write('extern fann_type fann_weights[' + str(num_weights) + '];\n\n')
        f.write('extern fann_layer fann_layers[' + str(num_layers) + '];\n\n')
        f.write('#endif // FANN_FANN_NET_H')
    finally:
        f.close()

## Generate fann_net.c

In [0]:
def gen_fann_net_c(model):
    try:
        f = open('fann_net.c', 'w')
        f.write('#include "fann_net.h"\n\n')
        f.write('const enum fann_nettype_enum network_type = 0;\n\n')
        neuron_array_string = gen_neuron_array_string(model)
        weight_array_string = gen_weight_array_string(model) 
        layer_array_string = gen_layer_array_string(model)
        f.write(neuron_array_string + "\n\n")
        f.write(weight_array_string + "\n\n")
        f.write(layer_array_string + "\n\n")
    finally:
        f.close()

## Run Generate Functions

In [0]:
gen_fann_net_h(model_binary)

In [0]:
gen_fann_net_c(model_binary)

# Generate Test Data

In [0]:
def gen_fann_data_h(features, classes, num_samples):
    try:
        features = features[:num_samples, :]
        classes = classes[:num_samples]
        if features.shape[0] != len(classes):
            raise Exception("Number of samples do not match")
            
        num_inputs = np.product(features.shape)
        num_outputs = len(classes)
        input_array_string = 'extern fann_type test_data_input[{}];\n\n'.format(num_inputs)
        output_array_string = 'extern const int test_data_output[{}];\n\n'.format(num_outputs)        
        f = open('fann_data.h', 'w')
        f.write('#ifndef FANN_FANN_TEST_DATA_H_\n')
        f.write('#define FANN_FANN_TEST_DATA_H_\n\n')
        f.write(input_array_string)
        f.write(output_array_string)
        f.write('#endif // FANN_FANN_TEST_DATA_H_')
    finally:
        f.close()

In [0]:
def gen_fann_data_c(features, classes, num_samples):
    try:
        features = features[:num_samples, :]
        classes = classes[:num_samples]
        num_features = features.shape[1]
        if features.shape[0] != len(classes):
            raise Exception("Number of samples do not match")
            
        num_inputs = np.product(features.shape)
        num_outputs = len(classes)
        input_array_string = 'fann_type test_data_input[{}] = '.format(num_inputs) + '{'
        output_array_string = 'const int test_data_output[{}] = '.format(num_outputs) + '{'
        
        for i in range(num_samples):
            for j in range(num_features):
                input_array_string += '{}, '.format(features[i][j])
        input_array_string = input_array_string[:-2] + '};\n\n'
        
        for i in range(num_samples):
            output_array_string += '{}, '.format(classes[i])
        output_array_string = output_array_string[:-2] + '};'
            
        f = open('fann_data.c', 'w')
        f.write('#include "fann_data.h"\n\n')
        f.write(input_array_string)
        f.write(output_array_string)
    finally:
        f.close()

## Run Generate Functions

In [0]:
gen_fann_data_h(x_test_split, y_test_split_binary, 100)
gen_fann_data_c(x_test_split, y_test_split_binary, 100)

# Download files

In [0]:
files.download('fann_net.h')
files.download('fann_net.c')

In [0]:
files.download('fann_data.h')
files.download('fann_data.c')

# Data Format for FANN tool

In [0]:
def to_fann_dat(features, classes, output_path):
  matrix = df.values
  if len(features) != len(classes):
    raise Exception("Sample lengths not the same")
  num_samples = len(features)
  num_features = features.shape[1]
  num_classes = classes.shape[1]
  
  try:
    f = open(output_path, 'w')
    
    # Write first line
    header_line = '{} {} {}\n'.format(num_samples, num_features, num_classes)
    f.write(header_line)
    
    # Write remaining lines
    for i in range(num_samples):
      input_line = ''
      for j in range(num_features):
        input_line += '{} '.format(features[i][j])
      input_line += '\n'
      
      output_line = ''
      for j in range(num_classes):
        output_line += '{} '.format(classes[i][j])
      output_line += '\n'
      f.write(input_line)
      f.write(output_line)
  finally:
    f.close()

In [0]:
x_train_fann_tool = x_train_split
y_train_fann_tool = keras.utils.to_categorical(binarize(y_train_split), num_classes=2)

x_test_fann_tool = x_test_split
y_test_fann_tool = keras.utils.to_categorical(binarize(y_test_split), num_classes=2)

In [0]:
to_fann_dat(x_train_fann_tool, y_train_fann_tool, 'fann_tool_train.dat')
to_fann_dat(x_test_fann_tool, y_test_fann_tool, 'fann_tool_test.dat')

In [0]:
files.download('fann_tool_train.dat')
files.download('fann_tool_test.dat')

# Test

In [125]:
model_binary.predict(x_test_split)

array([[0.18113823, 0.0026138 ],
       [0.37766486, 0.00513022],
       [0.99862564, 0.01753486],
       ...,
       [0.4665092 , 0.00333333],
       [0.9896712 , 0.09140327],
       [0.97911775, 0.05582176]], dtype=float32)

In [136]:
binarize([1])

[1]

In [137]:
keras.utils.to_categorical(binarize(y_train_split), num_classes=2)

array([[1., 0.],
       [1., 0.],
       [0., 1.],
       ...,
       [1., 0.],
       [0., 1.],
       [1., 0.]], dtype=float32)