# Test/Dev Neural Network Architecture

In [1]:
from __future__ import print_function

import sys
import os
import time
import chb

import numpy as np
import theano
import theano.tensor as T

import lasagne
import lasagne.objectives as lobj
import lasagne.nonlinearities as lnon
import lasagne.layers as llay
import lasagne.init as linit

## Load Subject Data

In [2]:
# TODO: organize this differently - this should just load the subject and
#       data, so that we can loop through all the LOOs in main
# Load data for subject
subject = chb.CHBsubj()
subject.load_meta('chb01')
subject.load_data(exthd=False)

Loading: /Users/adamcellon/Drive/senior/thesis/data/chb01.npz
Done: 68.598263 seconds elapsed.


In [15]:
# Load and read training and test set images and labels.
x_train, y_train, x_test, y_test = subject.leaveOneOut(1, 1000, 100)

# We reserve the last 100 training examples for validation.
x_train, x_val = x_train[:-500], x_train[-500:]
y_train, y_val = y_train[:-500], y_train[-500:]

----

## ConvNet Based On [This Site](https://github.com/luizgh/lasagne_basics/blob/master/comparing-optimization-algs.ipynb)

In [114]:
data_size=(None,1,23,256) # Batch size x Img Channels x Height (electrodes) x Width (samples)
output_size=1             # Binary classification of seizure (1) vs non-seizure (0)

def build_model():
    input_var = T.tensor4('inputs')
    target_var = T.ivector('targets')
    net = {}

    #Input layer:
    net['data'] = lasagne.layers.InputLayer(data_size, input_var=input_var)

    #Convolution + Pooling
    net['conv1'] = lasagne.layers.Conv2DLayer(net['data'], num_filters=6, filter_size=(1,16), stride=(1,2), nonlinearity=lnon.sigmoid)
    net['pool1'] = lasagne.layers.Pool2DLayer(net['conv1'], pool_size=(1,2))

    net['conv2'] = lasagne.layers.Conv2DLayer(net['pool1'], num_filters=10, filter_size=(1,32), stride=(1,2), nonlinearity=lnon.sigmoid)
    net['pool2'] = lasagne.layers.Pool2DLayer(net['conv2'], pool_size=(1,2))


    #Fully-connected + dropout
    net['fc1'] = lasagne.layers.DenseLayer(net['pool2'], num_units=100)

    #Output layer:
    net['out'] = lasagne.layers.DenseLayer(net['fc1'], num_units=output_size, 
                                           nonlinearity=lasagne.nonlinearities.softmax)

    weight_decay = 1e-5

    #Loss function: mean cross-entropy
    prediction = lasagne.layers.get_output(net['out'])
    loss = lasagne.objectives.binary_crossentropy(prediction, target_var)
    loss = loss.mean()

    #Also add weight decay to the cost function
    weightsl2 = lasagne.regularization.regularize_network_params(net['out'], lasagne.regularization.l2)
    loss += weight_decay * weightsl2

    #Get the update rule
    params = lasagne.layers.get_all_params(net['out'], trainable=True)
    updates = lasagne.updates.sgd(loss, params, learning_rate=1e-2)

    test_prediction = lasagne.layers.get_output(net['out'], deterministic=True)
    test_loss = lasagne.objectives.binary_crossentropy(test_prediction,
                                                            target_var)
    test_loss = test_loss.mean()
    test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var),
                      dtype=theano.config.floatX)

    train_fn = theano.function([input_var, target_var], loss, updates=updates, name='train')
    val_fn = theano.function([input_var, target_var], [test_loss, test_acc], name='validation')
    get_preds = theano.function([input_var], test_prediction, name='get_preds')

    return (train_fn, val_fn, get_preds)

In [115]:
epochs = 10
batch_size=10

#Run the training function per mini-batches
n_examples = x_train.shape[0]
n_batches = int(n_examples / batch_size)

def train(train_fn):
    cost_history = []
    for epoch in range(epochs):
        st = time.time()
        batch_cost_history = []
        for batch in range(n_batches):
            x_batch = x_train[batch*batch_size: (batch+1) * batch_size]
            y_batch = y_train[batch*batch_size: (batch+1) * batch_size]

            this_cost = train_fn(x_batch, y_batch) # This is where the model gets updated

            batch_cost_history.append(this_cost)
        epoch_cost = np.mean(batch_cost_history)
        cost_history.append(epoch_cost)
        en = time.time()
        print('Epoch %d/%d, train error: %f. Elapsed time: %.2f seconds' % (epoch+1, epochs, epoch_cost, en-st))
    return cost_history

In [117]:
sgd_functions = build_model()
for key, value in network.items():
    print('%s: %s' % (key, llay.get_output_shape(value)))
print ("Training with SGD")
sgd_cost_history = train(sgd_functions[0])

Training with SGD
Epoch 1/10, train error: nan. Elapsed time: 9.71 seconds
Epoch 2/10, train error: nan. Elapsed time: 9.26 seconds
Epoch 3/10, train error: nan. Elapsed time: 10.07 seconds
Epoch 4/10, train error: nan. Elapsed time: 9.46 seconds
Epoch 5/10, train error: nan. Elapsed time: 9.43 seconds
Epoch 6/10, train error: nan. Elapsed time: 10.35 seconds
Epoch 7/10, train error: nan. Elapsed time: 10.35 seconds
Epoch 8/10, train error: nan. Elapsed time: 13.33 seconds
Epoch 9/10, train error: nan. Elapsed time: 11.77 seconds
Epoch 10/10, train error: nan. Elapsed time: 9.93 seconds


-----

## ConvNet Based On mnist.py 

In [29]:
# ##################### Build the neural network model #######################
def build_cnn(input_var=None):
    data_size = (None,1,23,256)
    output_size = 1
    # The most simple, straightforward CNN I can come up with.
    network = {}
    
    network['data'] = llay.InputLayer(shape=data_size, input_var=input_var)

    network['conv1'] = llay.Conv2DLayer(network['data'], num_filters=8, filter_size=(1, 15),
                                        stride=(1,1), nonlinearity=lnon.sigmoid,pad='same')

    network['pool1'] = llay.MaxPool2DLayer(network['conv1'], pool_size=(1,2))

    network['conv2'] = llay.Conv2DLayer(network['pool1'], num_filters=8, filter_size=(1, 15), 
                                        stride=(1,2), nonlinearity=lnon.rectify)
    
    network['dense1'] = llay.DenseLayer(network['conv2'],num_units=256, nonlinearity=lnon.rectify)

    network['out'] = llay.DenseLayer(network['dense1'], num_units=output_size, nonlinearity=lnon.rectify)

    return network

In [30]:
# ############################# Batch iterator ###############################
# This is just a simple helper function iterating over training data in
# mini-batches of a particular size, optionally in random order. It assumes
# data is available as numpy arrays. For big datasets, you could load numpy
# arrays as memory-mapped files (np.load(..., mmap_mode='r')), or write your
# own custom data iteration function. For small datasets, you can also copy
# them to GPU at once for slightly improved performance. This would involve
# several changes in the main program, though, and is not demonstrated here.
# Notice that this function returns only mini-batches of size `batchsize`.
# If the size of the data is not a multiple of `batchsize`, it will not
# return the last (remaining) mini-batch.

def iterate_minibatches(inputs, targets, batchsize, shuffle=False):
    assert len(inputs) == len(targets)
    if shuffle:
        indices = np.arange(len(inputs))
        np.random.shuffle(indices)
    for start_idx in range(0, len(inputs) - batchsize + 1, batchsize):
        if shuffle:
            excerpt = indices[start_idx:start_idx + batchsize]
        else:
            excerpt = slice(start_idx, start_idx + batchsize)
        yield inputs[excerpt], targets[excerpt]

In [31]:
# Prepare Theano variables for inputs and targets
input_var = T.tensor4('inputs')
target_var = T.ivector('targets')

In [32]:
# Create neural network model (depending on first command line parameter)
print("Building model and compiling functions...")
network = build_cnn(input_var)

Building model and compiling functions...


In [33]:
# Create a loss expression for training, i.e., a scalar objective we want
# to minimize (for our 2-class problem, it is the cross-entropy loss):
prediction = llay.get_output(network['out'])

loss = lobj.binary_crossentropy(prediction, target_var)
loss = loss.mean()

# Update expressions for training
params = llay.get_all_params(network['out'], trainable=True)
updates = lasagne.updates.rmsprop(loss, params, learning_rate=0.001)

# Create a loss expression for validation/testing.
test_prediction = llay.get_output(network['out'], deterministic=True)

test_loss = lobj.binary_crossentropy(test_prediction,target_var)
test_loss = lobj.aggregate(test_loss, mode='mean')

test_acc = T.mean(lobj.binary_accuracy(test_prediction, target_var),
                  dtype=theano.config.floatX)

# Compile a function performing a training step on a mini-batch (by giving
# the updates dictionary) and returning the corresponding training loss:
train_fn = theano.function([input_var, target_var], loss, updates=updates)

# Compile a second function computing the validation loss and accuracy:
val_fn = theano.function([input_var, target_var], [test_loss, test_acc])

In [34]:
for key, value in network.items():
    print('%s: %s' % (key, llay.get_output_shape(value)))

data: (None, 1, 23, 256)
conv1: (None, 8, 23, 256)
pool1: (None, 8, 23, 128)
conv2: (None, 8, 23, 57)
dense1: (None, 256)
out: (None, 1)


In [37]:
num_epochs=10

# Finally, launch the training loop.
print("Starting training...")
# We iterate over epochs:
for epoch in range(num_epochs):
    # In each epoch, we do a full pass over the training data:
    train_err = 0
    train_batches = 0
    start_time = time.time()
    for batch in iterate_minibatches(x_train, y_train, 10):
        inputs, targets = batch
        #print(inputs.shape, targets.shape)
        train_err += train_fn(inputs, targets)
        train_batches += 1

    # And a full pass over the validation data:
    val_err = 0
    val_acc = 0
    val_batches = 0
    for batch in iterate_minibatches(x_val, y_val, 10):
        inputs, targets = batch
        err, acc = val_fn(inputs, targets)
        val_err += err
        val_acc += acc
        val_batches += 1

    # Then we print the results for this epoch:
    print("Epoch {} of {} took {:.3f}s".format(
        epoch + 1, num_epochs, time.time() - start_time))
    print("  training error:\t\t{:.6f}".format(train_err)) #
    print("  train batches:\t\t{:d}".format(train_batches)) #
    print("  training loss:\t\t{:.6f}".format(train_err / train_batches))
    print("  validation error:\t\t{:.6f}".format(val_err)) #
    print("  validation batches:\t\t{:d}".format(val_batches)) #
    print("  validation loss:\t\t{:.6f}".format(val_err / val_batches))
    print("  validation accuracy:\t\t{:.2f} %".format(
        val_acc / val_batches * 100))

Starting training...
Epoch 1 of 10 took 2.877s
  training error:		nan
  train batches:		38
  training loss:		nan
  validation error:		nan
  validation batches:		38
  validation loss:		nan
  validation accuracy:		62.55 %
Epoch 2 of 10 took 2.683s
  training error:		nan
  train batches:		38
  training loss:		nan
  validation error:		nan
  validation batches:		38
  validation loss:		nan
  validation accuracy:		62.55 %
Epoch 3 of 10 took 2.685s
  training error:		nan
  train batches:		38
  training loss:		nan
  validation error:		nan
  validation batches:		38
  validation loss:		nan
  validation accuracy:		62.55 %
Epoch 4 of 10 took 2.694s
  training error:		nan
  train batches:		38
  training loss:		nan
  validation error:		nan
  validation batches:		38
  validation loss:		nan
  validation accuracy:		62.55 %
Epoch 5 of 10 took 2.687s
  training error:		nan
  train batches:		38
  training loss:		nan
  validation error:		nan
  validation batches:		38
  validation loss:		nan
  validation acc

In [36]:
# After training, we compute and print the test error:
test_err = 0
test_acc = 0
test_batches = 0
for batch in iterate_minibatches(x_test, y_test, 2, shuffle=False):
    inputs, targets = batch
    err, acc = val_fn(inputs, targets)
    test_err += err
    test_acc += acc
    test_batches += 1
print("Final results:")
print("  test loss:\t\t\t{:.6f}".format(test_err / test_batches))
print("  test accuracy:\t\t{:.2f} %".format(
    test_acc / test_batches * 100))

Final results:
  test loss:			nan
  test accuracy:		59.00 %
