# lasagne
* lasagne is a library for neural network building and training
* it's a low-level library with almost seamless integration with theano

For a demo we shall solve the same digit recognition problem, but at a different scale
* images are now 28x28
* 10 different digits
* 50k samples

In [1]:
import numpy as np
import theano
import theano.tensor as T
import matplotlib.pyplot as plt
import sys
import os

%matplotlib inline

In [2]:
if sys.version_info[0] == 2:
    from urllib import urlretrieve
else:
    from urllib.request import urlretrieve

def download(filename, source='http://yann.lecun.com/exdb/mnist/'):
    print("Downloading %s" % filename)
    urlretrieve(source + filename, filename)

# We then define functions for loading MNIST images and labels.
# For convenience, they also download the requested files if needed.
import gzip

def load_mnist_images(filename):
    if not os.path.exists(filename):
        download(filename)
    # Read the inputs in Yann LeCun's binary format.
    with gzip.open(filename, 'rb') as f:
        data = np.frombuffer(f.read(), np.uint8, offset=16)
    # The inputs are vectors now, we reshape them to monochrome 2D images,
    # following the shape convention: (examples, channels, rows, columns)
    data = data.reshape(-1, 1, 28, 28)
    # The inputs come as bytes, we convert them to float32 in range [0,1].
    # (Actually to range [0, 255/256], for compatibility to the version
    # provided at http://deeplearning.net/data/mnist/mnist.pkl.gz.)
    return data / np.float32(256)

def load_mnist_labels(filename):
    if not os.path.exists(filename):
        download(filename)
    # Read the labels in Yann LeCun's binary format.
    with gzip.open(filename, 'rb') as f:
        data = np.frombuffer(f.read(), np.uint8, offset=8)
    # The labels are vectors of integers now, that's exactly what we want.
    return data

# We can now download and read the training and test set images and labels.
X_train = load_mnist_images('train-images-idx3-ubyte.gz')
y_train = load_mnist_labels('train-labels-idx1-ubyte.gz')
X_test = load_mnist_images('t10k-images-idx3-ubyte.gz')
y_test = load_mnist_labels('t10k-labels-idx1-ubyte.gz')

# We reserve the last 10000 training examples for validation.
X_train, X_val = X_train[:-10000], X_train[-10000:]
y_train, y_val = y_train[:-10000], y_train[-10000:]


print(X_train.shape,y_train.shape)

(50000, 1, 28, 28) (50000,)


In [3]:
input_X = T.tensor4("X")

#input dimention (None means "Arbitrary" and only works at  the first axes [samples])
input_shape = [None,1,28,28]

target_y = T.vector("target Y integer",dtype='int32')

Defining network architecture

In [4]:
import lasagne
from lasagne.layers import *

l_inp = InputLayer(shape = input_shape,input_var=input_X)
l1 = Conv2DLayer(l_inp, 16, (5,5))
l2 = Pool2DLayer(l1, (2,2))
l3 = Conv2DLayer(l2, 32, (3,3))
l4 = Conv2DLayer(l3, 32, (3,3))
l5 = DenseLayer(l4,num_units=100,nonlinearity = lasagne.nonlinearities.elu)
l6 = DropoutLayer(l5,p=.5)
l7 = DenseLayer(l6,num_units=150,nonlinearity = lasagne.nonlinearities.tanh)
l8 = DropoutLayer(l7,p=.5)
l_out = DenseLayer(l8,num_units = 10,nonlinearity=lasagne.nonlinearities.softmax)

In [5]:
#network prediction (theano-transformation)
y_predicted = lasagne.layers.get_output(l_out)

In [6]:
#all network weights (shared variables)
all_weights = lasagne.layers.get_all_params(l_out)
print(all_weights)

[W, b, W, b, W, b, W, b, W, b, W, b]


### Than you could simply
* define loss function manually
* compute error gradient over all weights
* define updates
* But that's a whole lot of work and life's short
  * not to mention life's too short to wait for SGD to converge

Instead, we shall use Lasagne builtins

In [7]:
#Mean categorical crossentropy as a loss function - similar to logistic loss but for multiclass targets
loss = lasagne.objectives.categorical_crossentropy(y_predicted,target_y).mean()

#prediction accuracy
accuracy = lasagne.objectives.categorical_accuracy(y_predicted,target_y).mean()

#This function computes gradient AND composes weight updates just like you did earlier
updates_sgd = lasagne.updates.momentum(loss, all_weights,learning_rate=0.01,momentum=0.9)

In [8]:
#function that computes loss and updates weights
train_fun = theano.function([input_X,target_y],[loss,accuracy],updates= updates_sgd)

#function that just computes accuracy
accuracy_fun = theano.function([input_X,target_y],accuracy)

### That's all, now let's train it!
* We got a lot of data, so it's recommended that you use SGD
* So let's implement a function that splits the training sample into minibatches

In [9]:
# An auxilary function that returns mini-batches for neural network training

#Parameters
# inputs - a tensor of images with shape (many, 1, 28, 28), e.g. X_train
# outputs - a vector of answers for corresponding images e.g. Y_train
#batch_size - a single number - the intended size of each batches

def iterate_minibatches(inputs, targets, batchsize):
    assert len(inputs) == len(targets)
    indices = np.arange(len(inputs))
    np.random.shuffle(indices)
    for start_idx in range(0, len(inputs) - batchsize + 1, batchsize):
        excerpt = indices[start_idx:start_idx + batchsize]
        yield inputs[excerpt], targets[excerpt]

# Training loop

In [10]:
import time

num_epochs = 1000 #amount of passes through the data

batch_size = 500 #number of samples processed at each function call

for epoch in range(num_epochs):
    # In each epoch, we do a full pass over the training data:
    train_err = 0
    train_acc = 0
    train_batches = 0
    start_time = time.time()
    for batch in iterate_minibatches(X_train, y_train,batch_size):
        inputs, targets = batch
        train_err_batch, train_acc_batch= train_fun(inputs, targets)
        train_err += train_err_batch
        train_acc += train_acc_batch
        train_batches += 1

    # And a full pass over the validation data:
    val_acc = 0
    val_batches = 0
    for batch in iterate_minibatches(X_val, y_val, batch_size):
        inputs, targets = batch
        val_acc += accuracy_fun(inputs, targets)
        val_batches += 1

    
    # Then we print the results for this epoch:
    print("Epoch {} of {} took {:.3f}s".format(
        epoch + 1, num_epochs, time.time() - start_time))

    print("  training loss (in-iteration):\t\t{:.6f}".format(train_err / train_batches))
    print("  train accuracy:\t\t{:.2f} %".format(
        train_acc / train_batches * 100))
    print("  validation accuracy:\t\t{:.2f} %".format(
        val_acc / val_batches * 100))

Epoch 1 of 1000 took 56.073s
  training loss (in-iteration):		1.175829
  train accuracy:		60.90 %
  validation accuracy:		86.68 %
Epoch 2 of 1000 took 56.549s
  training loss (in-iteration):		0.330097
  train accuracy:		89.97 %
  validation accuracy:		93.16 %
Epoch 3 of 1000 took 59.482s
  training loss (in-iteration):		0.209299
  train accuracy:		93.75 %
  validation accuracy:		95.06 %
Epoch 4 of 1000 took 61.319s
  training loss (in-iteration):		0.163782
  train accuracy:		95.12 %
  validation accuracy:		96.51 %
Epoch 5 of 1000 took 53.885s
  training loss (in-iteration):		0.137390
  train accuracy:		95.93 %
  validation accuracy:		96.68 %
Epoch 6 of 1000 took 57.361s
  training loss (in-iteration):		0.120215
  train accuracy:		96.44 %
  validation accuracy:		97.17 %
Epoch 7 of 1000 took 55.489s
  training loss (in-iteration):		0.106614
  train accuracy:		96.88 %
  validation accuracy:		97.06 %
Epoch 8 of 1000 took 59.009s
  training loss (in-iteration):		0.096161
  train accuracy:		

KeyboardInterrupt: 

In [11]:
test_acc = 0
test_batches = 0
for batch in iterate_minibatches(X_test, y_test, 500):
    inputs, targets = batch
    acc = accuracy_fun(inputs, targets)
    test_acc += acc
    test_batches += 1
print("Final results:")
print("  test accuracy:\t\t{:.2f} %".format(
    test_acc / test_batches * 100))

if test_acc / test_batches * 100 > 99:
    print ("Achievement unlocked: 80lvl Warlock!")
else:
    print ("We need more magic!")
    
# Unfortunately I can't train on GPU (I have ATI RADEON graphic card), and training takes a lot of time, the convolution
# net architecture I used above is a prototype of LeNet, which after 500 iterations is said to give an accuracy around
# 99.2%

Final results:
  test accuracy:		99.03 %
Achievement unlocked: 80lvl Warlock!


# Now improve it!

* Moar layers!
* Moar units!
* Different nonlinearities!