In [1]:
import time
import numpy as np
import theano
import lasagne
import theano.tensor as T
from helpers.mnist_dataset import load_mnist_rot, test_mnist_rot
from Models import build_st_network_MNIST as network_builder
from helpers.utils import iterate_minibatches

Using lasagne.layers (slower)


In [2]:
# Parameters
BATCH_SIZE = 100
EPOCH = 100
DISCRETE = True
DISCINITS = (-3., 3., 50.)
DISCTRAINABLES = (False, False, False)

In [3]:
# Load Dataset
X_train, y_train, X_val, y_val = load_mnist_rot(path='mnist_rotation_new.zip', which='train')
Xtst, Ytst = load_mnist_rot(path='mnist_rotation_new.zip', which='test')

['mnist_all_rotation_normalized_float_train_valid.amat', 'mnist_all_rotation_normalized_float_test.amat']
['mnist_all_rotation_normalized_float_train_valid.amat', 'mnist_all_rotation_normalized_float_test.amat']


### Two options
* The network can add a discretization layer if withdisc=True
* Otherwise the exact regressed parameters will be used as affine transformation matrix

In [4]:
# Build the model, and place holders
network = network_builder(X_train.shape, DISCINITS, DISCTRAINABLES, withdisc=False)

X = T.tensor4('inputs', dtype=theano.config.floatX)
Y = T.ivector('targets')

In [5]:
# Loss is continious
prediction = lasagne.layers.get_output(network, X, deterministic=False)
loss = T.mean(lasagne.objectives.categorical_crossentropy(prediction, Y))

In [6]:
# Get parameters, set update rule and create functions
params = lasagne.layers.get_all_params(network, trainable=True)
# We shall use the cont. cost in update function
updates = lasagne.updates.nesterov_momentum(loss, params,
                                            learning_rate=0.01, momentum=0.9)
# Test function
# Test_prediction is on discrete values
test_prediction = lasagne.layers.get_output(network, X, deterministic=True)
test_loss = T.mean(lasagne.objectives.categorical_crossentropy(test_prediction, Y))
test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), Y))
val_fn = theano.function([X, Y], [test_loss, test_acc], allow_input_downcast=True)

# Train function
train_fn = theano.function([X, Y], loss, updates=updates, allow_input_downcast=True)

In [None]:
# Finally, launch the training loop.
print("Starting training...")
# We iterate over epochs:
for epoch in range(EPOCH):
    # In each epoch, we do a full pass over the training data:
    train_err = 0
    train_batches = 0
    start_time = time.time()
    for batch in iterate_minibatches(X_train, y_train, BATCH_SIZE, shuffle=True):
        inputs, targets = batch
        train_err += train_fn(inputs, targets)
        train_batches += 1

    # And a full pass over the validation data:
    val_err = 0
    val_acc = 0
    val_batches = 0
    for batch in iterate_minibatches(X_val, y_val, BATCH_SIZE, shuffle=False):
        inputs, targets = batch
        err, acc = val_fn(inputs, targets)
        val_err += err
        val_acc += acc
        val_batches += 1

    # Then we print the results for this epoch:
    print("Epoch {} of {} took {:.3f}s".format(
        epoch + 1, EPOCH, time.time() - start_time))
    print("  training loss:\t\t{:.6f}".format(train_err / train_batches))
    print("  validation loss:\t\t{:.6f}".format(val_err / val_batches))
    print("  validation accuracy:\t\t{:.2f} %".format(
        val_acc / val_batches * 100))

Starting training...
Epoch 1 of 100 took 66.143s
  training loss:		2.212398
  validation loss:		1.890532
  validation accuracy:		35.00 %
Epoch 2 of 100 took 59.708s
  training loss:		1.740009
  validation loss:		1.295692
  validation accuracy:		58.20 %


In [None]:
# After training, we compute and print the test error:
test_err = 0
test_acc = 0
test_batches = 0
for batch in iterate_minibatches(Xtst, Ytst, BATCH_SIZE, shuffle=False):
    inputs, targets = batch
    err, acc = val_fn(inputs, targets)
    test_err += err
    test_acc += acc
    test_batches += 1
print("Final results:")
print("  test loss:\t\t\t{:.6f}".format(test_err / test_batches))
print("  test accuracy:\t\t{:.2f} %".format(
    test_acc / test_batches * 100))