In [1]:
import time
import numpy as np
import theano
import lasagne
import theano.tensor as T
from helpers.mnist_dataset import load_mnist_rot, test_mnist_rot
from Models import build_st_network_MNIST as network_builder
from helpers.utils import iterate_minibatches

Using gpu device 0: Tesla K40c (CNMeM is enabled with initial size: 30.0% of memory, cuDNN 5105)


Using cuda_convnet (faster)


In [2]:
# Parameters
BATCH_SIZE = 100
EPOCH = 100
DISCRETE = True

In [3]:
# Load Dataset
X_train, y_train, X_val, y_val = load_mnist_rot(path='mnist_rotation_new.zip', which='train')
Xtst, Ytst = load_mnist_rot(path='mnist_rotation_new.zip', which='test')

['mnist_all_rotation_normalized_float_train_valid.amat', 'mnist_all_rotation_normalized_float_test.amat']
['mnist_all_rotation_normalized_float_train_valid.amat', 'mnist_all_rotation_normalized_float_test.amat']


### Two options
* The network can add a discretization layer if withdisc=True
* Otherwise the exact regressed parameters will be used as affine transformation matrix

In [11]:
# Build the model, and place holders
network = network_builder(BATCH_SIZE, X_train.shape, withdisc=False)

X = T.tensor4('inputs', dtype=theano.config.floatX)
Y = T.ivector('targets')

In [12]:
# Loss is continious
prediction = lasagne.layers.get_output(network, X, deterministic=False)
loss = T.mean(lasagne.objectives.categorical_crossentropy(prediction, Y))

In [13]:
# Get parameters, set update rule and create functions
params = lasagne.layers.get_all_params(network, trainable=True)
# We shall use the cont. cost in update function
updates = lasagne.updates.nesterov_momentum(loss, params,
                                            learning_rate=0.01, momentum=0.9)
# Test function
# Test_prediction is on discrete values
test_prediction = lasagne.layers.get_output(network, X, deterministic=True)
test_loss = T.mean(lasagne.objectives.categorical_crossentropy(test_prediction, Y))
test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), Y))
val_fn = theano.function([X, Y], [test_loss, test_acc], allow_input_downcast=True)

# Train function
train_fn = theano.function([X, Y], loss, updates=updates, allow_input_downcast=True)

In [None]:
# Finally, launch the training loop.
print("Starting training...")
# We iterate over epochs:
for epoch in range(EPOCH):
    # In each epoch, we do a full pass over the training data:
    train_err = 0
    train_batches = 0
    start_time = time.time()
    for batch in iterate_minibatches(X_train, y_train, BATCH_SIZE, shuffle=True):
        inputs, targets = batch
        train_err += train_fn(inputs, targets)
        train_batches += 1

    # And a full pass over the validation data:
    val_err = 0
    val_acc = 0
    val_batches = 0
    for batch in iterate_minibatches(X_val, y_val, BATCH_SIZE, shuffle=False):
        inputs, targets = batch
        err, acc = val_fn(inputs, targets)
        val_err += err
        val_acc += acc
        val_batches += 1

    # Then we print the results for this epoch:
    print("Epoch {} of {} took {:.3f}s".format(
        epoch + 1, EPOCH, time.time() - start_time))
    print("  training loss:\t\t{:.6f}".format(train_err / train_batches))
    print("  validation loss:\t\t{:.6f}".format(val_err / val_batches))
    print("  validation accuracy:\t\t{:.2f} %".format(
        val_acc / val_batches * 100))

Starting training...
Epoch 1 of 100 took 2.109s
  training loss:		2.226181
  validation loss:		1.956056
  validation accuracy:		38.05 %
Epoch 2 of 100 took 2.100s
  training loss:		1.780257
  validation loss:		1.392961
  validation accuracy:		52.40 %
Epoch 3 of 100 took 2.097s
  training loss:		1.417059
  validation loss:		1.115826
  validation accuracy:		64.55 %
Epoch 4 of 100 took 2.093s
  training loss:		1.167867
  validation loss:		0.815576
  validation accuracy:		74.60 %
Epoch 5 of 100 took 2.093s
  training loss:		0.977413
  validation loss:		0.713046
  validation accuracy:		78.05 %
Epoch 6 of 100 took 2.093s
  training loss:		0.857039
  validation loss:		0.587980
  validation accuracy:		82.65 %
Epoch 7 of 100 took 2.091s
  training loss:		0.758988
  validation loss:		0.513833
  validation accuracy:		85.60 %
Epoch 8 of 100 took 2.090s
  training loss:		0.684342
  validation loss:		0.511156
  validation accuracy:		85.00 %
Epoch 9 of 100 took 2.091s
  training loss:		0.636994
  val

In [10]:
# After training, we compute and print the test error:
test_err = 0
test_acc = 0
test_batches = 0
for batch in iterate_minibatches(Xtst, Ytst, BATCH_SIZE, shuffle=False):
    inputs, targets = batch
    err, acc = val_fn(inputs, targets)
    test_err += err
    test_acc += acc
    test_batches += 1
print("Final results:")
print("  test loss:\t\t\t{:.6f}".format(test_err / test_batches))
print("  test accuracy:\t\t{:.2f} %".format(
    test_acc / test_batches * 100))

Final results:
  test loss:			0.377329
  test accuracy:		91.30 %
