In this notebook, I will experiment with different schedulers, seeing which can improve the standard coursework 1 setup.

In [2]:
import numpy
import logging
from mlp.dataset import MNISTDataProvider

logger = logging.getLogger()
logger.setLevel(logging.INFO)
logger.info('Initialising data providers...')

train_dp = MNISTDataProvider(dset='train', batch_size=10, max_num_batches=100, randomize=True)
valid_dp = MNISTDataProvider(dset='valid', batch_size=10000, max_num_batches=-10, randomize=False)
test_dp = MNISTDataProvider(dset='eval', batch_size=10000, max_num_batches=-10, randomize=False)

INFO:root:Initialising data providers...


In [4]:
#Baseline experiment

from mlp.layers import MLP, Linear, Sigmoid, Softmax #import required layer types
from mlp.optimisers import SGDOptimiser #import the optimiser

from mlp.costs import CECost #import the cost we want to use for optimisation
from mlp.schedulers import LearningRateExponential

logger = logging.getLogger()
logger.setLevel(logging.INFO)
rng = numpy.random.RandomState([2015,10,10])

#some hyper-parameters
nhid = 800
learning_rate = 0.5
max_epochs = 30
cost = CECost()
    
stats = []
for layer in xrange(1, 2):

    train_dp.reset()
    valid_dp.reset()
    test_dp.reset()
    
    #define the model
    model = MLP(cost=cost)
    model.add_layer(Sigmoid(idim=784, odim=nhid, irange=0.2, rng=rng))
    for i in xrange(1, layer):
        logger.info("Stacking hidden layer (%s)" % str(i+1))
        model.add_layer(Sigmoid(idim=nhid, odim=nhid, irange=0.2, rng=rng))
    model.add_layer(Softmax(idim=nhid, odim=10, rng=rng))

    # define the optimiser, here stochasitc gradient descent
    # with fixed learning rate and max_epochs
    lr_scheduler = LearningRateExponential(start_rate=learning_rate, max_epochs=max_epochs, training_size=10)
    optimiser = SGDOptimiser(lr_scheduler=lr_scheduler)

    logger.info('Training started...')
    tr_stats, valid_stats = optimiser.train(model, train_dp, valid_dp)

    logger.info('Testing the model on test set:')
    tst_cost, tst_accuracy = optimiser.validate(model, test_dp)
    logger.info('MNIST test set accuracy is %.2f %%, cost (%s) is %.3f'%(tst_accuracy*100., cost.get_name(), tst_cost))
    
    stats.append((tr_stats, valid_stats, (tst_cost, tst_accuracy)))

INFO:root:Training started...
INFO:mlp.optimisers:Epoch 1: Training cost (ce) for initial model is 2.624. Accuracy is 8.60%
INFO:mlp.optimisers:Epoch 1: Validation cost (ce) for initial model is 2.554. Accuracy is 9.84%
INFO:mlp.optimisers:Epoch 2: Training cost (ce) is 2.888. Accuracy is 57.30%
INFO:mlp.optimisers:Epoch 2: Validation cost (ce) is 0.656. Accuracy is 79.20%
INFO:mlp.optimisers:Epoch 2: Took 4 seconds. Training speed 470 pps. Validation speed 5833 pps.
INFO:mlp.optimisers:Epoch 3: Training cost (ce) is 0.421. Accuracy is 88.30%
INFO:mlp.optimisers:Epoch 3: Validation cost (ce) is 0.428. Accuracy is 87.70%
INFO:mlp.optimisers:Epoch 3: Took 4 seconds. Training speed 476 pps. Validation speed 5935 pps.
INFO:mlp.optimisers:Epoch 4: Training cost (ce) is 0.332. Accuracy is 90.90%
INFO:mlp.optimisers:Epoch 4: Validation cost (ce) is 0.392. Accuracy is 88.35%
INFO:mlp.optimisers:Epoch 4: Took 4 seconds. Training speed 477 pps. Validation speed 5918 pps.
INFO:mlp.optimisers:Epoc