In this notebook, I will experiment with different schedulers, seeing which can improve the standard coursework 1 setup.

In [None]:
import numpy
import logging
from mlp.dataset import MNISTDataProvider

logger = logging.getLogger()
logger.setLevel(logging.INFO)
logger.info('Initialising data providers...')

train_dp = MNISTDataProvider(dset='train', batch_size=100, max_num_batches=1000, randomize=True)
valid_dp = MNISTDataProvider(dset='valid', batch_size=10000, max_num_batches=-10, randomize=False)
test_dp = MNISTDataProvider(dset='eval', batch_size=10000, max_num_batches=-10, randomize=False)

In [None]:
#Baseline experiment

from mlp.layers import MLP, Linear, Sigmoid, Softmax #import required layer types
from mlp.optimisers import SGDOptimiser #import the optimiser

from mlp.costs import CECost #import the cost we want to use for optimisation
from mlp.schedulers import LearningRateExponential

logger = logging.getLogger()
logger.setLevel(logging.INFO)
rng = numpy.random.RandomState([2015,10,10])

#some hyper-parameters
nhid = 800
learning_rate = 0.5
max_epochs = 30
cost = CECost()
    
stats = []
for layer in xrange(1, 2):

    train_dp.reset()
    valid_dp.reset()
    test_dp.reset()
    
    #define the model
    model = MLP(cost=cost)
    model.add_layer(Sigmoid(idim=784, odim=nhid, irange=0.2, rng=rng))
    for i in xrange(1, layer):
        logger.info("Stacking hidden layer (%s)" % str(i+1))
        model.add_layer(Sigmoid(idim=nhid, odim=nhid, irange=0.2, rng=rng))
    model.add_layer(Softmax(idim=nhid, odim=10, rng=rng))

    # define the optimiser, here stochasitc gradient descent
    # with fixed learning rate and max_epochs
    # training_size should equal batch size, as that is the amount for each epoch
    lr_scheduler = LearningRateExponential(start_rate=learning_rate, max_epochs=max_epochs, training_size=100)
    optimiser = SGDOptimiser(lr_scheduler=lr_scheduler)

    logger.info('Training started...')
    tr_stats, valid_stats = optimiser.train(model, train_dp, valid_dp)

    logger.info('Testing the model on test set:')
    tst_cost, tst_accuracy = optimiser.validate(model, test_dp)
    logger.info('MNIST test set accuracy is %.2f %%, cost (%s) is %.3f'%(tst_accuracy*100., cost.get_name(), tst_cost))
    
    stats.append((tr_stats, valid_stats, (tst_cost, tst_accuracy)))

In [None]:
#Baseline experiment

from mlp.layers import MLP, Linear, Sigmoid, Softmax #import required layer types
from mlp.optimisers import SGDOptimiser #import the optimiser

from mlp.costs import CECost #import the cost we want to use for optimisation
from mlp.schedulers import LearningRateNewBob

logger = logging.getLogger()
logger.setLevel(logging.INFO)
rng = numpy.random.RandomState([2015,10,10])

#some hyper-parameters
nhid = 800
learning_rate = 0.8
max_epochs = 30
cost = CECost()
    
stats = []
for layer in xrange(1, 2):

    train_dp.reset()
    valid_dp.reset()
    test_dp.reset()
    
    #define the model
    model = MLP(cost=cost)
    model.add_layer(Sigmoid(idim=784, odim=nhid, irange=0.2, rng=rng))
    for i in xrange(1, layer):
        logger.info("Stacking hidden layer (%s)" % str(i+1))
        model.add_layer(Sigmoid(idim=nhid, odim=nhid, irange=0.2, rng=rng))
    model.add_layer(Softmax(idim=nhid, odim=10, rng=rng))

    # define the optimiser, here stochasitc gradient descent
    # with fixed learning rate and max_epochs
    lr_scheduler = LearningRateNewBob(start_rate=learning_rate, max_epochs=max_epochs,\
                                      min_derror_stop=.05, scale_by=0.05, zero_rate=0.5, patience = 10)
    optimiser = SGDOptimiser(lr_scheduler=lr_scheduler)

    logger.info('Training started...')
    tr_stats, valid_stats = optimiser.train(model, train_dp, valid_dp)

    logger.info('Testing the model on test set:')
    tst_cost, tst_accuracy = optimiser.validate(model, test_dp)
    logger.info('MNIST test set accuracy is %.2f %%, cost (%s) is %.3f'%(tst_accuracy*100., cost.get_name(), tst_cost))
    
    stats.append((tr_stats, valid_stats, (tst_cost, tst_accuracy)))

In [None]:
#Baseline experiment
%autoreload
import numpy
import logging
from mlp.dataset import MNISTDataProvider

logger = logging.getLogger()
logger.setLevel(logging.INFO)
logger.info('Initialising data providers...')

train_dp = MNISTDataProvider(dset='train', batch_size=10, max_num_batches=100, randomize=True)
valid_dp = MNISTDataProvider(dset='valid', batch_size=10000, max_num_batches=-10, randomize=False)
test_dp = MNISTDataProvider(dset='eval', batch_size=10000, max_num_batches=-10, randomize=False)
from mlp.layers import MLP, Linear, Sigmoid, Softmax #import required layer types
from mlp.optimisers import SGDOptimiser #import the optimiser

from mlp.costs import CECost #import the cost we want to use for optimisation
from mlp.schedulers import LearningRateNewBob, LearningRateFixed

logger = logging.getLogger()
logger.setLevel(logging.INFO)
rng = numpy.random.RandomState([2015,10,10])

#some hyper-parameters
nhid = 600
learning_rate = 0.05
max_epochs = 10
cost = CECost()
    
stats = []
layer=2

train_dp.reset()

#define the model
model = MLP(cost=cost)
model.add_layer(Sigmoid(idim=784, odim=600, irange=0.2, rng=rng))
model.add_layer(Sigmoid(idim=600, odim=500, irange=0.2, rng=rng))
model.add_layer(Sigmoid(idim=500, odim=300, irange=0.2, rng=rng))
model.add_layer(Softmax(idim=300, odim=10, rng=rng))

lr_scheduler = LearningRateFixed(learning_rate=0.05, max_epochs=max_epochs)
optimiser = SGDOptimiser(lr_scheduler=lr_scheduler)

logger.info('Pre-Training started...')
tr_stats, valid_stats = optimiser.pretrain(model, train_dp, None, 0)
logger.info('Training started...')

train_dp.reset()

tr_stats, valid_stats = optimiser.train(model, train_dp, valid_dp)

In [None]:
#Baseline experiment
%autoreload
import numpy
import logging
from mlp.dataset import MNISTDataProvider

logger = logging.getLogger()
logger.setLevel(logging.INFO)
logger.info('Initialising data providers...')

train_dp = MNISTDataProvider(dset='train', batch_size=10, max_num_batches=100, randomize=True)
valid_dp = MNISTDataProvider(dset='valid', batch_size=10000, max_num_batches=-10, randomize=False)
test_dp = MNISTDataProvider(dset='eval', batch_size=10000, max_num_batches=-10, randomize=False)
from mlp.layers import MLP, Linear, Sigmoid, Softmax #import required layer types
from mlp.optimisers import SGDOptimiser #import the optimiser

from mlp.costs import CECost #import the cost we want to use for optimisation
from mlp.schedulers import LearningRateNewBob, LearningRateFixed

logger = logging.getLogger()
logger.setLevel(logging.INFO)
rng = numpy.random.RandomState([2015,10,10])

#some hyper-parameters
nhid = 600
learning_rate = 0.05
max_epochs = 10
cost = CECost()
    
stats = []
layer=2

train_dp.reset()

#define the model
model = MLP(cost=cost)
model.add_layer(Sigmoid(idim=784, odim=600, irange=0.2, rng=rng))
model.add_layer(Sigmoid(idim=600, odim=500, irange=0.2, rng=rng))
model.add_layer(Sigmoid(idim=500, odim=300, irange=0.2, rng=rng))
model.add_layer(Softmax(idim=300, odim=10, rng=rng))

lr_scheduler = LearningRateFixed(learning_rate=0.05, max_epochs=max_epochs)
optimiser = SGDOptimiser(lr_scheduler=lr_scheduler)

logger.info('Training started...')

train_dp.reset()

tr_stats, valid_stats = optimiser.train(model, train_dp, valid_dp)

In [None]:
import numpy
import logging
from mlp.dataset import MNISTDataProvider

logger = logging.getLogger()
logger.setLevel(logging.INFO)
logger.info('Initialising data providers...')

train_dp = MNISTDataProvider(dset='train', batch_size=10, max_num_batches=100, randomize=True)
i = 0
inputs=[]

for x,t in train_dp:
    inputs.append(x)
    
print inputs[0].shape
print len(inputs)

In [None]:
print len(inputs)

In [1]:
#Baseline experiment
%autoreload
import numpy
import logging
from mlp.dataset import MNISTDataProvider

logger = logging.getLogger()
logger.setLevel(logging.INFO)
logger.info('Initialising data providers...')

train_dp = MNISTDataProvider(dset='train', batch_size=10, max_num_batches=100, randomize=True)
valid_dp = MNISTDataProvider(dset='valid', batch_size=10000, max_num_batches=-10, randomize=False)
test_dp = MNISTDataProvider(dset='eval', batch_size=10000, max_num_batches=-10, randomize=False)
from mlp.layers import MLP, Linear, Sigmoid, Softmax #import required layer types
from mlp.optimisers import SGDOptimiser #import the optimiser

from mlp.costs import CECost #import the cost we want to use for optimisation
from mlp.schedulers import LearningRateNewBob, LearningRateFixed

logger = logging.getLogger()
logger.setLevel(logging.INFO)
rng = numpy.random.RandomState([2015,10,10])

#some hyper-parameters
nhid = 600
learning_rate = 0.05
max_epochs = 10
cost = CECost()
    
stats = []
layer=2

train_dp.reset()

#define the model
model = MLP(cost=cost)
model.add_layer(Sigmoid(idim=784, odim=600, irange=0.2, rng=rng))
model.add_layer(Sigmoid(idim=600, odim=500, irange=0.2, rng=rng))
model.add_layer(Sigmoid(idim=500, odim=300, irange=0.2, rng=rng))
model.add_layer(Softmax(idim=300, odim=10, rng=rng))

lr_scheduler = LearningRateFixed(learning_rate=0.05, max_epochs=max_epochs)
optimiser = SGDOptimiser(lr_scheduler=lr_scheduler)

logger.info('Pre-Training started...')
tr_stats, valid_stats = optimiser.pretrain_discriminative(model, train_dp, None)
logger.info('Training started...')

train_dp.reset()


tr_stats, valid_stats = optimiser.train(model, train_dp, valid_dp)

ERROR: Line magic function `%autoreload` not found.
INFO:root:Initialising data providers...
INFO:root:Pre-Training started...
INFO:mlp.optimisers:Max epochs 10
INFO:mlp.optimisers:epochs 0
INFO:mlp.optimisers:Running
INFO:mlp.optimisers:Epoch 1: Training cost (ce) is 2.194. Accuracy is 23.90%
INFO:mlp.optimisers:Epoch 2: Training cost (ce) is 1.386. Accuracy is 56.30%
INFO:mlp.optimisers:Epoch 3: Training cost (ce) is 1.002. Accuracy is 71.20%
INFO:mlp.optimisers:Epoch 4: Training cost (ce) is 0.803. Accuracy is 78.60%
INFO:mlp.optimisers:Epoch 5: Training cost (ce) is 0.684. Accuracy is 82.20%
INFO:mlp.optimisers:Epoch 6: Training cost (ce) is 0.603. Accuracy is 84.20%
INFO:mlp.optimisers:Epoch 7: Training cost (ce) is 0.544. Accuracy is 86.40%
INFO:mlp.optimisers:Epoch 8: Training cost (ce) is 0.498. Accuracy is 87.10%
INFO:mlp.optimisers:Epoch 9: Training cost (ce) is 0.461. Accuracy is 87.80%
INFO:mlp.optimisers:Epoch 10: Training cost (ce) is 0.430. Accuracy is 88.50%
INFO:mlp.op

In [None]:
%load -s Linear layers.py




In [34]:
#Run experiments using fixed, list, newBob and exponential use different scheduler each loop

UsageError: option -s requires argument ( allowed: "yns:r:" )

In [41]:
#Baseline experiment

from mlp.layers import MLP, Linear, Sigmoid, Softmax #import required layer types
from mlp.optimisers import SGDOptimiser #import the optimiser

from mlp.costs import CECost #import the cost we want to use for optimisation
from mlp.schedulers import LearningRateExponential, LearningRateFixed, LearningRateList, LearningRateNewBob

import numpy
import logging
import shelve
from mlp.dataset import MNISTDataProvider

logger = logging.getLogger()
logger.setLevel(logging.INFO)
logger.info('Initialising data providers...')

train_dp = MNISTDataProvider(dset='train', batch_size=100, max_num_batches=1000, randomize=True)
valid_dp = MNISTDataProvider(dset='valid', batch_size=10000, max_num_batches=-10, randomize=False)
test_dp = MNISTDataProvider(dset='eval', batch_size=10000, max_num_batches=-10, randomize=False)

rng = numpy.random.RandomState([2015,10,10])

#some hyper-parameters
nhid = 800
max_epochs = 5
cost = CECost()

#Open file to save to
shelve_p = shelve.open("learningRateExperiments")

stats = []
#Go through for each learning rate
for rate in xrange(1, 5):

    train_dp.reset()
    valid_dp.reset()
    test_dp.reset()
    
    #define the model
    model = MLP(cost=cost)
    model.add_layer(Sigmoid(idim=784, odim=nhid, irange=0.2, rng=rng))
    model.add_layer(Softmax(idim=nhid, odim=10, rng=rng))
    
    #Set rate scheduler here
    if rate == 1:
        lr_scheduler = LearningRateExponential(start_rate=0.5, max_epochs=max_epochs, training_size=100)
    elif rate == 2:
        lr_scheduler = LearningRateFixed(learning_rate=0.5, max_epochs=max_epochs)
    elif rate == 3:
        # define the optimiser, here stochasitc gradient descent
        # with fixed learning rate and max_epochs
        lr_scheduler = LearningRateNewBob(start_rate=0.5, max_epochs=max_epochs,\
                                      min_derror_stop=.05, scale_by=0.05, zero_rate=0.5, patience = 10)
    elif rate == 4:
        # define the optimiser, here stochasitc gradient descent
        # with fixed learning rate and max_epochs
        lr_scheduler = LearningRateList([0.5,0.45,0.4,0.35,0.3,0.25,0.2,0.15,0.1,0.05,0.005],max_epochs)
    
    optimiser = SGDOptimiser(lr_scheduler=lr_scheduler)

    logger.info('Training started...')
    tr_stats, valid_stats = optimiser.train(model, train_dp, valid_dp)

    logger.info('Testing the model on test set:')
    tst_cost, tst_accuracy = optimiser.validate(model, test_dp)
    logger.info('MNIST test set accuracy is %.2f %%, cost (%s) is %.3f'%(tst_accuracy*100., cost.get_name(), tst_cost))
    
    #Append stats for all test
    stats.append((tr_stats, valid_stats, (tst_cost, tst_accuracy)))
    
    if rate == 1:
        shelve_p['exponential'] = (tr_stats, valid_stats, (tst_cost, tst_accuracy))
    elif rate == 2:
        shelve_p['fixed'] = (tr_stats, valid_stats, (tst_cost, tst_accuracy))
    elif rate == 3:
        shelve_p['newbob'] = (tr_stats, valid_stats, (tst_cost, tst_accuracy))
    elif rate == 4:
        shelve_p['list'] = (tr_stats, valid_stats, (tst_cost, tst_accuracy))
        
        
shelve_p.close()    

INFO:root:Initialising data providers...
INFO:root:Training started...
INFO:mlp.optimisers:Epoch 1: Training cost (ce) for initial model is 2.570. Accuracy is 9.28%
INFO:mlp.optimisers:Epoch 1: Validation cost (ce) for initial model is 2.554. Accuracy is 9.84%
INFO:mlp.optimisers:Epoch 2: Training cost (ce) is 0.761. Accuracy is 85.84%
INFO:mlp.optimisers:Epoch 2: Validation cost (ce) is 0.262. Accuracy is 92.61%
INFO:mlp.schedulers:0.495024916875
INFO:mlp.optimisers:Epoch 2: Took 30 seconds. Training speed 1770 pps. Validation speed 5826 pps.
INFO:mlp.optimisers:Epoch 3: Training cost (ce) is 0.269. Accuracy is 92.06%
INFO:mlp.optimisers:Epoch 3: Validation cost (ce) is 0.228. Accuracy is 93.41%
INFO:mlp.schedulers:0.490099336653
INFO:mlp.optimisers:Epoch 3: Took 30 seconds. Training speed 1792 pps. Validation speed 5960 pps.
INFO:mlp.optimisers:Epoch 4: Training cost (ce) is 0.222. Accuracy is 93.56%
INFO:mlp.optimisers:Epoch 4: Validation cost (ce) is 0.192. Accuracy is 94.68%
INFO:

In [42]:
print shelve_p['exponential']

([(2.5698940660905487, 0.092780000000000057), (0.76107186078361544, 0.85839999999999927), (0.26941209019168572, 0.92061999999999999), (0.22168318644900226, 0.93561999999999956), (0.18934968575919447, 0.94454000000000027), (0.16270716969798743, 0.95259999999999989)], [(2.5535497258761031, 0.098400000000000001), (0.26169362217099285, 0.92610000000000003), (0.22757469120098908, 0.93410000000000004), (0.19216553403288314, 0.94679999999999997), (0.17132681558478102, 0.95399999999999996), (0.15021251853962736, 0.95979999999999999)], (0.15078776552708043, 0.95620000000000005))


/Users/josephyearsley/Documents/Edinburgh/Machine Learning Practical/mlpractical/repo-mlp
