L1 Experiments

In [24]:
# %load Experiments/l1Experiment.py
# %load Experiments/scheduler.py
#Baseline experiment

from mlp.layers import MLP, Linear, Sigmoid, Softmax #import required layer types
from mlp.optimisers import SGDOptimiser #import the optimiser

from mlp.costs import CECost #import the cost we want to use for optimisation
from mlp.schedulers import LearningRateExponential, LearningRateFixed, LearningRateList, LearningRateNewBob

import numpy
import logging
import shelve
from mlp.dataset import MNISTDataProvider

logger = logging.getLogger()
logger.setLevel(logging.INFO)
logger.info('Initialising data providers...')

train_dp = MNISTDataProvider(dset='train', batch_size=100, max_num_batches=1000, randomize=True)
valid_dp = MNISTDataProvider(dset='valid', batch_size=10000, max_num_batches=-10, randomize=False)
test_dp = MNISTDataProvider(dset='eval', batch_size=10000, max_num_batches=-10, randomize=False)

rng = numpy.random.RandomState([2015,10,10])

#some hyper-parameters
nhid = 800
max_epochs = 50
cost = CECost()
learning_rate = 0.5;
learningList = []
decrement = (learning_rate/max_epochs)

#Regulariser weights
l1_weight = 0.001
l2_weight = 0.000
dp_scheduler = None

#Build list once so we don't have to rebuild every time.
for i in xrange(0,max_epochs):
    #In this order so start learning rate is added
    learningList.append(learning_rate)
    learning_rate -= decrement



#Open file to save to
shelve_r = shelve.open("regExperiments")

stats = []
rate = 2

#For each number of layers, new model add layers.
for layer in xrange(0,2):
    #Set here in case we alter it in a layer experiment
    learning_rate = 0.5


    train_dp.reset()
    valid_dp.reset()
    test_dp.reset()

    logger.info("Starting ")

    #define the model
    model = MLP(cost=cost)

    if layer == 0:
        odim = 800
        model.add_layer(Sigmoid(idim=784, odim=odim, irange=0.2, rng=rng))
    ''' elif layer == 1:
        odim = 600
        model.add_layer(Sigmoid(idim=784, odim=600, irange=0.2, rng=rng))
        model.add_layer(Sigmoid(idim=600, odim=600, irange=0.2, rng=rng))
    elif layer == 2:
        odim = 400
        model.add_layer(Sigmoid(idim=784, odim=odim, irange=0.2, rng=rng))
        model.add_layer(Sigmoid(idim=odim, odim=odim, irange=0.2, rng=rng))
        model.add_layer(Sigmoid(idim=odim, odim=odim, irange=0.2, rng=rng))
        '''
    if layer == 1:
        odim = 300
        model.add_layer(Sigmoid(idim=784, odim=odim, irange=0.2, rng=rng))
        model.add_layer(Sigmoid(idim=odim, odim=odim, irange=0.2, rng=rng))
        model.add_layer(Sigmoid(idim=odim, odim=odim, irange=0.2, rng=rng))
        model.add_layer(Sigmoid(idim=odim, odim=odim, irange=0.2, rng=rng))
        
    #Add output layer
    model.add_layer(Softmax(idim=odim, odim=10, rng=rng))

    #Set rate scheduler here
    if rate == 1:
        lr_scheduler = LearningRateExponential(start_rate=learning_rate, max_epochs=max_epochs, training_size=100)
    elif rate == 2:
        lr_scheduler = LearningRateFixed(learning_rate=learning_rate, max_epochs=max_epochs)
    elif rate == 3:
        # define the optimiser, here stochasitc gradient descent
        # with fixed learning rate and max_epochs
        lr_scheduler = LearningRateNewBob(start_rate=learning_rate, max_epochs=max_epochs,\
                                          min_derror_stop=.05, scale_by=0.05, zero_rate=learning_rate, patience = 10)

    optimiser = SGDOptimiser(lr_scheduler=lr_scheduler, 
                             dp_scheduler=dp_scheduler,
                             l1_weight=l1_weight, 
                             l2_weight=l2_weight)

    logger.info('Training started...')
    tr_stats, valid_stats = optimiser.train(model, train_dp, valid_dp)

    logger.info('Testing the model on test set:')
    tst_cost, tst_accuracy = optimiser.validate(model, test_dp)
    logger.info('MNIST test set accuracy is %.2f %%, cost (%s) is %.3f'%(tst_accuracy*100., cost.get_name(), tst_cost))

    #Append stats for all test
    stats.append((tr_stats, valid_stats, (tst_cost, tst_accuracy)))

    #Should save rate to specific dictionairy in pickle
    shelve_r['l1F'+str(layer)] = (tr_stats, valid_stats, (tst_cost, tst_accuracy))

logger.info('Saving Data')
shelve_r.close()   

INFO:root:Initialising data providers...


KeyboardInterrupt: 

L2 Experiments

In [7]:
# %load Experiments/l2Experiment.py
# %load Experiments/scheduler.py
#Baseline experiment

from mlp.layers import MLP, Linear, Sigmoid, Softmax #import required layer types
from mlp.optimisers import SGDOptimiser #import the optimiser

from mlp.costs import CECost #import the cost we want to use for optimisation
from mlp.schedulers import LearningRateExponential, LearningRateFixed, LearningRateList, LearningRateNewBob

import numpy
import logging
import shelve
from mlp.dataset import MNISTDataProvider

logger = logging.getLogger()
logger.setLevel(logging.INFO)
logger.info('Initialising data providers...')

train_dp = MNISTDataProvider(dset='train', batch_size=100, max_num_batches=1000, randomize=True)
valid_dp = MNISTDataProvider(dset='valid', batch_size=10000, max_num_batches=-10, randomize=False)
test_dp = MNISTDataProvider(dset='eval', batch_size=10000, max_num_batches=-10, randomize=False)

rng = numpy.random.RandomState([2015,10,10])

#some hyper-parameters
nhid = 800
max_epochs = 50
cost = CECost()
learning_rate = 0.5;
learningList = []
decrement = (learning_rate/max_epochs)

#Regulariser weights
l1_weight = 0.000
l2_weight = 0.001
dp_scheduler = None

#Build list once so we don't have to rebuild every time.
for i in xrange(0,max_epochs):
    #In this order so start learning rate is added
    learningList.append(learning_rate)
    learning_rate -= decrement



#Open file to save to
shelve_r = shelve.open("regExperiments")

stats = []
rate = 2

#For each number of layers, new model add layers.
for layer in xrange(0,2):
    #Set here in case we alter it in a layer experiment
    learning_rate = 0.5


    train_dp.reset()
    valid_dp.reset()
    test_dp.reset()

    logger.info("Starting ")

    #define the model
    model = MLP(cost=cost)

    if layer == 0:
        odim = 800
        model.add_layer(Sigmoid(idim=784, odim=odim, irange=0.2, rng=rng))
    if layer == 1:
        odim = 300
        model.add_layer(Sigmoid(idim=784, odim=odim, irange=0.2, rng=rng))
        model.add_layer(Sigmoid(idim=odim, odim=odim, irange=0.2, rng=rng))
        model.add_layer(Sigmoid(idim=odim, odim=odim, irange=0.2, rng=rng))
        model.add_layer(Sigmoid(idim=odim, odim=odim, irange=0.2, rng=rng))
        
    #Add output layer
    model.add_layer(Softmax(idim=odim, odim=10, rng=rng))

    #Set rate scheduler here
    if rate == 1:
        lr_scheduler = LearningRateExponential(start_rate=learning_rate, max_epochs=max_epochs, training_size=100)
    elif rate == 2:
        lr_scheduler = LearningRateFixed(learning_rate=learning_rate, max_epochs=max_epochs)
    elif rate == 3:
        # define the optimiser, here stochasitc gradient descent
        # with fixed learning rate and max_epochs
        lr_scheduler = LearningRateNewBob(start_rate=learning_rate, max_epochs=max_epochs,\
                                          min_derror_stop=.05, scale_by=0.05, zero_rate=learning_rate, patience = 10)

    optimiser = SGDOptimiser(lr_scheduler=lr_scheduler, 
                             dp_scheduler=dp_scheduler,
                             l1_weight=l1_weight, 
                             l2_weight=l2_weight)

    logger.info('Training started...')
    tr_stats, valid_stats = optimiser.train(model, train_dp, valid_dp)

    logger.info('Testing the model on test set:')
    tst_cost, tst_accuracy = optimiser.validate(model, test_dp)
    logger.info('MNIST test set accuracy is %.2f %%, cost (%s) is %.3f'%(tst_accuracy*100., cost.get_name(), tst_cost))

    #Append stats for all test
    stats.append((tr_stats, valid_stats, (tst_cost, tst_accuracy)))

    #Should save rate to specific dictionairy in pickle, different key so same shelving doesn't matter
    shelve_r['l2F'+str(layer)] = (tr_stats, valid_stats, (tst_cost, tst_accuracy))

logger.info('Saving Data')
shelve_r.close()   

INFO:root:Initialising data providers...
INFO:root:Starting 
INFO:root:Training started...
INFO:mlp.optimisers:Epoch 0: Training cost (ce) for initial model is 6.759. Accuracy is 9.28%
INFO:mlp.optimisers:Epoch 0: Validation cost (ce) for initial model is 6.743. Accuracy is 9.84%
INFO:mlp.optimisers:Epoch 1: Training cost (ce) is 5.062. Accuracy is 85.84%
INFO:mlp.optimisers:Epoch 1: Validation cost (ce) is 4.562. Accuracy is 92.61%
INFO:mlp.optimisers:Epoch 1: Took 63 seconds. Training speed 876 pps. Validation speed 1661 pps.
INFO:mlp.optimisers:Epoch 2: Training cost (ce) is 4.605. Accuracy is 92.07%
INFO:mlp.optimisers:Epoch 2: Validation cost (ce) is 4.563. Accuracy is 93.39%
INFO:mlp.optimisers:Epoch 2: Took 63 seconds. Training speed 869 pps. Validation speed 1818 pps.
INFO:mlp.optimisers:Epoch 3: Training cost (ce) is 4.592. Accuracy is 93.55%
INFO:mlp.optimisers:Epoch 3: Validation cost (ce) is 4.562. Accuracy is 94.69%
INFO:mlp.optimisers:Epoch 3: Took 61 seconds. Training sp

Dropout fixed Experiments

In [8]:
# %load Experiments/dropNExperiment.py
# %load Experiments/scheduler.py
#Baseline experiment

from mlp.layers import MLP, Linear, Sigmoid, Softmax #import required layer types
from mlp.optimisers import SGDOptimiser #import the optimiser

from mlp.costs import CECost #import the cost we want to use for optimisation
from mlp.schedulers import LearningRateExponential, LearningRateFixed, LearningRateList, LearningRateNewBob, DropoutFixed

import numpy
import logging
import shelve
from mlp.dataset import MNISTDataProvider

logger = logging.getLogger()
logger.setLevel(logging.INFO)
logger.info('Initialising data providers...')

train_dp = MNISTDataProvider(dset='train', batch_size=100, max_num_batches=1000, randomize=True)
valid_dp = MNISTDataProvider(dset='valid', batch_size=10000, max_num_batches=-10, randomize=False)
test_dp = MNISTDataProvider(dset='eval', batch_size=10000, max_num_batches=-10, randomize=False)

rng = numpy.random.RandomState([2015,10,10])

#some hyper-parameters
nhid = 800
max_epochs = 50
cost = CECost()
learning_rate = 0.5;
learningList = []
decrement = (learning_rate/max_epochs)

#Regulariser weights
l1_weight = 0.000
l2_weight = 0.000
dp_scheduler = DropoutFixed(0.5, 0.5)

#Build list once so we don't have to rebuild every time.
for i in xrange(0,max_epochs):
    #In this order so start learning rate is added
    learningList.append(learning_rate)
    learning_rate -= decrement



#Open file to save to
shelve_r = shelve.open("regExperiments")

stats = []
rate = 2

#For each number of layers, new model add layers.
for layer in xrange(0,2):
    #Set here in case we alter it in a layer experiment
    learning_rate = 0.5


    train_dp.reset()
    valid_dp.reset()
    test_dp.reset()

    logger.info("Starting ")

    #define the model
    model = MLP(cost=cost)

    if layer == 0:
        odim = 800
        model.add_layer(Sigmoid(idim=784, odim=odim, irange=0.2, rng=rng))
    if layer == 1:
        odim = 300
        model.add_layer(Sigmoid(idim=784, odim=odim, irange=0.2, rng=rng))
        model.add_layer(Sigmoid(idim=odim, odim=odim, irange=0.2, rng=rng))
        model.add_layer(Sigmoid(idim=odim, odim=odim, irange=0.2, rng=rng))
        model.add_layer(Sigmoid(idim=odim, odim=odim, irange=0.2, rng=rng))
        
    #Add output layer
    model.add_layer(Softmax(idim=odim, odim=10, rng=rng))

    #Set rate scheduler here
    if rate == 1:
        lr_scheduler = LearningRateExponential(start_rate=learning_rate, max_epochs=max_epochs, training_size=100)
    elif rate == 2:
        lr_scheduler = LearningRateFixed(learning_rate=learning_rate, max_epochs=max_epochs)    
    elif rate == 3:
        # define the optimiser, here stochasitc gradient descent
        # with fixed learning rate and max_epochs
        lr_scheduler = LearningRateNewBob(start_rate=learning_rate, max_epochs=max_epochs,\
                                          min_derror_stop=.05, scale_by=0.05, zero_rate=learning_rate, patience = 10)

    optimiser = SGDOptimiser(lr_scheduler=lr_scheduler, 
                             dp_scheduler=dp_scheduler,
                             l1_weight=l1_weight, 
                             l2_weight=l2_weight)

    logger.info('Training started...')
    tr_stats, valid_stats = optimiser.train(model, train_dp, valid_dp)

    logger.info('Testing the model on test set:')
    tst_cost, tst_accuracy = optimiser.validate(model, test_dp)
    logger.info('MNIST test set accuracy is %.2f %%, cost (%s) is %.3f'%(tst_accuracy*100., cost.get_name(), tst_cost))

    #Append stats for all test
    stats.append((tr_stats, valid_stats, (tst_cost, tst_accuracy)))

    #Should save rate to specific dictionairy in pickle, different key so same shelving doesn't matter
    shelve_r['dropNF'+str(layer)] = (tr_stats, valid_stats, (tst_cost, tst_accuracy))

logger.info('Saving Data')
shelve_r.close()   

INFO:root:Initialising data providers...
INFO:root:Starting 
INFO:root:Training started...
INFO:mlp.optimisers:Epoch 0: Training cost (ce) for initial model is 2.570. Accuracy is 9.28%
INFO:mlp.optimisers:Epoch 0: Validation cost (ce) for initial model is 2.554. Accuracy is 9.84%
INFO:mlp.optimisers:Epoch 1: Training cost (ce) is 0.969. Accuracy is 73.68%
INFO:mlp.optimisers:Epoch 1: Validation cost (ce) is 0.329. Accuracy is 90.79%
INFO:mlp.optimisers:Epoch 1: Took 66 seconds. Training speed 824 pps. Validation speed 1851 pps.
INFO:mlp.optimisers:Epoch 2: Training cost (ce) is 0.543. Accuracy is 82.75%
INFO:mlp.optimisers:Epoch 2: Validation cost (ce) is 0.291. Accuracy is 91.78%
INFO:mlp.optimisers:Epoch 2: Took 68 seconds. Training speed 808 pps. Validation speed 1754 pps.
INFO:mlp.optimisers:Epoch 3: Training cost (ce) is 0.487. Accuracy is 84.79%
INFO:mlp.optimisers:Epoch 3: Validation cost (ce) is 0.255. Accuracy is 93.42%
INFO:mlp.optimisers:Epoch 3: Took 68 seconds. Training sp

Dropout Annealed Experiments

In [9]:
# %load Experiments/dropAExperiment.py
# %load Experiments/scheduler.py
#Baseline experiment

from mlp.layers import MLP, Linear, Sigmoid, Softmax #import required layer types
from mlp.optimisers import SGDOptimiser #import the optimiser

from mlp.costs import CECost #import the cost we want to use for optimisation
from mlp.schedulers import LearningRateExponential, LearningRateFixed, LearningRateList, LearningRateNewBob, DropoutAnnealed

import numpy
import logging
import shelve
from mlp.dataset import MNISTDataProvider

logger = logging.getLogger()
logger.setLevel(logging.INFO)
logger.info('Initialising data providers...')

train_dp = MNISTDataProvider(dset='train', batch_size=100, max_num_batches=1000, randomize=True)
valid_dp = MNISTDataProvider(dset='valid', batch_size=10000, max_num_batches=-10, randomize=False)
test_dp = MNISTDataProvider(dset='eval', batch_size=10000, max_num_batches=-10, randomize=False)

rng = numpy.random.RandomState([2015,10,10])

#some hyper-parameters
nhid = 800
max_epochs = 50
cost = CECost()
learning_rate = 0.5;
learningList = []
decrement = (learning_rate/max_epochs)

#Regulariser weights
l1_weight = 0.000
l2_weight = 0.000

#Build list once so we don't have to rebuild every time.
for i in xrange(0,max_epochs):
    #In this order so start learning rate is added
    learningList.append(learning_rate)
    learning_rate -= decrement



#Open file to save to
shelve_r = shelve.open("regExperiments")

stats = []
rate = 2

#For each number of layers, new model add layers.
for layer in xrange(0,2):
    #Reset
    dp_scheduler = DropoutAnnealed(0.5, 0.5, 0.005)


    #Set here in case we alter it in a layer experiment
    learning_rate = 0.5


    train_dp.reset()
    valid_dp.reset()
    test_dp.reset()

    logger.info("Starting ")

    #define the model
    model = MLP(cost=cost)

    if layer == 0:
        odim = 800
        model.add_layer(Sigmoid(idim=784, odim=odim, irange=0.2, rng=rng))
    if layer == 1:
        odim = 300
        model.add_layer(Sigmoid(idim=784, odim=odim, irange=0.2, rng=rng))
        model.add_layer(Sigmoid(idim=odim, odim=odim, irange=0.2, rng=rng))
        model.add_layer(Sigmoid(idim=odim, odim=odim, irange=0.2, rng=rng))
        model.add_layer(Sigmoid(idim=odim, odim=odim, irange=0.2, rng=rng))
        
    #Add output layer
    model.add_layer(Softmax(idim=odim, odim=10, rng=rng))

    #Set rate scheduler here
    if rate == 1:
        lr_scheduler = LearningRateExponential(start_rate=learning_rate, max_epochs=max_epochs, training_size=100)
    elif rate == 2:
        lr_scheduler = LearningRateFixed(learning_rate=learning_rate, max_epochs=max_epochs)
    elif rate == 3:
        # define the optimiser, here stochasitc gradient descent
        # with fixed learning rate and max_epochs
        lr_scheduler = LearningRateNewBob(start_rate=learning_rate, max_epochs=max_epochs,\
                                          min_derror_stop=.05, scale_by=0.05, zero_rate=learning_rate, patience = 10)

    optimiser = SGDOptimiser(lr_scheduler=lr_scheduler, 
                             dp_scheduler=dp_scheduler,
                             l1_weight=l1_weight, 
                             l2_weight=l2_weight)

    logger.info('Training started...')
    tr_stats, valid_stats = optimiser.train(model, train_dp, valid_dp)

    logger.info('Testing the model on test set:')
    tst_cost, tst_accuracy = optimiser.validate(model, test_dp)
    logger.info('MNIST test set accuracy is %.2f %%, cost (%s) is %.3f'%(tst_accuracy*100., cost.get_name(), tst_cost))

    #Append stats for all test
    stats.append((tr_stats, valid_stats, (tst_cost, tst_accuracy)))

    #Should save rate to specific dictionairy in pickle, different key so same shelving doesn't matter
    shelve_r['dropNF'+str(layer)] = (tr_stats, valid_stats, (tst_cost, tst_accuracy))

logger.info('Saving Data')
shelve_r.close()   

INFO:root:Initialising data providers...
INFO:root:Starting 
INFO:root:Training started...
INFO:mlp.optimisers:Epoch 0: Training cost (ce) for initial model is 2.570. Accuracy is 9.28%
INFO:mlp.optimisers:Epoch 0: Validation cost (ce) for initial model is 2.554. Accuracy is 9.84%
INFO:mlp.optimisers:Epoch 1: Training cost (ce) is 0.943. Accuracy is 73.85%
INFO:mlp.optimisers:Epoch 1: Validation cost (ce) is 0.322. Accuracy is 91.16%
INFO:mlp.optimisers:Epoch 1: Took 67 seconds. Training speed 807 pps. Validation speed 1817 pps.
INFO:mlp.optimisers:Epoch 2: Training cost (ce) is 0.535. Accuracy is 83.08%
INFO:mlp.optimisers:Epoch 2: Validation cost (ce) is 0.290. Accuracy is 91.65%
INFO:mlp.optimisers:Epoch 2: Took 66 seconds. Training speed 820 pps. Validation speed 1844 pps.
INFO:mlp.optimisers:Epoch 3: Training cost (ce) is 0.470. Accuracy is 85.38%
INFO:mlp.optimisers:Epoch 3: Validation cost (ce) is 0.255. Accuracy is 93.08%
INFO:mlp.optimisers:Epoch 3: Took 67 seconds. Training sp

Normal Experiment

In [11]:
# %load Experiments/noDropExp.py
# %load Experiments/scheduler.py
#Baseline experiment

from mlp.layers import MLP, Linear, Sigmoid, Softmax #import required layer types
from mlp.optimisers import SGDOptimiser #import the optimiser

from mlp.costs import CECost #import the cost we want to use for optimisation
from mlp.schedulers import LearningRateExponential, LearningRateFixed, LearningRateList, LearningRateNewBob, DropoutAnnealed

import numpy
import logging
import shelve
from mlp.dataset import MNISTDataProvider

logger = logging.getLogger()
logger.setLevel(logging.INFO)
logger.info('Initialising data providers...')

train_dp = MNISTDataProvider(dset='train', batch_size=100, max_num_batches=1000, randomize=True)
valid_dp = MNISTDataProvider(dset='valid', batch_size=10000, max_num_batches=-10, randomize=False)
test_dp = MNISTDataProvider(dset='eval', batch_size=10000, max_num_batches=-10, randomize=False)

rng = numpy.random.RandomState([2015,10,10])

#some hyper-parameters
nhid = 800
max_epochs = 50
cost = CECost()
learning_rate = 0.5;
learningList = []
decrement = (learning_rate/max_epochs)

#Regulariser weights
l1_weight = 0.000
l2_weight = 0.000
dp_scheduler = None

#Build list once so we don't have to rebuild every time.
for i in xrange(0,max_epochs):
    #In this order so start learning rate is added
    learningList.append(learning_rate)
    learning_rate -= decrement



#Open file to save to
shelve_r = shelve.open("regExperiments")

stats = []
rate = 2

#For each number of layers, new model add layers.
for layer in xrange(0,2):
    #Set here in case we alter it in a layer experiment
    learning_rate = 0.5


    train_dp.reset()
    valid_dp.reset()
    test_dp.reset()

    logger.info("Starting ")

    #define the model
    model = MLP(cost=cost)

    if layer == 0:
        odim = 800
        model.add_layer(Sigmoid(idim=784, odim=odim, irange=0.2, rng=rng))
    if layer == 1:
        odim = 300
        model.add_layer(Sigmoid(idim=784, odim=odim, irange=0.2, rng=rng))
        model.add_layer(Sigmoid(idim=odim, odim=odim, irange=0.2, rng=rng))
        model.add_layer(Sigmoid(idim=odim, odim=odim, irange=0.2, rng=rng))
        model.add_layer(Sigmoid(idim=odim, odim=odim, irange=0.2, rng=rng))
        
    #Add output layer
    model.add_layer(Softmax(idim=odim, odim=10, rng=rng))

    #Set rate scheduler here
    if rate == 1:
        lr_scheduler = LearningRateExponential(start_rate=learning_rate, max_epochs=max_epochs, training_size=100)
    elif rate == 2:
        lr_scheduler = LearningRateFixed(learning_rate=learning_rate, max_epochs=max_epochs)
    elif rate == 3:
        # define the optimiser, here stochasitc gradient descent
        # with fixed learning rate and max_epochs
        lr_scheduler = LearningRateNewBob(start_rate=learning_rate, max_epochs=max_epochs,\
                                          min_derror_stop=.05, scale_by=0.05, zero_rate=learning_rate, patience = 10)

    optimiser = SGDOptimiser(lr_scheduler=lr_scheduler, 
                             dp_scheduler=dp_scheduler,
                             l1_weight=l1_weight, 
                             l2_weight=l2_weight)

    logger.info('Training started...')
    tr_stats, valid_stats = optimiser.train(model, train_dp, valid_dp)

    logger.info('Testing the model on test set:')
    tst_cost, tst_accuracy = optimiser.validate(model, test_dp)
    logger.info('MNIST test set accuracy is %.2f %%, cost (%s) is %.3f'%(tst_accuracy*100., cost.get_name(), tst_cost))

    #Append stats for all test
    stats.append((tr_stats, valid_stats, (tst_cost, tst_accuracy)))

    #Should save rate to specific dictionairy in pickle, different key so same shelving doesn't matter
    shelve_r['noDLF'+str(layer)] = (tr_stats, valid_stats, (tst_cost, tst_accuracy))

logger.info('Saving Data')
shelve_r.close()   

INFO:root:Initialising data providers...
INFO:root:Starting 
INFO:root:Training started...


KeyboardInterrupt: 

In [6]:
import shelve
#Open file to save to
shelve_r = shelve.open("regExperiments")

print shelve_r['noDLF1'][2][1]

0.9803


Reg Exp Annealed Dropout for 10 Epochs

In [17]:
# %load Experiments/dropAExperiment10E.py
# %load Experiments/scheduler.py
#Baseline experiment

from mlp.layers import MLP, Linear, Sigmoid, Softmax #import required layer types
from mlp.optimisers import SGDOptimiser #import the optimiser

from mlp.costs import CECost #import the cost we want to use for optimisation
from mlp.schedulers import LearningRateExponential, LearningRateFixed, LearningRateList, LearningRateNewBob, DropoutAnnealed

import numpy
import logging
import shelve
from mlp.dataset import MNISTDataProvider

logger = logging.getLogger()
logger.setLevel(logging.INFO)
logger.info('Initialising data providers...')

train_dp = MNISTDataProvider(dset='train', batch_size=100, max_num_batches=1000, randomize=True)
valid_dp = MNISTDataProvider(dset='valid', batch_size=10000, max_num_batches=-10, randomize=False)
test_dp = MNISTDataProvider(dset='eval', batch_size=10000, max_num_batches=-10, randomize=False)

rng = numpy.random.RandomState([2015,10,10])

#some hyper-parameters
nhid = 800
max_epochs = 50
cost = CECost()
learning_rate = 0.5;
learningList = []
decrement = (learning_rate/max_epochs)

#Regulariser weights
l1_weight = 0.000
l2_weight = 0.000

#Build list once so we don't have to rebuild every time.
for i in xrange(0,max_epochs):
    #In this order so start learning rate is added
    learningList.append(learning_rate)
    learning_rate -= decrement



#Open file to save to
shelve_r = shelve.open("regExperiments")

stats = []
rate = 2

#For each number of layers, new model add layers.
for layer in xrange(0,2):
    
    #Reset
    dp_scheduler = DropoutAnnealed(0.5, 0.5, 0.05)
    
    #Set here in case we alter it in a layer experiment
    learning_rate = 0.5


    train_dp.reset()
    valid_dp.reset()
    test_dp.reset()

    logger.info("Starting ")

    #define the model
    model = MLP(cost=cost)

    if layer == 0:
        odim = 800
        model.add_layer(Sigmoid(idim=784, odim=odim, irange=0.2, rng=rng))
    if layer == 1:
        odim = 300
        model.add_layer(Sigmoid(idim=784, odim=odim, irange=0.2, rng=rng))
        model.add_layer(Sigmoid(idim=odim, odim=odim, irange=0.2, rng=rng))
        model.add_layer(Sigmoid(idim=odim, odim=odim, irange=0.2, rng=rng))
        model.add_layer(Sigmoid(idim=odim, odim=odim, irange=0.2, rng=rng))
        
    #Add output layer
    model.add_layer(Softmax(idim=odim, odim=10, rng=rng))

    #Set rate scheduler here
    if rate == 1:
        lr_scheduler = LearningRateExponential(start_rate=learning_rate, max_epochs=max_epochs, training_size=100)
    elif rate == 2:
        lr_scheduler = LearningRateFixed(learning_rate=learning_rate, max_epochs=max_epochs)
    elif rate == 3:
        # define the optimiser, here stochasitc gradient descent
        # with fixed learning rate and max_epochs
        lr_scheduler = LearningRateNewBob(start_rate=learning_rate, max_epochs=max_epochs,\
                                          min_derror_stop=.05, scale_by=0.05, zero_rate=learning_rate, patience = 10)

    optimiser = SGDOptimiser(lr_scheduler=lr_scheduler, 
                             dp_scheduler=dp_scheduler,
                             l1_weight=l1_weight, 
                             l2_weight=l2_weight)

    logger.info('Training started...')
    tr_stats, valid_stats = optimiser.train(model, train_dp, valid_dp)

    logger.info('Testing the model on test set:')
    tst_cost, tst_accuracy = optimiser.validate(model, test_dp)
    logger.info('MNIST test set accuracy is %.2f %%, cost (%s) is %.3f'%(tst_accuracy*100., cost.get_name(), tst_cost))

    #Append stats for all test
    stats.append((tr_stats, valid_stats, (tst_cost, tst_accuracy)))

    #Should save rate to specific dictionairy in pickle, different key so same shelving doesn't matter
    shelve_r['dropA10EF'+str(layer)] = (tr_stats, valid_stats, (tst_cost, tst_accuracy))

logger.info('Saving Data')
shelve_r.close()   

INFO:root:Initialising data providers...
INFO:root:Starting 
INFO:root:Training started...
INFO:mlp.optimisers:Epoch 0: Training cost (ce) for initial model is 2.570. Accuracy is 9.28%
INFO:mlp.optimisers:Epoch 0: Validation cost (ce) for initial model is 2.554. Accuracy is 9.84%
INFO:mlp.optimisers:Epoch 1: Training cost (ce) is 0.910. Accuracy is 75.77%
INFO:mlp.optimisers:Epoch 1: Validation cost (ce) is 0.310. Accuracy is 91.36%
INFO:mlp.optimisers:Epoch 1: Took 70 seconds. Training speed 779 pps. Validation speed 1784 pps.
INFO:mlp.optimisers:Epoch 2: Training cost (ce) is 0.464. Accuracy is 85.64%
INFO:mlp.optimisers:Epoch 2: Validation cost (ce) is 0.273. Accuracy is 92.02%
INFO:mlp.optimisers:Epoch 2: Took 70 seconds. Training speed 772 pps. Validation speed 1834 pps.
INFO:mlp.optimisers:Epoch 3: Training cost (ce) is 0.379. Accuracy is 88.45%
INFO:mlp.optimisers:Epoch 3: Validation cost (ce) is 0.229. Accuracy is 93.75%
INFO:mlp.optimisers:Epoch 3: Took 71 seconds. Training sp

KeyboardInterrupt: 

Reg Exp Annealed Dropout for 25 Epochs

In [None]:
# %load Experiments/dropAExperiment25E.py
# %load Experiments/scheduler.py
#Baseline experiment

from mlp.layers import MLP, Linear, Sigmoid, Softmax #import required layer types
from mlp.optimisers import SGDOptimiser #import the optimiser

from mlp.costs import CECost #import the cost we want to use for optimisation
from mlp.schedulers import LearningRateExponential, LearningRateFixed, LearningRateList, LearningRateNewBob, DropoutAnnealed

import numpy
import logging
import shelve
from mlp.dataset import MNISTDataProvider

logger = logging.getLogger()
logger.setLevel(logging.INFO)
logger.info('Initialising data providers...')

train_dp = MNISTDataProvider(dset='train', batch_size=100, max_num_batches=1000, randomize=True)
valid_dp = MNISTDataProvider(dset='valid', batch_size=10000, max_num_batches=-10, randomize=False)
test_dp = MNISTDataProvider(dset='eval', batch_size=10000, max_num_batches=-10, randomize=False)

rng = numpy.random.RandomState([2015,10,10])

#some hyper-parameters
nhid = 800
max_epochs = 50
cost = CECost()
learning_rate = 0.5;
learningList = []
decrement = (learning_rate/max_epochs)

#Regulariser weights
l1_weight = 0.000
l2_weight = 0.000

#Build list once so we don't have to rebuild every time.
for i in xrange(0,max_epochs):
    #In this order so start learning rate is added
    learningList.append(learning_rate)
    learning_rate -= decrement



#Open file to save to
shelve_r = shelve.open("regExperiments")

stats = []
rate = 2

#For each number of layers, new model add layers.
for layer in xrange(0,2):
    
    #Reset
    dp_scheduler = DropoutAnnealed(0.5, 0.5, 0.02)
    #Set here in case we alter it in a layer experiment
    learning_rate = 0.5


    train_dp.reset()
    valid_dp.reset()
    test_dp.reset()

    logger.info("Starting ")

    #define the model
    model = MLP(cost=cost)

    if layer == 0:
        odim = 800
        model.add_layer(Sigmoid(idim=784, odim=odim, irange=0.2, rng=rng))
    if layer == 1:
        odim = 300
        model.add_layer(Sigmoid(idim=784, odim=odim, irange=0.2, rng=rng))
        model.add_layer(Sigmoid(idim=odim, odim=odim, irange=0.2, rng=rng))
        model.add_layer(Sigmoid(idim=odim, odim=odim, irange=0.2, rng=rng))
        model.add_layer(Sigmoid(idim=odim, odim=odim, irange=0.2, rng=rng))
        
    #Add output layer
    model.add_layer(Softmax(idim=odim, odim=10, rng=rng))

    #Set rate scheduler here
    if rate == 1:
        lr_scheduler = LearningRateExponential(start_rate=learning_rate, max_epochs=max_epochs, training_size=100)
    elif rate == 2:
        lr_scheduler = LearningRateFixed(learning_rate=learning_rate, max_epochs=max_epochs)
    elif rate == 3:
        # define the optimiser, here stochasitc gradient descent
        # with fixed learning rate and max_epochs
        lr_scheduler = LearningRateNewBob(start_rate=learning_rate, max_epochs=max_epochs,\
                                          min_derror_stop=.05, scale_by=0.05, zero_rate=learning_rate, patience = 10)

    optimiser = SGDOptimiser(lr_scheduler=lr_scheduler, 
                             dp_scheduler=dp_scheduler,
                             l1_weight=l1_weight, 
                             l2_weight=l2_weight)

    logger.info('Training started...')
    tr_stats, valid_stats = optimiser.train(model, train_dp, valid_dp)

    logger.info('Testing the model on test set:')
    tst_cost, tst_accuracy = optimiser.validate(model, test_dp)
    logger.info('MNIST test set accuracy is %.2f %%, cost (%s) is %.3f'%(tst_accuracy*100., cost.get_name(), tst_cost))

    #Append stats for all test
    stats.append((tr_stats, valid_stats, (tst_cost, tst_accuracy)))

    #Should save rate to specific dictionairy in pickle, different key so same shelving doesn't matter
    shelve_r['dropA25EF'+str(layer)] = (tr_stats, valid_stats, (tst_cost, tst_accuracy))

logger.info('Saving Data')
shelve_r.close()   

Reg Exp Annealed Dropout for 40 Epochs

In [None]:
# %load Experiments/dropAExperiment40E.py
# %load Experiments/scheduler.py
#Baseline experiment

from mlp.layers import MLP, Linear, Sigmoid, Softmax #import required layer types
from mlp.optimisers import SGDOptimiser #import the optimiser

from mlp.costs import CECost #import the cost we want to use for optimisation
from mlp.schedulers import LearningRateExponential, LearningRateFixed, LearningRateList, LearningRateNewBob, DropoutAnnealed

import numpy
import logging
import shelve
from mlp.dataset import MNISTDataProvider

logger = logging.getLogger()
logger.setLevel(logging.INFO)
logger.info('Initialising data providers...')

train_dp = MNISTDataProvider(dset='train', batch_size=100, max_num_batches=1000, randomize=True)
valid_dp = MNISTDataProvider(dset='valid', batch_size=10000, max_num_batches=-10, randomize=False)
test_dp = MNISTDataProvider(dset='eval', batch_size=10000, max_num_batches=-10, randomize=False)

rng = numpy.random.RandomState([2015,10,10])

#some hyper-parameters
nhid = 800
max_epochs = 50
cost = CECost()
learning_rate = 0.5;
learningList = []
decrement = (learning_rate/max_epochs)

#Regulariser weights
l1_weight = 0.000
l2_weight = 0.000

#Build list once so we don't have to rebuild every time.
for i in xrange(0,max_epochs):
    #In this order so start learning rate is added
    learningList.append(learning_rate)
    learning_rate -= decrement



#Open file to save to
shelve_r = shelve.open("regExperiments")

stats = []
rate = 2

#For each number of layers, new model add layers.
for layer in xrange(0,2):
    #Reset
    dp_scheduler = DropoutAnnealed(0.5, 0.5, 0.0125)
    #Set here in case we alter it in a layer experiment
    learning_rate = 0.5


    train_dp.reset()
    valid_dp.reset()
    test_dp.reset()

    logger.info("Starting ")

    #define the model
    model = MLP(cost=cost)

    if layer == 0:
        odim = 800
        model.add_layer(Sigmoid(idim=784, odim=odim, irange=0.2, rng=rng))
    if layer == 1:
        odim = 300
        model.add_layer(Sigmoid(idim=784, odim=odim, irange=0.2, rng=rng))
        model.add_layer(Sigmoid(idim=odim, odim=odim, irange=0.2, rng=rng))
        model.add_layer(Sigmoid(idim=odim, odim=odim, irange=0.2, rng=rng))
        model.add_layer(Sigmoid(idim=odim, odim=odim, irange=0.2, rng=rng))
        
    #Add output layer
    model.add_layer(Softmax(idim=odim, odim=10, rng=rng))

    #Set rate scheduler here
    if rate == 1:
        lr_scheduler = LearningRateExponential(start_rate=learning_rate, max_epochs=max_epochs, training_size=100)
    elif rate == 2:
        lr_scheduler = LearningRateFixed(learning_rate=learning_rate, max_epochs=max_epochs)
    elif rate == 3:
        # define the optimiser, here stochasitc gradient descent
        # with fixed learning rate and max_epochs
        lr_scheduler = LearningRateNewBob(start_rate=learning_rate, max_epochs=max_epochs,\
                                          min_derror_stop=.05, scale_by=0.05, zero_rate=learning_rate, patience = 10)

    optimiser = SGDOptimiser(lr_scheduler=lr_scheduler, 
                             dp_scheduler=dp_scheduler,
                             l1_weight=l1_weight, 
                             l2_weight=l2_weight)

    logger.info('Training started...')
    tr_stats, valid_stats = optimiser.train(model, train_dp, valid_dp)

    logger.info('Testing the model on test set:')
    tst_cost, tst_accuracy = optimiser.validate(model, test_dp)
    logger.info('MNIST test set accuracy is %.2f %%, cost (%s) is %.3f'%(tst_accuracy*100., cost.get_name(), tst_cost))

    #Append stats for all test
    stats.append((tr_stats, valid_stats, (tst_cost, tst_accuracy)))

    #Should save rate to specific dictionairy in pickle, different key so same shelving doesn't matter
    shelve_r['dropA40EF'+str(layer)] = (tr_stats, valid_stats, (tst_cost, tst_accuracy))

logger.info('Saving Data')
shelve_r.close()   

Reg Exp Annealed Dropout for 50 Epochs

In [None]:
# %load Experiments/dropAExperiment50E.py
# %load Experiments/scheduler.py
#Baseline experiment

from mlp.layers import MLP, Linear, Sigmoid, Softmax #import required layer types
from mlp.optimisers import SGDOptimiser #import the optimiser

from mlp.costs import CECost #import the cost we want to use for optimisation
from mlp.schedulers import LearningRateExponential, LearningRateFixed, LearningRateList, LearningRateNewBob, DropoutAnnealed

import numpy
import logging
import shelve
from mlp.dataset import MNISTDataProvider

logger = logging.getLogger()
logger.setLevel(logging.INFO)
logger.info('Initialising data providers...')

train_dp = MNISTDataProvider(dset='train', batch_size=100, max_num_batches=1000, randomize=True)
valid_dp = MNISTDataProvider(dset='valid', batch_size=10000, max_num_batches=-10, randomize=False)
test_dp = MNISTDataProvider(dset='eval', batch_size=10000, max_num_batches=-10, randomize=False)

rng = numpy.random.RandomState([2015,10,10])

#some hyper-parameters
nhid = 800
max_epochs = 50
cost = CECost()
learning_rate = 0.5;
learningList = []
decrement = (learning_rate/max_epochs)

#Regulariser weights
l1_weight = 0.000
l2_weight = 0.000

#Build list once so we don't have to rebuild every time.
for i in xrange(0,max_epochs):
    #In this order so start learning rate is added
    learningList.append(learning_rate)
    learning_rate -= decrement



#Open file to save to
shelve_r = shelve.open("regExperiments")

stats = []
rate = 2

#For each number of layers, new model add layers.
for layer in xrange(0,2):
    
    #Reset for new run
    dp_scheduler = DropoutAnnealed(0.5, 0.5, 0.01)
    
    #Set here in case we alter it in a layer experiment
    learning_rate = 0.5


    train_dp.reset()
    valid_dp.reset()
    test_dp.reset()

    logger.info("Starting ")

    #define the model
    model = MLP(cost=cost)

    if layer == 0:
        odim = 800
        model.add_layer(Sigmoid(idim=784, odim=odim, irange=0.2, rng=rng))
    if layer == 1:
        odim = 300
        model.add_layer(Sigmoid(idim=784, odim=odim, irange=0.2, rng=rng))
        model.add_layer(Sigmoid(idim=odim, odim=odim, irange=0.2, rng=rng))
        model.add_layer(Sigmoid(idim=odim, odim=odim, irange=0.2, rng=rng))
        model.add_layer(Sigmoid(idim=odim, odim=odim, irange=0.2, rng=rng))
        
    #Add output layer
    model.add_layer(Softmax(idim=odim, odim=10, rng=rng))

    #Set rate scheduler here
    if rate == 1:
        lr_scheduler = LearningRateExponential(start_rate=learning_rate, max_epochs=max_epochs, training_size=100)
    elif rate == 2:
        lr_scheduler = LearningRateFixed(learning_rate=learning_rate, max_epochs=max_epochs)
    elif rate == 3:
        # define the optimiser, here stochasitc gradient descent
        # with fixed learning rate and max_epochs
        lr_scheduler = LearningRateNewBob(start_rate=learning_rate, max_epochs=max_epochs,\
                                          min_derror_stop=.05, scale_by=0.05, zero_rate=learning_rate, patience = 10)

    optimiser = SGDOptimiser(lr_scheduler=lr_scheduler, 
                             dp_scheduler=dp_scheduler,
                             l1_weight=l1_weight, 
                             l2_weight=l2_weight)

    logger.info('Training started...')
    tr_stats, valid_stats = optimiser.train(model, train_dp, valid_dp)

    logger.info('Testing the model on test set:')
    tst_cost, tst_accuracy = optimiser.validate(model, test_dp)
    logger.info('MNIST test set accuracy is %.2f %%, cost (%s) is %.3f'%(tst_accuracy*100., cost.get_name(), tst_cost))

    #Append stats for all test
    stats.append((tr_stats, valid_stats, (tst_cost, tst_accuracy)))

    #Should save rate to specific dictionairy in pickle, different key so same shelving doesn't matter
    shelve_r['dropA50EF'+str(layer)] = (tr_stats, valid_stats, (tst_cost, tst_accuracy))

logger.info('Saving Data')
shelve_r.close()   

Reg Exp Annealed Dropout for 100 Epochs

In [None]:
# %load Experiments/dropAExperiment100E.py
# %load Experiments/scheduler.py
#Baseline experiment

from mlp.layers import MLP, Linear, Sigmoid, Softmax #import required layer types
from mlp.optimisers import SGDOptimiser #import the optimiser

from mlp.costs import CECost #import the cost we want to use for optimisation
from mlp.schedulers import LearningRateExponential, LearningRateFixed, LearningRateList, LearningRateNewBob, DropoutAnnealed

import numpy
import logging
import shelve
from mlp.dataset import MNISTDataProvider

logger = logging.getLogger()
logger.setLevel(logging.INFO)
logger.info('Initialising data providers...')

train_dp = MNISTDataProvider(dset='train', batch_size=100, max_num_batches=1000, randomize=True)
valid_dp = MNISTDataProvider(dset='valid', batch_size=10000, max_num_batches=-10, randomize=False)
test_dp = MNISTDataProvider(dset='eval', batch_size=10000, max_num_batches=-10, randomize=False)

rng = numpy.random.RandomState([2015,10,10])

#some hyper-parameters
nhid = 800
max_epochs = 50
cost = CECost()
learning_rate = 0.5;
learningList = []
decrement = (learning_rate/max_epochs)

#Regulariser weights
l1_weight = 0.000
l2_weight = 0.000

#Build list once so we don't have to rebuild every time.
for i in xrange(0,max_epochs):
    #In this order so start learning rate is added
    learningList.append(learning_rate)
    learning_rate -= decrement



#Open file to save to
shelve_r = shelve.open("regExperiments")

stats = []
rate = 2

#For each number of layers, new model add layers.
for layer in xrange(0,2):
    
    #Reset for new run
    dp_scheduler = DropoutAnnealed(0.5, 0.5, 0.005)
    
    #Set here in case we alter it in a layer experiment
    learning_rate = 0.5


    train_dp.reset()
    valid_dp.reset()
    test_dp.reset()

    logger.info("Starting ")

    #define the model
    model = MLP(cost=cost)

    if layer == 0:
        odim = 800
        model.add_layer(Sigmoid(idim=784, odim=odim, irange=0.2, rng=rng))
    if layer == 3:
        odim = 300
        model.add_layer(Sigmoid(idim=784, odim=odim, irange=0.2, rng=rng))
        model.add_layer(Sigmoid(idim=odim, odim=odim, irange=0.2, rng=rng))
        model.add_layer(Sigmoid(idim=odim, odim=odim, irange=0.2, rng=rng))
        model.add_layer(Sigmoid(idim=odim, odim=odim, irange=0.2, rng=rng))
        
    #Add output layer
    model.add_layer(Softmax(idim=odim, odim=10, rng=rng))

    #Set rate scheduler here
    if rate == 1:
        lr_scheduler = LearningRateExponential(start_rate=learning_rate, max_epochs=max_epochs, training_size=100)
    elif rate == 2:
        lr_scheduler = LearningRateFixed(learning_rate=learning_rate, max_epochs=max_epochs)
    elif rate == 3:
        # define the optimiser, here stochasitc gradient descent
        # with fixed learning rate and max_epochs
        lr_scheduler = LearningRateNewBob(start_rate=learning_rate, max_epochs=max_epochs,\
                                          min_derror_stop=.05, scale_by=0.05, zero_rate=learning_rate, patience = 10)

    optimiser = SGDOptimiser(lr_scheduler=lr_scheduler, 
                             dp_scheduler=dp_scheduler,
                             l1_weight=l1_weight, 
                             l2_weight=l2_weight)

    logger.info('Training started...')
    tr_stats, valid_stats = optimiser.train(model, train_dp, valid_dp)

    logger.info('Testing the model on test set:')
    tst_cost, tst_accuracy = optimiser.validate(model, test_dp)
    logger.info('MNIST test set accuracy is %.2f %%, cost (%s) is %.3f'%(tst_accuracy*100., cost.get_name(), tst_cost))

    #Append stats for all test
    stats.append((tr_stats, valid_stats, (tst_cost, tst_accuracy)))

    #Should save rate to specific dictionairy in pickle, different key so same shelving doesn't matter
    shelve_r['dropA100EF'+str(layer)] = (tr_stats, valid_stats, (tst_cost, tst_accuracy))

logger.info('Saving Data')
shelve_r.close()   