In this notebook, I will experiment with different schedulers, seeing which can improve the standard coursework 1 setup.

In [8]:
import numpy
import logging
from mlp.dataset import MNISTDataProvider

logger = logging.getLogger()
logger.setLevel(logging.INFO)
logger.info('Initialising data providers...')

train_dp = MNISTDataProvider(dset='train', batch_size=100, max_num_batches=1000, randomize=True)
valid_dp = MNISTDataProvider(dset='valid', batch_size=10000, max_num_batches=-10, randomize=False)
test_dp = MNISTDataProvider(dset='eval', batch_size=10000, max_num_batches=-10, randomize=False)

INFO:root:Initialising data providers...


KeyboardInterrupt: 

In [None]:
#Baseline experiment

from mlp.layers import MLP, Linear, Sigmoid, Softmax #import required layer types
from mlp.optimisers import SGDOptimiser #import the optimiser

from mlp.costs import CECost #import the cost we want to use for optimisation
from mlp.schedulers import LearningRateExponential

logger = logging.getLogger()
logger.setLevel(logging.INFO)
rng = numpy.random.RandomState([2015,10,10])

#some hyper-parameters
nhid = 800
learning_rate = 0.5
max_epochs = 30
cost = CECost()
    
stats = []
for layer in xrange(1, 2):

    train_dp.reset()
    valid_dp.reset()
    test_dp.reset()
    
    #define the model
    model = MLP(cost=cost)
    model.add_layer(Sigmoid(idim=784, odim=nhid, irange=0.2, rng=rng))
    for i in xrange(1, layer):
        logger.info("Stacking hidden layer (%s)" % str(i+1))
        model.add_layer(Sigmoid(idim=nhid, odim=nhid, irange=0.2, rng=rng))
    model.add_layer(Softmax(idim=nhid, odim=10, rng=rng))

    # define the optimiser, here stochasitc gradient descent
    # with fixed learning rate and max_epochs
    # training_size should equal batch size, as that is the amount for each epoch
    lr_scheduler = LearningRateExponential(start_rate=learning_rate, max_epochs=max_epochs, training_size=100)
    optimiser = SGDOptimiser(lr_scheduler=lr_scheduler)

    logger.info('Training started...')
    tr_stats, valid_stats = optimiser.train(model, train_dp, valid_dp)

    logger.info('Testing the model on test set:')
    tst_cost, tst_accuracy = optimiser.validate(model, test_dp)
    logger.info('MNIST test set accuracy is %.2f %%, cost (%s) is %.3f'%(tst_accuracy*100., cost.get_name(), tst_cost))
    
    stats.append((tr_stats, valid_stats, (tst_cost, tst_accuracy)))

In [None]:
#Baseline experiment

from mlp.layers import MLP, Linear, Sigmoid, Softmax #import required layer types
from mlp.optimisers import SGDOptimiser #import the optimiser

from mlp.costs import CECost #import the cost we want to use for optimisation
from mlp.schedulers import LearningRateNewBob

logger = logging.getLogger()
logger.setLevel(logging.INFO)
rng = numpy.random.RandomState([2015,10,10])

#some hyper-parameters
nhid = 800
learning_rate = 0.8
max_epochs = 30
cost = CECost()
    
stats = []
for layer in xrange(1, 2):

    train_dp.reset()
    valid_dp.reset()
    test_dp.reset()
    
    #define the model
    model = MLP(cost=cost)
    model.add_layer(Sigmoid(idim=784, odim=nhid, irange=0.2, rng=rng))
    for i in xrange(1, layer):
        logger.info("Stacking hidden layer (%s)" % str(i+1))
        model.add_layer(Sigmoid(idim=nhid, odim=nhid, irange=0.2, rng=rng))
    model.add_layer(Softmax(idim=nhid, odim=10, rng=rng))

    # define the optimiser, here stochasitc gradient descent
    # with fixed learning rate and max_epochs
    lr_scheduler = LearningRateNewBob(start_rate=learning_rate, max_epochs=max_epochs,\
                                      min_derror_stop=.05, scale_by=0.05, zero_rate=0.5, patience = 10)
    optimiser = SGDOptimiser(lr_scheduler=lr_scheduler)

    logger.info('Training started...')
    tr_stats, valid_stats = optimiser.train(model, train_dp, valid_dp)

    logger.info('Testing the model on test set:')
    tst_cost, tst_accuracy = optimiser.validate(model, test_dp)
    logger.info('MNIST test set accuracy is %.2f %%, cost (%s) is %.3f'%(tst_accuracy*100., cost.get_name(), tst_cost))
    
    stats.append((tr_stats, valid_stats, (tst_cost, tst_accuracy)))

In [None]:
#Baseline experiment
%autoreload
import numpy
import logging
from mlp.dataset import MNISTDataProvider

logger = logging.getLogger()
logger.setLevel(logging.INFO)
logger.info('Initialising data providers...')

train_dp = MNISTDataProvider(dset='train', batch_size=10, max_num_batches=100, randomize=True)
valid_dp = MNISTDataProvider(dset='valid', batch_size=10000, max_num_batches=-10, randomize=False)
test_dp = MNISTDataProvider(dset='eval', batch_size=10000, max_num_batches=-10, randomize=False)
from mlp.layers import MLP, Linear, Sigmoid, Softmax #import required layer types
from mlp.optimisers import SGDOptimiser #import the optimiser

from mlp.costs import CECost #import the cost we want to use for optimisation
from mlp.schedulers import LearningRateNewBob, LearningRateFixed

logger = logging.getLogger()
logger.setLevel(logging.INFO)
rng = numpy.random.RandomState([2015,10,10])

#some hyper-parameters
nhid = 600
learning_rate = 0.05
max_epochs = 10
cost = CECost()
    
stats = []
layer=2

train_dp.reset()

#define the model
model = MLP(cost=cost)
model.add_layer(Sigmoid(idim=784, odim=600, irange=0.2, rng=rng))
model.add_layer(Sigmoid(idim=600, odim=500, irange=0.2, rng=rng))
model.add_layer(Sigmoid(idim=500, odim=300, irange=0.2, rng=rng))
model.add_layer(Softmax(idim=300, odim=10, rng=rng))

lr_scheduler = LearningRateFixed(learning_rate=0.05, max_epochs=max_epochs)
optimiser = SGDOptimiser(lr_scheduler=lr_scheduler)

logger.info('Pre-Training started...')
tr_stats, valid_stats = optimiser.pretrain(model, train_dp, None, 0)
logger.info('Training started...')

train_dp.reset()

tr_stats, valid_stats = optimiser.train(model, train_dp, valid_dp)

In [None]:
#Baseline experiment
%autoreload
import numpy
import logging
from mlp.dataset import MNISTDataProvider

logger = logging.getLogger()
logger.setLevel(logging.INFO)
logger.info('Initialising data providers...')

train_dp = MNISTDataProvider(dset='train', batch_size=10, max_num_batches=100, randomize=True)
valid_dp = MNISTDataProvider(dset='valid', batch_size=10000, max_num_batches=-10, randomize=False)
test_dp = MNISTDataProvider(dset='eval', batch_size=10000, max_num_batches=-10, randomize=False)
from mlp.layers import MLP, Linear, Sigmoid, Softmax #import required layer types
from mlp.optimisers import SGDOptimiser #import the optimiser

from mlp.costs import CECost #import the cost we want to use for optimisation
from mlp.schedulers import LearningRateNewBob, LearningRateFixed

logger = logging.getLogger()
logger.setLevel(logging.INFO)
rng = numpy.random.RandomState([2015,10,10])

#some hyper-parameters
nhid = 600
learning_rate = 0.05
max_epochs = 10
cost = CECost()
    
stats = []
layer=2

train_dp.reset()

#define the model
model = MLP(cost=cost)
model.add_layer(Sigmoid(idim=784, odim=600, irange=0.2, rng=rng))
model.add_layer(Sigmoid(idim=600, odim=500, irange=0.2, rng=rng))
model.add_layer(Sigmoid(idim=500, odim=300, irange=0.2, rng=rng))
model.add_layer(Softmax(idim=300, odim=10, rng=rng))

lr_scheduler = LearningRateFixed(learning_rate=0.05, max_epochs=max_epochs)
optimiser = SGDOptimiser(lr_scheduler=lr_scheduler)

logger.info('Training started...')

train_dp.reset()

tr_stats, valid_stats = optimiser.train(model, train_dp, valid_dp)

In [None]:
import numpy
import logging
from mlp.dataset import MNISTDataProvider

logger = logging.getLogger()
logger.setLevel(logging.INFO)
logger.info('Initialising data providers...')

train_dp = MNISTDataProvider(dset='train', batch_size=10, max_num_batches=100, randomize=True)
i = 0
inputs=[]

for x,t in train_dp:
    inputs.append(x)
    
print inputs[0].shape
print len(inputs)

In [None]:
print len(inputs)

In [None]:
#Baseline experiment
%autoreload
import numpy
import logging
from mlp.dataset import MNISTDataProvider

logger = logging.getLogger()
logger.setLevel(logging.INFO)
logger.info('Initialising data providers...')

train_dp = MNISTDataProvider(dset='train', batch_size=10, max_num_batches=100, randomize=True)
valid_dp = MNISTDataProvider(dset='valid', batch_size=10000, max_num_batches=-10, randomize=False)
test_dp = MNISTDataProvider(dset='eval', batch_size=10000, max_num_batches=-10, randomize=False)
from mlp.layers import MLP, Linear, Sigmoid, Softmax #import required layer types
from mlp.optimisers import SGDOptimiser #import the optimiser

from mlp.costs import CECost #import the cost we want to use for optimisation
from mlp.schedulers import LearningRateNewBob, LearningRateFixed

logger = logging.getLogger()
logger.setLevel(logging.INFO)
rng = numpy.random.RandomState([2015,10,10])

#some hyper-parameters
nhid = 600
learning_rate = 0.05
max_epochs = 10
cost = CECost()
    
stats = []
layer=2

train_dp.reset()

#define the model
model = MLP(cost=cost)
model.add_layer(Sigmoid(idim=784, odim=600, irange=0.2, rng=rng))
model.add_layer(Sigmoid(idim=600, odim=500, irange=0.2, rng=rng))
model.add_layer(Sigmoid(idim=500, odim=300, irange=0.2, rng=rng))
model.add_layer(Softmax(idim=300, odim=10, rng=rng))

lr_scheduler = LearningRateFixed(learning_rate=0.05, max_epochs=max_epochs)
optimiser = SGDOptimiser(lr_scheduler=lr_scheduler)

logger.info('Pre-Training started...')
tr_stats, valid_stats = optimiser.pretrain_discriminative(model, train_dp, None)
logger.info('Training started...')

train_dp.reset()


tr_stats, valid_stats = optimiser.train(model, train_dp, valid_dp)

In [None]:
#Run experiments using fixed, list, newBob and exponential use different scheduler each loop

In [None]:
# %load Experiments/scheduler.py
#Baseline experiment

from mlp.layers import MLP, Linear, Sigmoid, Softmax #import required layer types
from mlp.optimisers import SGDOptimiser #import the optimiser

from mlp.costs import CECost #import the cost we want to use for optimisation
from mlp.schedulers import LearningRateExponential, LearningRateFixed, LearningRateList, LearningRateNewBob

import numpy
import logging
import shelve
from mlp.dataset import MNISTDataProvider

logger = logging.getLogger()
logger.setLevel(logging.INFO)
logger.info('Initialising data providers...')

train_dp = MNISTDataProvider(dset='train', batch_size=100, max_num_batches=1000, randomize=True)
valid_dp = MNISTDataProvider(dset='valid', batch_size=10000, max_num_batches=-10, randomize=False)
test_dp = MNISTDataProvider(dset='eval', batch_size=10000, max_num_batches=-10, randomize=False)

rng = numpy.random.RandomState([2015,10,10])

#some hyper-parameters
nhid = 800
max_epochs = 20
cost = CECost()

learning_rate = 0.5;
learningList = []
decrement = (learning_rate/max_epochs)
#Build list once so we don't have to rebuild every time.
for i in xrange(0,max_epochs):
    #In this order so start learning rate is added
    learningList.append(learning_rate)
    learning_rate -= decrement



#Open file to save to
shelve_p = shelve.open("learningRateExperiments")


options = {1: 'Exponential', 2: 'Fixed', 3: 'NewBob', 4: 'List'}

stats = []

#For each number of layers, new model add layers.
for layer in xrange(0,3):
    #Go through for each learning rate
    for rate in xrange(1, 5):

        #Set here in case we alter it in a layer experiment
        learning_rate = 0.5


        train_dp.reset()
        valid_dp.reset()
        test_dp.reset()

        logger.info("Starting " + options[rate])

        #define the model
        model = MLP(cost=cost)
        
        if layer >= 0:
            odim = 800
            model.add_layer(Sigmoid(idim=784, odim=odim, irange=0.2, rng=rng))
        if layer >= 1:
            odim = 600
            model.add_layer(Sigmoid(idim=800, odim=600, irange=0.2, rng=rng))
        elif layer == 2:
            odim = 400
            model.add_layer(Sigmoid(idim=600, odim=odim, irange=0.2, rng=rng))
        
        #Add output layer
        model.add_layer(Softmax(idim=odim, odim=10, rng=rng))

        #Set rate scheduler here
        if rate == 1:
            lr_scheduler = LearningRateExponential(start_rate=learning_rate, max_epochs=max_epochs, training_size=100)
        elif rate == 2:
            lr_scheduler = LearningRateFixed(learning_rate=learning_rate, max_epochs=max_epochs)
        elif rate == 3:
            # define the optimiser, here stochasitc gradient descent
            # with fixed learning rate and max_epochs
            lr_scheduler = LearningRateNewBob(start_rate=learning_rate, max_epochs=max_epochs,\
                                          min_derror_stop=.05, scale_by=0.05, zero_rate=learning_rate, patience = 10)
        elif rate == 4:
            # define the optimiser, here stochasitc gradient descent
            # with fixed learning rate and max_epochs
            
            #Build this up instead
            lr_scheduler = LearningRateList(learningList,max_epochs=max_epochs)

        optimiser = SGDOptimiser(lr_scheduler=lr_scheduler)

        logger.info('Training started...')
        tr_stats, valid_stats = optimiser.train(model, train_dp, valid_dp)

        logger.info('Testing the model on test set:')
        tst_cost, tst_accuracy = optimiser.validate(model, test_dp)
        logger.info('MNIST test set accuracy is %.2f %%, cost (%s) is %.3f'%(tst_accuracy*100., cost.get_name(), tst_cost))

        #Append stats for all test
        stats.append((tr_stats, valid_stats, (tst_cost, tst_accuracy)))

        #Should save rate to specific dictionairy in pickle
        shelve_p[options[rate]+str(layer)] = (tr_stats, valid_stats, (tst_cost, tst_accuracy))

logger.info('Saving Data')
shelve_p.close()   

Use 20 epochs, 0.01, 0.05, 0.1, 0.2, 0.5, 
Build the list from (start_rate / epochs), to have constant decrease otherwise it would return a 0 learning rate.
Always start from 0.5
Use both 1 and 2 layers and 3 layers as interested in different schedulers affects on depth of network.
Save the exponential decline, as well as list and newBobs rate.



In [None]:
import matplotlib.pyplot as plt
%matplotlib inline

#Open file to save to
shelve_p = shelve.open("learningRateExperiments")
x = range(0,21)

#Go through training list and output the stats in a graph.
#Make sure you run the above cell first.
train_cont = ['Exponential', 'Fixed', 'NewBob' ,'List']

for i in xrange(0,3):
    for idx,lists in enumerate(train_cont):
        train_error = []
        for inner_list in shelve_p[lists+str(i)][0]:
            train_error.append(100-(inner_list[1]*100.))
        plt.plot(x[4:len(shelve_p[lists+str(i)][0])], train_error[4:len(shelve_p[lists+str(i)][0])], lw=2, label="Scheduler: "+ str(train_cont[idx]))


    plt.title("Error Rates v Train Epochs for different learning schedulers with "+str(i+1)+" hidden layer")
    plt.xlabel('Training Epochs')
    plt.legend
    plt.legend(bbox_to_anchor=(1.6, 0.8))
    plt.ylabel('Error Rate')
    plt.show()

Grid search for better l1 value, how?!

0.001



In [None]:
# %load Experiments/l1Experiment.py
# %load Experiments/scheduler.py
#Baseline experiment

from mlp.layers import MLP, Linear, Sigmoid, Softmax #import required layer types
from mlp.optimisers import SGDOptimiser #import the optimiser

from mlp.costs import CECost #import the cost we want to use for optimisation
from mlp.schedulers import LearningRateExponential, LearningRateFixed, LearningRateList, LearningRateNewBob

import numpy
import logging
import shelve
from mlp.dataset import MNISTDataProvider

logger = logging.getLogger()
logger.setLevel(logging.INFO)
logger.info('Initialising data providers...')

train_dp = MNISTDataProvider(dset='train', batch_size=100, max_num_batches=1000, randomize=True)
valid_dp = MNISTDataProvider(dset='valid', batch_size=10000, max_num_batches=-10, randomize=False)
test_dp = MNISTDataProvider(dset='eval', batch_size=10000, max_num_batches=-10, randomize=False)

rng = numpy.random.RandomState([2015,10,10])

#some hyper-parameters
nhid = 800
max_epochs = 20
cost = CECost()
learning_rate = 0.5;
learningList = []
decrement = (learning_rate/max_epochs)

#Regulariser weights
l1_weight = 0.001
l2_weight = 0.000
dp_scheduler = None

#Build list once so we don't have to rebuild every time.
for i in xrange(0,max_epochs):
    #In this order so start learning rate is added
    learningList.append(learning_rate)
    learning_rate -= decrement



#Open file to save to
shelve_r = shelve.open("regExperiments")

stats = []
rate = 1

#For each number of layers, new model add layers.
for layer in xrange(0,3):
    #Set here in case we alter it in a layer experiment
    learning_rate = 0.5


    train_dp.reset()
    valid_dp.reset()
    test_dp.reset()

    logger.info("Starting")

    #define the model
    model = MLP(cost=cost)

    if layer >= 0:
        odim = 800
        model.add_layer(Sigmoid(idim=784, odim=odim, irange=0.2, rng=rng))
    if layer >= 1:
        odim = 600
        model.add_layer(Sigmoid(idim=800, odim=600, irange=0.2, rng=rng))
    elif layer == 2:
        odim = 400
        model.add_layer(Sigmoid(idim=600, odim=odim, irange=0.2, rng=rng))
        
    #Add output layer
    model.add_layer(Softmax(idim=odim, odim=10, rng=rng))

    #Set rate scheduler here
    if rate == 1:
        lr_scheduler = LearningRateExponential(start_rate=learning_rate, max_epochs=max_epochs, training_size=100)
    elif rate == 3:
        # define the optimiser, here stochasitc gradient descent
        # with fixed learning rate and max_epochs
        lr_scheduler = LearningRateNewBob(start_rate=learning_rate, max_epochs=max_epochs,\
                                          min_derror_stop=.05, scale_by=0.05, zero_rate=learning_rate, patience = 10)

    optimiser =   optimiser = SGDOptimiser(lr_scheduler=lr_scheduler, 
                             dp_scheduler=dp_scheduler,
                             l1_weight=l1_weight, 
                             l2_weight=l2_weight)

    logger.info('Training started...')
    tr_stats, valid_stats = optimiser.train(model, train_dp, valid_dp)

    logger.info('Testing the model on test set:')
    tst_cost, tst_accuracy = optimiser.validate(model, test_dp)
    logger.info('MNIST test set accuracy is %.2f %%, cost (%s) is %.3f'%(tst_accuracy*100., cost.get_name(), tst_cost))

    #Append stats for all test
    stats.append((tr_stats, valid_stats, (tst_cost, tst_accuracy)))

    #Should save rate to specific dictionairy in pickle
    shelve_r['l1'+str(layer)] = (tr_stats, valid_stats, (tst_cost, tst_accuracy))

logger.info('Saving Data')
shelve_r.close()   

Seemingly allows for more hidden layers to be added without the added overhead of having to run for more epochs.
If we ran for more epochs it should do better? (Test)

Grid search l2 0.001

In [None]:
# %load Experiments/l2Experiment.py
# %load Experiments/scheduler.py
#Baseline experiment

from mlp.layers import MLP, Linear, Sigmoid, Softmax #import required layer types
from mlp.optimisers import SGDOptimiser #import the optimiser

from mlp.costs import CECost #import the cost we want to use for optimisation
from mlp.schedulers import LearningRateExponential, LearningRateFixed, LearningRateList, LearningRateNewBob

import numpy
import logging
import shelve
from mlp.dataset import MNISTDataProvider

logger = logging.getLogger()
logger.setLevel(logging.INFO)
logger.info('Initialising data providers...')

train_dp = MNISTDataProvider(dset='train', batch_size=100, max_num_batches=1000, randomize=True)
valid_dp = MNISTDataProvider(dset='valid', batch_size=10000, max_num_batches=-10, randomize=False)
test_dp = MNISTDataProvider(dset='eval', batch_size=10000, max_num_batches=-10, randomize=False)

rng = numpy.random.RandomState([2015,10,10])

#some hyper-parameters
nhid = 800
max_epochs = 20
cost = CECost()
learning_rate = 0.5;
learningList = []
decrement = (learning_rate/max_epochs)

#Regulariser weights
l1_weight = 0.000
l2_weight = 0.001
dp_scheduler = None

#Build list once so we don't have to rebuild every time.
for i in xrange(0,max_epochs):
    #In this order so start learning rate is added
    learningList.append(learning_rate)
    learning_rate -= decrement



#Open file to save to
shelve_r = shelve.open("regExperiments")

stats = []
rate = 1

#For each number of layers, new model add layers.
for layer in xrange(0,3):
    #Set here in case we alter it in a layer experiment
    learning_rate = 0.5


    train_dp.reset()
    valid_dp.reset()
    test_dp.reset()

    logger.info("Starting ")

    #define the model
    model = MLP(cost=cost)

    if layer >= 0:
        odim = 800
        model.add_layer(Sigmoid(idim=784, odim=odim, irange=0.2, rng=rng))
    if layer >= 1:
        odim = 600
        model.add_layer(Sigmoid(idim=800, odim=600, irange=0.2, rng=rng))
    elif layer == 2:
        odim = 400
        model.add_layer(Sigmoid(idim=600, odim=odim, irange=0.2, rng=rng))
        
    #Add output layer
    model.add_layer(Softmax(idim=odim, odim=10, rng=rng))

    #Set rate scheduler here
    if rate == 1:
        lr_scheduler = LearningRateExponential(start_rate=learning_rate, max_epochs=max_epochs, training_size=100)
    elif rate == 3:
        # define the optimiser, here stochasitc gradient descent
        # with fixed learning rate and max_epochs
        lr_scheduler = LearningRateNewBob(start_rate=learning_rate, max_epochs=max_epochs,\
                                          min_derror_stop=.05, scale_by=0.05, zero_rate=learning_rate, patience = 10)

    optimiser =   optimiser = SGDOptimiser(lr_scheduler=lr_scheduler, 
                             dp_scheduler=dp_scheduler,
                             l1_weight=l1_weight, 
                             l2_weight=l2_weight)

    logger.info('Training started...')
    tr_stats, valid_stats = optimiser.train(model, train_dp, valid_dp)

    logger.info('Testing the model on test set:')
    tst_cost, tst_accuracy = optimiser.validate(model, test_dp)
    logger.info('MNIST test set accuracy is %.2f %%, cost (%s) is %.3f'%(tst_accuracy*100., cost.get_name(), tst_cost))

    #Append stats for all test
    stats.append((tr_stats, valid_stats, (tst_cost, tst_accuracy)))

    #Should save rate to specific dictionairy in pickle, different key so same shelving doesn't matter
    shelve_r['l2'+str(layer)] = (tr_stats, valid_stats, (tst_cost, tst_accuracy))

logger.info('Saving Data')
shelve_r.close()   

Investigate dropout with both normal and annealed. Start at 0.5 and experiment for both.

In [None]:
# %load Experiments/dropNExperiment.py
# %load Experiments/scheduler.py
#Baseline experiment

from mlp.layers import MLP, Linear, Sigmoid, Softmax #import required layer types
from mlp.optimisers import SGDOptimiser #import the optimiser

from mlp.costs import CECost #import the cost we want to use for optimisation
from mlp.schedulers import LearningRateExponential, LearningRateFixed, LearningRateList, LearningRateNewBob, DropoutFixed

import numpy
import logging
import shelve
from mlp.dataset import MNISTDataProvider

logger = logging.getLogger()
logger.setLevel(logging.INFO)
logger.info('Initialising data providers...')

train_dp = MNISTDataProvider(dset='train', batch_size=100, max_num_batches=1000, randomize=True)
valid_dp = MNISTDataProvider(dset='valid', batch_size=10000, max_num_batches=-10, randomize=False)
test_dp = MNISTDataProvider(dset='eval', batch_size=10000, max_num_batches=-10, randomize=False)

rng = numpy.random.RandomState([2015,10,10])

#some hyper-parameters
nhid = 800
max_epochs = 20
cost = CECost()
learning_rate = 0.5;
learningList = []
decrement = (learning_rate/max_epochs)

#Regulariser weights
l1_weight = 0.000
l2_weight = 0.000
dp_scheduler = DropoutFixed(0.5, 0.5)

#Build list once so we don't have to rebuild every time.
for i in xrange(0,max_epochs):
    #In this order so start learning rate is added
    learningList.append(learning_rate)
    learning_rate -= decrement



#Open file to save to
shelve_r = shelve.open("regExperiments")

stats = []
rate = 1

#For each number of layers, new model add layers.
for layer in xrange(0,3):
    #Set here in case we alter it in a layer experiment
    learning_rate = 0.5


    train_dp.reset()
    valid_dp.reset()
    test_dp.reset()

    logger.info("Starting ")

    #define the model
    model = MLP(cost=cost)

    if layer >= 0:
        odim = 800
        model.add_layer(Sigmoid(idim=784, odim=odim, irange=0.2, rng=rng))
    if layer >= 1:
        odim = 600
        model.add_layer(Sigmoid(idim=800, odim=600, irange=0.2, rng=rng))
    elif layer == 2:
        odim = 400
        model.add_layer(Sigmoid(idim=600, odim=odim, irange=0.2, rng=rng))
        
    #Add output layer
    model.add_layer(Softmax(idim=odim, odim=10, rng=rng))

    #Set rate scheduler here
    if rate == 1:
        lr_scheduler = LearningRateExponential(start_rate=learning_rate, max_epochs=max_epochs, training_size=100)
    elif rate == 3:
        # define the optimiser, here stochasitc gradient descent
        # with fixed learning rate and max_epochs
        lr_scheduler = LearningRateNewBob(start_rate=learning_rate, max_epochs=max_epochs,\
                                          min_derror_stop=.05, scale_by=0.05, zero_rate=learning_rate, patience = 10)

    optimiser =   optimiser = SGDOptimiser(lr_scheduler=lr_scheduler, 
                             dp_scheduler=dp_scheduler,
                             l1_weight=l1_weight, 
                             l2_weight=l2_weight)

    logger.info('Training started...')
    tr_stats, valid_stats = optimiser.train(model, train_dp, valid_dp)

    logger.info('Testing the model on test set:')
    tst_cost, tst_accuracy = optimiser.validate(model, test_dp)
    logger.info('MNIST test set accuracy is %.2f %%, cost (%s) is %.3f'%(tst_accuracy*100., cost.get_name(), tst_cost))

    #Append stats for all test
    stats.append((tr_stats, valid_stats, (tst_cost, tst_accuracy)))

    #Should save rate to specific dictionairy in pickle, different key so same shelving doesn't matter
    shelve_r['dropN'+str(layer)] = (tr_stats, valid_stats, (tst_cost, tst_accuracy))

logger.info('Saving Data')
shelve_r.close()   

In [None]:
# %load Experiments/dropAExperiment.py
# %load Experiments/scheduler.py
#Baseline experiment

from mlp.layers import MLP, Linear, Sigmoid, Softmax #import required layer types
from mlp.optimisers import SGDOptimiser #import the optimiser

from mlp.costs import CECost #import the cost we want to use for optimisation
from mlp.schedulers import LearningRateExponential, LearningRateFixed, LearningRateList, LearningRateNewBob, DropoutAnnealed

import numpy
import logging
import shelve
from mlp.dataset import MNISTDataProvider

logger = logging.getLogger()
logger.setLevel(logging.INFO)
logger.info('Initialising data providers...')

train_dp = MNISTDataProvider(dset='train', batch_size=100, max_num_batches=1000, randomize=True)
valid_dp = MNISTDataProvider(dset='valid', batch_size=10000, max_num_batches=-10, randomize=False)
test_dp = MNISTDataProvider(dset='eval', batch_size=10000, max_num_batches=-10, randomize=False)

rng = numpy.random.RandomState([2015,10,10])

#some hyper-parameters
nhid = 800
max_epochs = 20
cost = CECost()
learning_rate = 0.5;
learningList = []
decrement = (learning_rate/max_epochs)

#Regulariser weights
l1_weight = 0.000
l2_weight = 0.000
dp_scheduler = DropoutAnnealed(0.5, 0.5, 0.005)

#Build list once so we don't have to rebuild every time.
for i in xrange(0,max_epochs):
    #In this order so start learning rate is added
    learningList.append(learning_rate)
    learning_rate -= decrement



#Open file to save to
shelve_r = shelve.open("regExperiments")

stats = []
rate = 1

#For each number of layers, new model add layers.
for layer in xrange(0,3):
    #Set here in case we alter it in a layer experiment
    learning_rate = 0.5


    train_dp.reset()
    valid_dp.reset()
    test_dp.reset()

    logger.info("Starting ")

    #define the model
    model = MLP(cost=cost)

    if layer >= 0:
        odim = 800
        model.add_layer(Sigmoid(idim=784, odim=odim, irange=0.2, rng=rng))
    if layer >= 1:
        odim = 600
        model.add_layer(Sigmoid(idim=800, odim=600, irange=0.2, rng=rng))
    elif layer == 2:
        odim = 400
        model.add_layer(Sigmoid(idim=600, odim=odim, irange=0.2, rng=rng))
        
    #Add output layer
    model.add_layer(Softmax(idim=odim, odim=10, rng=rng))

    #Set rate scheduler here
    if rate == 1:
        lr_scheduler = LearningRateExponential(start_rate=learning_rate, max_epochs=max_epochs, training_size=100)
    elif rate == 3:
        # define the optimiser, here stochasitc gradient descent
        # with fixed learning rate and max_epochs
        lr_scheduler = LearningRateNewBob(start_rate=learning_rate, max_epochs=max_epochs,\
                                          min_derror_stop=.05, scale_by=0.05, zero_rate=learning_rate, patience = 10)

    optimiser =   optimiser = SGDOptimiser(lr_scheduler=lr_scheduler, 
                             dp_scheduler=dp_scheduler,
                             l1_weight=l1_weight, 
                             l2_weight=l2_weight)

    logger.info('Training started...')
    tr_stats, valid_stats = optimiser.train(model, train_dp, valid_dp)

    logger.info('Testing the model on test set:')
    tst_cost, tst_accuracy = optimiser.validate(model, test_dp)
    logger.info('MNIST test set accuracy is %.2f %%, cost (%s) is %.3f'%(tst_accuracy*100., cost.get_name(), tst_cost))

    #Append stats for all test
    stats.append((tr_stats, valid_stats, (tst_cost, tst_accuracy)))

    #Should save rate to specific dictionairy in pickle, different key so same shelving doesn't matter
    shelve_r['dropA'+str(layer)] = (tr_stats, valid_stats, (tst_cost, tst_accuracy))

logger.info('Saving Data')
shelve_r.close()   

In [None]:
# %load Experiments/noDropExp.py
# %load Experiments/scheduler.py
#Baseline experiment

from mlp.layers import MLP, Linear, Sigmoid, Softmax #import required layer types
from mlp.optimisers import SGDOptimiser #import the optimiser

from mlp.costs import CECost #import the cost we want to use for optimisation
from mlp.schedulers import LearningRateExponential, LearningRateFixed, LearningRateList, LearningRateNewBob, DropoutAnnealed

import numpy
import logging
import shelve
from mlp.dataset import MNISTDataProvider

logger = logging.getLogger()
logger.setLevel(logging.INFO)
logger.info('Initialising data providers...')

train_dp = MNISTDataProvider(dset='train', batch_size=100, max_num_batches=1000, randomize=True)
valid_dp = MNISTDataProvider(dset='valid', batch_size=10000, max_num_batches=-10, randomize=False)
test_dp = MNISTDataProvider(dset='eval', batch_size=10000, max_num_batches=-10, randomize=False)

rng = numpy.random.RandomState([2015,10,10])

#some hyper-parameters
nhid = 800
max_epochs = 20
cost = CECost()
learning_rate = 0.5;
learningList = []
decrement = (learning_rate/max_epochs)

#Regulariser weights
l1_weight = 0.000
l2_weight = 0.000
dp_scheduler = None

#Build list once so we don't have to rebuild every time.
for i in xrange(0,max_epochs):
    #In this order so start learning rate is added
    learningList.append(learning_rate)
    learning_rate -= decrement



#Open file to save to
shelve_r = shelve.open("regExperiments")

stats = []
rate = 1

#For each number of layers, new model add layers.
for layer in xrange(0,3):
    #Set here in case we alter it in a layer experiment
    learning_rate = 0.5


    train_dp.reset()
    valid_dp.reset()
    test_dp.reset()

    logger.info("Starting ")

    #define the model
    model = MLP(cost=cost)

    if layer >= 0:
        odim = 800
        model.add_layer(Sigmoid(idim=784, odim=odim, irange=0.2, rng=rng))
    if layer >= 1:
        odim = 600
        model.add_layer(Sigmoid(idim=800, odim=600, irange=0.2, rng=rng))
    elif layer == 2:
        odim = 400
        model.add_layer(Sigmoid(idim=600, odim=odim, irange=0.2, rng=rng))
        
    #Add output layer
    model.add_layer(Softmax(idim=odim, odim=10, rng=rng))

    #Set rate scheduler here
    if rate == 1:
        lr_scheduler = LearningRateExponential(start_rate=learning_rate, max_epochs=max_epochs, training_size=100)
    elif rate == 3:
        # define the optimiser, here stochasitc gradient descent
        # with fixed learning rate and max_epochs
        lr_scheduler = LearningRateNewBob(start_rate=learning_rate, max_epochs=max_epochs,\
                                          min_derror_stop=.05, scale_by=0.05, zero_rate=learning_rate, patience = 10)

    optimiser =   optimiser = SGDOptimiser(lr_scheduler=lr_scheduler, 
                             dp_scheduler=dp_scheduler,
                             l1_weight=l1_weight, 
                             l2_weight=l2_weight)

    logger.info('Training started...')
    tr_stats, valid_stats = optimiser.train(model, train_dp, valid_dp)

    logger.info('Testing the model on test set:')
    tst_cost, tst_accuracy = optimiser.validate(model, test_dp)
    logger.info('MNIST test set accuracy is %.2f %%, cost (%s) is %.3f'%(tst_accuracy*100., cost.get_name(), tst_cost))

    #Append stats for all test
    stats.append((tr_stats, valid_stats, (tst_cost, tst_accuracy)))

    #Should save rate to specific dictionairy in pickle, different key so same shelving doesn't matter
    shelve_r['noDL'+str(layer)] = (tr_stats, valid_stats, (tst_cost, tst_accuracy))

logger.info('Saving Data')
shelve_r.close()   