# Machine Learning Practical: Coursework 1

**Release date: Monday 10th October 2016**  
**Due date: 16:00 Thursday 27th October 2016**

Instructions for the coursework are [available as a PDF here](http://www.inf.ed.ac.uk/teaching/courses/mlp/2016/coursework_1.pdf).

## Part 1: Learning rate schedules

In [1]:
# The below code will set up the data providers, random number
# generator and logger objects needed for training runs. As
# loading the data from file take a little while you generally
# will probably not want to reload the data providers on
# every training run. If you wish to reset their state you
# should instead use the .reset() method of the data providers.
import numpy as np
import logging
import matplotlib.pyplot as plt
from mlp.data_providers import MNISTDataProvider
%matplotlib inline
# Seed a random number generator
seed = 10102016 
rng = np.random.RandomState(seed)

# Set up a logger object to print info about the training run to stdout
logger = logging.getLogger()
logger.setLevel(logging.INFO)
logger.handlers = [logging.StreamHandler()]

# Create data provider objects for the MNIST data set
train_data = MNISTDataProvider('train', batch_size=50, rng=rng)
valid_data = MNISTDataProvider('valid', batch_size=50, rng=rng)

In [2]:
def train_model_and_plot_stats(
         schedulers, model, error, learning_rule, train_data, valid_data, num_epochs, stats_interval):

    # As well as monitoring the error over training also monitor classification
    # accuracy i.e. proportion of most-probable predicted classes being equal to targets
    data_monitors={'acc': lambda y, t: (y.argmax(-1) == t.argmax(-1)).mean()}

    # Use the created objects to initialise a new Optimiser instance.
    optimiser = Optimiser(
    model, error, learning_rule, train_data,
    valid_data, data_monitors, schedulers)

    # Run the optimiser for 5 epochs (full passes through the training set)
    # printing statistics every epoch.
    stats, keys, run_time = optimiser.train(num_epochs=num_epochs, stats_interval=stats_interval)

    # Plot the change in the validation and training set error over training.
    fig_1 = plt.figure(figsize=(8, 4))
    ax_1 = fig_1.add_subplot(111)
    for k in ['error(train)', 'error(valid)']:
        ax_1.plot(np.arange(1, stats.shape[0]) * stats_interval, 
                  stats[1:, keys[k]], label=k)
    ax_1.legend(loc=0)
    ax_1.set_xlabel('Epoch number')
    fig_1.savefig('0.0001,m-e.pdf')
    # Plot the change in the validation and training set accuracy over training.
    fig_2 = plt.figure(figsize=(8, 4))
    ax_2 = fig_2.add_subplot(111)
    for k in ['acc(train)', 'acc(valid)']:
        ax_2.plot(np.arange(1, stats.shape[0]) * stats_interval, 
                  stats[1:, keys[k]], label=k)
    ax_2.legend(loc=0)
    ax_2.set_xlabel('Epoch number')
    fig_2.savefig('0.0001,m-a.pdf')
    return stats, keys, run_time, fig_1, ax_1, fig_2, ax_2

In [None]:
# The model set up code below is provided as a starting point.
# You will probably want to add further code cells for the
# different experiments you run.

from mlp.layers import AffineLayer, SoftmaxLayer, SigmoidLayer
from mlp.errors import CrossEntropySoftmaxError
from mlp.models import MultipleLayerModel
from mlp.initialisers import ConstantInit, GlorotUniformInit
from mlp.learning_rules import GradientDescentLearningRule 
from mlp.schedulers import ConstantLearningRateScheduler,ReciprocalLearningRateScheduler
from mlp.optimisers import Optimiser

input_dim, output_dim, hidden_dim = 784, 10, 100

weights_init = GlorotUniformInit(rng=rng)
biases_init = ConstantInit(0.)
num_epochs = 100
stats_interval = 5
learning_rate = 0.01
decay_rate = 100

model = MultipleLayerModel([
    AffineLayer(input_dim, hidden_dim, weights_init, biases_init), 
    SigmoidLayer(),
    AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init), 
    SigmoidLayer(),
    AffineLayer(hidden_dim, output_dim, weights_init, biases_init)
])

error = CrossEntropySoftmaxError()


Constant_schedulers = [ConstantLearningRateScheduler(learning_rate)]
Reciprocal_schedulers = [ReciprocalLearningRateScheduler(learning_rate,decay_rate)]
learning_rule = GradientDescentLearningRule(learning_rate=learning_rate)


train_model_and_plot_stats(
        Constant_schedulers, model, error, learning_rule, train_data, valid_data, num_epochs, stats_interval)

train_model_and_plot_stats(
       Reciprocal_schedulers, model, error, learning_rule, train_data, valid_data, num_epochs, stats_interval)

## Part 2: Momentum learning rule

In [10]:
# The below code will set up the data providers, random number
# generator and logger objects needed for training runs. As
# loading the data from file take a little while you generally
# will probably not want to reload the data providers on
# every training run. If you wish to reset their state you
# should instead use the .reset() method of the data providers.
import numpy as np
import logging
from mlp.data_providers import MNISTDataProvider

seed = 10102016 
rng = np.random.RandomState(seed)

# Set up a logger object to print info about the training run to stdout
logger = logging.getLogger()
logger.setLevel(logging.INFO)
logger.handlers = [logging.StreamHandler()]

# Create data provider objects for the MNIST data set
train_data = MNISTDataProvider('train', batch_size=50, rng=rng)
valid_data = MNISTDataProvider('valid', batch_size=50, rng=rng)

In [None]:
# The model set up code below is provided as a starting point.
# You will probably want to add further code cells for the
# different experiments you run.

from mlp.layers import AffineLayer, SoftmaxLayer, SigmoidLayer
from mlp.errors import CrossEntropySoftmaxError
from mlp.models import MultipleLayerModel
from mlp.initialisers import ConstantInit, GlorotUniformInit
from mlp.schedulers import ConstantLearningRateScheduler, MomentumLearningRateScheduler
from mlp.learning_rules import GradientDescentLearningRule, MomentumLearningRule

# Seed a random number generator
input_dim, output_dim, hidden_dim = 784, 10, 100
mom_coeff = 1 #[0,1]
Tau = 1 # t>>1
Gamma = 0.5 # 0<<r<<t
learning_rate = 0.01
num_epochs = 100
stats_interval = 5

weights_init = GlorotUniformInit(rng=rng)
biases_init = ConstantInit(0.)

model = MultipleLayerModel([
    AffineLayer(input_dim, hidden_dim, weights_init, biases_init), 
    SigmoidLayer(),
    AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init), 
    SigmoidLayer(),
    AffineLayer(hidden_dim, output_dim, weights_init, biases_init)
])

error = CrossEntropySoftmaxError()



Constant_schedulers = [ConstantLearningRateScheduler(learning_rate)]
Momentum_schedulers = [MomentumLearningRateScheduler(learning_rate, mom_coeff, Tau, Gamma)]
learning_rule = MomentumLearningRule(learning_rate=learning_rate, mom_coeff=mom_coeff)


train_model_and_plot_stats(
       Constant_schedulers, model, error, learning_rule, train_data, valid_data, num_epochs, stats_interval)

## Part 3: Adaptive learning rules

In [17]:
# The below code will set up the data providers, random number
# generator and logger objects needed for training runs. As
# loading the data from file take a little while you generally
# will probably not want to reload the data providers on
# every training run. If you wish to reset their state you
# should instead use the .reset() method of the data providers.
import numpy as np
import logging
from mlp.data_providers import MNISTDataProvider

# Seed a random number generator
seed = 10102016 
rng = np.random.RandomState(seed)

# Set up a logger object to print info about the training run to stdout
logger = logging.getLogger()
logger.setLevel(logging.INFO)
logger.handlers = [logging.StreamHandler()]

# Create data provider objects for the MNIST data set
train_data = MNISTDataProvider('train', batch_size=50, rng=rng)
valid_data = MNISTDataProvider('valid', batch_size=50, rng=rng)

In [26]:
# The model set up code below is provided as a starting point.
# You will probably want to add further code cells for the
# different experiments you run.

from mlp.layers import AffineLayer, SoftmaxLayer, SigmoidLayer
from mlp.errors import CrossEntropySoftmaxError
from mlp.models import MultipleLayerModel
from mlp.initialisers import ConstantInit, GlorotUniformInit

input_dim, output_dim, hidden_dim = 784, 10, 100

weights_init = GlorotUniformInit(rng=rng)
biases_init = ConstantInit(0.)

model = MultipleLayerModel([
    AffineLayer(input_dim, hidden_dim, weights_init, biases_init), 
    SigmoidLayer(),
    AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init), 
    SigmoidLayer(),
    AffineLayer(hidden_dim, output_dim, weights_init, biases_init)
])

error = CrossEntropySoftmaxError()

In [None]:
from mlp.schedulers import ConstantLearningRateScheduler, MomentumLearningRateScheduler
from mlp.learning_rules import AdaGradLearningRule, MomentumLearningRule, RMSPropLearningRule
mom_coeff = 0 #[0,1]
Tau = 1 # t>>1
Gamma = 0.5 # 0<<r<<t
learning_rate = 0.01
num_epochs = 100
stats_interval = 5

Momentum_schedulers = [MomentumLearningRateScheduler(learning_rate, mom_coeff, Tau, Gamma)]

#Constant_schedulers = [ConstantLearningRateScheduler(learning_rate)]
AdaGrad_learning_rule = AdaGradLearningRule(learning_rate=learning_rate)
RMSProp_Learning_rule = RMSPropLearningRule(learning_rate=learning_rate)
schedulers = []

train_model_and_plot_stats(
       schedulers, model, error, AdaGrad_learning_rule, train_data, valid_data, num_epochs, stats_interval)

train_model_and_plot_stats(
       schedulers, model, error, RMSProp_Learning_rule, train_data, valid_data, num_epochs, stats_interval)