In [1]:
import numpy as np
import matplotlib.pyplot as plt
import logging
import cPickle as pickle
%matplotlib inline
plt.style.use('ggplot')
%load_ext autoreload
%autoreload 2

logger = logging.getLogger()
logger.setLevel("INFO") 
logger.handlers = [logging.StreamHandler()] 
seed = 20161121
rng = np.random.RandomState(seed)

In [11]:
from mlp.models import MultipleLayerModel
from mlp.layers import AffineLayer, ConvolutionalLayer, MaxPoolingLayer, ReshapeLayer
from mlp.layers import SigmoidLayer, TanhLayer, ReluLayer, DropoutLayer
from mlp.data_providers import MNISTDataProvider, MNISTCNNAutoencoderDataProvider, MNISTCNNDataProvider
from mlp.data_providers import MNISTCNNDenoisingAutoencoderDataProvider
from mlp.errors import CNNSumOfSquaredDiffsError, CrossEntropySoftmaxError
from mlp.learning_rules import GradientDescentLearningRule, MomentumLearningRule
from mlp.schedulers import ConstantLearningRateScheduler, ExponentialLearningRateScheduler
from mlp.penalties import L1Penalty, L2Penalty
from mlp.initialisers import GlorotUniformInit, ConstantInit, UniformInit
from mlp.optimisers import Optimiser
from mlp.settings import Setting
from mlp.experiments import Experiment

In [12]:
kernels_init = UniformInit(-0.01,0.01,rng=rng)
weights_init = GlorotUniformInit(rng=rng)
biases_init = ConstantInit(0.)

learning_rule = GradientDescentLearningRule(learning_rate=0.1)
error = CrossEntropySoftmaxError()
# ae_error = CNNSumOfSquaredDiffsError()

In [17]:
train_set = MNISTDataProvider(which_set='train', batch_size=10, max_num_batches=-1, rng=rng)
valid_set = MNISTDataProvider(which_set='valid', batch_size=10, max_num_batches=-1, rng=rng)

In [22]:

layers = [
    AffineLayer(input_dim=784, output_dim=100, weights_initialiser=weights_init, weights_penalty=L2Penalty(0.0001)),
    TanhLayer(),
    AffineLayer(input_dim=100, output_dim=100, weights_initialiser=weights_init, weights_penalty=L2Penalty(0.0001)),
    TanhLayer(),
    AffineLayer(input_dim=100, output_dim=10, weights_initialiser=weights_init, weights_penalty=L2Penalty(0.0001))
]

model = MultipleLayerModel(layers)

setting = Setting(model, error, learning_rule, train_set, valid_set)

remark = "3 layer normal nn"
settings = [setting]
experiment = Experiment(settings=settings, num_epoch=400, stats_interval=1, remark=remark)
experiment.do_experiment()

Epoch 0:
  error(train)=2.35e+00, acc(train)=1.06e-01, error(valid)=2.34e+00, acc(valid)=1.11e-01, params_penalty=1.47e-02
Epoch 1: 4.70s to complete
  error(train)=2.07e-01, acc(train)=9.36e-01, error(valid)=2.09e-01, acc(valid)=9.38e-01, params_penalty=2.12e-02
Epoch 2: 5.20s to complete
  error(train)=1.36e-01, acc(train)=9.59e-01, error(valid)=1.46e-01, acc(valid)=9.57e-01, params_penalty=2.47e-02
Epoch 3: 4.80s to complete
  error(train)=1.09e-01, acc(train)=9.67e-01, error(valid)=1.33e-01, acc(valid)=9.61e-01, params_penalty=2.73e-02
Epoch 4: 3.97s to complete
  error(train)=9.64e-02, acc(train)=9.70e-01, error(valid)=1.29e-01, acc(valid)=9.62e-01, params_penalty=2.96e-02
Epoch 5: 3.91s to complete
  error(train)=7.39e-02, acc(train)=9.78e-01, error(valid)=1.18e-01, acc(valid)=9.68e-01, params_penalty=3.15e-02
Epoch 6: 3.99s to complete
  error(train)=6.03e-02, acc(train)=9.82e-01, error(valid)=1.03e-01, acc(valid)=9.70e-01, params_penalty=3.30e-02
Epoch 7: 3.93s to complete
  er

KeyboardInterrupt: 

In [53]:
import copy

model_1 = copy.deepcopy(model)
model_2 = copy.deepcopy(model)
model_3 = copy.deepcopy(model)

learning_rule_1 = MomentumLearningRule(learning_rate=0.02, mom_coeff=0.9)
learning_rule_2 = MomentumLearningRule(learning_rate=0.02, mom_coeff=0.9)
learning_rule_3 = MomentumLearningRule(learning_rate=0.02, mom_coeff=0.9)



learning_rule_1.initialise(model_1.params)
learning_rule_2.initialise(model_2.params)
learning_rule_3.initialise(model_3.params)



for inputs_batch, targets_batch in train_set:
    
    
    activations_1 = model_1.fprop(inputs_batch)
    grads_wrt_outputs_1 = error.grad(activations_1[-1], targets_batch)
    grads_wrt_params_1 = model_1.grads_wrt_params(
        activations_1, grads_wrt_outputs_1)
    learning_rule_1.update_params(grads_wrt_params_1)
    
    inputs_batch, targets_batch = train_set.next()
    activations_2 = model_2.fprop(inputs_batch)
    grads_wrt_outputs_2 = error.grad(activations_2[-1], targets_batch)
    grads_wrt_params_2 = model_2.grads_wrt_params(
        activations_2, grads_wrt_outputs_2)
    learning_rule_2.update_params(grads_wrt_params_2)
    
    inputs_batch, targets_batch = train_set.next()
    activations_3 = model_3.fprop(inputs_batch)
    grads_wrt_outputs_3 = error.grad(activations_3[-1], targets_batch)
    grads_wrt_params_3 = model_3.grads_wrt_params(
        activations_3, grads_wrt_outputs_3)
    learning_rule_3.update_params(grads_wrt_params_3)
    
    inputs_batch, targets_batch = train_set.next()
    activations_1 = model_1.fprop(inputs_batch)
    e1 = error(activations_1[-1], targets_batch)
    activations_2 = model_2.fprop(inputs_batch)
    e2 = error(activations_2[-1], targets_batch)
    activations_3 = model_3.fprop(inputs_batch)
    e3 = error(activations_3[-1], targets_batch)
    e = [e1, e2, e3]
    print e
    idx = e.index(min(e))

    if idx == 0:
        model_2.set_params(copy.copy(model_1.params))
        model_3.set_params(copy.copy(model_1.params))
    elif idx==1:
        model_1.set_params(copy.copy(model_2.params))
        model_3.set_params(copy.copy(model_2.params))
    elif idx==2:
        model_1.set_params(copy.copy(model_3.params))
        model_2.set_params(copy.copy(model_3.params))
    
    


[2.3575114457003643, 2.3469643820466239, 2.3585207334952689]
100 #
100 #
100 #
100 #
100 #
100 #


ValueError: operands could not be broadcast together with shapes (100,) (784,) 