In [None]:
%pylab inline

# Data loading

## Loading data into RAM in expected format and visualizing

In [None]:
from mnist import *
from pprint import pprint
random.seed(12345)

dataset = load_dataset()
print dataset.keys()

In [None]:
print dataset['X_test'].dtype, dataset['y_test'].dtype
print dataset['X_test'].shape, dataset['y_test'].shape
print dataset['y_test'][0]
imshow(dataset['X_test'][0].reshape(28,28), cmap=cm.gray, interpolation='nearest')

## Breaking data into batches in callable form

In [None]:
# define generators
# make a generator to yield a batch of data for training/validating
class iterate_minibatches():
    def __init__(self, dataset, batchsize, partition='train'):
        self.dataset = dataset
        self.batchsize = batchsize
        self.partition = partition

    def __call__(self):
        inputs = self.dataset['X_'+self.partition]
        targets = self.dataset['y_'+self.partition]
        for start_idx in range(0, len(inputs) - self.batchsize + 1, self.batchsize):
            excerpt = slice(start_idx, start_idx + self.batchsize)
            batchdata = dict(
                X=inputs[excerpt],
                y=targets[excerpt]
            )
            yield batchdata

In [None]:
# make a train batch iterator and get a batch from it
trainbatchit = iterate_minibatches(dataset, BATCHSIZE, 'train')
it = trainbatchit()
batch = it.next()
print batch.keys()
print batch['X'].shape, batch['X'].dtype

# Note: 'X' and 'y' are the inputs and labels that will be bound to model layers

# Model building

In [None]:
# Define an XNN model that is a container around layer graphs
m = Model("MLP")

pprint(m.to_dict())

In [None]:
# This creates an MLP of two hidden layers of 800 units each, followed by
# a softmax output layer of 10 units. It applies 20% dropout to the input
# data and 50% dropout to the hidden layers.

# Input layer, specifying the expected input shape of the network
# (unspecified batchsize, 1 channel, 28 rows and 28 columns) and
# linking it to the given Theano variable `input_var`, if any:

l_in = m.add_layer(InputLayer((None, 1, 28, 28)))

# Apply 20% dropout to the input data:
l_in_drop = m.make_dropout_layer(l_in, p=0.2)

# Add a stack of fully-connected layers of 800 units each with dropout
l_stacktop = m.make_dense_drop_stack(l_in_drop, [800, 800], drop_p_list=[.5, .5])

# Finally, we'll add the fully-connected output layer, of 10 softmax units:
l_out = m.add_layer(DenseLayer(l_stacktop, num_units=10, nonlinearity=softmax), "l_out")

pprint(m.to_dict())

In [None]:
m.bind_input(l_in, 'X')
m.bind_output(l_out, categorical_crossentropy, 'y')

pprint(m.to_dict())

In [None]:
# show the model graph
modelgraphimg = xnn.utils.draw_to_file(m, '/tmp/modelgraph.png')
modelgraphimg.show()

## Predict outputs before training

In [None]:
outs = m.predict(batch)
print outs.keys()
print outs['l_out'].shape
print outs['l_out'][0]

## Saving and loading model

In [None]:
m.save_model('/tmp/model.pkl')
m2 = Model("loaded model")
m2.load_model('/tmp/model.pkl')
outs2 = m2.predict(batch)
print outs['l_out'][0] == outs2['l_out'][0]

# Trainer

In [None]:
# first set up global parameters for nesterov momentum
global_update_settings = ParamUpdateSettings(
    update=nesterov_momentum, learning_rate=0.25, momentum=0.9)

# instantiate a trainer
trainer = Trainer(m, global_update_settings)
pprint(trainer.to_dict())

## Run some training steps on a batch and modify updates

In [None]:
set_printoptions(precision=3, suppress=True)
print batch['y'][0]
for i in range(5):
    trainer.train_step(batch)
    outs = m.predict(batch)
    print outs['l_out'][0]

In [None]:
# decrease learning rate and continue training
trainer.bind_global_update(ParamUpdateSettings(learning_rate=0.1))
for i in range(10):
    trainer.train_step(batch)
    outs = m.predict(batch)
    print outs['l_out'][0]

In [None]:
# add l2-regularization with weight .001 to weights to all layers
trainer.bind_regularization(xnn.regularization.l2, .001)

for i in range(5):
    trainer.train_step(batch)
    outs = m.predict(batch)
    print outs['l_out'][0]

# Training loop

In [None]:
# let's start the batch iteration from scratch and re-initialize the model
trainbatchit = iterate_minibatches(dataset, BATCHSIZE, 'train')
validbatchit = iterate_minibatches(dataset, BATCHSIZE, 'valid')

# use a convenience function defined in mnist.py to build the same mlp as above 
m = build_mlp()
trainer.set_model(m)

# define some metrics to keep track of performance
metrics = [
    ('l_out', Metric(computeCategoricalCrossentropy, "y", aggregation_type="mean"), 'min'),
    ('l_out', Metric(computeOneHotAccuracy, "y", aggregation_type="none"), 'max')
]

# create a training loop
loop = Loop(trainer, trainbatchit, validbatchit, metrics, plotmetricmean=False)

# iterate through 3 epochs of training on all data
loop(3)

# Experiment

## First let's create a function for building an MLP from arguments

In [None]:
# ##################### Build the neural network model #######################
# We define a function that takes a Theano variable representing the input and returns
# the output layer of a neural network model.
def build_mlp(input_var=None, numhidunits=800, hiddropout=.5, dropout_type='standard'):
    m = Model("MLP")
    # This creates an MLP of two hidden layers of 800 units each, followed by
    # a softmax output layer of 10 units. It applies 20% dropout to the input
    # data and 50% dropout to the hidden layers.

    # Input layer, specifying the expected input shape of the network
    # (unspecified batchsize, 1 channel, 28 rows and 28 columns) and
    # linking it to the given Theano variable `input_var`, if any:

    lin = m.make_bound_input_layer((None, 1, 28, 28), 'X', input_var=input_var)

    # Apply 20% dropout to the input data:
    l_in_drop = m.make_dropout_layer(lin, p=0.2)

    # Add a stack of fully-connected layers of 800 units each with dropout
    l_stacktop = m.make_dense_drop_stack(l_in_drop, [numhidunits, numhidunits],
                                         drop_p_list=[hiddropout, hiddropout],drop_type_list=[dropout_type,dropout_type])

    # Finally, we'll add the fully-connected output layer, of 10 softmax units:
    l_out = m.add_layer(DenseLayer(l_stacktop, num_units=10, nonlinearity=softmax), "l_out")

    m.bind_output(l_out, categorical_crossentropy, 'y')
    return m

## Define the variables that are relevant for exporing hyperparameter space in this experiment

In [None]:
class Cond(xnn.experiments.ExperimentCondition):
    def __init__(self):
        self.learning_rate = 0.1
        self.hiddenunits   = 500
        self.droptype      = 'standard' 
        self.hiddropout    = 0.5

cond = Cond()
print cond.to_dict()

## Set up the experiment design (this one is a nested design with groups for std drop and gauss drop)

In [None]:
def set_up_experiment():
    expt = xnn.experiments.Experiment(name='mnist mlp',default_condition=Cond())
    expt.add_group('std drop')
    expt.add_group('gauss drop')
    expt.add_factor('learning_rate',[0.001,0.1])
    expt.add_factor('hiddenunits',[20,200,500])
    expt.add_factor('droptype','gauss',groupname='gauss drop')
    expt.add_factor('hiddropout',[.3,1.5],groupname='gauss drop')
    expt.add_factor('hiddropout',[.3,.75],groupname='std drop')
    return expt

expt = set_up_experiment()
print expt.to_dict()

## Train all experiment conditions for one epoch each and then report results

In [None]:
    metrics = [
        ('l_out', Metric(computeCategoricalCrossentropy, "y", aggregation_type="mean"), 'min'),
        ('l_out', Metric(computeOneHotAccuracy, "y", aggregation_type="mean"), 'max')
    ]

    trainbatchit = iterate_minibatches(dataset, BATCHSIZE, 'train')
    validbatchit = iterate_minibatches(dataset, BATCHSIZE, 'valid')

    #--------
    # Run all conditions in experiment, and store results
    #--------

    for conddict in expt.get_all_condition_iterator():
        print "\nRunning condition %d\n"%conddict['condition_num']
        c = conddict['condition']
        m = build_mlp(numhidunits=c.hiddenunits,hiddropout=c.hiddropout,dropout_type=c.droptype)
        t = set_up_trainer(m,learning_rate=c.learning_rate)
        loop = xnn.training.Loop(t,trainbatchit,validbatchit,metrics,plotmetricmean=False)
        metvals = loop(1)
        expt.add_results(conddict['condition_num'],metvals)


    #--------
    # Interpret results
    #--------

    #get results for standard vs gaussian dropout
    std_nums = expt.get_condition_numbers(fixed_dict={'droptype':'standard'})
    gss_nums = expt.get_condition_numbers(fixed_dict={'droptype':'gauss'})

    print "Standard dropout"
    bsp = None
    for sn in std_nums:
        cc,pc = expt.results[sn]
        bsp = pc if (bsp is None or pc > bsp) else bsp
        print "%d: %0.3f"%(sn,pc)  
    print "Gaussian dropout"
    bgp = None
    for sn in gss_nums:
        cc,pc = expt.results[sn]
        bgp = pc if (bgp is None or pc > bgp) else bgp
        print "%d: %0.3f"%(sn,pc)  

    print "Best Standard PC: %0.3f"%bsp
    print "Best Gaussian PC: %0.3f"%bgp

    #get results for hidden unit size 
    bhu = [None]*3
    for i,hu in enumerate([20,200,500]):
        print "%d hidden units"%hu
        nums = expt.get_condition_numbers(fixed_dict={'hiddenunits':hu})
        for sn in nums:
            cc,pc = expt.results[sn]
            bhu[i] = pc if (bhu[i] is None or pc > bhu[i]) else bhu[i] 
            print "%d: %0.3f"%(sn,pc)  
    for bh,hu  in zip(bhu,[20,200,500]):
        print "Best %d HU PC: %0.3f"%(hu,bh)