## imports

In [2]:
import numpy
import theano
from theano import tensor
from fuel.streams import DataStream
from fuel.schemes import ShuffledScheme
from fuel.datasets.iris import Iris
from fuel.transformers import Mapping
from blocks.bricks import Linear, Softmax, Logistic
from blocks.bricks.cost import MisclassificationRate
from blocks.initialization import Uniform, Constant
from blocks.graph import ComputationGraph
from blocks.algorithms import GradientDescent, Scale
from blocks.main_loop import MainLoop
from blocks.extensions import Timing, FinishAfter, Printing
from blocks.extensions.monitoring import TrainingDataMonitoring
from blocks.extras.extensions.plot import Plot
from sklearn import metrics
import os
os.environ['FUEL_DATA_PATH'] = '/home/datasets/datasets1/fuel/'

## Params

In [3]:
learning_rate = 0.01
nclasses = 3
nfeatures = 4
nhiddens = 5
batch_size = 32
nepochs = 300

# Data

In [4]:
dataset = Iris(which_sets=('all',))
scheme = ShuffledScheme(examples=dataset.num_examples, batch_size=batch_size)
stream = DataStream(dataset, iteration_scheme=scheme)

## One-hot representation

In [5]:
I = numpy.eye(nclasses, dtype=int)
def one_hot(data):
    return data[0], I[data[1].flatten()]
stream = Mapping(stream, one_hot)

# Model

In [6]:
x = tensor.matrix('features')
y = tensor.lmatrix('targets')
linear = Linear(nfeatures, nclasses,
                weights_init=Constant(0), biases_init=Constant(0))
linear.initialize()
linear_output = linear.apply(x)
softmax = Softmax()
y_hat = softmax.apply(linear_output)

## cost

In [7]:
cost = softmax.categorical_cross_entropy(y, linear_output).mean()
error = MisclassificationRate().apply(y.nonzero()[1], y_hat)
error.name = 'error'
cost.name = 'cost'

# Algorithm

In [8]:
cg = ComputationGraph(cost)
algorithm = GradientDescent(cost=cost, parameters=cg.parameters,
                            step_rule=Scale(learning_rate))

# Extensions

In [9]:
monitor = TrainingDataMonitoring([cost, error], prefix='tra', after_batch=True)
extensions=[monitor, Printing(), Timing(), FinishAfter(after_n_epochs=nepochs)]

# Main loop

In [10]:
loop = MainLoop(algorithm, stream, extensions=extensions)
loop.run()


-------------------------------------------------------------------------------
BEFORE FIRST EPOCH
-------------------------------------------------------------------------------
Training status:
	 batch_interrupt_received: False
	 epoch_interrupt_received: False
	 epoch_started: True
	 epochs_done: 0
	 iterations_done: 0
	 received_first_batch: False
	 resumed_from: None
	 training_started: True
Log records from the iteration 0:


-------------------------------------------------------------------------------
AFTER ANOTHER EPOCH
-------------------------------------------------------------------------------
Training status:
	 batch_interrupt_received: False
	 epoch_interrupt_received: False
	 epoch_started: False
	 epochs_done: 1
	 iterations_done: 5
	 received_first_batch: True
	 resumed_from: None
	 training_started: True
Log records from the iteration 5:
	 tra_cost: 1.07524915068
	 tra_error: 0.727272727273


------------------------------------------------------------------------

In [11]:
loop.profile.report()

Section                                  Time     % of total
------------------------------------------------------------
Before training                          0.00          0.00%
  TrainingDataMonitoring                 0.00          0.00%
  Printing                               0.00          0.00%
  Timing                                 0.00          0.00%
  FinishAfter                            0.00          0.00%
  Other                                  0.00          0.00%
Initialization                          12.57         91.62%
Training                                 1.15          8.38%
  Before epoch                           0.03          0.20%
    TrainingDataMonitoring               0.01          0.04%
    Printing                             0.01          0.08%
    Timing                               0.00          0.03%
    FinishAfter                          0.00          0.02%
    Other                                0.00          0.03%
  Epoch                 

In [12]:
predict = theano.function([x], y_hat)
x_vals, y_vals = stream.get_epoch_iterator().next()
y_pred = predict(x_vals)
metrics.accuracy_score(y_vals, y_pred > 0.5)

0.9375

#Excercise:  Building more complex models
Now, we are going to train a model with one more transformation (a MLP). Update the way to build the `y_hat` as follows:

In [13]:
linear_to_hidden = Linear(nfeatures, nhiddens,
                weights_init=Uniform(width=0.01), biases_init=Constant(0))

hidden_to_output = Linear(nhiddens, nclasses,
                weights_init=Constant(0), biases_init=Constant(0))

linear_to_hidden.initialize()
hidden_to_output.initialize()

h = Logistic().apply(linear_to_hidden.apply(x))
linear_output = hidden_to_output.apply(h)
softmax = Softmax()
y_hat = softmax.apply(linear_output)
learning_rate = 0.1

Re-run statements from [Cost](#cost)
# Excercise: MLP with MNIST
Modify the above code to train a MLP for MNIST dataset with Momentum update rule. Useful notes:
 - You should use the `Flatten` transfomer to preprocess [MNIST](http://fuel.readthedocs.org/en/latest/api/dataset.html?#module-fuel.datasets.mnist) dataset
 - Change the `Scale` update rule to the [Momentum](http://blocks.readthedocs.org/en/latest/api/algorithms.html?#blocks.algorithms.Momentum) rule and try values for `learning_rate` and `momentum` parameters