# Linnear Regresion in Blocks

In [1]:
from __future__ import division

import numpy as np

## Dataset

In [2]:
from fuel.datasets import MNIST
mnist = MNIST("train")

In [3]:
mnist.num_examples

60000

In [4]:
mnist.sources

(u'features', u'targets')

In [5]:
handle = mnist.open()
data_sample = mnist.get_data(handle, [0, 1, 2])  # (ndarray, dnarray)

In [6]:
data_sample[0].shape  # features

(3, 1, 28, 28)

In [7]:
data_sample[1].shape  # targets

(3, 1)

## DataStream

In [8]:
from fuel.streams import DataStream
from fuel.schemes import SequentialScheme

data_stream = DataStream.default_stream(
    mnist,
    iteration_scheme=SequentialScheme(
        mnist.num_examples,
        batch_size=256
    )
)

In [9]:
data_stream.sources

(u'features', u'targets')

In [10]:
epoch = data_stream.get_epoch_iterator()
batch = next(epoch)  # (ndarray, dnarray)

In [11]:
batch[0].shape

(256, 1, 28, 28)

In [12]:
batch[1].shape

(256, 1)

## Transformers

In [13]:
from fuel.transformers import Flatten

In [14]:
data_stream = Flatten(data_stream)

In [15]:
epoch = data_stream.get_epoch_iterator()
batch = next(epoch)  # (ndarray, dnarray)

In [16]:
batch[0].shape

(256, 784)

In [17]:
batch[1].shape

(256, 1)

## Model / Bricks

In [18]:
from blocks.bricks import Linear
from blocks import initialization as init

## \# 1 Configuration

In [19]:
linear = Linear(
    input_dim=28*28,
    output_dim=10,
    weights_init=init.IsotropicGaussian(0.01),
    biases_init=init.Constant(0)
)

## \# 2 Allocation (Optional)

In [20]:
linear.params

AttributeError: 'Linear' object does not have an attribute 'params'

In [21]:
linear.allocate()

In [22]:
linear.params

[W, b]

## \# 3 Initialization

In [23]:
W = linear.params[0]
W.eval()

array([[ nan,  nan,  nan, ...,  nan,  nan,  nan],
       [ nan,  nan,  nan, ...,  nan,  nan,  nan],
       [ nan,  nan,  nan, ...,  nan,  nan,  nan],
       ..., 
       [ nan,  nan,  nan, ...,  nan,  nan,  nan],
       [ nan,  nan,  nan, ...,  nan,  nan,  nan],
       [ nan,  nan,  nan, ...,  nan,  nan,  nan]], dtype=float32)

In [24]:
linear.initialize()

In [25]:
W = linear.params[0]
W.eval()

array([[ 0.00764556, -0.01124291, -0.00137316, ...,  0.00713121,
         0.01307908, -0.00076428],
       [-0.00038064,  0.00397207,  0.00106865, ..., -0.01462041,
        -0.00731604, -0.00198592],
       [ 0.01699585, -0.01712248,  0.0002806 , ...,  0.01085133,
        -0.00166492,  0.00832114],
       ..., 
       [ 0.00457955,  0.00664128,  0.00771374, ...,  0.00711473,
        -0.00163681, -0.01836808],
       [ 0.02272249,  0.01570716, -0.00236659, ...,  0.0020195 ,
        -0.00232745, -0.00386061],
       [-0.00711885, -0.01066627,  0.01543075, ...,  0.00014156,
         0.01984636, -0.00590108]], dtype=float32)

##  \# 4 Application

In [26]:
from theano import tensor

X = tensor.fmatrix('X')

In [27]:
y_hat = linear.apply(X)

In [28]:
type(y_hat)

theano.tensor.var.TensorVariable

In [29]:
y_hat

linear_apply_output

## Building your own Bricks

In [30]:
from blocks.bricks import Initializable
from blocks.bricks import Softmax

class SoftmaxLinear(Initializable):
    
    def __init__(
        self,
        input_dim,
        output_dim,
        **kwargs
    ):
        super(SoftmaxLinear, self).__init__(**kwargs)
        self.linear = Linear(
            input_dim=input_dim,
            output_dim=output_dim
        )
        self.sofmax = Softmax()
        
        self.children = [
            self.linear,
            self.sofmax
        ]
        
    def apply(self, input_):
        output = self.sofmax.apply(
            self.linear.apply(
                input_
            )
        )
        return output

In [31]:
softmax_linear = SoftmaxLinear(
    28*28,
    10,
    weights_init=init.IsotropicGaussian(0.01),
    biases_init=init.Constant(0)
)

In [32]:
softmax_linear.initialize()
softmax_linear.linear.params

[W, b]

## Cost

In [33]:
X = tensor.matrix('features')  # match sources in datastream
y = tensor.lmatrix('targets')

In [34]:
data_stream.sources

(u'features', u'targets')

In [35]:
y_hat = softmax_linear.apply(X)

In [1]:
# Run this to apply an MLP instead of Linear Regression

# from blocks.bricks import MLP, Tanh, Softmax

# mlp = MLP(
#     activations=[Tanh(), None],
#     dims=[28*28, 200, 10],
#     weights_init=init.IsotropicGaussian(0.01),
#     biases_init=init.Constant(0)
# )
# mlp.initialize()

# y_hat = Softmax().apply(mlp.apply(X))


In [None]:
from blocks.bricks.cost import CategoricalCrossEntropy

cost = CategoricalCrossEntropy().apply(y=y.flatten(), y_hat=y_hat)
cost.name = 'cost'

## Computational (Annotated) Graph

In [None]:
from blocks.graph import ComputationGraph

cg = ComputationGraph(cost)
cg.parameters

In [None]:
from blocks.filter import VariableFilter
from blocks.roles import WEIGHT

weights = VariableFilter(roles=[WEIGHT])(cg.variables)
weights

## Training Algorithm

In [None]:
from blocks.algorithms import GradientDescent, Scale

algorithm = GradientDescent(
    cost=cost,
    params=cg.parameters,
    step_rule=Scale(learning_rate=0.1)
)

In [None]:
from blocks.main_loop import MainLoop
from blocks.extensions import FinishAfter, Printing
from blocks.extensions.plot import Plot
from blocks.extensions.monitoring import TrainingDataMonitoring


main_loop = MainLoop(
    data_stream=data_stream,
    algorithm=algorithm,
    extensions=[
        FinishAfter(after_n_epochs=100),
        TrainingDataMonitoring([cost], after_epoch=True),
        Plot(
            document='blocks_fuel_basics_tutorial LINEAR REG',
            channels=[['cost']],
            after_epoch=True
        ),
        Printing()
    ]
)

In [None]:
main_loop.run()