In [66]:
import gzip
import numpy
import pickle

import theano
from theano import tensor as T
from theano.tensor.nnet import conv2d
from theano.tensor.signal import downsample

theano.config.exception_verbosity='high'
rng = numpy.random.RandomState(23455)

In [67]:
def get_convolutionaln_function(filters_number, filter_width, filter_height, input_depth):
    input_ = T.tensor4(name='input')
    w_shp = (filters_number, input_depth, filter_width, filter_height)
    #if weights are boud
    w_bound = numpy.sqrt(input_depth * filter_width * filter_height)
    W = theano.shared( numpy.asarray(
                rng.uniform(
                    low=-1.0 / w_bound,
                    high=1.0 / w_bound,
                    size=w_shp),
                dtype=input.dtype), name ='W')
    b_shp = (2,)
    b = theano.shared(numpy.asarray(
                rng.uniform(low=-.5, high=.5, size=b_shp),
                dtype=input.dtype), name ='b')
    # build symbolic expression that computes the convolution of input with filters in w
    conv_out = conv2d(input_, W)
    return T.nnet.sigmoid(conv_out + b.dimshuffle('x', 0, 'x', 'x'))

In [68]:
def shared_dataset(data_xy, borrow=True):
    data_x, data_y = data_xy
    shared_x = theano.shared(numpy.asarray(data_x,
                                           dtype=theano.config.floatX),
                             borrow=borrow)
    shared_y = theano.shared(numpy.asarray(data_y,
                                           dtype=theano.config.floatX),
                             borrow=borrow)
    return shared_x, T.cast(shared_y, 'int32')

            

def load_data(path='/home/siak/Data/MNIST/mnist.pkl.gz'):
    # Load the dataset
    with gzip.open(path, 'rb') as f:
        train_set, valid_set, test_set = pickle.load(f)

   
    test_set_x, test_set_y = shared_dataset(test_set)
    valid_set_x, valid_set_y = shared_dataset(valid_set)
    train_set_x, train_set_y = shared_dataset(train_set)

    rval = [(train_set_x, train_set_y), (valid_set_x, valid_set_y),
            (test_set_x, test_set_y)]
    return rval

In [69]:
class ConvLayer(object):
    """Pool Layer of a convolutional network """

    def __init__(self, rng, input_, filter_shape, input_shape):
        """
        :filter_shape: (number of output feature maps, num input feature maps,
                              filter height, filter width)

        :input_shape: (batch size, num input feature maps,
                             image height, image width)
        """

        assert input_shape[1] == filter_shape[1]
        self.input_ = input_
        number_of_inputs = numpy.prod(filter_shape[1:])
        number_of_outputs = numpy.prod(filter_shape[0] + numpy.prod(filter_shape[2:]))

        self.W = theano.shared(
            numpy.asarray(rng.uniform(low=-1, high=1, size=filter_shape),
                dtype=theano.config.floatX
            ),
            borrow=True
        )

        # the bias is a 1D tensor -- one bias per output feature map
        b_values = numpy.zeros((filter_shape[0],), dtype=theano.config.floatX)
        self.b = theano.shared(value=b_values, borrow=True)

        # convolve input feature maps with filters
        conv_out = conv2d(
            input=input_,
            filters=self.W,
            filter_shape=filter_shape,
            image_shape=input_shape
        )


        # add the bias term. Since the bias is a vector (1D array), we first
        # reshape it to a tensor of shape (1, n_filters, 1, 1). Each bias will
        # thus be broadcasted across mini-batches and feature map
        # width & height
        self.output = conv_out + self.b.dimshuffle('x', 0, 'x', 'x')

        # store parameters of this layer
        self.params = [self.W, self.b]

        # keep track of model input
        self.input_ = input_
        
        
#remember to add zero padding
class PoolLayer(object):
    def __init__(self, rng, input_, poolsize=(2, 2)):
        self.output = downsample.max_pool_2d(
            input=input_,
            ds=poolsize,
            ignore_border=True
        )
        self.input_ = input_
        
class ReluLayer(object):
    def __init__(self, input_):
        self.output = T.tanh(input_)
        self.input_ = input_

In [150]:
class ConvoNet(object):
    def initialize_datasets(self):
        datasets = load_data()
        self.train_set_x, self.train_set_y = datasets[0]
        self.valid_set_x, self.valid_set_y = datasets[1]
        self.test_set_x, self.test_set_y = datasets[2]

        self.n_train_batches = self.train_set_x.get_value(borrow=True).shape[0]
        self.n_valid_batches = self.valid_set_x.get_value(borrow=True).shape[0]
        self.n_test_batches = self.test_set_x.get_value(borrow=True).shape[0]
        self.n_train_batches //= self.batch_size
        self.n_valid_batches //= self.batch_size
        self.n_test_batches //= self.batch_size
    
    def setup_network(self):
        input_shape = (self.batch_size, 1, 28, 28)
        layer0_input = self.x.reshape(input_shape)
        self.layer0 = ConvLayer(self.state, input_=layer0_input, filter_shape=(16, 1, 5, 5), input_shape=input_shape)
        
        self.layer1 = ReluLayer(input_=self.layer0.output)
        self.input_data = self.test_set_x[self.index * self.batch_size: (self.index + 1) * self.batch_size]
        
       
        self.layer2 = ConvLayer(self.state, input_= self.layer1.output, filter_shape=(32, 16, 5, 5), input_shape=(self.batch_size, 16, 24, 24))
        self.layer3 = ReluLayer(input_=self.layer2.output)
        #import pdb; pdb.set_trace()
        
        self.layer4 = PoolLayer(self.state, input_=self.layer3.output)
        
        self.layer5 = HiddenLayer(self.state, input_=self.layer4.output.flatten(2), n_in= 32 * 10 * 10,
        n_out=500, activation=T.tanh)
        self.layer6 =  LogisticRegression(input_= self.layer5.output, n_in=500, n_out=10)
    
    def setup_training_functions(self):
        self.validate_model = theano.function(
            [self.index],
            self.layer6.errors(self.y),
            givens={
                self.x: self.test_set_x[self.index * self.batch_size: (self.index + 1) * self.batch_size],
                self.y: self.test_set_y[self.index * self.batch_size: (self.index + 1) * self.batch_size]
        })
        self.cost = self.layer6.negative_log_likelihood(self.y)
        self.params = self.layer6.params + self.layer5.params + self.layer2.params + self.layer0.params
        self.grads = T.grad(self.cost, self.params)
        self.updates = [(param_i, param_i - self.learning_rate * grad_i)
                   for param_i, grad_i in zip(self.params, self.grads)
                  ]
        self.train_model = theano.function(
            [self.index],
            self.cost,
            updates=self.updates,
            givens={
                self.x: self.train_set_x[self.index * self.batch_size: (self.index + 1) * self.batch_size],
                self.y: self.train_set_y[self.index * self.batch_size: (self.index + 1) * self.batch_size]
            }
        )
    def train(self):
        for epoch in xrange(self.epochs_no):
            for minibatch_index in range(self.n_train_batches):
                cost_ij = self.train_model(minibatch_index)
                validation_losses = [self.validate_model(i) for i
                                     in range(self.n_valid_batches)]
                this_validation_loss = numpy.mean(validation_losses)
                print('epoch %i, minibatch %i/%i, validation error %f %%' %
                      (epoch, minibatch_index + 1, self.n_train_batches,
                       this_validation_loss * 100.))
        print('optimization complete!')
    
    def __init__(self, batch_size=500, learning_rate=0.1, epochs_no=200):
        self.state = numpy.random.RandomState(23455)
        self.index = T.lscalar()  # index to a [mini]batch
        self.batch_size = batch_size
        self.x = T.matrix('x')   # the data is presented as rasterized images
        self.y = T.ivector('y')
        self.learning_rate = learning_rate
        self.epochs_no = epochs_no
        self.initialize_datasets()
        self.setup_network()
        self.setup_training_functions()
        

        
       
        


In [151]:
cn = ConvoNet()

In [None]:
out = cn.train()

epoch 0, minibatch 1/100, validation error 9.060000 %


In [130]:
out
#for x in range(10):
#    print(out[0][0][x])

array(0.916)

In [78]:
from matplotlib import pyplot as plt
import PIL
from PIL import Image

out = cn.test_model(2)
im = Image.fromarray(out[0][0])

for idx, (x, y) in zip(range(16), window(range(0, 32))):
    im = Image.fromarray(out[0][idx] * 256).resize((48, 48), PIL.Image.ANTIALIAS)
    fltr = Image.fromarray(cn.layer0.W.get_value()[idx][0] * 256)
    fltr.resize((50, 50), PIL.Image.ANTIALIAS)
    plt.subplot(16, 2, x+1); plt.axis('off'); plt.imshow(fltr)
    plt.subplot(16, 2, y+1); plt.axis('off'); plt.imshow(im)
    
plt.show()

In [26]:
class LogisticRegression(object):
    """Multi-class Logistic Regression Class

    The logistic regression is fully described by a weight matrix :math:`W`
    and bias vector :math:`b`. Classification is done by projecting data
    points onto a set of hyperplanes, the distance to which is used to
    determine a class membership probability.
    """

    def __init__(self, input_, n_in, n_out):
        """ Initialize the parameters of the logistic regression

        :type input: theano.tensor.TensorType
        :param input: symbolic variable that describes the input of the
                      architecture (one minibatch)

        :type n_in: int
        :param n_in: number of input units, the dimension of the space in
                     which the datapoints lie

        :type n_out: int
        :param n_out: number of output units, the dimension of the space in
                      which the labels lie

        """
        # start-snippet-1
        # initialize with 0 the weights W as a matrix of shape (n_in, n_out)
        self.W = theano.shared(
            value=numpy.zeros(
                (n_in, n_out),
                dtype=theano.config.floatX
            ),
            name='W',
            borrow=True
        )
        # initialize the biases b as a vector of n_out 0s
        self.b = theano.shared(
            value=numpy.zeros(
                (n_out,),
                dtype=theano.config.floatX
            ),
            name='b',
            borrow=True
        )

        # symbolic expression for computing the matrix of class-membership
        # probabilities
        # Where:
        # W is a matrix where column-k represent the separation hyperplane for
        # class-k
        # x is a matrix where row-j  represents input training sample-j
        # b is a vector where element-k represent the free parameter of
        # hyperplane-k
        self.p_y_given_x = T.nnet.softmax(T.dot(input_, self.W) + self.b)

        # symbolic description of how to compute prediction as class whose
        # probability is maximal
        self.y_pred = T.argmax(self.p_y_given_x, axis=1)
        # end-snippet-1

        # parameters of the model
        self.params = [self.W, self.b]

        # keep track of model input
        self.input = input

    def negative_log_likelihood(self, y):
        """Return the mean of the negative log-likelihood of the prediction
        of this model under a given target distribution.

        .. math::

            \frac{1}{|\mathcal{D}|} \mathcal{L} (\theta=\{W,b\}, \mathcal{D}) =
            \frac{1}{|\mathcal{D}|} \sum_{i=0}^{|\mathcal{D}|}
                \log(P(Y=y^{(i)}|x^{(i)}, W,b)) \\
            \ell (\theta=\{W,b\}, \mathcal{D})

        :type y: theano.tensor.TensorType
        :param y: corresponds to a vector that gives for each example the
                  correct label

        Note: we use the mean instead of the sum so that
              the learning rate is less dependent on the batch size
        """
        # start-snippet-2
        # y.shape[0] is (symbolically) the number of rows in y, i.e.,
        # number of examples (call it n) in the minibatch
        # T.arange(y.shape[0]) is a symbolic vector which will contain
        # [0,1,2,... n-1] T.log(self.p_y_given_x) is a matrix of
        # Log-Probabilities (call it LP) with one row per example and
        # one column per class LP[T.arange(y.shape[0]),y] is a vector
        # v containing [LP[0,y[0]], LP[1,y[1]], LP[2,y[2]], ...,
        # LP[n-1,y[n-1]]] and T.mean(LP[T.arange(y.shape[0]),y]) is
        # the mean (across minibatch examples) of the elements in v,
        # i.e., the mean log-likelihood across the minibatch.
        return -T.mean(T.log(self.p_y_given_x)[T.arange(y.shape[0]), y])
        # end-snippet-2

    def errors(self, y):
        """Return a float representing the number of errors in the minibatch
        over the total number of examples of the minibatch ; zero one
        loss over the size of the minibatch

        :type y: theano.tensor.TensorType
        :param y: corresponds to a vector that gives for each example the
                  correct label
        """

        # check if y has same dimension of y_pred
        if y.ndim != self.y_pred.ndim:
            raise TypeError(
                'y should have the same shape as self.y_pred',
                ('y', y.type, 'y_pred', self.y_pred.type)
            )
        # check if y is of the correct datatype
        if y.dtype.startswith('int'):
            # the T.neq operator returns a vector of 0s and 1s, where 1
            # represents a mistake in prediction
            return T.mean(T.neq(self.y_pred, y))
        else:
            raise NotImplementedError()


In [27]:
"""
This tutorial introduces the multilayer perceptron using Theano.

 A multilayer perceptron is a logistic regressor where
instead of feeding the input to the logistic regression you insert a
intermediate layer, called the hidden layer, that has a nonlinear
activation function (usually tanh or sigmoid) . One can use many such
hidden layers making the architecture deep. The tutorial will also tackle
the problem of MNIST digit classification.

.. math::

    f(x) = G( b^{(2)} + W^{(2)}( s( b^{(1)} + W^{(1)} x))),

References:

    - textbooks: "Pattern Recognition and Machine Learning" -
                 Christopher M. Bishop, section 5

"""

from __future__ import print_function

__docformat__ = 'restructedtext en'


import os
import sys
import timeit

import numpy

import theano
import theano.tensor as T


# start-snippet-1
class HiddenLayer(object):
    def __init__(self, rng, input_, n_in, n_out, W=None, b=None,
                 activation=T.tanh):
        """
        Typical hidden layer of a MLP: units are fully-connected and have
        sigmoidal activation function. Weight matrix W is of shape (n_in,n_out)
        and the bias vector b is of shape (n_out,).

        NOTE : The nonlinearity used here is tanh

        Hidden unit activation is given by: tanh(dot(input,W) + b)

        :type rng: numpy.random.RandomState
        :param rng: a random number generator used to initialize weights

        :type input: theano.tensor.dmatrix
        :param input: a symbolic tensor of shape (n_examples, n_in)

        :type n_in: int
        :param n_in: dimensionality of input

        :type n_out: int
        :param n_out: number of hidden units

        :type activation: theano.Op or function
        :param activation: Non linearity to be applied in the hidden
                           layer
        """
        self.input_ = input_
        # end-snippet-1

        # `W` is initialized with `W_values` which is uniformely sampled
        # from sqrt(-6./(n_in+n_hidden)) and sqrt(6./(n_in+n_hidden))
        # for tanh activation function
        # the output of uniform if converted using asarray to dtype
        # theano.config.floatX so that the code is runable on GPU
        # Note : optimal initialization of weights is dependent on the
        #        activation function used (among other things).
        #        For example, results presented in [Xavier10] suggest that you
        #        should use 4 times larger initial weights for sigmoid
        #        compared to tanh
        #        We have no info for other function, so we use the same as
        #        tanh.
        if W is None:
            W_values = numpy.asarray(
                rng.uniform(
                    low=-numpy.sqrt(6. / (n_in + n_out)),
                    high=numpy.sqrt(6. / (n_in + n_out)),
                    size=(n_in, n_out)
                ),
                dtype=theano.config.floatX
            )
            if activation == theano.tensor.nnet.sigmoid:
                W_values *= 4

            W = theano.shared(value=W_values, name='W', borrow=True)

        if b is None:
            b_values = numpy.zeros((n_out,), dtype=theano.config.floatX)
            b = theano.shared(value=b_values, name='b', borrow=True)

        self.W = W
        self.b = b

        lin_output = T.dot(input_, self.W) + self.b
        self.output = (
            lin_output if activation is None
            else activation(lin_output)
        )
        # parameters of the model
        self.params = [self.W, self.b]



In [28]:
def window(l):
    it = iter(l)
    for x in it:
        yield (x, next(it))

def special_get_value(x):
    y = T.cast(x, 'int32')
    return y.eval()
    