In [1]:
## >> http://deeplearning.net/tutorial/lenet.html << ##
import numpy as np

import theano.tensor as T
import theano
import theano.tensor.nnet as conv

from theano.tensor.signal import downsample

from logistic import load_data,LogisticRegression
from mlp import HiddenLayer

Using gpu device 0: GeForce GTX 960


In [2]:
class ConvPoolLayer(object):
    def __init__(self,rng,input,filter_shape,image_shape, pool_size=(2,2) ):
        # check if filter shape matches the input shape
        assert image_shape[1] == filter_shape[1]
        # >>>> self.input = input
        # number of inputs to each hidden unit: fan_in
        fan_in = np.prod(filter_shape[1:]) # 1x5x5 : receptive field 5x5 of grayscale image
        # fan_out to lower layer (left)
        #  for gradient propagation
        #   20 * 5x5 connections
        fan_out = filter_shape[0] * np.prod(filter_shape[2:]) # 20 * (5x5) : nkern[0] * (5x5)
        # random initialization of weights
        wbound = np.sqrt(6. / (fan_in + fan_out))
        wval = np.asarray(rng.uniform(low = -wbound, high = wbound, size=filter_shape),
                          dtype = theano.config.floatX)
        self.w = theano.shared(wval,name='w',borrow = True)
        # bias term 
        self.b = theano.shared(np.zeros((filter_shape[0],),dtype=theano.config.floatX),name='b', borrow=True)
        # convol operation
        conv_out = conv.conv2d(input,self.w,filter_shape=filter_shape,image_shape=image_shape)
        # pooling : downsampling
        pooled = downsample.max_pool_2d(input=conv_out,ds=pool_size,ignore_border=True)
        # apply non-linearity and bias to pooled output
        #  dimshuffle : convert shape of bias from (filter_shape[0],) to (1, n_filters, 1, 1)
        self.output = T.tanh(pooled + self.b.dimshuffle('x',0,'x','x'))
        # store params
        self.params = [self.w,self.b]
        self.input = input

In [3]:
# Load MNIST data
datasets = load_data('mnist.pkl.gz')

batch_size = 500

train_set_x, train_set_y = datasets[0]
valid_set_x, valid_set_y = datasets[1]
test_set_x, test_set_y = datasets[2]

# compute number of minibatches for training, validation and testing
n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size
n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size

... loading data


In [4]:
x = T.matrix('x')
y = T.ivector('y')

learning_rate = 0.01

# convert input x to form (batch_size,1,28,28)
layer0_input = x.reshape((batch_size,1,28,28))

# setup random stream
rng = np.random.RandomState(123455)

# build layer0
layer0 = ConvPoolLayer(rng=rng,input=layer0_input,
                      image_shape=(batch_size,1,28,28),
                      filter_shape=(20,1,5,5))
                      

In [5]:
## Layer 1 setup ##
layer1 = ConvPoolLayer(rng=rng,input=layer0.output,
                      image_shape=(batch_size,20,12,12),
                      filter_shape=(50,20,5,5))

In [6]:
## Layer 2 : Hidden Layer setup ##
# layer1 output shape : batch_sizex50x4x4
# layer2_h input shape req : batch_size x (50*4*4)
layer2_h_input = layer1.output.flatten(2)
# n_in = 50x4x4 pixels; n_out = 500 hidden nodes
layer2_h = HiddenLayer(rng=rng,input=layer2_h_input,n_in=50*4*4,n_out=500)

In [7]:
# Layer 3 : Output layer : LogisticRegression
layer3_o = LogisticRegression(input=layer2_h.output,n_in=500,n_out=10)

In [8]:
# cost 
cost = layer3_o.neg_log_likelihood(y)
# >> setup gradient expression <<
### Need :parameters
params = layer3_o.params + layer2_h.params + layer1.params + layer0.params
gparams = T.grad(cost,params)

In [13]:
## Updates ##
updates = [(param, param - gparam*learning_rate) 
              for param,gparam in zip(params,gparams)]


index = T.lscalar('index')
# compile train
train = theano.function(inputs=[index],
                        outputs=cost,
                        updates=updates,
                        givens = { x : train_set_x[index*batch_size : (index +1)*batch_size],
                                   y : train_set_y[index*batch_size : (index +1)*batch_size]}
                       )

In [16]:
# Actual training #
# Actual training begins here
minibatch_avg_cost = 0
for j in xrange(100):
    for i in xrange(n_train_batches):
        minibatch_avg_cost = train(i)        
    print 'iteration ',j,' : cost : ', minibatch_avg_cost

iteration  0  : cost :  0.113680385053
iteration  1  : cost :  0.112653717399
iteration  2  : cost :  0.111653864384
iteration  3  : cost :  0.110670574009
iteration  4  : cost :  0.109706357121
iteration  5  : cost :  0.108763627708
iteration  6  : cost :  0.107837550342
iteration  7  : cost :  0.10692743212
iteration  8  : cost :  0.106033541262
iteration  9  : cost :  0.105159319937
iteration  10  : cost :  0.104302830994
iteration  11  : cost :  0.103467419744
iteration  12  : cost :  0.10264454782
iteration  13  : cost :  0.101835392416
iteration  14  : cost :  0.101043917239
iteration  15  : cost :  0.100261412561
iteration  16  : cost :  0.0994883701205
iteration  17  : cost :  0.0987267270684
iteration  18  : cost :  0.0979755520821
iteration  19  : cost :  0.097239561379
iteration  20  : cost :  0.0965143442154
iteration  21  : cost :  0.0957981869578
iteration  22  : cost :  0.0950925275683
iteration  23  : cost :  0.0943966805935
iteration  24  : cost :  0.0937125980854
iter

In [17]:
# testing
test = theano.function(inputs = [index],
                      outputs = layer3_o.errors(y),
                      givens = { x : test_set_x[index*batch_size : (index +1)*batch_size],
                                 y : test_set_y[index*batch_size : (index +1)*batch_size]
                               }
                      )
error_sum = 0.0
for i in xrange(n_test_batches):
    error_sum += test(i)
print 'avg_error : ',error_sum/n_test_batches


avg_error :  0.0134
