In [1]:
### >> Multilayer Perceptron << ###
## http://deeplearning.net/tutorial/mlp.html ##

import os
import sys

import theano
import theano.tensor as T
import numpy as np

from logistic import load_data,LogisticRegression

Using gpu device 0: GeForce GTX 960


In [24]:
# Define the hidden layer class
class HiddenLayer(object):
    def __init__(self,rng,input,n_in,n_out,w=None,b=None):
        self.input = input
        if w is None:
            wval = np.asarray(rng.uniform(low=-np.sqrt(6. / (n_in + n_out)),
                    high=np.sqrt(6. / (n_in + n_out)),
                    size=(n_in, n_out)),
                              dtype = theano.config.floatX)
            w = theano.shared(wval,name='w',borrow=True)
        #bias
        if b is None:
            b = theano.shared(
                value = np.zeros( (n_out,),dtype=theano.config.floatX),
                name='b',
                borrow=True
            )
        
        #b_values = np.zeros((n_out,), dtype=theano.config.floatX)
        #b = theano.shared(value=b_values, name='b', borrow=True)
        
        self.w = w
        self.b = b
        self.output = T.tanh(T.dot(input,self.w)+self.b)
        self.params = [self.w,self.b]    
        

In [25]:
datasets = load_data('TamilCh.pkl.gz')

batch_size = 20

train_set_x, train_set_y = datasets[0]
valid_set_x, valid_set_y = datasets[1]
test_set_x, test_set_y = datasets[2]

# compute number of minibatches for training, validation and testing
n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size
n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size

... loading data


In [26]:
### : Testing the hidden layer ###
#------------------------------##

x = T.matrix('x')

rng = np.random.RandomState(1234)
hl = HiddenLayer(rng, input=x, n_in=30*30, n_out=500)

In [30]:
### Define MLP class ###
class MLP(object):
    def __init__(self,rng,input,n_in,n_h,n_out):
        self.hidden_layer1 = HiddenLayer(rng,input=input,n_in=n_in,n_out=n_h)
        self.hidden_layer2 = HiddenLayer(rng,input=self.hidden_layer1.output,n_in=n_h,n_out=n_h)
        self.output_layer = LogisticRegression(input=self.hidden_layer2.output, n_in=n_h,n_out=n_out)
        #regularization
        self.L1 = abs(self.hidden_layer1.w).sum() + abs(self.hidden_layer2.w).sum() + abs(self.output_layer.w).sum()
        self.L2 = (self.hidden_layer1.w**2).sum() + (self.hidden_layer2.w**2).sum() + (self.output_layer.w**2).sum()
        # Negative Log Likelihood
        self.neg_log_likelihood = (self.output_layer.neg_log_likelihood)
        # errors function
        self.errors = (self.output_layer.errors)
        # params
        self.params = self.hidden_layer1.params + self.hidden_layer2.params + self.output_layer.params
        
        self.input = input

In [50]:
index = T.lscalar('index')
x = T.matrix('x')
y = T.ivector('y')
rng = np.random.RandomState(1234)

# instantiate MLP classifier
cl = MLP(rng, input = x, n_in = 30*30, n_h = 500, n_out = 156)

In [56]:
# setup cost
cost = cl.neg_log_likelihood(y) + (cl.L1 * 0.000) + (cl.L2 * 0.001)

# setup gradient
gparams = [ T.grad(cost,param) for param in cl.params ]

# setup updates 
updates = [ (param, param - 0.01*gparam) for param,gparam in zip(cl.params,gparams)]

In [58]:
# compile training function
train = theano.function(inputs=[index],
                       outputs=cost,
                       updates=updates,
                       givens = { x : train_set_x[index * batch_size : (index+1)*batch_size],
                                  y : train_set_y[index * batch_size : (index+1)*batch_size]
                                }
                       )

In [59]:
# Actual training begins here
minibatch_avg_cost = 0
for j in xrange(100):
    for i in xrange(n_train_batches):
        minibatch_avg_cost = train(i)
    if j % 10 == 0:
        print 'iteration ',j,' : cost : ', minibatch_avg_cost/n_train_batches
    

iteration  0  : cost :  0.00405857391357
iteration  10  : cost :  0.00199298591614
iteration  20  : cost :  0.00124554433823
iteration  30  : cost :  0.00107453575134
iteration  40  : cost :  0.00102085027695
iteration  50  : cost :  0.00103062477112
iteration  60  : cost :  0.00107429666519
iteration  70  : cost :  0.00108952674866
iteration  80  : cost :  0.00105737953186
iteration  90  : cost :  0.00103445415497


In [60]:
# compile the test function
test = theano.function(inputs=[index],
                      outputs=cl.errors(y),
                      givens = { x : valid_set_x[index*batch_size : (index+1)*batch_size],
                                 y : valid_set_y[index*batch_size : (index+1)*batch_size]
                               }
                      )

In [61]:
# testing
error_sum = 0.0
for i in xrange(n_valid_batches):
    error_sum += test(i)
print 'avg_error : ',error_sum/n_valid_batches

avg_error :  0.3794
