In [11]:
## >> Tut : http://deeplearning.net/tutorial/logreg.html <<<

import cPickle
import gzip
import os
import sys
import timeit

import numpy as np

import theano
import theano.tensor as T
from logistic import load_data

In [12]:
# Logistic Regression Class

class LogisticReg(object):
    def __init__(self, input, n_in, n_out):
        self.w = theano.shared(
            value=np.zeros(
                (n_in, n_out),
                dtype=theano.config.floatX
            ),
            name='w',
            borrow=True
        )
         # initialize the biases b as a vector of n_out 0s
        self.b = theano.shared(
            value=np.zeros(
                (n_out,),
                dtype=theano.config.floatX
            ),
            name='b',
            borrow=True
        )
        self.p_y_given_x = T.nnet.softmax(T.dot(input,self.w) + self.b)
        self.y_pred = np.argmax(self.p_y_given_x,axis = 1)
        self.params = [self.w, self.b]
        self.input = input

        
    def neg_log_likelihood(self,y):
        return -T.mean(T.log(self.p_y_given_x)[T.arange(y.shape[0]), y])
    
    def errors(self,y):
        if y.ndim != self.y_pred.ndim:
            raise TypeError(
                'y should have the same shape as self.y_pred',
                ('y', y.type, 'y_pred', self.y_pred.type)
            )
        if y.dtype.startswith('int'):
            return T.mean(T.neq(self.y_pred, y))
        else:
            raise NotImplementedError()

In [13]:
## Instantiate the class

x = T.matrix('x')
y = T.ivector('y')

index = T.lscalar('index')

classifier = LogisticReg(input=x, n_in=28 * 28, n_out=10)

cost = classifier.neg_log_likelihood(y)

#classifier = Logistic(input=x,n_in=28*28,n_out=10)

In [16]:
#### Train symbolic function : Compile ####

# gradients
gw,gb = T.grad(cost,[classifier.w,classifier.b])

# Update weight and bias
updates=[(classifier.w, classifier.w - (0.13*gw) ),(classifier.b, classifier.b - (0.13*gb))]

# train function
train = theano.function(inputs=[index],
                        outputs=[cost],
                        updates = updates,
                        givens={x : train_set_x[index*batch_size : (index+1)*batch_size],
                                y : train_set_y[index*batch_size :(index+1)*batch_size]}                        
                       )

In [14]:
## Load dataset ##
datasets = load_data('mnist.pkl.gz')

train_set_x, train_set_y = datasets[0]
valid_set_x, valid_set_y = datasets[1]
test_set_x, test_set_y = datasets[2]

... loading data


In [15]:
print train_set_x.get_value().shape
print test_set_x.get_value().shape

# Set minibatch size
batch_size = 600

# compute number of minibatches for training, validation and testing
n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size
n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size


(50000, 784)
(10000, 784)


In [17]:
minibatch_avg_cost = 0
for j in xrange(100):
    for i in xrange(n_train_batches):
        minibatch_avg_cost = train(i)
    print 'iteration ',j,' : cost : ',minibatch_avg_cost

iteration  0  : cost :  [array(0.6845049826453509)]
iteration  1  : cost :  [array(0.5626089911321697)]
iteration  2  : cost :  [array(0.5108841152796244)]
iteration  3  : cost :  [array(0.4803931242250776)]
iteration  4  : cost :  [array(0.45959309350944494)]
iteration  5  : cost :  [array(0.4441862477042445)]
iteration  6  : cost :  [array(0.43215854023920286)]
iteration  7  : cost :  [array(0.422421433666431)]
iteration  8  : cost :  [array(0.41432618296120477)]
iteration  9  : cost :  [array(0.40745764544886065)]
iteration  10  : cost :  [array(0.4015354475896336)]
iteration  11  : cost :  [array(0.3963620826117809)]
iteration  12  : cost :  [array(0.3917936380380423)]
iteration  13  : cost :  [array(0.387722312530715)]
iteration  14  : cost :  [array(0.38406547190152707)]
iteration  15  : cost :  [array(0.3807585253114044)]
iteration  16  : cost :  [array(0.37775013265912305)]
iteration  17  : cost :  [array(0.37499888825231265)]
iteration  18  : cost :  [array(0.3724709696232807)

In [18]:
test = theano.function(inputs=[index],outputs=classifier.errors(y),
                      givens={x : test_set_x[index*batch_size : (index +1)*batch_size],
                              y : test_set_y[index*batch_size : (index +1)*batch_size]})

In [19]:
for j in xrange(n_test_batches):
    avg_test_error = test(j)
    print 'batch ',j,' : avg_error : ',avg_test_error
    

batch  0  : avg_error :  0.085
batch  1  : avg_error :  0.105
batch  2  : avg_error :  0.115
batch  3  : avg_error :  0.105
batch  4  : avg_error :  0.0933333333333
batch  5  : avg_error :  0.0783333333333
batch  6  : avg_error :  0.115
batch  7  : avg_error :  0.095
batch  8  : avg_error :  0.06
batch  9  : avg_error :  0.0633333333333
batch  10  : avg_error :  0.0666666666667
batch  11  : avg_error :  0.04
batch  12  : avg_error :  0.025
batch  13  : avg_error :  0.0766666666667
batch  14  : avg_error :  0.015
batch  15  : avg_error :  0.0516666666667


In [20]:
test = theano.function(inputs=[index],outputs=classifier.errors(y),
                      givens={x : valid_set_x[index*batch_size : (index +1)*batch_size],
                              y : valid_set_y[index*batch_size : (index +1)*batch_size]})

for j in xrange(n_valid_batches):
    avg_valid_error = test(j)
    print 'batch ',j,' : avg_valid_error : ',avg_valid_error
    

batch  0  : avg_valid_error :  0.101666666667
batch  1  : avg_valid_error :  0.07
batch  2  : avg_valid_error :  0.07
batch  3  : avg_valid_error :  0.0933333333333
batch  4  : avg_valid_error :  0.113333333333
batch  5  : avg_valid_error :  0.0733333333333
batch  6  : avg_valid_error :  0.085
batch  7  : avg_valid_error :  0.0566666666667
batch  8  : avg_valid_error :  0.0883333333333
batch  9  : avg_valid_error :  0.06
batch  10  : avg_valid_error :  0.0716666666667
batch  11  : avg_valid_error :  0.0583333333333
batch  12  : avg_valid_error :  0.0866666666667
batch  13  : avg_valid_error :  0.0466666666667
batch  14  : avg_valid_error :  0.0466666666667
batch  15  : avg_valid_error :  0.0583333333333
