In [20]:
import theano
import theano.tensor as T
import cPickle, gzip, numpy
import time
from LogisticRegression import *

In [21]:
def shared_dataset(data_xy, borrow=True):
    data_x, data_y = data_xy
    shared_x = theano.shared(numpy.asarray(data_x, dtype=theano.config.floatX), borrow=borrow)
    shared_y = theano.shared(numpy.asarray(data_y, dtype=theano.config.floatX), borrow=borrow)
    return shared_x, T.cast(shared_y, 'int32')

In [22]:
def load_data(dataset='data/mnist.pkl.gz'):
    f = gzip.open(dataset, 'rb')
    train_set, valid_set, test_set = cPickle.load(f)
    f.close()
    test_set_x, test_set_y = shared_dataset(test_set)
    valid_set_x, valid_set_y = shared_dataset(valid_set)
    train_set_x, train_set_y = shared_dataset(train_set)
    
    return [(train_set_x, train_set_y), (valid_set_x, valid_set_y), (test_set_x, test_set_y)]

In [24]:
def sgd(learning_rate = 0.13, n_epoch = 1000, batch_size = 10):
    
    #load data
    dataset = load_data();
    train_set_x, train_set_y = dataset[0] # array([50000,   784])
    valid_set_x, valid_set_y = dataset[1] # array([10000,   784])
    test_set_x, test_set_y = dataset[2] #array([10000,   784])
    
    #compute number of minibatches
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size
    n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size
    
    print ('training set has %i batches' %n_train_batches)
    print ('validate set has %i batches' %n_valid_batches)
    print ('testing set has %i batches' %n_test_batches)
    
    
    
    #---------------------BUILD MODEL-----------------------#
    print 'Build Model...'
    
    minibatch_index = T.lscalar()
    x = T.matrix('x')
    y = T.ivector('y')
    
    #construct the logistic regression class
    classifier = LogisticRegression(input = x, n_in = 28*28, n_out = 10)
    cost = classifier.negative_log_likelihood(y)
    
    #model on a minibatch
    test_model = theano.function(inputs=[minibatch_index],
            outputs=classifier.errors_nl(y),
            givens={
                x: test_set_x[minibatch_index * batch_size: (minibatch_index + 1) * batch_size],
                y: test_set_y[minibatch_index * batch_size: (minibatch_index + 1) * batch_size]})
               
    validate_model = theano.function(inputs = [minibatch_index],
                                     outputs = classifier.errors_nl(y),
                                     givens = {x:valid_set_x[minibatch_index*batch_size : (minibatch_index+1)*batch_size],
                                               y:valid_set_y[minibatch_index*batch_size : (minibatch_index+1)*batch_size]})
    
    #compute gradient
    g_W = T.grad(cost = cost, wrt = classifier.W)
    g_b = T.grad(cost = cost, wrt = classifier.b)
    updates = [(classifier.W , classifier.W - learning_rate*g_W),
               (classifier.b, classifier.b - learning_rate*g_b)]
    #updates should be defined as a list of pairs of (shared-variable, new expression)
               
    train_model = theano.function(inputs = [minibatch_index],
                                  outputs = cost,
                                  updates = updates,
                                  givens = {x: train_set_x[minibatch_index*batch_size : (minibatch_index+1)*batch_size],
                                            y: train_set_y[minibatch_index*batch_size : (minibatch_index+1)*batch_size]})
    
    #---------------------Train-----------------------#
    print 'Training the model...'
    
    #early stop parameters
    patience = 5000
    patience_increase = 2  # wait this much longer when a new best is found
    improvement_thres = 0.995
    validation_frequency = min(n_train_batches,patience/2)
    best_validation_loss = numpy.inf
    test_score = 0
    start_time = time.clock()
    done_looping = False
    epoch = 0
    
    while (epoch<n_epoch) and (not done_looping):
        epoch = epoch+1
        for minibatch_index_train in range(n_train_batches):
            minibatch_avg_cost = train_model(minibatch_index_train)
            # number of iterations (updates)
            iter = (epoch-1) * n_train_batches + minibatch_index_train
            
            if (iter+1)%validation_frequency == 0:
                validation_losses = [validate_model(i) for i in xrange(n_valid_batches)]
                cur_validation_loss = numpy.mean(validation_losses)
                print('epoch %i, training minibatch %i/%i, validation error %f %%' %\
                (epoch, minibatch_index_train+1, n_train_batches, cur_validation_loss*100.))
                
                #compare with best validation loss
                if  cur_validation_loss< best_validation_loss:
                    if cur_validation_loss < best_validation_loss*improvement_thres:
                        #define patience: if best_validation_loss not changes over 'patience' iterations
                        patience = max(patience, iter*patience_increase)
                    best_validation_loss = cur_validation_loss
                    test_loss = [test_model(i) for i in xrange(n_test_batches)]
                    test_score = numpy.mean(test_loss)
                    print (('     epoch %i, minibatch %i/%i, test error of best'
                       ' model %f %%') %
                       (epoch, minibatch_index_train + 1, n_train_batches,
                         test_score * 100.))
            if patience<=iter:
                done_looping = True
                break
    
    end_time = time.clock()
    print(('Optimization completed with best validation score of %f %%,'
            'with test performance %f %%')%
            (best_validation_loss*100., test_score*100.))
    
    print 'The code run for %d epochs, with %f epochs/sec' %(
            epoch, 1.*epoch/(end_time-start_time))

In [25]:
if __name__ == '__main__':
    sgd()

training set has 5000 batches
validate set has 1000 batches
testing set has 1000 batches
Build Model...
Training the model...
epoch 1, training minibatch 2500/5000, validation error 9.070000 %
     epoch 1, minibatch 2500/5000, test error of best model 9.170000 %
epoch 1, training minibatch 5000/5000, validation error 8.520000 %
     epoch 1, minibatch 5000/5000, test error of best model 9.110000 %
epoch 2, training minibatch 2500/5000, validation error 8.420000 %
     epoch 2, minibatch 2500/5000, test error of best model 8.480000 %
epoch 2, training minibatch 5000/5000, validation error 8.200000 %
     epoch 2, minibatch 5000/5000, test error of best model 8.800000 %
epoch 3, training minibatch 2500/5000, validation error 8.170000 %
     epoch 3, minibatch 2500/5000, test error of best model 8.350000 %
epoch 3, training minibatch 5000/5000, validation error 8.060000 %
     epoch 3, minibatch 5000/5000, test error of best model 8.620000 %
epoch 4, training minibatch 2500/5000, validat