In [23]:
import cPickle
import gzip
import numpy
import theano

import theano.tensor as T

import ipdb

MINI_BATCH_SIZE = 500
            
# This function will extract the images from the
# images classes, and place that information into
# two separate shared theano variables.
#
# We use shared variables because When storing data 
# on the GPU it has to be stored as floats
# therefore we will store the labels as ‘‘floatX‘‘ 
# as well (‘‘shared_y‘‘ does exactly that). But 
# during our computations we need them as ints 
# (we use labels as index, and if they are
# floats it doesn’t make sense) therefore 
# instead of returning ‘‘shared_y‘‘ we will have 
# to cast it to int. This little hack lets us 
# get around this issue.
#
# You can tune the size of the mini-batches to fit
# in available GPU memory.
def shared_dataset(data_xy):
    data_x, data_y = data_xy
    shared_x = theano.shared(
        numpy.asarray(
            data_x, 
            dtype=theano.config.floatX
        )
    )
    shared_y = theano.shared(
        numpy.asarray(
            data_y, 
            dtype=theano.config.floatX
        )
    )
    return shared_x, T.cast(shared_y, 'int32')


# Opening the zipped file and extracting the contents.
f = gzip.open('data/mnist.pkl.gz', 'rb')

# Loading the pickled data from the compressed file.
# The file is formated as a 3-tuple of tuples,
# where each tuple consists of a list of 10000 
# data elements (the normalized images) and 
# 10000 class labels (a number, 0-9, that
# indicates the class of the associated data element).
#
# Each data element is a normalized 28x28 MNIST 
# image rendered in a single list of 784 elements 
# (e.g. 28 x 28).
#
# So, to extract the image from the train_set tuple, 
# you'd use something like: 
# 
# images = train_set[0]
# classes = train_set[1]
# first_image = images[0]
# first_image_class = classes[0]
#
# Loading the tuples from the 3-tuple file format,
# and closing the archive. This could be done with
# the 'with' statement as well.
train_set, valid_set, test_set = cPickle.load(f)
f.close()

# Create the extraced data.
test_set_x, test_set_y = shared_dataset(test_set)
valid_set_x, valid_set_y = shared_dataset(valid_set)
train_set_x, train_set_y = shared_dataset(train_set)

# Accessing the first three minibatch of the training
# set. You would generally extract the data in this way,
# processing data as mini-batches of MINI_BATCH_SIZE.
data_1 = train_set_x[0 * MINI_BATCH_SIZE : 1 * MINI_BATCH_SIZE]
data_2 = train_set_x[1 * MINI_BATCH_SIZE : 2 * MINI_BATCH_SIZE]
data_3 = train_set_x[2 * MINI_BATCH_SIZE : 3 * MINI_BATCH_SIZE]

class LogisticRegression(object):
    def __init__(self, input, datapoints_dim, label_dim):
        # Here, we initialize an empty array with the dimentions
        # of the number of classes by the number of elements.
        #
        # In our MNIST example, this will yield a 768 by 10
        # two-dimensional shared numpy array.
        #
        # Named 'W' as it's the weights array. This is a 768x10
        # array as we will have a sequence of 768 possible connections
        # to each output node, and there's 10 output nodes. An
        # input value will have 768 elements.
        self.W = theano.shared(
            value=numpy.zeros(
                (datapoints_dim, label_dim),
                dtype=theano.config.floatX
            ),
            name='W',
            borrow=True
        )
        
        # These are bias values, and we'll use them later.
        # In our MNIST example, we will have one bias per class.
        self.b = theano.shared(
            value=numpy.zeros(
                (label_dim,),
                dtype=theano.config.floatX
            ),
            name='b',
            borrow=True
        )
        
        # An MNIST input value has 768 elements, so we can matrix
        # multiply with W (see T.dot(.) below). This yields a 10
        # element vector we can then add to the bias values (e.g. self.b).
        # Then, we use softmax to determine the distribution.
        #
        # As we take these values to be the probabilities that the submitted
        # input belongs to a given class, we call the resulting 10 element
        # vector the p_y_given_x (the probability that the input is a member
        # of class Y[i] given the probabilities in W biased by b).
        self.p_y_given_x = T.nnet.softmax(T.dot(input, self.W) + self.b)
        
        # This is the predicted class of the submitted input - essentially
        # the most likely class based on the probabilities obtained above.
        self.y_pred = T.argmax(self.p_y_given_x, axis=1)
        
        # This is essentially the model. This would usually be saved.
        self.params = [self.W, self.b]
        
    def negative_log_likelihood(self, y):
        return T.mean(
            T.log(self.p_y_given_x)[T.arange(y.shape[0]), y]
        )
    
    def errors(self, y):
        if y.ndim != self.y_pred.ndim:
            raise TypeError(
                'y should have the same shape as self.y_pred',
                ('y', y.type, 'y_pred', self.y_pred.type)
            )
        if y.dtype.startswith('int'):
            return T.mean(T.neq(self.y_pred, y))
        else:
            raise NotImplementedError()

# Working with Logistic Regression
x = T.matrix('x')
y = T.ivector('y')

classifier = LogisticRegression(input=x, datapoints_dim=28*28, label_dim=10)
cost = classifier.negative_log_likelihood(y)

print 'finished.'

finished.
