In [76]:
%matplotlib inline

import numpy as np
import matplotlib.pyplot as plt
import timeit
import gzip,cPickle,sys,os
from PIL import Image

import theano
from theano import tensor as T
from theano import shared,function,grad,pp
from theano.tensor.shared_randomstreams import RandomStreams 

In [30]:
f = gzip.open('mnist.pkl.gz','rb')
train_set,valid_set,test_set = cPickle.load(f)
f.close()
def share_dataset(data_xy):
    data_x,data_y = data_xy
    shared_x = shared(np.array(data_x,dtype=theano.config.floatX))
    shared_y = shared(np.array(data_y,dtype=theano.config.floatX))
    return shared_x,T.cast(shared_y,'int32')

train_set_x,train_set_y = share_dataset(train_set)
valid_set_x,valid_set_y = share_dataset(valid_set)
test_set_x,test_set_y = share_dataset(test_set)

In [86]:
class LogisticRegression(object):
    def __init__(self,x,n_in,n_out):
        self.W = theano.shared(
            value=np.zeros(
                (n_in,n_out),
                dtype=theano.config.floatX
            ),
            name='W',
            borrow=True
        )
        self.b = theano.shared(
            value=np.zeros(
                (n_out,),
                dtype=theano.config.floatX
            ),
            name='b',
            borrow=True
        )
        self.p_y_given_x = T.nnet.softmax(T.dot(x,self.W)+self.b)
        self.y_pred = T.argmax(self.p_y_given_x,axis=1)
        self.params = [ self.W, self.b ]
        self.x = x
    
    def negative_log_likelihood(self,y):
        return -T.mean(T.log(self.p_y_given_x)[T.arange(y.shape[0]),y])
    
    def errors(self,y):
        return T.mean(T.neq(self.y_pred,y))

In [87]:
class HiddenLayer(object):
    def __init__(self,x,n_in,n_out):
        self.x = x
        W_values = np.asarray(
                np.random.uniform(
                    low=-np.sqrt(6./(n_in + n_out)),
                    high=np.sqrt(6./(n_in + n_out)),
                    size=(n_in,n_out)
                ),
                dtype=theano.config.floatX
        )
        W = shared(value=W_values, name='W',borrow=True)
        b_values = np.zeros((n_out,),dtype=theano.config.floatX)
        b = shared(value=b_values,name='b',borrow=True)
        self.W = W
        self.b = b
        self.output = T.tanh(T.dot(x,self.W) + self.b)
        self.params = [ self.W, self.b ]

In [94]:
class DA(object):
    def __init__(self,input,n_visible,n_hidden,W=None,bhid=None,bvis=None):
        self.x = input
        self.n_visible = n_visible
        self.n_hidden = n_hidden
        
        if not W:
            _W = np.asarray(
                np.random.uniform(
                    low=-4*np.sqrt(6. / (n_hidden + n_visible)),
                    high=4*np.sqrt(6. / (n_hidden + n_visible)),
                    size=(n_visible,n_hidden)
                ),
                dtype=theano.config.floatX
            )
            self.W = shared(value=_W,name='W',borrow=True)
        else:
            self.W = W
        
        if not bhid:
            self.b = shared(
                value=np.zeros(
                    n_hidden,
                    dtype=theano.config.floatX
                ),
                name='b',
                borrow=True
            )
        else:
            self.b = bhid
            
        self.W_prime = self.W.T
        
        if not bvis:
            self.b_prime = shared(
                value=np.zeros(
                    n_visible,
                    dtype=theano.config.floatX
                )
            )
        else:
            self.b_prime = bvis
        
        self.params = [self.W, self.b, self.b_prime]
        self.theano_rng = RandomStreams(np.random.randint(2 ** 30))

    def get_hidden_values(self,input):
        return T.nnet.sigmoid(T.dot(input,self.W) + self.b)
    
    def get_reconstructed_input(self,hidden):
        return T.nnet.sigmoid(T.dot(hidden,self.W_prime) + self.b_prime)
    
    def get_cost_updates(self,corruption_level,learning_rate):
        corrupted_x = self.get_corrupted_input(self.x, corruption_level)
        y = self.get_hidden_values(corrupted_x)
        z = self.get_reconstructed_input(y)
        L = -  T.sum(self.x * T.log(z) + (1-self.x) * T.log(1-z),axis=1)
        cost = T.mean(L)
        gparams = grad(cost,self.params)
        updates = [
            (param, param - learning_rate * gparam)
            for param, gparam in zip(self.params,gparams)
        ]
        return (cost,updates)
    
    def get_corrupted_input(self, input, corruption_level):
        return self.theano_rng.binomial(size=input.shape, n=1,
                                        p=1 - corruption_level,
                                        dtype=theano.config.floatX) * input


In [95]:
batch_size = 600
index = T.lscalar()
x = T.matrix('x')
n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size
n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size
n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size

da = DA(input=x,n_visible=28*28,n_hidden=500)
cost,updates = da.get_cost_updates(
    corruption_level=0.3,
    learning_rate=0.1
)
train_da = function(
    [index],
    cost,
    updates=updates,
    givens={
        x: train_set_x[index * batch_size: (index+1)* batch_size]
    }
)

start_time = timeit.default_timer()
for epoch in range(10):
    c = []
    for batch_index in range(n_train_batches):
        c.append(train_da(batch_index))
    print('Training epoch %d, cost ' % epoch, np.mean(c))

end_time = timeit.default_timer()
training_time = (end_time - start_time)

print(('training time is %.2fm' % (training_time / 60.)))

('Training epoch 0, cost ', 126.56361769299292)
('Training epoch 1, cost ', 94.774712825267017)
('Training epoch 2, cost ', 88.526435498703464)
('Training epoch 3, cost ', 85.205810665698252)
('Training epoch 4, cost ', 83.041297199536899)
('Training epoch 5, cost ', 81.568192024923448)
('Training epoch 6, cost ', 80.44665463393379)
('Training epoch 7, cost ', 79.59331354841602)
('Training epoch 8, cost ', 78.882518895178038)
('Training epoch 9, cost ', 78.294319900514495)
training time is 1.56m


In [111]:
class SDA(object):
    def __init__(self,
                 n_in=784,
                 hidden_layers_sizes=[500,500],
                 n_outs=10,
                 corruption_levels=[0.1,0.1] ):
        self.sigmoid_layers = []
        self.da_layers = []
        self.params = []
        self.n_layers = len(hidden_layers_sizes)
        
        self.theano_rng = RandomStreams(np.random.randint(2**30))
        self.x = T.matrix('x')
        self.y = T.ivector('y')
        
        for i in range(self.n_layers):
            if i == 0:
                input_size = n_in
                layer_input = self.x
            else:
                input_size = hidden_layers_sizes[i-1]
                layer_input = self.sigmoid_layers[-1].output
        
            sigmoid_layer = HiddenLayer(x=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i])
            self.sigmoid_layers.append(sigmoid_layer)
            self.params.extend(sigmoid_layer.params)
            
            dA_layer = DA(input=layer_input,
                          n_visible=input_size,
                          n_hidden=hidden_layers_sizes[i],
                          W=sigmoid_layer.W,
                          bhid=sigmoid_layer.b)
            self.da_layers.append(dA_layer)

        self.logLayer = LogisticRegression(
            x=self.sigmoid_layers[-1].output,
            n_in=hidden_layers_sizes[-1],
            n_out=n_outs
        )
        self.params.extend(self.logLayer.params)
        self.finetune_cost = self.logLayer.negative_log_likelihood(self.y)
        self.errors = self.logLayer.errors(self.y)
    
    def pretraining_functions(self, train_set_x, batch_size):
        index = T.lscalar('index')
        corruption_level = T.scalar('corruption')  # % of corruption to use
        learning_rate = T.scalar('lr')  # learning rate to use
        # begining of a batch, given `index`
        batch_begin = index * batch_size
        # ending of a batch given `index`
        batch_end = batch_begin + batch_size

        pretrain_fns = []
        for dA in self.da_layers:
            # get the cost and the updates list
            cost, updates = dA.get_cost_updates(corruption_level,
                                                learning_rate)
            # compile the theano function
            fn = theano.function(
                inputs=[
                    index,
                    theano.In(corruption_level, value=0.2),
                    theano.In(learning_rate, value=0.1)
                ],
                outputs=cost,
                updates=updates,
                givens={
                    self.x: train_set_x[batch_begin: batch_end]
                }
            )
            # append `fn` to the list of functions
            pretrain_fns.append(fn)

        return pretrain_fns
    
    def build_finetune_functions(self, batch_size, learning_rate):
        n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
        n_valid_batches //= batch_size
        n_test_batches = test_set_x.get_value(borrow=True).shape[0]
        n_test_batches //= batch_size
        index = T.lscalar('index')  # index to a [mini]batch

        # compute the gradients with respect to the model parameters
        gparams = T.grad(self.finetune_cost, self.params)

        # compute list of fine-tuning updates
        updates = [
            (param, param - gparam * learning_rate)
            for param, gparam in zip(self.params, gparams)
        ]

        train_fn = theano.function(
            inputs=[index],
            outputs=self.finetune_cost,
            updates=updates,
            givens={
                self.x: train_set_x[
                    index * batch_size: (index + 1) * batch_size
                ],
                self.y: train_set_y[
                    index * batch_size: (index + 1) * batch_size
                ]
            },
            name='train'
        )

        test_score_i = theano.function(
            [index],
            self.errors,
            givens={
                self.x: test_set_x[
                    index * batch_size: (index + 1) * batch_size
                ],
                self.y: test_set_y[
                    index * batch_size: (index + 1) * batch_size
                ]
            },
            name='test'
        )

        valid_score_i = theano.function(
            [index],
            self.errors,
            givens={
                self.x: valid_set_x[
                    index * batch_size: (index + 1) * batch_size
                ],
                self.y: valid_set_y[
                    index * batch_size: (index + 1) * batch_size
                ]
            },
            name='valid'
        )

        # Create a function that scans the entire validation set
        def valid_score():
            return [valid_score_i(i) for i in range(n_valid_batches)]

        # Create a function that scans the entire test set
        def test_score():
            return [test_score_i(i) for i in range(n_test_batches)]

        return train_fn, valid_score, test_score

In [112]:
sda = SDA()

print('... getting the pretraining functions')
pretraining_fns = sda.pretraining_functions(train_set_x=train_set_x,
                                                batch_size=batch_size)

print('... pre-training the model')
start_time = timeit.default_timer()
corruption_levels = [.1, .2, .3]
for i in range(sda.n_layers):
    for epoch in range(2):
        c = []
        for batch_index in range(n_train_batches):
            c.append(pretraining_fns[i](index=batch_index,
                         corruption=corruption_levels[i],
                         lr=0.1))
        print('Pre-training layer %i, epoch %d, cost %f' % (i, epoch, np.mean(c)))

end_time = timeit.default_timer()




... getting the pretraining functions
... pre-training the model
Pre-training layer 0, epoch 0, cost 144.305060
Pre-training layer 0, epoch 1, cost 101.219341
Pre-training layer 1, epoch 0, cost -184724.699653
Pre-training layer 1, epoch 1, cost -559510.676238


(<theano.compile.function_module.Function at 0x10c1b2c50>,
 <function __main__.valid_score>,
 <function __main__.test_score>)

In [113]:
train_fn, valid_score, test_score = sda.build_finetune_functions(batch_size=600,learning_rate=0.1)

In [118]:
print np.mean(test_score())

for epoch in range(10):
    costs = [] 
    for i in range(n_train_batches):
        costs.append(train_fn(i))
    print np.mean(costs)
        
print np.mean(test_score())

0.9071875
8.34176133513
8.9838537059
9.68613665453
9.93588579311
10.0647988361


KeyboardInterrupt: 