In [28]:
%matplotlib inline

import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import gzip,cPickle

import theano
from theano import tensor as T
from theano import shared,function,grad,pp
from theano.tensor.nnet import conv2d
from theano.tensor.signal import pool

In [29]:
img = Image.open(open('3wolfmoon.jpg'))
img1 = np.array(img,dtype='float64') / 256.
img2 = img1.transpose(2,0,1).reshape(1,3,639,516)

f = gzip.open('mnist.pkl.gz','rb')
train_set,valid_set,test_set = cPickle.load(f)
f.close()
def share_dataset(data_xy):
    data_x,data_y = data_xy
    shared_x = shared(np.array(data_x,dtype=theano.config.floatX))
    shared_y = shared(np.array(data_y,dtype=theano.config.floatX))
    return shared_x,T.cast(shared_y,'int32')

train_set_x,train_set_y = share_dataset(train_set)
valid_set_x,valid_set_y = share_dataset(valid_set)
test_set_x,test_set_y = share_dataset(test_set)

In [30]:
class LogisticRegression(object):
    def __init__(self,x,n_in,n_out):
        self.W = theano.shared(
            value=np.zeros(
                (n_in,n_out),
                dtype=theano.config.floatX
            ),
            name='W',
            borrow=True
        )
        self.b = theano.shared(
            value=np.zeros(
                (n_out,),
                dtype=theano.config.floatX
            ),
            name='b',
            borrow=True
        )
        self.p_y_given_x = T.nnet.softmax(T.dot(x,self.W)+self.b)
        self.y_pred = T.argmax(self.p_y_given_x,axis=1)
        self.params = [ self.W, self.b ]
        self.x = x
    
    def negative_log_likelihood(self,y):
        return -T.mean(T.log(self.p_y_given_x)[T.arange(y.shape[0]),y])
    
    def errors(self,y):
        return T.mean(T.neq(self.y_pred,y))

In [31]:
class HiddenLayer(object):
    def __init__(self,x,n_in,n_out):
        self.x = x
        W_values = np.asarray(
                np.random.uniform(
                    low=-np.sqrt(6./(n_in + n_out)),
                    high=np.sqrt(6./(n_in + n_out)),
                    size=(n_in,n_out)
                ),
                dtype=theano.config.floatX
        )
        W = shared(value=W_values, name='W',borrow=True)
        b_values = np.zeros((n_out,),dtype=theano.config.floatX)
        b = shared(value=b_values,name='b',borrow=True)
        self.W = W
        self.b = b
        self.output = T.tanh(T.dot(x,self.W) + self.b)
        self.params = [ self.W, self.b ]

In [32]:
class ConvPoolLayer(object):
    def __init__(self,input,filter_shape,image_shape,pool_shape=(2,2)):
        
        self.input = input
        
        fan_in = np.prod(filter_shape[1:])
        fan_out = (filter_shape[0] * np.prod(filter_shape[2:]) //
                   np.prod(pool_shape))
        W_bound = np.sqrt(6./(fan_in + fan_out))
        self.W = shared(
            np.asarray(
                np.random.uniform(low=-W_bound,high=W_bound,size=filter_shape),
                dtype=theano.config.floatX
            ),
            borrow=True
        )
        
        b_values = np.zeros((filter_shape[0],),dtype=theano.config.floatX)
        self.b = shared(value=b_values,borrow=True)
        
        conv_out = conv2d(
            input=input,
            filters=self.W,
            filter_shape=filter_shape,
            input_shape=image_shape
        )
        
        pooled_out = pool.pool_2d(
            input=conv_out,
            ds=pool_shape,
            ignore_border=True
        )
        
        self.output = T.tanh(pooled_out + self.b.dimshuffle('x',0,'x','x'))
        self.params = [self.W, self.b]
    

In [48]:
batch_size = 600
i = T.lscalar('i')
x = T.matrix('x')
y = T.ivector('y')
nkerns = [2,2]
alpha = 0.1

n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size
n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size
n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size

layer0_input = x.reshape((batch_size,1,28,28))
layer0 = ConvPoolLayer(
    input = layer0_input,
    image_shape=(batch_size,1,28,28),
    filter_shape=(nkerns[0],1,5,5),
    pool_shape=(2,2)
)

layer1 = ConvPoolLayer(
    input=layer0.output,
    image_shape=(batch_size,nkerns[0],12,12),
    filter_shape=(nkerns[1],nkerns[0],5,5),
    pool_shape=(2,2)
)

layer2_input = layer1.output.flatten(2)
layer2 = HiddenLayer(x=layer2_input,n_in=nkerns[1]*4*4,
                    n_out=500)
layer3 = LogisticRegression(x=layer2.output,n_in=500,n_out=10)
cost = layer3.negative_log_likelihood(y)
test_model = theano.function(
        [i],
        layer3.errors(y),
        givens={
            x: test_set_x[i * batch_size: (i + 1) * batch_size],
            y: test_set_y[i * batch_size: (i + 1) * batch_size]
        }
    )

params = layer3.params + layer2.params + layer1.params + layer0.params
grads = grad(cost,params)

updates = [
    (param_i,param_i - alpha * grad_i) 
    for param_i,grad_i in zip(params,grads)
]

train_model = theano.function(
        [i],
        cost,
        updates=updates,
        givens={
            x: train_set_x[i * batch_size: (i + 1) * batch_size],
            y: train_set_y[i * batch_size: (i + 1) * batch_size]
        }
    )

In [49]:
print test_model(0)
for epoch in range(1):
    costs = [] 
    for i in range(n_train_batches):
        costs.append(train_model(i))
print test_model(0)

0.911666666667
0.258333333333
