In [1]:
import theano
from theano import tensor as T
from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams
import numpy as np
from load import mnist

In [2]:
import os,sys,inspect
os.chdir ('/Users/davidhalvorson/Google Drive/General Assembly/Data Science Class/github/Theano-Tutorials-MNIST')

currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(currentdir)
sys.path.insert(0,parentdir) 
import load

os.chdir ('/Users/davidhalvorson/Google Drive/General Assembly/Data Science Class/github/DAT_SF_12/datasets/')

load.datasets_dir = os.path.expanduser("/Users/davidhalvorson/Google Drive/General Assembly/Data Science Class/github/DAT_SF_12/datasets/")

trX, teX, trY, teY = load.mnist(onehot=True)

os.chdir ('/Users/davidhalvorson/Google Drive/General Assembly/Data Science Class/github/DAT_SF_12/')

In [3]:
srng = RandomStreams()

def floatX(X):
    return np.asarray(X, dtype=theano.config.floatX)

def init_weights(shape):
    return theano.shared(floatX(np.random.randn(*shape) * 0.01))

def rectify(X):
    return T.maximum(X, 0.)

def softmax(X):
    e_x = T.exp(X - X.max(axis=1).dimshuffle(0, 'x'))
    return e_x / e_x.sum(axis=1).dimshuffle(0, 'x')

In [4]:
def RMSprop(cost, params, lr=0.001, rho=0.9, epsilon=1e-6):
    grads = T.grad(cost=cost, wrt=params)
    updates = []
    for p, g in zip(params, grads):
        acc = theano.shared(p.get_value() * 0.)
        acc_new = rho * acc + (1 - rho) * g ** 2
        gradient_scaling = T.sqrt(acc_new + epsilon)
        g = g / gradient_scaling
        updates.append((acc, acc_new))
        updates.append((p, p - lr * g))
    return updates

In [5]:
def dropout(X, p=0.):
    if p > 0:
        retain_prob = 1 - p
        X *= srng.binomial(X.shape, p=retain_prob, dtype=theano.config.floatX)
        X /= retain_prob
    return X

In [6]:
def model(X, w_h, w_h2, w_o, p_drop_input, p_drop_hidden):
    X = dropout(X, p_drop_input)
    h = rectify(T.dot(X, w_h))

    h = dropout(h, p_drop_hidden)
    h2 = rectify(T.dot(h, w_h2))

    h2 = dropout(h2, p_drop_hidden)
    py_x = softmax(T.dot(h2, w_o))
    return h, h2, py_x

In [7]:
trX, teX, trY, teY = mnist(onehot=True)

In [8]:
X = T.fmatrix()
Y = T.fmatrix()

In [9]:
w_h = init_weights((784, 625))
w_h2 = init_weights((625, 625))
w_o = init_weights((625, 10))

In [10]:
noise_h, noise_h2, noise_py_x = model(X, w_h, w_h2, w_o, 0.2, 0.5)
h, h2, py_x = model(X, w_h, w_h2, w_o, 0., 0.)
y_x = T.argmax(py_x, axis=1)

In [11]:
cost = T.mean(T.nnet.categorical_crossentropy(noise_py_x, Y))
params = [w_h, w_h2, w_o]
updates = RMSprop(cost, params, lr=0.001)

In [12]:
train = theano.function(inputs=[X, Y], outputs=cost, updates=updates, allow_input_downcast=True)
predict = theano.function(inputs=[X], outputs=y_x, allow_input_downcast=True)

In [23]:
for i in range(100):
#for i in range(10):    
    for start, end in zip(range(0, len(trX), 128), range(128, len(trX), 128)):
        cost = train(trX[start:end], trY[start:end])
    print np.mean(np.argmax(teY, axis=1) == predict(teX))

0.9816
0.9816
0.9821
0.9826
0.9833
0.9823
0.9832
0.9836
0.9836
0.9837
0.984


KeyboardInterrupt: 

Debug

In [14]:
print trX.shape, trY.shape, teX.shape, teY.shape

(60000, 784) (60000, 10) (10000, 784) (10000, 10)


In [15]:
import pandas as pd
pd.DataFrame(teY).head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,0,0,0,0,0,0,0,1,0,0
1,0,0,1,0,0,0,0,0,0,0
2,0,1,0,0,0,0,0,0,0,0
3,1,0,0,0,0,0,0,0,0,0
4,0,0,0,0,1,0,0,0,0,0


In [16]:
zip((0, len(trX), 128), range(128, len(trX), 128))

[(0, 128), (60000, 256), (128, 384)]

In [17]:
for i in range(10):
    for start, end in zip(range(0, len(trX), 128), range(128, len(trX), 128)):
#        cost = train(trX[start:end], trY[start:end])
       print trX[start:end].shape, trY[start:end].shape
#    print np.mean(np.argmax(teY, axis=1) == predict(teX))

(128, 784) (128, 10)
(128, 784) (128, 10)
(128, 784) (128, 10)
(128, 784) (128, 10)
(128, 784) (128, 10)
(128, 784) (128, 10)
(128, 784) (128, 10)
(128, 784) (128, 10)
(128, 784) (128, 10)
(128, 784) (128, 10)
(128, 784) (128, 10)
(128, 784) (128, 10)
(128, 784) (128, 10)
(128, 784) (128, 10)
(128, 784) (128, 10)
(128, 784) (128, 10)
(128, 784) (128, 10)
(128, 784) (128, 10)
(128, 784) (128, 10)
(128, 784) (128, 10)
(128, 784) (128, 10)
(128, 784) (128, 10)
(128, 784) (128, 10)
(128, 784) (128, 10)
(128, 784) (128, 10)
(128, 784) (128, 10)
(128, 784) (128, 10)
(128, 784) (128, 10)
(128, 784) (128, 10)
(128, 784) (128, 10)
(128, 784) (128, 10)
(128, 784) (128, 10)
(128, 784) (128, 10)
(128, 784) (128, 10)
(128, 784) (128, 10)
(128, 784) (128, 10)
(128, 784) (128, 10)
(128, 784) (128, 10)
(128, 784) (128, 10)
(128, 784) (128, 10)
(128, 784) (128, 10)
(128, 784) (128, 10)
(128, 784) (128, 10)
(128, 784) (128, 10)
(128, 784) (128, 10)
(128, 784) (128, 10)
(128, 784) (128, 10)
(128, 784) (1