In [None]:
%pylab inline
import numpy as np
from sklearn import preprocessing
from keras.datasets import mnist
from keras.utils import np_utils
np.set_printoptions(suppress=True)
(X_train, y_train), (X_test, y_test) = mnist.load_data()

def preproc(X_train, y_train):
    X = X_train
    X = X.reshape(X.shape[0], X.shape[1] * X.shape[2]).astype("float32")
    #X = X/255
    X = preprocessing.scale(X)
    Y = np_utils.to_categorical(y_train, 10)
    return X, Y

X, Y = preproc(X_train, y_train)
Xt, Yt = preproc(X_test, y_test)

In [None]:
# add biases?
def update(y0):
    def dtanh(x):
        # pass in tanh
        return 1 - (np.tanh(x)**2)
    
    # forward pass
    # L vector matrix multiplies, L vector vector adds, L-1 NL
    net = []
    y = [y0]
    for i in range(len(W)):
        net.append(np.dot(W[i], y[i]) + B[i])
        if i != len(W)-1:   # last layer is linear
            y.append(np.tanh(net[-1]))
        else:
            y.append(net[-1])

    # squared error derivative, (computed - target)
    err = y[-1] - y[0]
    e = [2*err]
    
    # backward pass
    # L-1 vector matrix multiplies, L-1 dNL
    for i in range(len(W)-2, -1, -1):
        te = np.dot(W[i+1].T, e[0]) * dtanh(net[i])
        e = [te] + e
    
    # update the weights and biases
    gW, gB = [], []
    for i in range(len(W)):
        gW.append(np.outer(e[i], y[i]))
        gB.append(e[i])

    return np.mean((err)**2), gW, gB

In [None]:
np.random.seed(1337)
init = 0.08
W, B = [], []

#sz = [784, 256, 32, 256, 784]

sz = [32,16,32]

X = np.zeros((32,32)).astype(np.float32)
for i in range(32):
    X[i,i] = 1.0

for i in range(len(sz)-1):
    W.append(np.random.uniform(size=(sz[i+1], sz[i]), low=-init, high=init).astype(np.float32))
    B.append(np.random.uniform(size=(sz[i+1]), low=-init, high=init).astype(np.float32))

In [None]:
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation
from keras.optimizers import SGD, Adam, RMSprop

# uniform init
np.random.seed(1337)

# 208896 params
model = Sequential()
model.add(Dense(32, 16))
model.add(Activation('tanh'))
model.add(Dense(16, 32))

# optimize
opt = Adam()
model.compile(loss='mean_squared_error', optimizer=opt)

In [None]:
model.fit(X, X, batch_size=4, nb_epoch=100, verbose=0)
model.evaluate(X, X, verbose=2)

In [None]:
lr = 0.05
mom = 0.5

momW, momB = [], []
for j in range(len(W)):
    momW.append(np.zeros(W[j].shape).astype(np.float32))
    momB.append(np.zeros(B[j].shape).astype(np.float32))

# run an epoch
err = []
minibatch_size = 4

for ep in range(1000):
    for i in range(0,X.shape[0],minibatch_size):
        # do first one
        terr, gW, gB = update(X[i])
        err.append(terr)

        # do rest in minibatch
        for k in range(1, minibatch_size):
            terr, tgW, tgB = update(X[i+k])
            err.append(terr)
            for j in range(len(W)):
                gW[j] += tgW[j]
                gB[j] += tgB[j]

        for j in range(len(W)):
            #print np.max(gB[j]), np.argmax(gB[j]), B[j][np.argmax(gB[j])]

            gW[j] = np.clip(gW[j], -1, 1)
            gB[j] = np.clip(gB[j], -1, 1)

            updW = momW[j]*mom - gW[j]*(lr/minibatch_size)
            updB = momB[j]*mom - gB[j]*(lr/minibatch_size)

            W[j] += updW
            B[j] += updB

            momW[j] = updW
            momB[j] = updB

        if np.isnan(err[-1]):
            print "FAILED AT",i, err[-10:]
            break

        if (i % 2000) == 0:
            sys.stdout.write("%6d: %f\r\n" % (i, np.mean(err)))
            sys.stdout.flush()


In [None]:
lr = 0.001

# run an epoch
for ep in range(10):
    err = []
    for i in range(0,X.shape[0]):
        terr, gW, gB = update(X[i])
        err.append(terr)

        for j in range(len(W)):
            W[j] -= gW[j]*lr
            B[j] -= gB[j]*lr
            pass

        if np.isnan(err[-1]):
            print "FAILED AT",i, err[-10:]
            break
    sys.stdout.write("%6d: %f\r\n" % (ep, np.mean(err)))
    sys.stdout.flush()

In [None]:
figure(), imshow(W[0])
figure(), imshow(W[1])

In [None]:
B[0]