In [4]:
import numpy as np, sys
from keras.datasets import mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()
images, labels = (x_train[0:1000].reshape(1000, 28*28) / 255,
                  y_train[0:1000])

one_hot_labels = np.zeros((len(labels),10))
for i,l in enumerate(labels):
    one_hot_labels[i][l] = 1
labels = one_hot_labels

test_images = x_test.reshape(len(x_test), 28*28) / 255
test_labels = np.zeros((len(y_test), 10))

for i,l in enumerate(y_test):
    test_labels[i][l] = 1
    
def tanh(x):
    return np.tanh(x)

def tanh2deriv(output):
    return 1 - (output ** 2)

def softmax(x):
    temp = np.exp(x)
    return temp / np.sum(temp, axis=1, keepdims=True)

np.random.seed(1)
alpha, iterations, hidden_size = (0.02, 300, 40)
pixels_per_image, num_labels = (784, 10)
batch_size = 100

weights_0_1 = 0.02 * np.random.random((pixels_per_image, hidden_size)) - 0.01
weights_1_2 = 0.2 * np.random.random((hidden_size, num_labels)) - 0.1

for j in range(iterations):
    train_acc = 0
    for i in range(int(len(images) / batch_size)):
        batch_start, batch_end = ((i * batch_size), ((i+1) * batch_size))
        layer_0 = images[batch_start:batch_end]
        layer_1 = tanh(np.dot(layer_0, weights_0_1))
        dropout_mask = np.random.randint(2, size=layer_1.shape)
        layer_1 *= dropout_mask * 2
        layer_2 = softmax(np.dot(layer_1, weights_1_2))
        
        for k in range(batch_size):
            train_acc += int(np.argmax(layer_2[k:k+1]) == \
                          np.argmax(labels[batch_start+k:batch_start+k+1]))
        
        layer_2_delta = \
            (labels[batch_start:batch_end] - layer_2) / batch_size
        layer_1_delta = \
            layer_2_delta.dot(weights_1_2.T) * tanh2deriv(layer_1)
        layer_1_delta *= dropout_mask
        
        weights_1_2 += alpha * layer_1.T.dot(layer_2_delta)
        weights_0_1 += alpha * layer_0.T.dot(layer_1_delta)
        
    test_acc = 0
    for i in range(len(test_images)):
        layer_0 = test_images[i:i+1]
        layer_1 = tanh(np.dot(layer_0, weights_0_1))
        layer_2 = np.dot(layer_1, weights_1_2)
        test_acc += int(np.argmax(layer_2) == np.argmax(test_labels[i:i+1]))
        
    if(j % 10 == 0):
        sys.stdout.write(
            "\n"+ "I:" + str(j) + \
            " Train-Acc:" + str(train_acc/float(len(images))) + \
            " Test-Acc:" + str(test_acc/float(len(test_images))))


I:0 Train-Acc:0.127 Test-Acc:0.2069
I:10 Train-Acc:0.618 Test-Acc:0.5982
I:20 Train-Acc:0.632 Test-Acc:0.6226
I:30 Train-Acc:0.658 Test-Acc:0.6447
I:40 Train-Acc:0.681 Test-Acc:0.6774
I:50 Train-Acc:0.731 Test-Acc:0.7064
I:60 Train-Acc:0.753 Test-Acc:0.7387
I:70 Train-Acc:0.781 Test-Acc:0.7622
I:80 Train-Acc:0.829 Test-Acc:0.7806
I:90 Train-Acc:0.844 Test-Acc:0.7992
I:100 Train-Acc:0.84 Test-Acc:0.8109
I:110 Train-Acc:0.855 Test-Acc:0.8216
I:120 Train-Acc:0.868 Test-Acc:0.8271
I:130 Train-Acc:0.878 Test-Acc:0.832
I:140 Train-Acc:0.889 Test-Acc:0.8367
I:150 Train-Acc:0.887 Test-Acc:0.8413
I:160 Train-Acc:0.887 Test-Acc:0.8435
I:170 Train-Acc:0.898 Test-Acc:0.8466
I:180 Train-Acc:0.898 Test-Acc:0.8478
I:190 Train-Acc:0.909 Test-Acc:0.849
I:200 Train-Acc:0.913 Test-Acc:0.8521
I:210 Train-Acc:0.911 Test-Acc:0.8518
I:220 Train-Acc:0.902 Test-Acc:0.8537
I:230 Train-Acc:0.905 Test-Acc:0.8556
I:240 Train-Acc:0.91 Test-Acc:0.857
I:250 Train-Acc:0.913 Test-Acc:0.8553
I:260 Train-Acc:0.918 Test-