In [5]:
import numpy as np, sys
np.random.seed(1)

from keras.datasets import mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()

images, labels = (x_train[0:1000].reshape(1000,28*28)/255, y_train[0:1000])

one_hot_labels = np.zeros((len(labels),10))
for i,l in enumerate(labels):
    one_hot_labels[i][l] = 1
labels = one_hot_labels

test_images = x_test.reshape(len(x_test),28*28) / 255
test_labels = np.zeros((len(y_test),10))
for i,l in enumerate(y_test):
    test_labels[i][l] = 1

def tanh(x):
    return np.tanh(x)
def tanh2deriv(output):
    return 1 - (output ** 2)
def softmax(x):
    e_x = np.exp(x - np.max(x))
    return e_x / e_x.sum()

alpha, iterations, hidden_size = (2, 300, 100)
pixels_per_image, num_labels = (784, 10)
batch_size = 100

weights_0_1 = 0.02*np.random.random((pixels_per_image, hidden_size)) - 0.01
weights_1_2 = 0.2*np.random.random((hidden_size, num_labels)) - 0.1

for j in range(iterations):
    correct_cnt = 0
    for i in range(int(len(images) / batch_size)):
        batch_start, batch_end = ((i * batch_size),((i+1)*batch_size))
        layer_0 = images[batch_start:batch_end]
        layer_1 = tanh(np.dot(layer_0,weights_0_1))
        dropout_mask = np.random.randint(2,size=layer_1.shape)
        layer_1 *= dropout_mask * 2
        layer_2 = softmax(np.dot(layer_1,weights_1_2))

        for k in range(batch_size):
            correct_cnt += int(np.argmax(layer_2[k:k+1]) == np.argmax(labels[batch_start+k:batch_start+k+1]))

        layer_2_delta = (labels[batch_start:batch_end] - layer_2) / (batch_size * layer_2.shape[0])
        layer_1_delta = layer_2_delta.dot(weights_1_2.T) * tanh2deriv(layer_1)
        layer_1_delta *= dropout_mask

        weights_1_2 += alpha * layer_1.T.dot(layer_2_delta)
        weights_0_1 += alpha * layer_0.T.dot(layer_1_delta)
    test_correct_cnt = 0

    for i in range(len(test_images)):

        layer_0 = test_images[i:i+1]
        layer_1 = tanh(np.dot(layer_0,weights_0_1))
        layer_2 = np.dot(layer_1,weights_1_2)
        test_correct_cnt += int(np.argmax(layer_2) == np.argmax(test_labels[i:i+1]))

    if(j % 10 == 0):
        sys.stdout.write("\n" + "I:" + str(j) + " Test-Acc:" + str(test_correct_cnt/float(len(test_images))) + " Train-Acc:" + str(correct_cnt/float(len(images))))


I:0 Test-Acc:0.3679 Train-Acc:0.16
I:10 Test-Acc:0.2503 Train-Acc:0.285
I:20 Test-Acc:0.1924 Train-Acc:0.22
I:30 Test-Acc:0.1296 Train-Acc:0.166
I:40 Test-Acc:0.1028 Train-Acc:0.134
I:50 Test-Acc:0.1028 Train-Acc:0.138
I:60 Test-Acc:0.1028 Train-Acc:0.135
I:70 Test-Acc:0.103 Train-Acc:0.141
I:80 Test-Acc:0.1039 Train-Acc:0.147
I:90 Test-Acc:0.1055 Train-Acc:0.159
I:100 Test-Acc:0.1245 Train-Acc:0.173
I:110 Test-Acc:0.1753 Train-Acc:0.183
I:120 Test-Acc:0.1919 Train-Acc:0.177
I:130 Test-Acc:0.2069 Train-Acc:0.196
I:140 Test-Acc:0.2104 Train-Acc:0.188
I:150 Test-Acc:0.2122 Train-Acc:0.195
I:160 Test-Acc:0.2121 Train-Acc:0.199
I:170 Test-Acc:0.212 Train-Acc:0.2
I:180 Test-Acc:0.2122 Train-Acc:0.193
I:190 Test-Acc:0.2117 Train-Acc:0.2
I:200 Test-Acc:0.212 Train-Acc:0.203
I:210 Test-Acc:0.2116 Train-Acc:0.193
I:220 Test-Acc:0.2113 Train-Acc:0.2
I:230 Test-Acc:0.2098 Train-Acc:0.21
I:240 Test-Acc:0.2108 Train-Acc:0.2
I:250 Test-Acc:0.2105 Train-Acc:0.195
I:260 Test-Acc:0.2102 Train-Acc:0.20

In [None]:
 T