In [2]:
import sys, numpy as np
from keras.datasets import mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()

In [3]:
print("x_train","\n", x_train[0])
print("y_train","\n", y_train[0])

x_train[0] 
 [[  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0   0   0   3  18  18  18 126 136
  175  26 166 255 247 127   0   0   0   0]
 [  0   0   0   0   0   0   0   0  30  36  94 154 170 253 253 253 253 253
  225 172 253 242 195  64   0   0   0   0]
 [  0   0   0   0   0   0   0  49 238 253 253 253 253 253 253 253 253 251
   93  82  82  56  39   0   0   0   0   0]
 [  0   0   0   0   0   0   0  18 219 253 253 253 2

In [4]:
images, labels = (x_train[0:1000].reshape(1000,28*28) / 255, y_train[0:1000])

print("images","\n", images.shape)
print("labels[0] value","\n", labels[0])

one_hot_labels = np.zeros((len(labels),10))

print("labels.shape","\n", labels.shape)
print("one_hot_labels.shape","\n", one_hot_labels.shape)

for i,l in enumerate(labels):
    one_hot_labels[i][l] = 1

labels = one_hot_labels
print("one_hot_labels","\n",labels)

images 
 (1000, 784)
labels[0] value 
 5
labels.shape 
 (1000,)
one_hot_labels.shape 
 (1000, 10)
one_hot_labels 
 [[0. 0. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [1. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]


In [5]:
test_images = x_test.reshape(len(x_test),28*28) / 255
test_labels = np.zeros((len(y_test),10))
for i,l in enumerate(y_test):
    test_labels[i][l] = 1


In [6]:
np.random.seed(1)
relu = lambda x:(x>=0) * x # returns x if x > 0, return 0 otherwise
relu2deriv = lambda x: x>=0 # returns 1 for input > 0, return 0 otherwise
alpha, iterations, hidden_size, pixels_per_image, num_labels = (0.005, 350, 40, 784, 10)

weights_0_1 = 0.2*np.random.random((pixels_per_image,hidden_size)) - 0.1
weights_1_2 = 0.2*np.random.random((hidden_size,num_labels)) - 0.1

print("weights_0_1.shape","\n", weights_0_1.shape)
print("weights_0_1","\n", weights_0_1)
print("weights_1_2.shape","\n", weights_1_2.shape)
print("weights_1_2","\n", weights_1_2)

weights_0_1.shape 
 (784, 40)
weights_0_1 
 [[-0.0165956   0.0440649  -0.09997713 ...  0.06692513 -0.09634234
   0.05002886]
 [ 0.09777222  0.04963313 -0.0439112  ... -0.03044683  0.05016242
   0.0451996 ]
 [ 0.07666122  0.02473444  0.05018849 ...  0.08460491  0.04230495
  -0.07514581]
 ...
 [ 0.01278792 -0.09335507  0.07751823 ...  0.09348249 -0.06255046
  -0.0724688 ]
 [ 0.02621576  0.03130522  0.0567295  ... -0.02118333 -0.06391994
   0.07874013]
 [-0.01947632  0.07673669  0.09756995 ...  0.03445804  0.03748222
   0.05496171]]
weights_1_2.shape 
 (40, 10)
weights_1_2 
 [[-0.0129138  -0.09765532 -0.03772776  0.04511556  0.03713697  0.0539454
   0.03387069  0.05517784  0.03646633 -0.08577301]
 [-0.04615044 -0.08412712  0.05727073  0.03236628  0.00431314  0.02723871
   0.04244115 -0.08035061  0.08789485  0.07374967]
 [ 0.06340531  0.02385217  0.03497944 -0.00299381  0.04807886  0.019268
  -0.03119899 -0.07911008  0.01948379 -0.07104769]
 [ 0.04609973 -0.08527007 -0.00049257  0.06213223

In [16]:
for j in range(iterations):
    error, correct_cnt = (0.0,0)
    for i in range(len(images)):
        layer_0 = images[i:i+1]
        layer_1 = relu(np.dot(layer_0,weights_0_1))
        dropout_mask = np.random.randint(2, size=layer_1.shape)
        layer_1 *= dropout_mask * 2
        layer_2 = np.dot(layer_1,weights_1_2)

        error += np.sum((labels[i:i+1] - layer_2) ** 2)
        correct_cnt += int(np.argmax(layer_2) == np.argmax(labels[i:i+1]))
        layer_2_delta = (labels[i:i+1] - layer_2)
        layer_1_delta = layer_2_delta.dot(weights_1_2.T) * relu2deriv(layer_1)
        layer_1_delta *= dropout_mask

        weights_1_2 += alpha * layer_1.T.dot(layer_2_delta)
        weights_0_1 += alpha * layer_0.T.dot(layer_1_delta)

    if(j%10 == 0):
        test_error = 0.0
        test_correct_cnt = 0

        for i in range(len(test_images)):
            layer_0 = test_images[i:i+1]
            layer_1 = relu(np.dot(layer_0,weights_0_1))
            layer_2 = np.dot(layer_1, weights_1_2)

            test_error += np.sum((test_labels[i:i+1] - layer_2) ** 2)
            test_correct_cnt += int(np.argmax(layer_2) == np.argmax(test_labels[i:i+1]))

        sys.stdout.write("\n" + \
                         "I:" + str(j) + \
                         " Test-Err:" + str(test_error/ float(len(test_images)))[0:5] +\
                         " Test-Acc:" + str(test_correct_cnt/ float(len(test_images)))+\
                         " Train-Err:" + str(error/ float(len(images)))[0:5] +\
                         " Train-Acc:" + str(correct_cnt/ float(len(images))))


I:0 Test-Err:0.584 Test-Acc:0.6581 Train-Err:0.607 Train-Acc:0.659
I:10 Test-Err:0.558 Test-Acc:0.7238 Train-Err:0.529 Train-Acc:0.712
I:20 Test-Err:0.507 Test-Acc:0.7374 Train-Err:0.499 Train-Acc:0.713
I:30 Test-Err:0.489 Test-Acc:0.7531 Train-Err:0.469 Train-Acc:0.774
I:40 Test-Err:0.462 Test-Acc:0.7682 Train-Err:0.458 Train-Acc:0.771
I:50 Test-Err:0.444 Test-Acc:0.7723 Train-Err:0.419 Train-Acc:0.788
I:60 Test-Err:0.451 Test-Acc:0.7797 Train-Err:0.432 Train-Acc:0.791
I:70 Test-Err:0.449 Test-Acc:0.7799 Train-Err:0.427 Train-Acc:0.798
I:80 Test-Err:0.438 Test-Acc:0.7864 Train-Err:0.414 Train-Acc:0.813
I:90 Test-Err:0.439 Test-Acc:0.7865 Train-Err:0.421 Train-Acc:0.808
I:100 Test-Err:0.450 Test-Acc:0.7763 Train-Err:0.412 Train-Acc:0.818
I:110 Test-Err:0.436 Test-Acc:0.7823 Train-Err:0.402 Train-Acc:0.825
I:120 Test-Err:0.440 Test-Acc:0.776 Train-Err:0.416 Train-Acc:0.817
I:130 Test-Err:0.434 Test-Acc:0.7818 Train-Err:0.409 Train-Acc:0.804
I:140 Test-Err:0.452 Test-Acc:0.7798 Train-Er