In [5]:
import sys, numpy as np
from keras.datasets import mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()
images, labels = (x_train[0:1000].reshape(1000, 28*28) / 255,
                  y_train[0:1000])

one_hot_labels = np.zeros((len(labels), 10))
for i,l in enumerate(labels):
    one_hot_labels[i][l] = 1
labels = one_hot_labels

test_images  = x_test.reshape(len(x_test), 28*28) / 255
test_labels = np.zeros((len(y_test), 10))
for i,l in enumerate(y_test):
    test_labels[i][l] = 1

def relu(x):
    return (x > 0) * x

def relu2deriv(output):
    return output > 0

np.random.seed(1)
alpha, iterations, hidden_size = (0.005, 300, 40)
pixels_per_image, num_labels = (784, 10)

weights_0_1 = 0.2 * np.random.random((pixels_per_image, hidden_size)) - 0.1
weights_1_2 = 0.2 * np.random.random((hidden_size, num_labels)) - 0.1

for j in range(iterations):
    train_err, train_acc = (0.0, 0)
    for i in range(len(images)):
        layer_0 = images[i:i+1]
        layer_1 = relu(np.dot(layer_0, weights_0_1))
        layer_2 = np.dot(layer_1, weights_1_2)
        train_err += np.sum((labels[i:i+1] - layer_2) ** 2)
        train_acc += int(np.argmax(layer_2) == np.argmax(labels[i:i+1]))
        layer_2_delta = (labels[i:i+1] - layer_2)
        layer_1_delta = \
            layer_2_delta.dot(weights_1_2.T) * relu2deriv(layer_1)
        weights_1_2 += alpha * layer_1.T.dot(layer_2_delta)
        weights_0_1 += alpha * layer_0.T.dot(layer_1_delta)
        
    sys.stdout.write(
        "\r" + \
        " I:"+ str(j) + \
        " Train-Err:" + str(train_err/float(len(images)))[0:5] + \
        " Train-Acc:" + str(train_acc/float(len(images))))



 I:299 Train-Err:0.004 Train-Acc:0.999

In [4]:
if(j % 10 == 0 or j == iterations-1):
    test_err, test_acc = (0.0, 0)
    
    for i in range(len(test_images)):
        layer_0 = test_images[i:i+1]
        layer_1 = relu(np.dot(layer_0, weights_0_1))
        layer_2 = np.dot(layer_1, weights_1_2)
        test_err += np.sum((test_labels[i:i+1] - layer_2) ** 2)
        test_acc += int(np.argmax(layer_2) == np.argmax(test_labels[i:i+1]))
        
    sys.stdout.write(
        " Test-Err:" + str(test_err/float(len(test_images)))[0:5] + \
        " Test-Acc:" + str(test_acc/float(len(test_images))))

 Test-Err:0.351 Test-Acc:0.8316

In [11]:
import sys, numpy as np
from keras.datasets import mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()
images, labels = (x_train[0:1000].reshape(1000, 28*28) / 255,
                  y_train[0:1000])

one_hot_labels = np.zeros((len(labels), 10))
for i,l in enumerate(labels):
    one_hot_labels[i][l] = 1
labels = one_hot_labels

test_images  = x_test.reshape(len(x_test), 28*28) / 255
test_labels = np.zeros((len(y_test), 10))
for i,l in enumerate(y_test):
    test_labels[i][l] = 1

np.random.seed(1)
relu = lambda x:(x>=0) * x # returns x if x > 0, return 0 otherwise
relu2deriv = lambda x: x>=0 # returns 1 for input > 0, return 0 otherwise
alpha, iterations, hidden_size, pixels_per_image, num_labels = (0.005, 350, 40, 784, 10)

weights_0_1 = 0.2 * np.random.random((pixels_per_image, hidden_size)) - 0.1
weights_1_2 = 0.2 * np.random.random((hidden_size, num_labels)) - 0.1

for j in range(iterations):
    train_err, train_acc = (0.0, 0)
    for i in range(len(images)):
        layer_0 = images[i:i+1]
        layer_1 = relu(np.dot(layer_0, weights_0_1))
        layer_2 = np.dot(layer_1, weights_1_2)
        train_err += np.sum((labels[i:i+1] - layer_2) ** 2)
        train_acc += int(np.argmax(layer_2) == np.argmax(labels[i:i+1]))
        layer_2_delta = (labels[i:i+1] - layer_2)
        layer_1_delta = \
            layer_2_delta.dot(weights_1_2.T) * relu2deriv(layer_1)
        weights_1_2 += alpha * layer_1.T.dot(layer_2_delta)
        weights_0_1 += alpha * layer_0.T.dot(layer_1_delta)
        
    sys.stdout.write(
        "\r" + \
        " I:"+ str(j) + \
        " Train-Err:" + str(train_err/float(len(images)))[0:5] + \
        " Train-Acc:" + str(train_acc/float(len(images))))


    if(j % 10 == 0 or j == iterations-1):
        test_err, test_acc = (0.0, 0)
    
        for i in range(len(test_images)):
            layer_0 = test_images[i:i+1]
            layer_1 = relu(np.dot(layer_0, weights_0_1))
            layer_2 = np.dot(layer_1, weights_1_2)
            test_err += np.sum((test_labels[i:i+1] - layer_2) ** 2)
            test_acc += int(np.argmax(layer_2) == np.argmax(test_labels[i:i+1]))
        
    sys.stdout.write(
        " Test-Err:" + str(test_err/float(len(test_images)))[0:5] + \
        " Test-Acc:" + str(test_acc/float(len(test_images))))
    
    print()

 I:0 Train-Err:0.722 Train-Acc:0.537 Test-Err:0.601 Test-Acc:0.6488
 I:1 Train-Err:0.512 Train-Acc:0.753 Test-Err:0.601 Test-Acc:0.6488
 I:2 Train-Err:0.448 Train-Acc:0.804 Test-Err:0.601 Test-Acc:0.6488
 I:3 Train-Err:0.411 Train-Acc:0.837 Test-Err:0.601 Test-Acc:0.6488
 I:4 Train-Err:0.385 Train-Acc:0.846 Test-Err:0.601 Test-Acc:0.6488
 I:5 Train-Err:0.367 Train-Acc:0.863 Test-Err:0.601 Test-Acc:0.6488
 I:6 Train-Err:0.352 Train-Acc:0.876 Test-Err:0.601 Test-Acc:0.6488
 I:7 Train-Err:0.340 Train-Acc:0.884 Test-Err:0.601 Test-Acc:0.6488
 I:8 Train-Err:0.329 Train-Acc:0.892 Test-Err:0.601 Test-Acc:0.6488
 I:9 Train-Err:0.320 Train-Acc:0.897 Test-Err:0.601 Test-Acc:0.6488
 I:10 Train-Err:0.312 Train-Acc:0.901 Test-Err:0.420 Test-Acc:0.8114
 I:11 Train-Err:0.304 Train-Acc:0.904 Test-Err:0.420 Test-Acc:0.8114
 I:12 Train-Err:0.297 Train-Acc:0.91 Test-Err:0.420 Test-Acc:0.8114
 I:13 Train-Err:0.291 Train-Acc:0.914 Test-Err:0.420 Test-Acc:0.8114
 I:14 Train-Err:0.286 Train-Acc:0.918 Test-Er

 I:120 Train-Err:0.157 Train-Acc:0.986 Test-Err:0.496 Test-Acc:0.766
 I:121 Train-Err:0.157 Train-Acc:0.986 Test-Err:0.496 Test-Acc:0.766
 I:122 Train-Err:0.156 Train-Acc:0.986 Test-Err:0.496 Test-Acc:0.766
 I:123 Train-Err:0.156 Train-Acc:0.986 Test-Err:0.496 Test-Acc:0.766
 I:124 Train-Err:0.155 Train-Acc:0.986 Test-Err:0.496 Test-Acc:0.766
 I:125 Train-Err:0.155 Train-Acc:0.987 Test-Err:0.496 Test-Acc:0.766
 I:126 Train-Err:0.155 Train-Acc:0.986 Test-Err:0.496 Test-Acc:0.766
 I:127 Train-Err:0.154 Train-Acc:0.987 Test-Err:0.496 Test-Acc:0.766
 I:128 Train-Err:0.154 Train-Acc:0.988 Test-Err:0.496 Test-Acc:0.766
 I:129 Train-Err:0.153 Train-Acc:0.989 Test-Err:0.496 Test-Acc:0.766
 I:130 Train-Err:0.153 Train-Acc:0.99 Test-Err:0.502 Test-Acc:0.7622
 I:131 Train-Err:0.153 Train-Acc:0.991 Test-Err:0.502 Test-Acc:0.7622
 I:132 Train-Err:0.152 Train-Acc:0.991 Test-Err:0.502 Test-Acc:0.7622
 I:133 Train-Err:0.152 Train-Acc:0.991 Test-Err:0.502 Test-Acc:0.7622
 I:134 Train-Err:0.151 Train-Ac

 I:238 Train-Err:0.122 Train-Acc:0.998 Test-Err:0.560 Test-Acc:0.7372
 I:239 Train-Err:0.122 Train-Acc:0.998 Test-Err:0.560 Test-Acc:0.7372
 I:240 Train-Err:0.121 Train-Acc:0.998 Test-Err:0.569 Test-Acc:0.7344
 I:241 Train-Err:0.121 Train-Acc:0.998 Test-Err:0.569 Test-Acc:0.7344
 I:242 Train-Err:0.121 Train-Acc:0.998 Test-Err:0.569 Test-Acc:0.7344
 I:243 Train-Err:0.121 Train-Acc:0.998 Test-Err:0.569 Test-Acc:0.7344
 I:244 Train-Err:0.121 Train-Acc:0.998 Test-Err:0.569 Test-Acc:0.7344
 I:245 Train-Err:0.121 Train-Acc:0.998 Test-Err:0.569 Test-Acc:0.7344
 I:246 Train-Err:0.120 Train-Acc:0.998 Test-Err:0.569 Test-Acc:0.7344
 I:247 Train-Err:0.120 Train-Acc:0.998 Test-Err:0.569 Test-Acc:0.7344
 I:248 Train-Err:0.120 Train-Acc:0.999 Test-Err:0.569 Test-Acc:0.7344
 I:249 Train-Err:0.120 Train-Acc:0.999 Test-Err:0.569 Test-Acc:0.7344
 I:250 Train-Err:0.120 Train-Acc:0.999 Test-Err:0.577 Test-Acc:0.7316
 I:251 Train-Err:0.120 Train-Acc:0.999 Test-Err:0.577 Test-Acc:0.7316
 I:252 Train-Err:0.1

In [16]:
i = 0
layer_0 = images[i:i+1]
layer_1 = relu(np.dot(layer_0, weights_0_1))
dropout_mask = np.random.randint(2, size=layer_1.shape)
layer_1 *= dropout_mask * 2
layer_2 = np.dot(layer_1, weights_1_2)

train_err += np.sum((labels[i:i+1] - layer_2) ** 2)
train_acc += int(np.argmax(layer_2) == np.argmax(labels[i:i+1]))

layer_2_delta = (labels[i:i+1] - layer_2)
layer_1_delta = layer_2_delta.dot(weights_1_2.T) * relu2deriv(layer_1)
layer_1_delta += dropout_mask

weights_1_2 += alpha * layer_1.T.dot(layer_2_delta)
weights_0_1 += alpha * layer_0.T.dot(layer_1_delta)