# 3 Layer Network on MNIST

In [2]:
import sys, numpy as np
from tensorflow.keras.datasets import mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()

images, labels = (x_train[0:1000].reshape(1000,28*28) / 255, y_train[0:1000])

one_hot_labels = np.zeros((len(labels),10))
for i,l in enumerate(labels):
    one_hot_labels[i][l] = 1
labels = one_hot_labels

test_images = x_test.reshape(len(x_test),28*28) / 255
test_labels = np.zeros((len(y_test),10))
for i,l in enumerate(y_test):
    test_labels[i][l] = 1
    
np.random.seed(1)
relu = lambda x:(x>0) * x # returns x if x > 0, return 0 otherwise
relu2deriv = lambda x: x>0 # returns 1 for input > 0, return 0 otherwise
alpha, iterations, hidden_size, pixels_per_image, num_labels = (0.005, 350, 40, 784, 10)

weights_0_1 = 0.2*np.random.random((pixels_per_image,hidden_size)) - 0.1
weights_1_2 = 0.2*np.random.random((hidden_size,num_labels)) - 0.1

for j in range(iterations):
    error, correct_cnt = (0.0, 0)
    
    for i in range(len(images)):
        layer_0 = images[i:i+1]
        layer_1 = relu(np.dot(layer_0,weights_0_1))
        layer_2 = np.dot(layer_1,weights_1_2)

        error += np.sum((labels[i:i+1] - layer_2) ** 2)
        correct_cnt += int(np.argmax(layer_2) == \
                                        np.argmax(labels[i:i+1]))

        layer_2_delta = (labels[i:i+1] - layer_2)
        layer_1_delta = layer_2_delta.dot(weights_1_2.T)\
                                    * relu2deriv(layer_1)
        weights_1_2 += alpha * layer_1.T.dot(layer_2_delta)
        weights_0_1 += alpha * layer_0.T.dot(layer_1_delta)

    sys.stdout.write("\r I:"+str(j)+ \
                     " Train-Err:" + str(error/float(len(images)))[0:5] +\
                     " Train-Acc:" + str(correct_cnt/float(len(images))))

 I:349 Train-Err:0.003 Train-Acc:0.999

In [3]:
if(j % 10 == 0 or j == iterations-1):
    error, correct_cnt = (0.0, 0)

    for i in range(len(test_images)):

        layer_0 = test_images[i:i+1]
        layer_1 = relu(np.dot(layer_0,weights_0_1))
        layer_2 = np.dot(layer_1,weights_1_2)

        error += np.sum((test_labels[i:i+1] - layer_2) ** 2)
        correct_cnt += int(np.argmax(layer_2) == \
                                        np.argmax(test_labels[i:i+1]))
    sys.stdout.write(" Test-Err:" + str(error/float(len(test_images)))[0:5] +\
                     " Test-Acc:" + str(correct_cnt/float(len(test_images))) + "\n")
    print()

 Test-Err:0.355 Test-Acc:0.829



In [5]:
import sys, numpy as np
from tensorflow.keras.datasets import mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()

images, labels = (x_train[0:1000].reshape(1000,28*28) / 255, y_train[0:1000])

one_hot_labels = np.zeros((len(labels),10))
for i,l in enumerate(labels):
    one_hot_labels[i][l] = 1
labels = one_hot_labels

test_images = x_test.reshape(len(x_test),28*28) / 255
test_labels = np.zeros((len(y_test),10))
for i,l in enumerate(y_test):
    test_labels[i][l] = 1

np.random.seed(1)
relu = lambda x:(x>0) * x # returns x if x > 0, return 0 otherwise
relu2deriv = lambda x: x>0 # returns 1 for input > 0, return 0 otherwise
alpha, iterations, hidden_size, pixels_per_image, num_labels = (0.005, 350, 40, 784, 10)

weights_0_1 = 0.2*np.random.random((pixels_per_image,hidden_size)) - 0.1
weights_1_2 = 0.2*np.random.random((hidden_size,num_labels)) - 0.1

for j in range(iterations):
    error, correct_cnt = (0.0, 0)
    
    for i in range(len(images)):
        layer_0 = images[i:i+1]
        layer_1 = relu(np.dot(layer_0,weights_0_1))
        layer_2 = np.dot(layer_1,weights_1_2)

        error += np.sum((labels[i:i+1] - layer_2) ** 2)
        correct_cnt += int(np.argmax(layer_2) == \
                                        np.argmax(labels[i:i+1]))

        layer_2_delta = (labels[i:i+1] - layer_2)
        layer_1_delta = layer_2_delta.dot(weights_1_2.T)\
                                    * relu2deriv(layer_1)
        weights_1_2 += alpha * layer_1.T.dot(layer_2_delta)
        weights_0_1 += alpha * layer_0.T.dot(layer_1_delta)

    sys.stdout.write("\r I:"+str(j)+ \
                     " Train-Err:" + str(error/float(len(images)))[0:5] +\
                     " Train-Acc:" + str(correct_cnt/float(len(images))))
    
    if(j % 10 == 0 or j == iterations-1):
        error, correct_cnt = (0.0, 0)

        for i in range(len(test_images)):

            layer_0 = test_images[i:i+1]
            layer_1 = relu(np.dot(layer_0,weights_0_1))
            layer_2 = np.dot(layer_1,weights_1_2)

            error += np.sum((test_labels[i:i+1] - layer_2) ** 2)
            correct_cnt += int(np.argmax(layer_2) == \
                                            np.argmax(test_labels[i:i+1]))
        sys.stdout.write(" Test-Err:" + str(error/float(len(test_images)))[0:5] +\
                         " Test-Acc:" + str(correct_cnt/float(len(test_images))))
        print()

 I:0 Train-Err:0.729 Train-Acc:0.527 Test-Err:0.593 Test-Acc:0.6493
 I:10 Train-Err:0.198 Train-Acc:0.94 Test-Err:0.321 Test-Acc:0.8559
 I:20 Train-Err:0.126 Train-Acc:0.972 Test-Err:0.293 Test-Acc:0.8638
 I:30 Train-Err:0.091 Train-Acc:0.991 Test-Err:0.291 Test-Acc:0.8614
 I:40 Train-Err:0.071 Train-Acc:0.994 Test-Err:0.295 Test-Acc:0.86
 I:50 Train-Err:0.057 Train-Acc:0.995 Test-Err:0.299 Test-Acc:0.8584
 I:60 Train-Err:0.047 Train-Acc:0.997 Test-Err:0.304 Test-Acc:0.8571
 I:70 Train-Err:0.040 Train-Acc:0.998 Test-Err:0.307 Test-Acc:0.8555
 I:80 Train-Err:0.034 Train-Acc:0.998 Test-Err:0.311 Test-Acc:0.8524
 I:90 Train-Err:0.030 Train-Acc:0.998 Test-Err:0.314 Test-Acc:0.8513
 I:100 Train-Err:0.026 Train-Acc:0.998 Test-Err:0.318 Test-Acc:0.8505
 I:110 Train-Err:0.023 Train-Acc:0.998 Test-Err:0.321 Test-Acc:0.8491
 I:120 Train-Err:0.020 Train-Acc:0.998 Test-Err:0.323 Test-Acc:0.8467
 I:130 Train-Err:0.018 Train-Acc:0.998 Test-Err:0.326 Test-Acc:0.8445
 I:140 Train-Err:0.016 Train-Acc:0

# Dropout In Code

In [6]:
i = 0
layer_0 = images[i:i+1]
dropout_mask = np.random.randint(2,size=layer_1.shape)

layer_1 *= dropout_mask * 2
layer_2 = np.dot(layer_1, weights_1_2)

error += np.sum((labels[i:i+1] - layer_2) ** 2)

correct_cnt += int(np.argmax(layer_2) == np.argmax(labels[i+i+1]))

layer_2_delta = (labels[i:i+1] - layer_2)
layer_1_delta = layer_2_delta.dot(weights_1_2.T) * relu2deriv(layer_1)

layer_1_delta *= dropout_mask

weights_1_2 += alpha * layer_1.T.dot(layer_2_delta)
weights_0_1 += alpha * layer_0.T.dot(layer_1_delta)

In [7]:
import numpy, sys
np.random.seed(1)
def relu(x):
    return (x > 0) * x # returns x if x > 0
                        # returns 0 otherwise

def relu2deriv(output):
    return output > 0 #returns 1 for input > 0

alpha, iterations, hidden_size = (0.005, 300, 100)
pixels_per_image, num_labels = (784, 10)

weights_0_1 = 0.2*np.random.random((pixels_per_image,hidden_size)) - 0.1
weights_1_2 = 0.2*np.random.random((hidden_size,num_labels)) - 0.1

for j in range(iterations):
    error, correct_cnt = (0.0,0)
    for i in range(len(images)):
        layer_0 = images[i:i+1]
        layer_1 = relu(np.dot(layer_0,weights_0_1))
        dropout_mask = np.random.randint(2, size=layer_1.shape)
        layer_1 *= dropout_mask * 2
        layer_2 = np.dot(layer_1,weights_1_2)

        error += np.sum((labels[i:i+1] - layer_2) ** 2)
        correct_cnt += int(np.argmax(layer_2) == np.argmax(labels[i:i+1]))
        layer_2_delta = (labels[i:i+1] - layer_2)
        layer_1_delta = layer_2_delta.dot(weights_1_2.T) * relu2deriv(layer_1)
        layer_1_delta *= dropout_mask

        weights_1_2 += alpha * layer_1.T.dot(layer_2_delta)
        weights_0_1 += alpha * layer_0.T.dot(layer_1_delta)

    if(j%10 == 0):
        test_error = 0.0
        test_correct_cnt = 0

        for i in range(len(test_images)):
            layer_0 = test_images[i:i+1]
            layer_1 = relu(np.dot(layer_0,weights_0_1))
            layer_2 = np.dot(layer_1, weights_1_2)

            test_error += np.sum((test_labels[i:i+1] - layer_2) ** 2)
            test_correct_cnt += int(np.argmax(layer_2) == np.argmax(test_labels[i:i+1]))

        sys.stdout.write("\n" + \
                         "I:" + str(j) + \
                         " Test-Err:" + str(test_error/ float(len(test_images)))[0:5] +\
                         " Test-Acc:" + str(test_correct_cnt/ float(len(test_images)))+\
                         " Train-Err:" + str(error/ float(len(images)))[0:5] +\
                         " Train-Acc:" + str(correct_cnt/ float(len(images))))


I:0 Test-Err:0.638 Test-Acc:0.6325 Train-Err:0.902 Train-Acc:0.395
I:10 Test-Err:0.371 Test-Acc:0.8314 Train-Err:0.392 Train-Acc:0.819
I:20 Test-Err:0.316 Test-Acc:0.8551 Train-Err:0.320 Train-Acc:0.884
I:30 Test-Err:0.307 Test-Acc:0.8661 Train-Err:0.276 Train-Acc:0.918
I:40 Test-Err:0.293 Test-Acc:0.8721 Train-Err:0.259 Train-Acc:0.931
I:50 Test-Err:0.281 Test-Acc:0.8749 Train-Err:0.234 Train-Acc:0.94
I:60 Test-Err:0.290 Test-Acc:0.8791 Train-Err:0.229 Train-Acc:0.957
I:70 Test-Err:0.295 Test-Acc:0.8758 Train-Err:0.226 Train-Acc:0.958
I:80 Test-Err:0.290 Test-Acc:0.8782 Train-Err:0.210 Train-Acc:0.961
I:90 Test-Err:0.285 Test-Acc:0.8809 Train-Err:0.204 Train-Acc:0.95
I:100 Test-Err:0.277 Test-Acc:0.8808 Train-Err:0.192 Train-Acc:0.97
I:110 Test-Err:0.284 Test-Acc:0.8786 Train-Err:0.183 Train-Acc:0.975
I:120 Test-Err:0.277 Test-Acc:0.8809 Train-Err:0.188 Train-Acc:0.97
I:130 Test-Err:0.281 Test-Acc:0.8784 Train-Err:0.187 Train-Acc:0.963
I:140 Test-Err:0.289 Test-Acc:0.8777 Train-Err:0

# Batch Gradient Descent

In [8]:
import numpy as np
np.random.seed(1)

def relu(x):
    return (x > 0) * x # returns x if x > 0

def relu2deriv(output):
    return output > 0 # returns 1 for input > 0

batch_size = 100
alpha, iterations = (0.001, 300)
pixels_per_image, num_labels, hidden_size = (784, 10, 100)

weights_0_1 = 0.2*np.random.random((pixels_per_image,hidden_size)) - 0.1
weights_1_2 = 0.2*np.random.random((hidden_size,num_labels)) - 0.1

for j in range(iterations):
    error, correct_cnt = (0.0, 0)
    for i in range(int(len(images) / batch_size)):
        batch_start, batch_end = ((i * batch_size),((i+1)*batch_size))

        layer_0 = images[batch_start:batch_end]
        layer_1 = relu(np.dot(layer_0,weights_0_1))
        dropout_mask = np.random.randint(2,size=layer_1.shape)
        layer_1 *= dropout_mask * 2
        layer_2 = np.dot(layer_1,weights_1_2)

        error += np.sum((labels[batch_start:batch_end] - layer_2) ** 2)
        for k in range(batch_size):
            correct_cnt += int(np.argmax(layer_2[k:k+1]) == np.argmax(labels[batch_start+k:batch_start+k+1]))

            layer_2_delta = (labels[batch_start:batch_end]-layer_2)/batch_size
            layer_1_delta = layer_2_delta.dot(weights_1_2.T)* relu2deriv(layer_1)
            layer_1_delta *= dropout_mask

            weights_1_2 += alpha * layer_1.T.dot(layer_2_delta)
            weights_0_1 += alpha * layer_0.T.dot(layer_1_delta)
            
    if(j%10 == 0):
        test_error = 0.0
        test_correct_cnt = 0

        for i in range(len(test_images)):
            layer_0 = test_images[i:i+1]
            layer_1 = relu(np.dot(layer_0,weights_0_1))
            layer_2 = np.dot(layer_1, weights_1_2)

            test_error += np.sum((test_labels[i:i+1] - layer_2) ** 2)
            test_correct_cnt += int(np.argmax(layer_2) == np.argmax(test_labels[i:i+1]))

        sys.stdout.write("\n" + \
                         "I:" + str(j) + \
                         " Test-Err:" + str(test_error/ float(len(test_images)))[0:5] +\
                         " Test-Acc:" + str(test_correct_cnt/ float(len(test_images)))+\
                         " Train-Err:" + str(error/ float(len(images)))[0:5] +\
                         " Train-Acc:" + str(correct_cnt/ float(len(images))))


I:0 Test-Err:0.824 Test-Acc:0.3752 Train-Err:1.286 Train-Acc:0.151
I:10 Test-Err:0.543 Test-Acc:0.7264 Train-Err:0.583 Train-Acc:0.66
I:20 Test-Err:0.464 Test-Acc:0.7757 Train-Err:0.496 Train-Acc:0.748
I:30 Test-Err:0.418 Test-Acc:0.8061 Train-Err:0.444 Train-Acc:0.792
I:40 Test-Err:0.389 Test-Acc:0.8197 Train-Err:0.412 Train-Acc:0.806
I:50 Test-Err:0.366 Test-Acc:0.8317 Train-Err:0.388 Train-Acc:0.833
I:60 Test-Err:0.351 Test-Acc:0.8445 Train-Err:0.364 Train-Acc:0.843
I:70 Test-Err:0.340 Test-Acc:0.8513 Train-Err:0.345 Train-Acc:0.865
I:80 Test-Err:0.331 Test-Acc:0.8534 Train-Err:0.333 Train-Acc:0.859
I:90 Test-Err:0.323 Test-Acc:0.8585 Train-Err:0.319 Train-Acc:0.883
I:100 Test-Err:0.316 Test-Acc:0.8625 Train-Err:0.305 Train-Acc:0.89
I:110 Test-Err:0.312 Test-Acc:0.8641 Train-Err:0.297 Train-Acc:0.893
I:120 Test-Err:0.306 Test-Acc:0.8642 Train-Err:0.289 Train-Acc:0.899
I:130 Test-Err:0.302 Test-Acc:0.8667 Train-Err:0.273 Train-Acc:0.902
I:140 Test-Err:0.301 Test-Acc:0.8703 Train-Err