In [3]:
import sys
import numpy as np
from keras.datasets import mnist

In [4]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()
print(f"x_train:{x_train.shape}, y_train:{y_train.shape}") #60k
print(f"x_test:{x_test.shape}, y_test:{y_test.shape}") #10k

x_train:(60000, 28, 28), y_train:(60000,)
x_test:(10000, 28, 28), y_test:(10000,)


In [5]:
def one_hot_labels(digit_labels):
    
    one_hot_labels = np.zeros((len(digit_labels),10))

    for i,l in enumerate(digit_labels):
        one_hot_labels[i][l] = 1
    
    return(one_hot_labels)

In [6]:
trn_images = x_train[0:1000].reshape(1000, 28*28)/255
trn_labels = one_hot_labels(y_train[0:1000])

tst_images = x_test.reshape(len(x_test), 28*28)/255
tst_labels = one_hot_labels(y_test)

In [41]:
np.random.seed(42)
relu = lambda x:(x>=0) * x # returns x if x > 0, return 0 otherwise
relu2deriv = lambda x: x>=0 # returns 1 for input > 0, return 0 otherwise
alpha, iterations, hidden_size, pixels_per_image, num_labels = (0.005, 350, 40, 784, 10)

In [42]:
#np.random creates create numbers randomly spread between 0 and 1
# by multipley by 0.2 [0-0.2] and subtracting 0.1 = range [-0.1 to 0.1]

weights_0_1 = 0.2*np.random.random((pixels_per_image,hidden_size)) - 0.1
weights_1_2 = 0.2*np.random.random((hidden_size,num_labels)) - 0.1

for j in range(iterations):
    error, correct_cnt = (0.0, 0)
    
    for i in range(len(trn_images)):
        layer_0 = trn_images[i:i+1]
        layer_1 = relu(np.dot(layer_0,weights_0_1))
        layer_2 = np.dot(layer_1,weights_1_2)

        error += np.sum((trn_labels[i:i+1] - layer_2) ** 2)
        correct_cnt += int(np.argmax(layer_2) == \
                                        np.argmax(trn_labels[i:i+1]))

        #spread the delta
        layer_2_delta = (trn_labels[i:i+1] - layer_2)
        layer_1_delta = layer_2_delta.dot(weights_1_2.T)* relu2deriv(layer_1)
        
        #input* delta
        weights_1_2 += alpha * layer_1.T.dot(layer_2_delta)
        weights_0_1 += alpha * layer_0.T.dot(layer_1_delta)
        
    #print(f"Iteration:{j}, Trn-Err:.{error/len(trn_images):.3f},Trn-Acc:{correct_cnt/float(len(trn_images)):.3f}", end = '\r')
    
    
    if(j % 50 == 0 or j == iterations-1):
        tst_error, tst_correct_cnt = (0.0, 0)

        for i in range(len(tst_images)):

            layer_0 = tst_images[i:i+1]
            layer_1 = relu(np.dot(layer_0,weights_0_1))
            layer_2 = np.dot(layer_1,weights_1_2)

            tst_error += np.sum((tst_labels[i:i+1] - layer_2) ** 2)
            tst_correct_cnt += int(np.argmax(layer_2) == np.argmax(tst_labels[i:i+1]))
            
        msg = (
            f"Iteration:{j}, "
            f"Trn-Err:.{error/len(trn_images):.3f}, "
            f"Tst-Err:.{tst_error/len(tst_images):.3f}, "
            f"Trn-Acc:{correct_cnt/float(len(trn_images)):.3f}, "
            f"Tst-Acc:{tst_correct_cnt/float(len(tst_images)):.3f}."
        )
        print(msg)

Iteration:0, Trn-Err:.0.717, Tst-Err:.0.607, Trn-Acc:0.545, Tst-Acc:0.667.
Iteration:50, Trn-Err:.0.187, Tst-Err:.0.454, Trn-Acc:0.964, Tst-Acc:0.795.
Iteration:100, Trn-Err:.0.156, Tst-Err:.0.499, Trn-Acc:0.989, Tst-Acc:0.773.
Iteration:150, Trn-Err:.0.139, Tst-Err:.0.524, Trn-Acc:0.993, Tst-Acc:0.756.
Iteration:200, Trn-Err:.0.126, Tst-Err:.0.554, Trn-Acc:0.998, Tst-Acc:0.737.
Iteration:250, Trn-Err:.0.117, Tst-Err:.0.584, Trn-Acc:0.998, Tst-Acc:0.723.
Iteration:300, Trn-Err:.0.111, Tst-Err:.0.618, Trn-Acc:0.998, Tst-Acc:0.709.
Iteration:349, Trn-Err:.0.107, Tst-Err:.0.642, Trn-Acc:0.999, Tst-Acc:0.701.
