In [1]:
import numpy as np
from keras.datasets import mnist
from keras.utils import np_utils
import tensorflow as tf

In [2]:
# load MNIST from server
(x_train, y_train), (x_test, y_test) = mnist.load_data()
# training data : 60000 samples
# reshape and normalize input data
x_train = x_train.reshape(x_train.shape[0], 1, 28*28)
x_train = x_train.astype('float32')
x_train /= 255
# encode output which is a number in range [0,9] into a vector of size 10
# e.g. number 3 will become [0, 0, 0, 1, 0, 0, 0, 0, 0, 0]
y_train = np_utils.to_categorical(y_train)

# same for test data : 10000 samples
x_test = x_test.reshape(x_test.shape[0], 1, 28*28)
x_test = x_test.astype('float32')
x_test /= 255
y_test = np_utils.to_categorical(y_test)

In [3]:
x_train.shape

(60000, 1, 784)

In [4]:
def init_layer_weights(input_size, output_size):
    weights = np.random.rand(input_size, output_size) - 0.5;
    bias = np.random.rand(1, output_size) - 0.5;
    return weights, bias

def mse(y_true, y_pred):
    return np.mean(np.power(y_true-y_pred, 2))

def mse_prime(y_true, y_pred):
    return 2*(y_pred-y_true)/y_true.size

In [5]:
def forward_propagation(input_data, layer):
    weights, bias = layer
    output = np.dot(input_data, weights) + bias
    return output

def backward_propagation(output_error, input_data, layer, learning_rate):
    weights, bias = layer
    input_error = np.dot(output_error, weights.T)
    weights_error = np.dot(input_data.T, output_error)
    weights_updated = weights -  (learning_rate * weights_error)
    bias_updated = bias - (learning_rate * output_error)
    updated_layer = (weights_updated, bias_updated)
    return input_error, updated_layer

def forward_propagation_activation(input_data):
    output = np.tanh(input_data)
    return output

def backward_propagation_activation(output_error, input_data):
    activation_prime = 1 - np.tanh(input_data)**2
    final_output_error = activation_prime * output_error
    return final_output_error

In [6]:
layer1 = init_layer_weights(28*28,150);
layer2 = init_layer_weights(150,50);
layer3 = init_layer_weights(50,10);

In [7]:
nsamples = 1000
learning_rate = 0.1

In [8]:
trainSampleX = x_train[0]
trainSampleY = y_train[0]

In [9]:
trainSampleX.shape

(1, 784)

In [10]:
#forward pass
def fullforward(X, layer1,layer2,layer3):
    output1 = forward_propagation(X, layer1)
    output1act = forward_propagation_activation(output1)
    output2 = forward_propagation(output1act, layer2)
    output2act = forward_propagation_activation(output2)
    output3 = forward_propagation(output2act, layer3)
    output3act = forward_propagation_activation(output3)
    return output1,output2,output3,output1act,output2act,output3act

In [11]:
#backward pass
def fullbackward(X,outputError,layer1,layer2,layer3,output1,output2,output3,output1act,output2act,output3act,learning_rate):
    back_output3act = backward_propagation_activation(outputError, output3)
    back_output3, updated_layer3 = backward_propagation(back_output3act, output2act, layer3, learning_rate)
    back_output2act = backward_propagation_activation(back_output3, output2)
    back_output2, updated_layer2 = backward_propagation(back_output2act, output1act, layer2, learning_rate)
    back_output1act = backward_propagation_activation(back_output2, output1)
    back_output1, updated_layer1 = backward_propagation(back_output1act, trainSampleX, layer1, learning_rate)
    return updated_layer1, updated_layer2, updated_layer3

In [12]:
def train(x_train, y_train, nsamples, layers, epochs, learning_rate):
    updated_layer1, updated_layer2, updated_layer3 = layers[0], layers[1], layers[2]
    for j in range(epochs):
        E = 0;
        for i in range(nsamples):
            x = x_train[i]
            y = y_train[i]
            #fwd pass
            output1,output2,output3,output1act,output2act,output3act= fullforward(x, updated_layer1, updated_layer2, updated_layer3)
            #compute loss 
            E += mse(y,output3act)
            #compute loss derivative to initiate backward pass
            dE = mse_prime(y,output3act)
            #backward pass
            updated_layer1, updated_layer2, updated_layer3 = fullbackward(x,dE,updated_layer1, updated_layer2, updated_layer3, 
                                                                          output1, output2, output3, output1act, output2act, 
                                                                          output3act, learning_rate)
            #avg error
            E /= nsamples
        print("Epoch number: ", j)
        print("Epoch error is: ", E)
        
    return (updated_layer1, updated_layer2, updated_layer3)

In [13]:
epochs = 35
layers = (layer1, layer2, layer3)
trained_layers = train(x_train, y_train, nsamples, layers, epochs, learning_rate)

Epoch number:  0
Epoch error is:  0.0001363026606131494
Epoch number:  1
Epoch error is:  0.00012472711651959642
Epoch number:  2
Epoch error is:  0.00011970439386853888
Epoch number:  3
Epoch error is:  0.00010952468146809172
Epoch number:  4
Epoch error is:  0.00010761927567505201
Epoch number:  5
Epoch error is:  0.00010264079191902732
Epoch number:  6
Epoch error is:  9.239332257087941e-05
Epoch number:  7
Epoch error is:  8.565637048794431e-05
Epoch number:  8
Epoch error is:  8.331246896279258e-05
Epoch number:  9
Epoch error is:  7.889543341573936e-05
Epoch number:  10
Epoch error is:  6.725556983263765e-05
Epoch number:  11
Epoch error is:  5.0764080174331865e-05
Epoch number:  12
Epoch error is:  4.332203449034608e-05
Epoch number:  13
Epoch error is:  3.655669967537051e-05
Epoch number:  14
Epoch error is:  2.5419988006491553e-05
Epoch number:  15
Epoch error is:  2.1068110636697942e-05
Epoch number:  16
Epoch error is:  2.2806649440407534e-05
Epoch number:  17
Epoch error is

In [14]:
x_test.shape

(10000, 1, 784)

In [26]:
#testing
idx = np.random.randint(0,10000)
print("Test Sample ID: ", idx)
testSampleX = x_test[idx]
testSampleY = y_test[idx]

_,_,_,_,_, pred = fullforward(testSampleX, trained_layers[0], trained_layers[1], trained_layers[2])
print("Prediction: ", np.argmax(pred))
print("Truth: ", np.argmax(testSampleY))

Test Sample ID:  8080
Prediction:  3
Truth:  3
