In [1]:
import os, sys
parent_dir = os.path.abspath('..')
# the parent_dir could already be there if the kernel was not restarted,
# and we run this cell again
if parent_dir not in sys.path:
    sys.path.append(parent_dir)


In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import NNLib.nn as nn

In [3]:
#load dataset csv
train_data = pd.read_csv('../Datasets/MNIST/mnist_train.csv')
test_data = pd.read_csv('../Datasets/MNIST/mnist_test.csv')

#train data
x_train = train_data.drop('label',axis=1).to_numpy()
y_train = pd.get_dummies(train_data['label']).values

#test data
x_test = test_data.drop('label',axis=1).to_numpy()
y_test = pd.get_dummies(test_data['label']).values

fac = 0.99 / 255
x_train = np.asfarray(x_train) * fac + 0.01
x_test = np.asfarray(x_test) * fac + 0.01

# train_labels = np.asfarray(train_data[:, :1])
# test_labels = np.asfarray(test_data[:, :1])

#printing dimensions
print(np.shape(x_train))
print(np.shape(y_train))


print(np.shape(x_test))
print(np.shape(y_test))
print((x_train))

(60000, 784)
(60000, 10)
(10000, 784)
(10000, 10)
[[0.01 0.01 0.01 ... 0.01 0.01 0.01]
 [0.01 0.01 0.01 ... 0.01 0.01 0.01]
 [0.01 0.01 0.01 ... 0.01 0.01 0.01]
 ...
 [0.01 0.01 0.01 ... 0.01 0.01 0.01]
 [0.01 0.01 0.01 ... 0.01 0.01 0.01]
 [0.01 0.01 0.01 ... 0.01 0.01 0.01]]


In [8]:
"""1. 784 neurons in input layer
   2. 128 neurons in hidden layer 1 
   3. 64 neurons in hidden layer 2
   4. 10 neurons in output layer"""
def softmax(input):
    y = np.exp(input - input.max())
    activated = y/ np.sum(y, axis=0)
    return activated
 
def softmax_grad(x):
    exps = np.exp(x-x.max())
    return exps / np.sum(exps,axis = 0) * (1 - exps /np.sum(exps,axis = 0))

def sigmoid(input):
    activated = 1/(1 + np.exp(-input))
    return activated

def sigmoid_grad(input):
    grad = input*(1-input)
    return grad


class DenseNN:
    def __init__(self,d0,d1,d2,d3):
        # self.params = {'w1': nn.Xavier.initialize(d0, d1),
        #                'w2': nn.Xavier.initialize(d1, d2),
        #                'w3': nn.Xavier.initialize(d2, d3)}
        self.params = {
        'w1':np.random.randn(d1, d0) * np.sqrt(1. / d1),
        'w2':np.random.randn(d2, d1) * np.sqrt(1. / d2),
        'w3':np.random.randn(d3, d2) * np.sqrt(1. / d3),
        'b1':np.random.randn(d1, 1)  * np.sqrt(1. / d1),
        'b2':np.random.randn(d2, 1)  * np.sqrt(1. / d2),
        'b3':np.random.randn(d3, 1)  * np.sqrt(1. / d3),
    }
    #     data = np.load("trained_mnist.npz")
    #     self.params = {
    #     'w1':data['w1'],
    #     'w2':data['w2'],
    #     'w3':data['w3'],
    #     'b1':data['b1'],
    #     'b2':data['b2'],
    #     'b3':data['b3'],
    # }
        
    def forward(self,a0):
        params = self.params
        params['a0'] = a0
        params['z1'] = np.dot(params['w1'],params['a0']) + params['b1']
        params['a1'] = sigmoid(params['z1'])
        
        params['z2'] = np.dot(params['w2'],params['a1']) + params['b2']
        params['a2'] = sigmoid(params['z2'])
        
        params['z3'] = np.dot(params['w3'],params['a2']) + params['b3']
        params['a3'] = softmax(params['z3'])
        for param in params.values():
            print(np.shape(param))
        
        return params['a3']
        
    def backprop(self,y_true,y_pred):
        params = self.params
        w_change = {}
        b_change = {}
        error = softmax_grad(params['z3'])*((y_pred - y_true)/y_true.shape[0])
        w_change['w3'] = np.matmul(error,params['a2'].T)
        b_change['b3'] = error
        
        error = np.dot(params['w3'].T,error)*sigmoid_grad(params['a2'])
        w_change['w2'] = np.matmul(error,params['a1'].T)
        b_change['b2'] = error
        
        error = np.dot(params['w2'].T,error)*sigmoid_grad(params['a1'])
        w_change['w1'] = np.matmul(error,params['a0'].T)
        b_change['b1'] = error
        
        return w_change,b_change
        
    def update_parameters(self,learning_rate,w_change,b_change):
        self.params['w1'] -= learning_rate*w_change['w1']
        self.params['w2'] -= learning_rate*w_change['w2']
        self.params['w3'] -= learning_rate*w_change['w3']
        
        self.params['b1'] -= learning_rate*b_change['b1']
        self.params['b2'] -= learning_rate*b_change['b2']
        self.params['b3'] -= learning_rate*b_change['b3']

    def train(self,epochs,lr):
        for epoch in range(epochs):
            for i in range(60000):
                a0 = np.array([x_train[i]]).T
                o = np.array([y_train[i]]).T
                y_pred = self.forward(a0)
                w_change,b_change = self.backprop(o,y_pred)
                self.update_parameters(lr,w_change,b_change)
                # print(self.compute_accuracy()*100)
                # print(calc_mse(a3, o))
            print("epoch number :",str(epoch),", accuracy ",str((self.compute_accuracy())*100),"%")
            np.savez("trained_mnist",**self.params)
    
    def compute_accuracy(self):
        predictions = []
        for i in range(10000):
            idx = i
            a0 = x_test[idx]
            a0 = np.array([a0]).T
            #print("acc a1",np.shape(a1))
            o = y_test[idx]
            o = np.array([o]).T
            #print("acc o",np.shape(o))
            output = self.forward(a0)
            pred = np.argmax(output)
            predictions.append(pred == np.argmax(o))
        return np.mean(predictions)
    
    def test(self):
        idx = np.random.randint(0,10000,1)
        image = x_test[idx]
        label = y_test[idx]
        
        image_array = np.asfarray(image).reshape((28,28))
        plt.imshow(image_array,cmap="Greys",interpolation="None")
        print("actual label is ",np.argmax(label))
        prediction = self.forward((np.array(image)).T)
        print("prediction is ",np.argmax(prediction))
      

In [10]:
dnn = DenseNN(784, 128,64, 10)
dnn.train(40,0.0001)
#dnn.test()

(128, 784)
(64, 128)
(10, 64)
(128, 1)
(64, 1)
(10, 1)
(784, 1)
(128, 1)
(128, 1)
(64, 1)
(64, 1)
(10, 1)
(10, 1)
(128, 784)
(64, 128)
(10, 64)
(128, 1)
(64, 1)
(10, 1)
(784, 1)
(128, 1)
(128, 1)
(64, 1)
(64, 1)
(10, 1)
(10, 1)
(128, 784)
(64, 128)
(10, 64)
(128, 1)
(64, 1)
(10, 1)
(784, 1)
(128, 1)
(128, 1)
(64, 1)
(64, 1)
(10, 1)
(10, 1)
(128, 784)
(64, 128)
(10, 64)
(128, 1)
(64, 1)
(10, 1)
(784, 1)
(128, 1)
(128, 1)
(64, 1)
(64, 1)
(10, 1)
(10, 1)
(128, 784)
(64, 128)
(10, 64)
(128, 1)
(64, 1)
(10, 1)
(784, 1)
(128, 1)
(128, 1)
(64, 1)
(64, 1)
(10, 1)
(10, 1)
(128, 784)
(64, 128)
(10, 64)
(128, 1)
(64, 1)
(10, 1)
(784, 1)
(128, 1)
(128, 1)
(64, 1)
(64, 1)
(10, 1)
(10, 1)
(128, 784)
(64, 128)
(10, 64)
(128, 1)
(64, 1)
(10, 1)
(784, 1)
(128, 1)
(128, 1)
(64, 1)
(64, 1)
(10, 1)
(10, 1)
(128, 784)
(64, 128)
(10, 64)
(128, 1)
(64, 1)
(10, 1)
(784, 1)
(128, 1)
(128, 1)
(64, 1)
(64, 1)
(10, 1)
(10, 1)
(128, 784)
(64, 128)
(10, 64)
(128, 1)
(64, 1)
(10, 1)
(784, 1)
(128, 1)
(128, 1)
(64, 1

KeyboardInterrupt: 

In [None]:
input = np.array([[1,2,3,4,5]]).T
print(input)
print(input.shape)
print(softmax(input))
print(softmax(input)*(1-softmax(input)))
print(softmax_grad(input))

[[1]
 [2]
 [3]
 [4]
 [5]]
(5, 1)
[[0.01165623]
 [0.03168492]
 [0.08612854]
 [0.23412166]
 [0.63640865]]
[[0.01152036]
 [0.03068099]
 [0.07871042]
 [0.17930871]
 [0.23139268]]
[[0.01152036]
 [0.03068099]
 [0.07871042]
 [0.17930871]
 [0.23139268]]
