In [1]:
import numpy as np
import pandas as pd
import scipy as sp
import matplotlib.pyplot as plt
import seaborn as sns


In [2]:
from keras.datasets import mnist

In [3]:
(train_X, train_y), (test_X, test_y) = mnist.load_data()


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [4]:
X_train=[]
X_test=[]
for i in range(train_X.shape[0]):
    X_train.append(train_X[i].flatten())
for j in range(test_X.shape[0]):
    X_test.append(test_X[j].flatten())
X_train=np.array(X_train).T
X_test=np.array(X_test).T

In [5]:
from scipy import sparse
def convert_labels(y, C=4 ):
    Y = sparse.coo_matrix((np.ones_like(y),
        (y, np.arange(len(y)))), shape = (C, len(y))).toarray()
    return Y

In [6]:
Y_train=convert_labels(train_y,10)
Y_train.shape

(10, 60000)

In [7]:
Y_test=convert_labels(test_y,10)
Y_test

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 1, ..., 0, 0, 0],
       [0, 1, 0, ..., 0, 0, 0],
       ...,
       [1, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]], dtype=uint8)

In [8]:
class neural_network(object):
    def __init__(self):
        self.input_unit = 784
        self.hidden_units_1 = 300
        self.hidden_units_2 = 100
        self.output_class = 10
        self.W1 = 0.01*np.random.randn(self.input_unit, self.hidden_units_1)
        self.b1 = np.zeros((self.hidden_units_1, 1))
        self.W2 = 0.01*np.random.randn(self.hidden_units_1,self.hidden_units_2)
        self.b2 = np.zeros((self.hidden_units_2, 1))
        self.W3 = 0.01*np.random.randn(self.hidden_units_2,self.output_class)
        self.b3 = np.zeros((self.output_class, 1))

    def softmax(self,Z):
        e_Z = np.exp(Z)
        A = e_Z / e_Z.sum(axis = 0)
        return A

    def feed_for_ward(self,X):
        #layer1
        self.Z1= self.W1.T@X + self.b1
        self.A1 =  np.maximum(self.Z1,0) #relu function f'(s)=0 if s<=0 else f'(s)=1
        #predict
        #layer2
        self.Z2= self.W2.T@self.A1 + self.b2
        self.A2 =  np.maximum(self.Z2,0) #relu function f'(s)=0 if s<=0 else f'(s)=1
        #predict
        self.Z3 = self.W3.T@self.A2 +self.b3
        self.A3 = self.softmax(self.Z3)
        return self.A3

    def back_propagation(self,X,Y,eta):
        self.N=X.shape[1]

        self.E3 = (self.A3 - Y)/self.N
        self.dW3 = np.dot(self.A2, self.E3.T)
        self.db3 = np.sum(self.E3, axis = 1, keepdims = True)
        self.E2 = np.dot(self.W3, self.E3)
        self.E2[self.Z2 <= 0] = 0 # gradient of ReLU
        self.dW2 = np.dot(self.A1, self.E2.T)
        self.db2 = np.sum(self.E2, axis = 1, keepdims = True)
        self.E1 = np.dot(self.W2, self.E2)
        self.E1[self.Z1 <= 0] = 0 # gradient of ReLU
        self.dW1 = np.dot(X, self.E1.T)
        self.db1 = np.sum(self.E1, axis = 1, keepdims = True)

        # Gradient Descent update
        self.W1 += -eta*self.dW1
        self.b1 += -eta*self.db1
        self.W2 += -eta*self.dW2
        self.b2 += -eta*self.db2
        self.W3 += -eta*self.dW3
        self.b3 += -eta*self.db3

    def train(self, X, Y, iteration=100,eta= 0.015):
        self.lost_arr = []
        for i in range(iteration):
            y_hat=self.feed_for_ward(X)
            loss = self.cost(Y , y_hat)
            self.lost_arr.append(loss)
            self.back_propagation(X,Y,eta)
            if i%10==0:
                print(f"loss after inter {i}: ", loss)

    def cost(self,Y, Yhat):
        epsilon = 1e-5
        return -np.sum(Y*np.log(Yhat+ epsilon))/Y.shape[1]

    def vis_loss(self,inter):
        x = np.arange(0,inter)
        y=self.lost_arr
        plt.plot(x,y,color='green')

    def predict(self,X):
        y_hat = self.feed_for_ward(X)
        p = []
        for i in y_hat.T:
            temp = np.zeros(self.output_class)
            temp[np.where(i==i.max())[0][0]]=1
            p.append(temp)
        return np.array(p).T

    def score(self,predict, y):
        cnt=0
        for i in range(predict.shape[1]):
            if ((predict[:,i]==y[:,i]).all()):
                cnt+=1
        return round(cnt/predict.shape[1]*100,4)

In [10]:
two_lay = neural_network()
two_lay.train(X_train,Y_train,700)

loss after inter 0:  2.3241011279217143
loss after inter 10:  2.301561821728704
loss after inter 20:  0.8896088219484387
loss after inter 30:  0.6148059284807955
loss after inter 40:  0.48917428398434476
loss after inter 50:  0.33409662418584907
loss after inter 60:  0.3018917879249289
loss after inter 70:  0.3070300953831267
loss after inter 80:  0.252621223502583
loss after inter 90:  0.23344825665323166
loss after inter 100:  0.22760917917627127
loss after inter 110:  0.23315378541018267
loss after inter 120:  0.19795385643259217
loss after inter 130:  0.1868091927122064
loss after inter 140:  0.17772312523170314
loss after inter 150:  0.16977320694878323
loss after inter 160:  0.16263106565387053
loss after inter 170:  0.15615390680542424
loss after inter 180:  0.15020978777839383
loss after inter 190:  0.14472374602281818
loss after inter 200:  0.13964532719981493
loss after inter 210:  0.1349097174761042
loss after inter 220:  0.13049281266572013
loss after inter 230:  0.12636292

In [13]:
l_pred=two_lay.predict(X_test)

In [14]:
two_lay.score(l_pred,Y_test)

97.56