### DIGIT RECOGNISER USING NEURAL NETWORK FROM SCRATCH

In [3]:
import numpy as np
import pandas as pd

##### IMPORTING DATASETS INTO MY NOTEBOOK AND CONVERTING INTO NUMPY ARRAYS TO GET THEM USED

In [4]:
x_train=pd.read_csv('train_X.csv')
x_test=pd.read_csv('test_X.csv')
y_train=pd.read_csv('train_label.csv')
y_test=pd.read_csv('test_label.csv')

In [5]:
x_train=np.array(x_train)
x_test=np.array(x_test)
y_train=np.array(y_train)
y_test=np.array(y_test)

In [6]:
x_train=x_train.T
x_test=x_test.T

In [7]:
y_train=y_train.T
y_test=y_test.T

In [8]:
y_test=y_test.argmax(axis=0)

### WRITING THE FUNCTIONS OF NEURAL NETWORK(INITIALIZE PARAMS,FORWARD PROP,BACK PROP,COST FUNC,UPDATE PARAMS )

In [9]:
def initialize_parameters(nx,nh,ny):
    w1=np.random.randn(nh,nx)*0.01
    w2=np.random.randn(ny,nh)*0.01
    b1=np.zeros(nh)
    b1=b1.reshape(nh,1)
    b2=np.zeros(ny)
    b2=b2.reshape(ny,1)
    parameters={
        'w1':w1,
        'w2':w2,
        'b1':b1,
        'b2':b2
    }
    return parameters

In [10]:
def forward_prop(parameters,x):
    w1=parameters[f'w{1}']
    w2=parameters['w2']
    b1=parameters['b1']
    b2=parameters['b2']
    z1=np.dot(w1,x)+b1
    a1=tanh(z1)
    z2=np.dot(w2,a1)+b2
    a2=softmax(z2)
    answ={
        'z1':z1,
        'z2':z2,
        'a1':a1,
        'a2':a2
    }
    return answ

##### AS IT IS A DIGIT RECOGNIZER SO USING SOFTMAX FOR MULTICLASS CLASSIFICATION

In [11]:
def softmax(z):
    z1=np.exp(z)
    sum=np.sum(z1,axis=0)
    return z1/sum

def tanh(z):
    return np.tanh(z)

In [12]:
def cost_function(a2,y):
    m=y.shape[1]
    cost=-(1/m)*np.sum(y*np.log(a2))
    return cost
    # y must be such it contains that its not 0 1 stc

In [13]:
def back_prop(x,y,parameters,answ):
    w1 = parameters['w1']
    b1 = parameters['b1']
    w2 = parameters['w2']
    b2 = parameters['b2']
    a1 = answ['a1']
    a2 = answ['a2']
    m = x.shape[1]
    dz2 = (a2 - y)
    dw2 = (1/m)*np.dot(dz2, a1.T)
    db2 = (1/m)*np.sum(dz2, axis = 1, keepdims = True)
    
    dz1 = (1/m)*np.dot(w2.T, dz2)*derivative_tanh(a1)
    dw1 = (1/m)*np.dot(dz1, x.T)
    db1 = (1/m)*np.sum(dz1, axis = 1, keepdims = True)
    
    gradients = {
        "dw1" : dw1,
        "db1" : db1,
        "dw2" : dw2,
        "db2" : db2
    }
    
    return gradients

In [14]:
def update_parameters(parameters, gradients, learning_rate):
    
    w1 = parameters['w1']
    b1 = parameters['b1']
    w2 = parameters['w2']
    b2 = parameters['b2']
    
    dw1 = gradients['dw1']
    db1 = gradients['db1']
    dw2 = gradients['dw2']
    db2 = gradients['db2']
    
    w1 = w1 - learning_rate*dw1
    b1 = b1 - learning_rate*db1
    w2 = w2 - learning_rate*dw2
    b2 = b2 - learning_rate*db2
    
    parameters = {
        "w1" : w1,
        "b1" : b1,
        "w2" : w2,
        "b2" : b2
    }
    
    return parameters

###  FIT FUNC OF THE MODEL

In [15]:
def model(x, y, n_h, learning_rate, iterations):
    
    n_x = x.shape[0]
    n_y = y.shape[0]
    
    cost_list = []
    
    parameters = initialize_parameters(n_x, n_h, n_y)
    
    for i in range(iterations):
        
        answ = forward_prop(parameters,x)
        
        cost = cost_function(answ['a2'], y)
        
        gradients = back_prop(x, y, parameters, answ)
        
        parameters = update_parameters(parameters, gradients, learning_rate)
        
        cost_list.append(cost)
        
        if(i%(iterations/10) == 0):
            print("Cost after", i, "iterations is :", cost)
        
    return parameters, cost_list

In [16]:
def derivative_tanh(x):
    return (1 - np.power(np.tanh(x), 2))

##### RUNNING THE MODEL

In [23]:
iterations = 1000
n_h = 100
learning_rate = 0.4
Parameters, Cost_list = model(x_train, y_train, n_h,learning_rate, iterations)

Cost after 0 iterations is : 2.3079203683716134
Cost after 100 iterations is : 0.10482350556484805
Cost after 200 iterations is : 0.03762790386852396
Cost after 300 iterations is : 0.02084668853635615
Cost after 400 iterations is : 0.013597291210972263
Cost after 500 iterations is : 0.009771846512948696
Cost after 600 iterations is : 0.007548699332805696
Cost after 700 iterations is : 0.006046968585690295
Cost after 800 iterations is : 0.005117060627427108
Cost after 900 iterations is : 0.00443115525024406


In [26]:
answ = forward_prop(Parameters,x_test)

In [27]:
np.sum(answ['a2'].argmax(axis=0)==y_test)/len(y_test)*100  #  ACCURACY ON THE TEST SET

83.66762177650429