# Neural Network from scratch

In [None]:
import numpy as np
import pandas as pd

data = pd.read_csv('/kaggle/input/digit-recognizer/train.csv')

In [None]:
data

In [None]:
data = np.array(data)
m, n = data.shape
m, n

In [None]:
number_of_tests = int(m * 0.3)

np.random.shuffle(data) # shuffle before splitting into test and training sets

test_data = data[0:number_of_tests].T
Y_test = test_data[0]
X_test = test_data[1:n]
X_test = X_test / 255.0

train_data = data[number_of_tests:m].T
Y_train = train_data[0]
X_train = train_data[1:n]
X_train = X_train / 255.0

In [35]:
def init_params():
    W1 = np.random.randn(100, 784) * np.sqrt(2 / 784)
    B1 = np.zeros((100, 1))
    W2 = np.random.randn(100, 100) * np.sqrt(2 / 10) 
    B2 = np.zeros((100, 1))
    W3 = np.random.randn(10, 100) * np.sqrt(2 / 10) 
    B3 = np.zeros((10, 1))
    return W1, B1, W2, B2, W3, B3

def ReLU(Z):
    return np.maximum(Z, 0)

def softmax(Z):
    expZ = np.exp(Z - np.max(Z, axis=0, keepdims=True))
    return expZ / np.sum(expZ, axis=0, keepdims=True)
    
def forward_propagation(W1, B1, W2, B2, W3, B3, X):
    Z1 = W1.dot(X) + B1
    A1 = ReLU(Z1)
    Z2 = W2.dot(A1) + B2
    A2 = ReLU(Z2)
    Z3 = W3.dot(A2) + B3
    A3 = softmax(Z3)
    return Z1, A1, Z2, A2, Z3, A3

def derivative_of_ReLU(Z):
    return Z > 0

def one_hot(Y):
    one_hot_Y = np.zeros((Y.size, Y.max() + 1))
    one_hot_Y[np.arange(Y.size), Y] = 1
    return one_hot_Y.T

def backward_propagation(Z1, A1, Z2, A2, Z3, A3, W1, W2, W3, X, Y):
    dZ3 = A3 - Y
    dW3 = 1 / m * dZ3.dot(A2.T) + W3
    dB3 = 1 / m * np.sum(dZ3)
    
    dZ2 = W3.T.dot(dZ3) * derivative_of_ReLU(Z2)
    dW2 = 1 / m * dZ2.dot(A1.T) + W2
    dB2 = 1 / m * np.sum(dZ2)
    
    dZ1 = W2.T.dot(dZ2) * derivative_of_ReLU(Z1)
    dW1 = 1 / m * dZ1.dot(X.T) + W1
    dB1 = 1 / m * np.sum(dZ1)
    
    return dW1, dB1, dW2, dB2, dW3, dB3

def update_params(W1, B1, W2, B2, W3, B3, dW1, dB1, dW2, dB2, dW3, dB3, LR):
    W1 = W1 - LR * dW1
    B1 = B1 - LR * dB1    
    W2 = W2 - LR * dW2  
    B2 = B2 - LR * dB2
    W3 = W3 - LR * dW3  
    B3 = B3 - LR * dB3
    return W1, B1, W2, B2, W3, B3

def get_predictions(A):
    return np.argmax(A, 0)

def get_accuracy(predictions, Y):
    return np.sum(predictions == Y) / Y.size

def gradient_descent(X, Y, LR, iterations):
    W1, B1, W2, B2, W3, B3 = init_params()
    one_hot_Y = one_hot(Y)
    
    for i in range(iterations + 1):
        Z1, A1, Z2, A2, Z3, A3 = forward_propagation(W1, B1, W2, B2, W3, B3, X)
        dW1, dB1, dW2, dB2, dW3, dB3 = backward_propagation(Z1, A1, Z2, A2, Z3, A3, W1, W2, W3, X, one_hot_Y)
        W1, B1, W2, B2, W3, B3 = update_params(W1, B1, W2, B2, W3, B3, dW1, dB1, dW2, dB2, dW3, dB3, LR)

        if i % 10 == 0:
            print("Iteration: ", i)
            predictions = get_predictions(A3)
            print(get_accuracy(predictions, Y))

    return W1, B1, W2, B2, W3, B3

In [41]:
W1, B1, W2, B2, W3, B3 = gradient_descent(X_train, Y_train, 0.1, 1000)

Iteration:  0
0.065
Iteration:  10
0.15404761904761904
Iteration:  20
0.25666666666666665
Iteration:  30
0.332108843537415
Iteration:  40
0.48741496598639455
Iteration:  50
0.5715986394557823
Iteration:  60
0.606156462585034
Iteration:  70
0.6463265306122449
Iteration:  80
0.697687074829932
Iteration:  90
0.7402380952380953
Iteration:  100
0.7624149659863946
Iteration:  110
0.7777891156462585
Iteration:  120
0.79
Iteration:  130
0.8008503401360544
Iteration:  140
0.8091496598639456
Iteration:  150
0.8182993197278912
Iteration:  160
0.8249319727891157
Iteration:  170
0.8310884353741497
Iteration:  180
0.8364965986394558
Iteration:  190
0.8407142857142857
Iteration:  200
0.8447959183673469
Iteration:  210
0.8485034013605443
Iteration:  220
0.8517687074829932
Iteration:  230
0.8549659863945578
Iteration:  240
0.8573469387755102
Iteration:  250
0.8600680272108844
Iteration:  260
0.8625850340136054
Iteration:  270
0.865204081632653
Iteration:  280
0.8680952380952381
Iteration:  290
0.869761

In [37]:
def make_predictions(X, W1, B1, W2, B2, W3, B3):
    _, _, _, _, _, A3 = forward_propagation(W1, B1, W2, B2, W3, B3, X)
    predictions = get_predictions(A3)
    return predictions

In [42]:
test_predictions = make_predictions(X_test, W1, B1, W2, B2, W3, B3)
get_accuracy(test_predictions, Y_test)

0.907063492063492