# Handwritten Digits Classification using Neural Network

Using only Python and Numpy.

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
data = pd.read_csv('dataset/train.csv')
data.head()

In [None]:
data = np.array(data)
m, nx = data.shape
data.shape

In [None]:
np.random.shuffle(data)
data = data.T

Y = data[0]
X = data[1:]

nx -= 1
X.shape

In [None]:
Y

In [None]:
# Double Check
print(Y.max(), Y.min())

In [None]:
def init():
    W1 = np.random.rand(10, 784)
    b1 = np.random.rand(10, 1)
    W2 = np.random.rand(10, 10)
    b2 = np.random.rand(10, 1)
    
    return W1, b1, W2, b2

In [None]:
# Hyper parameters
def ReLU(X):
    return np.maximum(X, 0)

def soft_max(Z):
    A = np.exp(Z) / sum(np.exp(Z))
    return A

def one_hot(Y):
    one_hot_Y = np.zeros((Y.size, Y.max() + 1))
    one_hot_Y[np.arange(Y.size), Y] = 1
    one_hot_Y = one_hot_Y.T
    return one_hot_Y

def ReLU_derivative(X):
    return X > 0

In [None]:
# Forward Pass
def forward_propagation(A0, W1, b1, W2, b2):
    Z1 = np.dot(W1, A0)
    np.add(Z1, b1, out=Z1)

    A1 = ReLU(Z1)

    Z2 = np.dot(W2, A1)
    np.add(Z2, b2, out=Z2)

    A2 = soft_max(Z2)

    return Z1, A1, Z2, A2

In [None]:
# Backward Pass
def backward_propagation(X, Y, Z1, A1, Z2, A2, W1, W2):
    corrected = one_hot(Y)

    dZ2 = A2 - corrected
    dW2 = 1 / m * dZ2.dot(A1.T)
    db2 = 1 / m * np.sum(dZ2)
    
    dZ1 = W2.T.dot(dZ2) * ReLU_derivative(Z1)
    dW1 = 1 / m * dZ1.dot(X.T)
    db1 = 1 / m * np.sum(dZ1)

    return dW1, db1, dW2, db2

In [None]:
def update_params(W1, b1, W2, b2, dW1, db1, dW2, db2, alpha):
    W1 = W1 - alpha * dW1
    b1 = b1 - alpha * db1

    W2 = W2 - alpha * dW2
    b2 = b2 - alpha * db2
    
    return W1, b1, W2, b2

In [None]:
# Requirements for gradient descent
def get_prediction(A2):
    # Since A2 is 10x1, we need to specify columnar axis, default is row
    return np.argmax(A2, 0)

def get_accuracy_percent(predictions, Y):
    return (np.sum(predictions == Y) / Y.size) * 100

In [None]:
# The Learning
def gradient_descent(X, Y, alpha, iterations):
    W1, b1, W2, b2 = init()

    for i in range(iterations + 1):
        Z1, A1, Z2, A2 = forward_propagation(X, W1, b1, W2, b2)
        dW1, db1, dW2, db2 = backward_propagation(X, Y, Z1, A1, Z2, A2, W1, W2)
        W1, b1, W2, b2 = update_params(W1, b1, W2, b2, dW1, db1, dW2, db2, alpha)

        if i % 10 == 0:
            print(f"Iteration {i}: {get_accuracy_percent(get_prediction(A2), Y)}% Accuracy")
    
    return W1, b1, W2, b2        