# Neural Network by Scratch

### Importing Numpy and Pandas 

In [1]:
import numpy as np
import pandas as pd

### Loading the Dataset as a Pandas Dataframe

In [2]:
data = pd.read_csv('train.csv')
data.head(5)

Unnamed: 0,label,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


### Train and Validation set split
We need to first convert the dataframe into a numpy array and transpose it

In [25]:
data = np.array(data)
m , n = data.shape
print(f'data.shape = {m,n}\n')
np.random.shuffle(data)

data_dev = data[0:1000].T
Y_dev = data_dev[0]
X_dev = data[1:1000]

data_train = data[1000:m].T
Y_train = data_train[0]
X_train = data_train[1:m]

X_train = X_train / 255
print(f'X_train.shape = {X_train.shape}')
print(f'Y_train.shape = {Y_train.shape}')

data.shape = (42000, 785)

X_train.shape = (784, 41000)
Y_train.shape = (41000,)


### Initializing Parameters

In [26]:
def init_params():

    W1 = np.random.rand(10,784) - 0.5
    b1 = np.random.rand(10,1) - 0.5
    
    W2 = np.random.rand(10,10) - 0.5
    b2 = np.random.rand(10,1) - 0.5
    
    return W1, b1, W2, b2

### ReLu Function

In [28]:
def ReLu(Z):
    return np.maximum(Z,0)

### Softmax Function

In [29]:
def softmax(Z):
    return np.exp(Z)/sum(np.exp(Z))

### Forward Propagation

In [30]:
def forward_prop(W1, b1, W2, b2, X):
    
    Z1 = W1.dot(X) + b1
    A1 = ReLu(Z1)
    
    Z2 = W2.dot(A1) + b2
    A2 = softmax(Z2)
    
    return Z1, A1, Z2, A2

### Derivative of ReLu

In [15]:
def deriv_relu(Z):
    return Z>0

### One Hot Encoder Function

In [33]:
def one_hot(Y):
    
    one_hot_Y = np.zeros((Y.size, Y.max() + 1))
    one_hot_Y[np.arange(Y.size), Y] = 1
    one_hot_Y = one_hot_Y.T
    
    return one_hot_Y

### Backward Propagation

In [34]:
def back_prop(Z1, A1, Z2, A2, W2, X, Y):

    m = Y.shape[0]
    
    dZ2 = A2 - one_hot(Y)
    dW2 = 1 / m * dZ2.dot(A1.T)
    db2 = 1 / m * sum(dZ2)
    
    dZ1 = W2.T.dot(dZ2) * deriv_relu(Z1)
    dW1 = 1 / m * dZ1.dot(X.T)
    db1 = 1 / m * sum(dZ1)
    
    return dW1, db1, dW2, db2

### Update Parameters

In [36]:
def update_params(W1, b1, W2, b2, dW1, db1, dW2, db2, alpha):

    W1 = W1 - alpha * dW1
    b1 = b1 - alpha * db1
    W2 = W2 - alpha * dW2
    b2 = b2 - alpha * db2
    
    return W1, b1, W2, b2

### Prediction and Accuracy functions

In [37]:
def make_prediction(A2):
    return np.argmax(A2, 0)

def get_accuracy(Y_hat, Y):
    print(Y_hat, Y)
    return np.sum(Y_hat==Y)/Y.size

### Training Function

In [38]:
def train(X, Y, iterations, alpha):
    
    W1, b1, W2, b2 = init_params()
    
    for i in range(iterations):
        Z1, A1, Z2, A2 = forward_prop(W1, b1, W2, b2, X)
        dW1, db1, dW2, db2 = back_prop(Z1, A1, Z2, A2, W2, X, Y)
        W1, b1, W2, b2 = update_params(W1, b1, W2, b2, dW1, db1, dW2, db2, alpha)
        
        if (i%10 == 0):
            acc.append(get_accuracy(make_prediction(A2),Y))
            print(f'Iteration: {i}')
            print(f'Accuracy: {acc}')
            
    return W1, b1, W2, b2, acc.max()

In [43]:
def lr_tuning(alpha1, alpha2):
    for i in range(alpha1, alpha2):
            train(X_train, Y_train, 500, i)

Iteration: 0
[3 9 3 ... 3 5 3] [0 3 6 ... 0 1 8]
Accuracy: 0.10748780487804878
Iteration: 10
[3 9 3 ... 3 5 3] [0 3 6 ... 0 1 8]
Accuracy: 0.18226829268292682
Iteration: 20
[3 3 3 ... 0 5 3] [0 3 6 ... 0 1 8]
Accuracy: 0.25470731707317074
Iteration: 30
[2 8 2 ... 3 5 3] [0 3 6 ... 0 1 8]
Accuracy: 0.3528292682926829
Iteration: 40
[2 8 2 ... 3 5 2] [0 3 6 ... 0 1 8]
Accuracy: 0.42036585365853657
Iteration: 50
[2 8 2 ... 3 5 2] [0 3 6 ... 0 1 8]
Accuracy: 0.4516341463414634
Iteration: 60
[2 8 2 ... 3 3 2] [0 3 6 ... 0 1 8]
Accuracy: 0.4780731707317073
Iteration: 70
[2 8 2 ... 3 3 2] [0 3 6 ... 0 1 8]
Accuracy: 0.5013414634146341
Iteration: 80
[2 8 2 ... 3 3 2] [0 3 6 ... 0 1 8]
Accuracy: 0.5207073170731708
Iteration: 90
[2 8 2 ... 3 3 2] [0 3 6 ... 0 1 8]
Accuracy: 0.5390975609756098
Iteration: 100
[2 3 2 ... 3 3 2] [0 3 6 ... 0 1 8]
Accuracy: 0.5560243902439025
Iteration: 110
[2 3 2 ... 3 3 2] [0 3 6 ... 0 1 8]
Accuracy: 0.575
Iteration: 120
[2 3 2 ... 3 3 2] [0 3 6 ... 0 1 8]
Accuracy:

(array([[-0.21860741, -0.47637675, -0.14514304, ...,  0.24879068,
         -0.13401874,  0.17871622],
        [ 0.31284757,  0.42411262, -0.30450295, ...,  0.42988734,
         -0.41618624, -0.14221756],
        [ 0.15431825, -0.25645049,  0.03622124, ..., -0.2455771 ,
          0.30065674,  0.38986651],
        ...,
        [ 0.13030052,  0.24872402,  0.48809735, ...,  0.11512203,
          0.35033693, -0.34086629],
        [ 0.32330205,  0.33993954, -0.19393134, ...,  0.10892628,
         -0.11205638, -0.43374063],
        [ 0.33222917, -0.20152062, -0.35440436, ...,  0.11902055,
         -0.2283756 , -0.20472844]]),
 array([[0.33594877, 0.33617632, 0.33616968, ..., 0.33598572, 0.33594506,
         0.33608443],
        [0.26547913, 0.26570668, 0.26570004, ..., 0.26551607, 0.26547542,
         0.26561479],
        [0.19344023, 0.19366777, 0.19366114, ..., 0.19347717, 0.19343651,
         0.19357589],
        ...,
        [0.04851813, 0.04874568, 0.04873904, ..., 0.04855507, 0.04851442