<a href="https://colab.research.google.com/github/Vishnu-1203/Neural-Network-From-Scratch/blob/main/Neural_network_from_scratch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [134]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np


In [135]:
train=pd.read_csv("/content/mnist_train.csv")
test=pd.read_csv("/content/mnist_test.csv")

train=np.array(train)
test=np.array(test)

m,n=train.shape

Splitting Data

---



In [136]:
y_train=train[:,0]
x_train=(train[:,1:]/255).T

y_test=test[:,0]
x_test=(test[:,1:]/255).T


print(x_train.shape)
print(y_train.shape)

(784, 60000)
(60000,)


Initializing Parameters and Forward Propogation


---



In [137]:
def init_params():
  w1=np.random.rand(10,784)-0.5
  b1=np.random.rand(10,1)-0.5

  w2=np.random.rand(10,10)-0.5
  b2=np.random.rand(10,1)-0.5

  return w1,b1,w2,b2

w1,b1,w2,b2=init_params()

def ReLu(Z):
  return np.maximum(0,Z)

def softmax(Z):
  A = np.exp(Z) / sum(np.exp(Z))
  return A

def forward_prop(w1,b1,w2,b2,x):
  a1=w1.dot(x)+b1
  z1=ReLu(a1)
  z2=w2.dot(a1)+b2
  a2=softmax(z2)
  return z1,a1,z2,a2



Backward Propogation

---



In [138]:
def one_hot(y):
  one_hot_y=np.zeros((y.size,y.max()+1))
  one_hot_y[np.arange(y.size),y]=1
  return one_hot_y.T

def deriv_ReLu(Z):
  return Z>0



In [139]:
def backward_prop(z1,a1,z2,a2,w1,w2,x,y):
  one_hot_y=one_hot(y)

  dz2=a2-one_hot_y
  dw2=1/m*dz2.dot(a1.T)
  db2=1/m*(np.sum(dz2))

  dz1=w2.T.dot(dz2)*deriv_ReLu(z1)
  dw1=1/m*dz1.dot(x.T)
  db1=1/m*(np.sum(dz1))

  return dw1,db1,dw2,db2



Updating parameters

---





In [140]:
def update_params(w1,b1,w2,b2,dw1,db1,dw2,db2,alpha):
  w2=w2-alpha*dw2
  b2=b2-alpha*db2
  w1=w1-alpha*dw1
  b1=b1-alpha*db1
  return w1,b1,w2,b2

Gradient Descent

---



In [141]:
def get_predictions(A2):
    return np.argmax(A2, 0)

def get_accuracy(predictions, Y):
    print(predictions, Y)
    return np.sum(predictions == Y) / Y.size

def gradient_descent(x,y,iterations,alpha):
  w1,b1,w2,b2=init_params()
  for i in range(iterations):
    z1,a1,z2,a2=forward_prop(w1,b1,w2,b2,x)
    dw1,db1,dw2,db2=backward_prop(z1,a1,z2,a2,w1,w2,x,y)
    w1,b1,w2,b2=update_params(w1,b1,w2,b2,dw1,db1,dw2,db2,alpha)
    if(i%10==0):
      print("Epoch:",i)
      print("Accuracy:",get_accuracy(get_predictions(a2),y))

  return w1,b1,w2,b2


In [143]:
W1,B1,W2,B2=gradient_descent(x_train,y_train,500,0.1)

Epoch: 0
[3 3 3 ... 3 0 3] [5 0 4 ... 5 6 8]
Accuracy: 0.032883333333333334
Epoch: 10
[8 0 7 ... 8 0 8] [5 0 4 ... 5 6 8]
Accuracy: 0.20503333333333335
Epoch: 20
[3 0 7 ... 8 0 0] [5 0 4 ... 5 6 8]
Accuracy: 0.3515
Epoch: 30
[5 0 4 ... 5 0 0] [5 0 4 ... 5 6 8]
Accuracy: 0.44233333333333336
Epoch: 40
[5 0 4 ... 5 0 6] [5 0 4 ... 5 6 8]
Accuracy: 0.49496666666666667
Epoch: 50
[5 0 4 ... 5 0 6] [5 0 4 ... 5 6 8]
Accuracy: 0.5309333333333334
Epoch: 60
[5 0 4 ... 5 0 6] [5 0 4 ... 5 6 8]
Accuracy: 0.5581833333333334
Epoch: 70
[5 0 4 ... 5 0 6] [5 0 4 ... 5 6 8]
Accuracy: 0.5799
Epoch: 80
[5 0 4 ... 5 0 6] [5 0 4 ... 5 6 8]
Accuracy: 0.5974333333333334
Epoch: 90
[5 0 4 ... 5 0 6] [5 0 4 ... 5 6 8]
Accuracy: 0.6133166666666666
Epoch: 100
[5 0 4 ... 5 0 6] [5 0 4 ... 5 6 8]
Accuracy: 0.6268
Epoch: 110
[5 0 4 ... 5 0 6] [5 0 4 ... 5 6 8]
Accuracy: 0.6385333333333333
Epoch: 120
[5 0 4 ... 5 6 6] [5 0 4 ... 5 6 8]
Accuracy: 0.64925
Epoch: 130
[5 0 4 ... 5 6 6] [5 0 4 ... 5 6 8]
Accuracy: 0.659533

Testing Model

---



In [146]:
def make_predictions(X, W1, b1, W2, b2):
    _, _, _, A2 = forward_prop(W1, B1, W2, B2, X)
    predictions = get_predictions(A2)
    return predictions

dev_predictions = make_predictions(x_test, W1, B1, W2, B2)
get_accuracy(dev_predictions, y_test)

[7 2 1 ... 4 8 6] [7 2 1 ... 4 5 6]


np.float64(0.8045)

Final Verdict

---

79.8% Model accuracy on training set

80.04% Model accuracy on test set


