# Building Neural Network from scratch

In [2]:
import numpy as np

#### Input data

In [171]:
X=np.array([0.05, 
   0.1])
X

array([0.05, 0.1 ])

#### Ground Truth

In [172]:
Y = np.array([0.01, 
   0.99])
Y

array([0.01, 0.99])

### Forward Propagation

In [173]:
def sigmoid(x):
    return 1/(1 + np.exp(-x))

In [175]:
def forward_prop(W, b , X, activation='sigmoid'):
    h = W@X.T + b
    if  activation=='sigmoid':
        h = sigmoid(h)
    return np.around(h, 4)

#### Hiden Layer 1

In [176]:
W1=np.array([[0.15, 0.25],
    [0.2, 0.3]])
b1 =np.array([0.35,
     0.35])

h1 = forward_prop(W1, b1, X)
print(h1)

[0.5945 0.5963]


#### Output layer

In [177]:
W2=np.array([[0.4, 0.5],
    [0.45, 0.55]])
b2 =np.array([0.6,
     0.6])

O = forward_prop(W2, b2, h1)
print(O)

[0.7569 0.7677]


### Loss

In [178]:
def MSE(y, yhat):
    diff = (yhat -y)
    loss = 0.5*diff.T@diff
    return loss

In [179]:
Loss = MSE(Y, O)
Loss

0.30363845

### Backpropagation

#### Gradient funcitions

In [180]:
def ouputGrad(y, yhat):
    return (yhat-y)

In [181]:
def sigmoidGrad(houtput, outgrad):
    grad = houtput*(1-houtput) * outgrad
    return grad

In [182]:
def weightGrad(hinput, outgrad):
    grad = np.expand_dims(outgrad, axis=1)*hinput.T
    return grad

In [183]:
def hiddenUnitGrad(W, outgrad):
    grad = W@outgrad.T
    return grad

#### Calculating Gradients

- Gradient of Loss wrt output probabilities

In [184]:
Ograd = ouputGrad(Y, O)
Ograd

array([ 0.7469, -0.2223])

- Gradient of Loss wrt output score

In [185]:
a2Grad = sigmoidGrad(O, Ograd)
a2Grad

array([ 0.13743139, -0.03964425])

- Gradient of Loss wrt output 2nd layer Weights

In [186]:
W2grad = weightGrad(h1, a2Grad)
W2grad

array([[ 0.08170296,  0.08195033],
       [-0.02356851, -0.02363987]])

- Gradient of Loss wrt 2nd layer Biases

In [187]:
b2grad = a2Grad

- Gradient of Loss wrt 2nd layer Inputs

In [188]:
h1Grad = hiddenUnitGrad(W2, a2Grad)
h1Grad

array([0.03515043, 0.04003979])

- Gradient of Loss wrt 1st layer aggregation

In [189]:
a1grad = sigmoidGrad(h1, h1Grad)
a1grad

array([0.00847371, 0.00963863])

- Gradient of Loss wrt 1st layer Weights

In [190]:
W1grad = weightGrad(X, a1grad)
W1grad

array([[0.00042369, 0.00084737],
       [0.00048193, 0.00096386]])

- Gradient of Loss wrt 1st layer Biases

In [191]:
b1grad = a1grad

#### Updating Learnable parameters 

In [197]:
def para_update(parameter,gradient, alpha):
    parameter= parameter - alpha*gradient
    return parameter

In [193]:
## learning Rate
alpha = 0.1

In [195]:
print('Old parameters:')
print("W1 :", W1)
print("b1 :", b1)
print("W2 :", W2)
print("b2 :", b2)


Old parameters:
W1 : [[0.15 0.25]
 [0.2  0.3 ]]
b1 : [0.35 0.35]
W2 : [[0.4  0.5 ]
 [0.45 0.55]]
b2 : [0.6 0.6]


In [201]:
## W2 update
W2 = para_update(W2, W2grad, alpha)

## b2 update
b2 = para_update(b2, b2grad, alpha)

## W1 update
W1 = para_update(W1, W1grad, alpha)

## b1 update
b1 = para_update(b1, b1grad, alpha)


In [211]:
print('New parameters:')
print("W1 :", W1)
print("b1 :", b1)
print("W2 :", W2)
print("b2 :", b2)

New parameters:
W1 : [[0.14995763 0.24991526]
 [0.19995181 0.29990361]]
b1 : [0.34915263 0.34903614]
W2 : [[0.3918297  0.49180497]
 [0.45235685 0.55236399]]
b2 : [0.58625686 0.60396443]
