*Building MultiLayer Perceptron*

`1 input node`, `1 hidden layer (10 neurons)`, `1 output node`

In [17]:
import numpy as np
np.random.seed(42)

X=np.random.rand(1).reshape(1,1) # 1 input node value
Y_true=1 # True value

print(X)

[[0.37454012]]


In [18]:
np.random.seed(42)
num_inputs=1 # number of inputs 1 (x)
num_neurons=10 # number of hidden layer nodes (10)

# WEIGHTS
# INPUT LAYER -> HIDDEN LAYER
weight_input_hidden=np.random.uniform(low=-0.1, high=0.1, size=(num_inputs, num_neurons))

# BIAS (APPLIED IN HIDDEN LAYER)
bias_hidden=np.random.uniform(low=-0.1, high=0.1, size=(1, num_neurons))

print(weight_input_hidden)
print(bias_hidden)


[[-0.02509198  0.09014286  0.04639879  0.0197317  -0.06879627 -0.0688011
  -0.08838328  0.07323523  0.020223    0.04161452]]
[[-0.0958831   0.09398197  0.06648853 -0.05753218 -0.06363501 -0.0633191
  -0.03915155  0.00495129 -0.013611   -0.04175417]]


In [19]:
print(X.shape)
print(weight_input_hidden.shape)
print(bias_hidden.shape)


(1, 1)
(1, 10)
(1, 10)


In [20]:
# we'll be using sigmoid activation function
def sigmoid_activation_func(z):
    return 1/(1+np.exp(-z))


In [21]:
# WEIGHT, BIAS FOR 
# HIDDEN LAYER -> OUTPUT LAYER

weight_hidden_output=np.random.rand(1,10)
bias_output=np.random.rand(1).reshape(1,1)


In [22]:
print(weight_hidden_output)
print(bias_hidden)

[[0.61185289 0.13949386 0.29214465 0.36636184 0.45606998 0.78517596
  0.19967378 0.51423444 0.59241457 0.04645041]]
[[-0.0958831   0.09398197  0.06648853 -0.05753218 -0.06363501 -0.0633191
  -0.03915155  0.00495129 -0.013611   -0.04175417]]


In [23]:
print(weight_hidden_output.shape)
print(bias_output.shape)

(1, 10)
(1, 1)


In [25]:
epochs=10000
learning_rate=0.1

for epoch in range(epochs):
    
    # FORWARD PASS
    # INPUT LAYER -> HIDDEN LAYER
    Z_hidden=X@weight_input_hidden+bias_hidden
    A_hidden=sigmoid_activation_func(Z_hidden)

    # HIDDEN LAYER -> OUTPUT LAYER
    Z_output=A_hidden@weight_hidden_output.T+bias_output
    Y_pred=sigmoid_activation_func(Z_output)

    # loss (L) : sum of squared error
    L=0.5*(Y_pred - Y_true)**2

    # BACKPROPAGATION

    # OUTPUT LAYER -> HIDDEN LAYER
    # dL/dW = (dL/dY_pred) * (dY_pred/dZ) * (dZ/dW)
    dL_dY_pred=(Y_pred - Y_true)
    dY_pred_dZ=Y_pred*(1-Y_pred)
    dZ_dW_hidden_output=A_hidden

    # dL/dZ = (dL/dY_pred)*(dY_pred/dZ)
    dL_dZ_output=dL_dY_pred*dY_pred_dZ

    # dL/dW = (dL/dW)*(dZ/dW)
    dL_dW_hidden_output=dL_dZ_output*dZ_dW_hidden_output

    # dL/db = (dL/dY_pred)*(dY_pred/dZ)*(dZ/db)
    # dL/dZ = (dL/dY_pred)*(dY_pred/dZ)
    dZ_db_output=1
    dL_db_output=dL_dZ_output*dZ_db_output

    # UPDATING WEIGHTS, BIAS
    weight_hidden_output-=learning_rate*dL_dW_hidden_output
    bias_output-=learning_rate*dL_db_output

    
    # HIDDEN LAYER -> INPUT LAYER
    # dL/dW = (dL/dA_hidden)*(dA_hidden/dZ)*(dZ/dW)
    # dL/dA_hidden = (dL/dY_pred)*(dY_pred/dZ)*(dZ/dA_hidden)
    # (dL/dY_pred)*(dY_pred/dZ) = (dL/dZ)
    # dL/dA_hidden = (dL/dZ)*(dZ/dA_hidden)
    
    # dZ/dA_hidden
    dZ_dA_hidden=weight_hidden_output

    # dL/dA_hidden
    dL_dA_hidden=dL_dZ_output*dZ_dA_hidden

    # dA_hidden/dZ = A_hidden*(1-A_hidden)
    dA_hidden_dZ=A_hidden*(1-A_hidden)

    # dZ/dW
    dZ_dW_input_hidden=X

    # dL/dZ_hidden=(dL/dA_hidden)*(dA_hidden/dZ)
    dL_dZ_hidden=dL_dA_hidden*dA_hidden_dZ

    # dL/dW = (dL/dZ)*(dZ/dW)
    dL_dW_input_hidden=dL_dZ_hidden*dZ_dW_input_hidden

    # dL/db = (dL/dZ)*(dZ/db)
    dZ_db_hidden=1
    dL_db_hidden=dL_dZ_hidden*dZ_db_hidden

    # WEIGHTS, BIAS UPDATION
    weight_input_hidden-=learning_rate*dL_dW_input_hidden
    bias_hidden-=learning_rate*dL_db_hidden


    if epoch%1000==0:
        loss_val=L.item() if np.ndim(L)==0 or L.size==1 else np.mean(L)
        Y_pred_val=Y_pred.item() if np.ndim(Y_pred)==0 or Y_pred.size==1 else np.mean(Y_pred)

        print(f"Epoch {epoch} | Loss : {loss_val:.4f}, Y_pred : {Y_pred_val:.4f}")


Epoch 0 | Loss : 0.0001, Y_pred : 0.9886
Epoch 1000 | Loss : 0.0001, Y_pred : 0.9891
Epoch 2000 | Loss : 0.0001, Y_pred : 0.9896
Epoch 3000 | Loss : 0.0001, Y_pred : 0.9900
Epoch 4000 | Loss : 0.0000, Y_pred : 0.9904
Epoch 5000 | Loss : 0.0000, Y_pred : 0.9907
Epoch 6000 | Loss : 0.0000, Y_pred : 0.9910
Epoch 7000 | Loss : 0.0000, Y_pred : 0.9913
Epoch 8000 | Loss : 0.0000, Y_pred : 0.9915
Epoch 9000 | Loss : 0.0000, Y_pred : 0.9918
