Multi Layer Perceptron

`1 input node`, `10 hidden layer neurons`, `1 output nodes`

In [164]:
import numpy as np
np.random.seed(42)
x_points = np.linspace(0, 1, 10)
noise = np.random.uniform(-0.05, 0.05, size=x_points.shape)
x_inputs = x_points + noise

# choosing as a output
y_true=1


In [172]:
# hidden layer 10 neurons
np.random.seed(42)
num_inputs=10
num_neurons=10

# WEIGHTS, BIAS FOR INPUT LAYER -> HIDDEN LAYER
weights_input_hidden=np.random.uniform(low=-0.1, high=0.1,size=(num_inputs, num_neurons))
bias_hidden=np.random.uniform(low=-0.1, high=0.1, size=(1,num_neurons))

print(weights_input_hidden)
print(bias_hidden)

[[-0.02509198  0.09014286  0.04639879  0.0197317  -0.06879627 -0.0688011
  -0.08838328  0.07323523  0.020223    0.04161452]
 [-0.0958831   0.09398197  0.06648853 -0.05753218 -0.06363501 -0.0633191
  -0.03915155  0.00495129 -0.013611   -0.04175417]
 [ 0.02237058 -0.07210123 -0.04157107 -0.02672763 -0.008786    0.05703519
  -0.06006524  0.00284689  0.01848291 -0.09070992]
 [ 0.02150897 -0.06589518 -0.08698968  0.08977711  0.09312641  0.06167947
  -0.03907725 -0.08046558  0.03684661 -0.0119695 ]
 [-0.07559235 -0.00096462 -0.0931223   0.08186408 -0.048244    0.03250446
  -0.03765778  0.0040136   0.00934206 -0.06302911]
 [ 0.09391693  0.05502656  0.08789979  0.07896547  0.01958     0.08437485
  -0.0823015  -0.06080343 -0.09095454 -0.03493393]
 [-0.02226454 -0.04573019  0.0657475  -0.02864933 -0.0438131   0.00853922
  -0.07181516  0.0604394  -0.08508987  0.09737739]
 [ 0.05444895 -0.06025686 -0.09889558  0.06309229  0.04137147  0.04580143
   0.05425407 -0.08519107 -0.02830685 -0.07682619]
 [

In [170]:
print(weights_input_hidden.shape)
print(x_inputs.shape)

(10, 10)
(10,)


In [171]:
# sigmoid activation function
def sigmoid_activation_function(z):
    return 1/(1+np.exp(-z))

In [173]:

# WEIGHTS, BIAS FOR HIDDEN LAYER -> OUTPUT LAYER
np.random.seed(42)
weights_hidden_output=np.random.rand(1,10)
bias_output = np.random.rand(1).reshape(1,1)

# weights_hidden_output=weights_hidden_output.flatten()
print(weights_hidden_output)
print(bias_output)

[[0.37454012 0.95071431 0.73199394 0.59865848 0.15601864 0.15599452
  0.05808361 0.86617615 0.60111501 0.70807258]]
[[0.02058449]]


In [174]:
epochs=1000000
learning_rate=0.01

for epoch in range(epochs):
    
    # FORWARD PASS
    # INPUT LAYER -> HIDDEN LAYER
    z_hidden=weights_input_hidden@x_inputs+bias_hidden
    a_hidden=sigmoid_activation_function(z_hidden)

    # HIDDEN LAYER -> OUTPUT LAYER
    z_output=a_hidden@weights_hidden_output.T+bias_output
    y_pred=sigmoid_activation_function(z_output)

    # loss (L)
    loss=0.5*(y_pred-y_true)**2

    # BACKPROPAGATION
    # # FROM OUTPUT LAYER -> HIDDEN LAYER

    # # dL/dw = dL/dy^ * dy^/dZ * dZ/dw
    dl_dy_pred=(y_pred - y_true)
    dy_pred_dz_output=y_pred*(1-y_pred)
    dz_output_dw_hidden_output=a_hidden

    dl_dw_hidden_output=dl_dy_pred*dy_pred_dz_output*dz_output_dw_hidden_output # weights gradients

    # # dL/db = dL/dy^ * dy^/dZ * dZ/db
    dl_db_output=dl_dy_pred*dy_pred_dz_output*1 # bias gradients

    # weights, bias updation
    weights_hidden_output-=learning_rate*dl_dw_hidden_output
    bias_output-=learning_rate*dl_db_output


    # FROM HIDDEN LAYER -> INPUT LAYER
    # hidden layer gradients
    # dL/dW=dL/da_hidden * da_hidden/dz_hidden * dz_hidden/dw_input_hidden
    # dL/da_hidden = dL/dy^ * dy^/dZ * dZ/da_hidden

    dl_da_hidden = dl_dy_pred*dy_pred_dz_output*weights_hidden_output 
    da_hidden_dz_hidden=a_hidden*(1-a_hidden)
    # dl/dz_hidden
    dl_dz_hidden=dl_da_hidden*da_hidden_dz_hidden
    dz_hidden_dw_input_hidden=x_inputs

    dl_dw_input_hidden=np.outer(dl_dz_hidden,dz_hidden_dw_input_hidden)
    dl_db_hidden=dl_dz_hidden

    # weights, bias updation
    weights_input_hidden-=learning_rate*dl_dw_input_hidden
    bias_hidden-=learning_rate*dl_db_hidden


    if epoch%10000==0:
        
        # converting to scaler if needed
        loss_val = loss.item() if np.ndim(loss) == 0 or loss.size == 1 else np.mean(loss)
        y_pred_val = y_pred.item() if np.ndim(y_pred) == 0 or y_pred.size == 1 else np.mean(y_pred)

        print(f"Epoch {epoch}| Loss : {loss_val:.4f}, y_pred : {y_pred_val:.4f}")
    
    
    

Epoch 0| Loss : 0.0028, y_pred : 0.9247
Epoch 10000| Loss : 0.0005, y_pred : 0.9690
Epoch 20000| Loss : 0.0002, y_pred : 0.9777
Epoch 30000| Loss : 0.0002, y_pred : 0.9818
Epoch 40000| Loss : 0.0001, y_pred : 0.9843
Epoch 50000| Loss : 0.0001, y_pred : 0.9861
Epoch 60000| Loss : 0.0001, y_pred : 0.9873
Epoch 70000| Loss : 0.0001, y_pred : 0.9883
Epoch 80000| Loss : 0.0001, y_pred : 0.9891
Epoch 90000| Loss : 0.0001, y_pred : 0.9898
Epoch 100000| Loss : 0.0000, y_pred : 0.9903
Epoch 110000| Loss : 0.0000, y_pred : 0.9908
Epoch 120000| Loss : 0.0000, y_pred : 0.9912
Epoch 130000| Loss : 0.0000, y_pred : 0.9916
Epoch 140000| Loss : 0.0000, y_pred : 0.9919
Epoch 150000| Loss : 0.0000, y_pred : 0.9922
Epoch 160000| Loss : 0.0000, y_pred : 0.9925
Epoch 170000| Loss : 0.0000, y_pred : 0.9927
Epoch 180000| Loss : 0.0000, y_pred : 0.9929
Epoch 190000| Loss : 0.0000, y_pred : 0.9931
Epoch 200000| Loss : 0.0000, y_pred : 0.9933
Epoch 210000| Loss : 0.0000, y_pred : 0.9935
Epoch 220000| Loss : 0.0