Simple Neuron implementation from scratch 

In [13]:
import numpy as np

def relu(output):
    return np.maximum(0,output)

weight = np.random.rand()
bias = np.random.randint(1,10)
input = np.random.randint(1,100)

output = np.dot(weight, input) + bias
print(relu(output))

In [21]:
weights = [[.3,.6,.7],
[.2,.54,.87],
[.96,.23,.55]]

inputs = [[1],
[2],
[3]]

bias = [[2],
[3],
[5]]

output = np.dot(weights, inputs) + bias
print(relu(output))

[[5.6 ]
 [6.89]
 [8.07]]


Simple 3 Layered Neural Network (Forward Propagation)

In [19]:
import numpy as np
input_original = np.array([[1],[2],[3],[4],[5]]) #(5x1)

#(5x3) matrix: 5 inputs, 3 neurons
weights_layer1 = np.hstack([
   np.array([[.3],[.2],[.5],[.23],[.6]]), #Neuron 1 Weights
   np.array([[.8],[.232],[.9],[.53],[.66]]), #Neuron 2 Weights
   np.array([[.32],[.27],[.655],[.23],[.46]]) #Neuron 3 Weights
]) 

#(3x2) matrix: 3 inputs, 2 neurons
weights_layer2 = np.hstack([
   np.array([[.3],[.2],[.5]]), #Neuron 1 Weights
   np.array([[.8],[.232],[.9]]), #Neuron 2 Weights
]) 

#(2x1), 2 inputs for 1 neuron
weight_final = np.hstack([
   np.array([[0.63],[.75]]) #Final Neuron Weights
])

bias_layer1 = np.array([[3],[1],[5]]) #(3x1)
bias_layer2 = np.array([[1],[2]]) #(2x1)
bias_final = np.array([[7]])

def hidden_layer1(inputs, weights, bias):
   #(1x5)*(5x3) + (1x3), (inputs^T * weights) + bias^T
   output = np.dot(inputs.T, weights) + bias.T #(1x3)
   return np.maximum(0,output)

def hidden_layer2(inputs, weights, bias):
   #(1x3)*(3x2) + (1x3), (inputs^T * weights) + bias^T
   output = np.dot(inputs, weights) + bias.T #(1x3)
   return np.maximum(0,output)

def final_layer(inputs, weights, bias):
   #(2x1)*(1x1) + (1x1)
   output = np.dot(inputs, weights) + bias
   return np.maximum(0,output)
print("Inputs:\n",input_original)
print("Inputs for 1st layer:\n",hidden_layer1(input_original, weights_layer1, bias_layer1))
inputs_2 = np.array(hidden_layer1(input_original, weights_layer1, bias_layer1))
print("Inputs for 2nd layer:\n",hidden_layer2(inputs_2, weights_layer2, bias_layer2))
inputs_final = np.array(hidden_layer2(inputs_2, weights_layer2, bias_layer2)) 
output_final = final_layer(inputs_final, weight_final, bias_final)
print("Final output:\n",output_final.squeeze())

Inputs:
 [[1]
 [2]
 [3]
 [4]
 [5]]
Inputs for 1st layer:
 [[ 9.12  10.384 11.045]]
Inputs for 2nd layer:
 [[11.3353   21.645588]]
Final output:
 30.37543


3 Layered Back-Propagation Neural Network

In [16]:
import numpy as np


#        ================== Forward Pass ================== 
''' 
Feed Forward Multi Layer Neural Network:
1. y = mx + c | no = w x i + b | neuron_output = weights x inputs + bias
2. activation_function(neuron_output)
'''

#(3x1)
inputs_original = np.array([[1],[2],[3]]) 

#(3x3)
weights_layer1 = np.hstack([
   np.array([[.3],[.34],[.67]]), # Weights for Neuron 1, (3x1)
   np.array([[.2],[.23],[.21]]), # Weights for Neuron 2, (3x1)
   np.array([[.4],[.43],[.11]]) # Weights for Neuron 3, (3x1)
])

#(3x2)
weights_layer2 = np.hstack([
   np.array([[.3],[.34],[.44]]), # Weights for Neuron 1, (3x1)
   np.array([[.2],[.23],[.75]]), # Weights for Neuron 2, (3x1)
])

#(2x1)
weight_final = np.array([[.33],[.66]]) # Weight for final Neuron

bias_layer1 = np.array([[1],[2],[1]]) # Biases for Hidden 1st Layer, (3x1)
bias_layer2 = np.array([[2],[3]]) # Biases for Hidden 2nd Layer, (2x1)
bias_final = np.array([1.1]) # Bias for Final Layer, (1x1)

def hidden_layer1(inputs, weights, bias):
   # (3x1)*(3x3) + (1x3)
   output = np.dot(inputs.T, weights) + bias.T
   return output, np.maximum(0, output)

def hidden_layer2(inputs, weights, bias):
   # (1x3)*(3x2) + (1x2)
   output = np.dot(inputs, weights) + bias.T
   return output, np.maximum(0, output)

def final_layer(inputs, weights, bias):
   # (1x2)*(2x1) + (1x1)
   output = np.dot(inputs, weights) + bias
   return output, output

# 'z' represents raw neuron output without activation function
output_layer_1, z1 = hidden_layer1(inputs_original, weights_layer1, bias_layer1)
output_layer_2, z2 = hidden_layer2(output_layer_1, weights_layer2, bias_layer2)
final_layer, z_final = final_layer(output_layer_2, weight_final, bias_final)
print("================== Forward Pass ==================")
print("1st Layer output:\n", output_layer_1,"\n")
print("2st Layer output:\n", output_layer_2,"\n")
print("Final Layer output:\n", final_layer.squeeze().round(3),'\n')


print("================== Backward Pass ==================")  
'''

==================================== Back Propagation =====================================
1. Loss Function (Mean Squared Error):
   l = 1/2(a - p)^2 | loss = 0.5(actual - predicted)^2

2. Gradients:
Note: Weights, Biases, Final Output are derivated/unpacked to update wrt to gradient/changes/difference between actual & predicted.

   [Final/Output Layer]
      Loss w.r.t Output: (Loss Function Derivative)
         dL/dp = (p - a) | d_loss_output = predicted - actual
      Predicted w.r.t Weight:
         dp/dw = A | d_predicted_weight = neuron_output        ### Note: You can use 'final_output' in place of 'neuron_output' ###
      Loss w.r.t Weight:
         dL/dw = A x (dL/dp) | d_loss_weight = neuron_output x d_loss_output
      Loss w.r.t Bias:
         dL/db = sum(dL/dp) | d_loss_bias = sum(loss_output)

   [Hidden Layer]
      Loss w.r.t  Pre-Activation Neuron Output (Z):
         dL/dZ = dL/dA x dA/dZ = (dL/dp x w).ReLu(Z) | d_loss_raw_neuron_output = (d_loss_output x weight).activation_function(d_raw_neuron_output)
      Loss w.r.t Weight:
         dL/dw = A x (dL/dZ) | d_loss_weight = neuron_output x (d_loss_raw_neuron_output)
      Loss w.r.t Bias:
         dL/db = sum(dL/dZ) | d_loss_bias = sum(d_loss_raw_neuron_output)
   
   [Input Layer]
      Loss w.r.t Pre-Activation Neuron Output (Z):
         dL/dZ = (dL/dZ x w).ReLu(Z) | d_loss_raw_neuron_output = (d_loss_raw_neuron_output x weight).activation_function(d_raw_neuron_output)
      Loss w.r.t Weight:
         dL/dw = i x dL/dZ | d_loss_weight = input x d_loss_raw_neuron_output
      Loss w.r.t Bias:
         dL/db = sum(dL/dZ) | d_loss_bias = sum(d_loss_raw_neuron_output)

   [Update Weights & Biases]
      w = w - (lr x dL/dw) | weight = weight - learning_rate x d_loss_weight
      b = b - (lr x dL/db) | bias = bias - learning_rate x d_loss_weight
'''


def ReLu_derivative(neuron):
   return (neuron > 0).astype(float)
    
def bp_layer_final(A_prev, predicted, actual):
   '''   
   [Final/Output Layer]
      Loss w.r.t Predicted or Output: (Loss Function Derivative)
         dL/dp = (p - a)
      Loss w.r.t Weight:
         dL/dw = A x (dL/dp)
      Loss w.r.t Bias:
         dL/db = sum(dL/dp)
   '''
   '''
    [Final/Output Layer]
      Inputs:
        A_prev: Activation from previous layer (shape: 1xN)
        predicted: Predicted output (shape: 1x1)
        actual: Actual value (scalar)
    '''
   # Loss gradient w.r.t. predicted output (dL/dp) 
   dL_dp = predicted - actual 

   # Gradient for weights (dL/dw) = A_prev^T @ dL_dp
   dL_dw = A_prev.T @ dL_dp
   
   # Gradient for bias (dL/db) = sum(dL_dp)
   dL_db = np.sum(dL_dp)
   return dL_dw, dL_db

def bp_layer_hidden(A_prev, dL_dz_next, weight_next, z_current):
   '''   
   [Hidden Layer]
      Loss w.r.t  Pre-Activation/Raw Neuron Output (Z):
         dL/dZ = dL/dA x dA/dZ = (dL/dp x w).ReLu(Z)
      Loss w.r.t Weight:
         dL/dw = A x (dL/dZ)
      Loss w.r.t Bias:
         dL/db = sum(dL/dZ)
   '''
   '''
    [Hidden Layer]
      Inputs:
        A_prev: Activation from previous layer (shape: 1xM)
        dL_dz_next: Gradient from the next layer (shape: 1xN)
        weight_next: Weights of the next layer (shape: MxN)
        z_current: Pre-activation of the current layer (shape: 1xM)
    '''
   # Gradient w.r.t. pre-activation (dL/dz_current)
   dL_dz_current = (dL_dz_next @ weight_next.T) * ReLu_derivative(z_current) # (1xR)

   # Gradient for weights (dL/dw) = A_prev^T @ dL_dz_current
   dL_dw = A_prev.T @ dL_dz_current # (RxR)

   # Gradient for bias (dL/db) = sum(dL_dz_current)
   dL_db = np.sum(dL_dz_current)
   return dL_dw, dL_db, dL_dz_current

def bp_layer_input(input_original, dL_dz_next, weight_next, z_current):
   '''   
   [Input Layer]
      Loss w.r.t Pre-Activation Neuron Output (Z):
         dL/dZ = (dL/dZ x w).ReLu(Z)
      Loss w.r.t Weight:
         dL/dw = i x dL/dZ
      Loss w.r.t Bias:
         dL/db = sum(dL/dZ)
   '''
   '''
    [Input Layer]
      Inputs:
        input: Input data (shape: 1xD)
        dL_dz_next: Gradient from the next layer (shape: 1xM)
        weight_next: Weights of the next layer (shape: DxM)
        z_current: Pre-activation of the input layer (shape: 1xD)
    '''
   # Gradient w.r.t. pre-activation (dL/dz_current)
   dL_dz_current = (dL_dz_next @ weight_next.T) * ReLu_derivative(z_current)

   # Gradient for weights (dL/dW) = input.T @ dL_dz_current
   dL_dw = input_original.T @ dL_dz_current

    # Gradient for bias (dL/db) = sum(dL_dz_current)
   dL_db = np.sum(dL_dz_current)
   return dL_dw, dL_db

def update_weights(weight, bias, dL_dw, dL_db, learning_rate):
   '''   
   [Update Weights]
      w = w - (lr x dL/dw)
      b = b - (lr x dL/db)
   '''
   '''Update weights and biases using gradients.'''

   weight = weight - (learning_rate * dL_dw)
   bias = bias - (learning_rate * dL_db)
   return weight, bias


# Implementing back propagation
learning_rate = 0.01
actual_output = 1

predicted_output = final_layer.round(3).squeeze()
error = actual_output - predicted_output
print("Error(Actual-Predicted): ",error)

# Computing loss gradient (dL/dy_pred)
dL_dp = (final_layer - actual_output) * ReLu_derivative(z_final) # final_layer is the predicted output

# Storing gradients for final layer weights and biases
dL_dw_final, dL_db_final = bp_layer_final(
   A_prev=output_layer_2, # Activation from previous (hidden) layer
   predicted=final_layer, 
   actual=actual_output
)

# Storing gradients for hidden layer2 weights and biases
dL_dw_layer2, dL_db_layer2, dL_dz_layer2 = bp_layer_hidden(
   A_prev=output_layer_1, # Activation from previous layer (hidden layer 1)
   dL_dz_next=dL_dp, # Gradient from final layer
   weight_next=weight_final, # Weights of the next layer (final layer)
   z_current=z2 # Pre-activation of hidden layer 2 (Z2)
)

# Storing gradients for hidden layer1 weights and biases
dL_dw_layer1, dL_db_layer1, dL_dz_layer1 = bp_layer_hidden(
   A_prev=inputs_original.T, # Input data (transposed to match dimensions)
   dL_dz_next=dL_dz_layer2,  # Gradient from hidden layer 2
   weight_next=weights_layer2, # Weights of the next layer (hidden layer 2)
   z_current=z1 # Pre-activation of hidden layer 1 (Z1)
)

# Updating final layer weights and biases
weight_final, bias_final = update_weights(
   weight_final, 
   bias_final,
   dL_dw_final,
   dL_db_final,
   learning_rate
)

# Updating hidden layer2 weights and biases
weights_layer2, bias_layer2 = update_weights(
   weights_layer2,
   bias_layer2,
   dL_dw_layer2,
   dL_db_layer2,
   learning_rate
)

# Updating first hidden layer weights and biases
weights_layer1, bias_layer1 = update_weights(
   weights_layer1,
   bias_layer1,
   dL_dw_layer1,
   dL_db_layer1,
   learning_rate
)

print("\nUpdated Weight & Bias of 1st Hidden Layer:\n", weights_layer1, bias_layer1)
print("\nUpdated Weight & Bias of 2nd Hidden Layer:\n", weights_layer2, bias_layer2)
print("\nUpdated Weight & Bias of Final Layer:\n", weight_final, bias_final)

1st Layer output:
 [[3.99 3.29 2.59]] 

2st Layer output:
 [[5.4552 6.4972]] 

Final Layer output:
 7.188 

Error(Actual-Predicted):  -6.188

Updated Weight & Bias of 1st Hidden Layer:
 [[ 0.1570487   0.03662708  0.00382068]
 [ 0.0540974  -0.09674583 -0.36235864]
 [ 0.2411461  -0.28011875 -1.07853796]] [[0.29749646]
 [1.29749646]
 [0.29749646]]

Updated Weight & Bias of 2nd Hidden Layer:
 [[-0.51482241 -1.42964483]
 [-0.33187111 -1.11374223]
 [-0.08891981 -0.30783963]] [[1.38735157]
 [2.38735157]]

Updated Weight & Bias of Final Layer:
 [[-3.04587851]
 [-3.36070646]] [0.4811632]
