## importing libraries & weights and biass initialization

In [84]:
import numpy as np

In [85]:
## Setting random seed for reproductibility
np.random.seed(42)

In [86]:
## ANN Structure 
size_of_input = 8
size_of_hiddens = [4,2,4]
size_of_output = 1

In [87]:
## Initialize Weights and Biases between random numbers from 0 to 1
weights = {
    "W1": np.random.rand(size_of_input, size_of_hiddens[0]),
    "W2": np.random.rand(size_of_hiddens[0], size_of_hiddens[1]),
    "W3": np.random.rand(size_of_hiddens[1], size_of_hiddens[2]),
    "W4": np.random.rand(size_of_hiddens[2], size_of_output),
}

biases = {
    "b1": np.random.rand(1, size_of_hiddens[0]),
    "b2": np.random.rand(1, size_of_hiddens[1]),
    "b3": np.random.rand(1, size_of_hiddens[2]),
    "b4": np.random.rand(1, size_of_output),
}

In [88]:
# Print shapes to verify sizes
for key, value in weights.items():
    print(f"{key} shape: {value.shape}")
    print('\n')

for key, value in biases.items():
    print(f"{key} shape: {value.shape}")
    print('\n')

W1 shape: (8, 4)


W2 shape: (4, 2)


W3 shape: (2, 4)


W4 shape: (4, 1)


b1 shape: (1, 4)


b2 shape: (1, 2)


b3 shape: (1, 4)


b4 shape: (1, 1)




## Forward-Pass 

In [89]:
# ReLU activation function
def relu(x):
    return np.maximum(0, x)

# Sigmoid activation function
def sigmoid(x):
    return 1 / (1 + np.exp(-x))
    
def forward_pass(X, weights, biases):
    # 1st layer (8 input to 4 hidden)
    Z1 = np.dot(X, weights['W1']) + biases['b1']
    # applying first Activation Func
    A1 = relu(Z1) 

    # 2nd layer (4 of first hidden layer to 2 of second hidden layer)
    Z2 = np.dot(A1, weights['W2']) + biases['b2']
    # applying second Activation Func
    A2 = relu(Z2) 

    # 3rd layer (2 of second hidden layer to 4 of third hidden layer)
    Z3 = np.dot(A2, weights['W3']) + biases['b3']
    # applying third Activation Func
    A3 = relu(Z3)

    # 4th Layer (4 of third hidden layer to 1 of output layer
    Z4 = np.dot(A3, weights['W4']) + biases['b4']
    # appying forth Activation Func
    A4= sigmoid(Z4)

    # returning A1-A4 for BackPropagation porpuses
    return A1, A2, A3, A4

## testing the forward pass func
np.random.seed(0)
TestX = np.random.rand(1, size_of_input)
A1, A2, A3, A4 = forward_pass(TestX, weights, biases)

print("Output of forward pass (predicted classification probabilities):")
print(A4)

Output of forward pass (predicted classification probabilities):
[[0.99999899]]


## Backpropagation and Gradient Descent

In [90]:
# ReLU activation function
def relu(x):
    return np.maximum(0, x)

# Derivative of ReLU
def relu_derivative(x):
    return (x > 0).astype(float)

# Sigmoid activation function
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

# Derivative of Sigmoid
def sigmoid_derivative(x):
    return sigmoid(x) * (1 - sigmoid(x))

## Binary Loss Function (Cross-Entropy)
def binary_loss(y_true, y_pred):
    M = y_true.shape[0] ## Number of samples ( rows )
    loss = -np.sum(y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred)) / M
    return loss
    
## Backpropagation function
def backpropagation(X, y, weights, biases, learning_rate=0.01):
    M = X.shape[0] # Number of samples ( rows ) 
    # Forward Pass
    A1, A2, A3, A4 = forward_pass(X, weights, biases)

    ## Output layer error
    dZ4 = A4 - y ## Deriv of the loss func with respect to A4

    ## Gradients of the output layer
    dW4 = np.dot(A3.T, dZ4) / M ## Gradient of weights for W4
    db4 = np.sum(dZ4, axis=0, keepdims=True) / M ## Gradient of bias for b3

    ## Backpropagate to hidden layer 3
    dA3 = np.dot(dZ4, weights["W4"].T)  ## Gradient of A3 (from dZ4)
    dZ3 = dA3 * relu_derivative(A3)  ## Derivative of ReLU for A3
    
    ## Gradients for Hidden Layer 3
    dW3 = np.dot(A2.T, dZ3) / M  ## Gradient of weights for W3
    db3 = np.sum(dZ3, axis=0, keepdims=True) / M  ## Gradient of bias for b3
    
    ## Backpropagate to hidden layer 2
    dA2 = np.dot(dZ3, weights["W3"].T)  ## Gradient of A2 (from dZ3)
    dZ2 = dA2 * relu_derivative(A2)  ## Derivative of ReLU for A2
    
    ## Gradients for Hidden Layer 2
    dW2 = np.dot(A1.T, dZ2) / M  ## Gradient of weights for W2
    db2 = np.sum(dZ2, axis=0, keepdims=True) / M  ## Gradient of bias for b2
    
    ## Backpropagate to hidden layer 1
    dA1 = np.dot(dZ2, weights["W2"].T)  ## Gradient of A1 (from dZ2)
    dZ1 = dA1 * relu_derivative(A1)  ## Derivative of ReLU for A1
    
    ## Gradients for Hidden Layer 1
    dW1 = np.dot(X.T, dZ1) / M  ## Gradient of weights for W1
    db1 = np.sum(dZ1, axis=0, keepdims=True) / M  ## Gradient of bias for b1
    
    ## Update weights and biases using gradient descent
    weights["W1"] -= learning_rate * dW1
    weights["W2"] -= learning_rate * dW2
    weights["W3"] -= learning_rate * dW3
    weights["W4"] -= learning_rate * dW4
    
    biases["b1"] -= learning_rate * db1
    biases["b2"] -= learning_rate * db2
    biases["b3"] -= learning_rate * db3
    biases["b4"] -= learning_rate * db4
    
    # Return the loss after backpropagation
    loss = binary_loss(y, A4)
    return loss

## Example input and labels ( random ) 

In [91]:
np.random.seed(42)
X_train = np.random.rand(5, 8)  # 5 samples, 8 features
y_train = np.random.randint(0, 2, (5, 1))  # 5 labels (binary)

# Initialize random weights and biases 
weights = {  
    "W1": np.random.rand(8, 4),
    "W2": np.random.rand(4, 2),
    "W3": np.random.rand(2, 4),
    "W4": np.random.rand(4, 1),
}

biases = {
    "b1": np.random.rand(1, 4),
    "b2": np.random.rand(1, 2),
    "b3": np.random.rand(1, 4),
    "b4": np.random.rand(1, 1),
}

# Training loop (25 iterations)
num_epochs = 100
learning_rate = 0.01
for epoch in range(num_epochs):
    # Perform Backpropagation and bring back the Loss
    loss = backpropagation(X_train, y_train, weights, biases, learning_rate)
    if epoch % 5 == 0:
        print(f"Epoch {epoch}, Loss: {loss:.5f}")

Epoch 0, Loss: 9.79649
Epoch 5, Loss: 4.55347
Epoch 10, Loss: 1.84276
Epoch 15, Loss: 0.75787
Epoch 20, Loss: 0.67549
Epoch 25, Loss: 0.67243
Epoch 30, Loss: 0.67207
Epoch 35, Loss: 0.67180
Epoch 40, Loss: 0.67153
Epoch 45, Loss: 0.67126
Epoch 50, Loss: 0.67100
Epoch 55, Loss: 0.67073
Epoch 60, Loss: 0.67046
Epoch 65, Loss: 0.67019
Epoch 70, Loss: 0.66993
Epoch 75, Loss: 0.66966
Epoch 80, Loss: 0.66939
Epoch 85, Loss: 0.66912
Epoch 90, Loss: 0.66885
Epoch 95, Loss: 0.66858
