# Backpropagation

In [1]:
import numpy as np

<p align="center"><img src="./attachments/Example.png" alt="" width="600"/></p>

**Neural Networks**

In [2]:
class NeuralNet:
    def __init__(self, W1_initial, W2_initial, b1 = None, b2 = None):
        self.W1 = W1_initial.copy()
        self.W2 = W2_initial.copy()

        self.b1 = b1 if b1 is not None else np.zeros((self.W1.shape[0], 1))
        self.b2 = b2 if b2 is not None else np.zeros((self.W2.shape[0], 1))

        self.a0 = None
        self.z1 = None
        self.a1 = None
        self.z2 = None
        self.a2 = None

    @staticmethod
    def sigmoid(z):
        return 1 / (1 + np.exp(-z))
    
    @staticmethod
    def sigmoid_derivative(a):
        return a*(1 - a)
    
    def forward_pass(self, a0):
        print("--- Forward Pass ---")
        self.a0 = a0
        print(f"Input a0: {self.a0}\n")

        # Layer 1
        self.z1 = self.W1 @ self.a0 + self.b1
        self.a1 = self.sigmoid(self.z1)
        print(f"z1 = W1 @ a0: \n{self.z1}\n")
        print(f"a1 = sigmoid(z1): \n{self.a1}\n")

        # Layer 2
        self.z2 = self.W2 @ self.a1 + self.b2
        self.a2 = self.sigmoid(self.z2)
        print(f"z2 = W2 @ a1: \n{self.z2}\n")
        print(f"a2 = sigmoid(z2): \n{self.a2}\n")
        print(f"Predicted y_hat = a2: \n{self.a2[0, 0]}\n")

        return self.a2

    def calculate_cost(self, y):
        if self.a2 is None:
            print("Run Forward pass")
            return None
        cost = 0.5 * np.sum((y - self.a2)**2)
        print(f"Cost: \n{cost}\n")
        return cost
    
    def backpropagation(self, y):
        if self.a2 is None or self.a1 is None or self.a0 is None or self.z1 is None or self.z2 is None:
            print("Run Forward pass")
            return None

        print("--- Backpropagation ---")
        y_hat = self.a2

        # 1. Delta^L (L=2)
        delta2 = (y - y_hat) * self.sigmoid_derivative(y_hat)
        print(f" delta^L = delta2 = (y - y_hat) * sigmoid_derivative(y_hat): \n{delta2}\n")

        # 2. Delta^l (l=1)
        # delta^l = [ (W^{l+1})^T @ delta^{l+1} ] .* sigma'(z^l)
        delta1 = (self.W2.T @ delta2) * self.sigmoid_derivative(self.a1)
        print(f"delta1 = (W2.T @ delta2) * sigmoid_derivative(a1): \n{delta1}\n")

        # 3. Gradient
        # dC/dW^l = delta^l @ (a^{l-1})^T
        # dC/db^l = delta^l
        grad_W2 = delta2 @ self.a1.T
        grad_b2 = delta2
        grad_W1 = delta1 @ self.a0.T
        grad_b1 = delta1

        print(f"Gradient dC/dW2 = delta2 @ a1.T: \n{grad_W2}\n")
        print(f"Gradient dC/db2 = delta2: \n{grad_b2}\n")
        print(f"Gradient dC/dW1 = delta1 @ a0.T: \n{grad_W1}\n")
        print(f"Gradient dC/db1 = delta1: \n{grad_b1}\n")

        return grad_W1, grad_b1, grad_W2, grad_b2
    
    def update_weights(self, grad_W1, grad_b1, grad_W2, grad_b2, eta):
        print(f"Learning rate: {eta}\n")

        W1_old = self.W1.copy()
        W2_old = self.W2.copy()

        self.W1 += eta * grad_W1
        self.W2 += eta * grad_W2

        print(f"W1_old: \n{W1_old}\n")
        print(f"W1_new = W1_old + eta * grad_W1: \n{self.W1}\n")
        print(f"W2_old: \n{W2_old}\n")
        print(f"W2_new = W2_old + eta * grad_W2: \n{self.W2}\n")

**Example**

In [3]:
# Data
a0_input = np.array([[0.35], [0.7]])
y_target = 0.5
# Init weights
W1_init = np.array([[0.2, 0.2],
                    [0.3, 0.3]])
W2_init = np.array([[0.3, 0.9]])
learning_rate = 1

In [4]:
# 1. Init
network = NeuralNet(W1_init, W2_init)

# 2. Forward Pass
network.a0 = a0_input
y_hat_output = network.forward_pass(a0_input)

# 3. Cost
initial_cost = network.calculate_cost(y_target)

# 4. Backpropagation -> Gradient
gradients = network.backpropagation(y_target)

# 5. Update Weights
if gradients:
    grad_W1, grad_b1, grad_W2, grad_b2 = gradients
    network.update_weights(grad_W1, grad_b1, grad_W2, grad_b2, learning_rate)

--- Forward Pass ---
Input a0: [[0.35]
 [0.7 ]]

z1 = W1 @ a0: 
[[0.21 ]
 [0.315]]

a1 = sigmoid(z1): 
[[0.55230791]
 [0.57810523]]

z2 = W2 @ a1: 
[[0.68598708]]

a2 = sigmoid(z2): 
[[0.66507364]]

Predicted y_hat = a2: 
0.6650736395247564

Cost: 
0.013624653232974609

--- Backpropagation ---
 delta^L = delta2 = (y - y_hat) * sigmoid_derivative(y_hat): 
[[-0.03677027]]

delta1 = (W2.T @ delta2) * sigmoid_derivative(a1): 
[[-0.00272759]
 [-0.00807143]]

Gradient dC/dW2 = delta2 @ a1.T: 
[[-0.02030851 -0.02125708]]

Gradient dC/db2 = delta2: 
[[-0.03677027]]

Gradient dC/dW1 = delta1 @ a0.T: 
[[-0.00095466 -0.00190931]
 [-0.002825   -0.00565   ]]

Gradient dC/db1 = delta1: 
[[-0.00272759]
 [-0.00807143]]

Learning rate: 1

W1_old: 
[[0.2 0.2]
 [0.3 0.3]]

W1_new = W1_old + eta * grad_W1: 
[[0.19904534 0.19809069]
 [0.297175   0.29435   ]]

W2_old: 
[[0.3 0.9]]

W2_new = W2_old + eta * grad_W2: 
[[0.27969149 0.87874292]]



****