# Backpropagation

In [1]:
import numpy as np

def relu_prime(Z):
    return np.where(Z > 0, 1, 0)

def cost(yHat, y):
    return 0.5 * np.sum((yHat - y)**2)

def cost_prime(yHat, y):
    return yHat - y

def backprop(x, y, Wh, Wo, lr):
    # Forward propagation
    Zh = np.dot(x, Wh)
    H = relu_prime(Zh)

    Zo = np.dot(H, Wo)
    yHat = relu_prime(Zo)

    # Layer Error
    Eo = cost_prime(yHat, y) * relu_prime(Zo)
    Eh = np.dot(Eo, Wo.T) * relu_prime(Zh)

    # Cost derivative for weights
    dWo = np.dot(H.T, Eo)
    dWh = np.dot(x.T, Eh)

    # Update weights
    Wh -= lr * dWh
    Wo -= lr * dWo

# Example usage
# Assuming x, y, Wh, Wo, lr are defined elsewhere
x = np.random.rand(2, 3)  # Example input matrix with shape (2, 3)
y = np.random.rand(2, 3)  # Example target matrix with shape (2, 3)
Wh = np.random.rand(3, 3)  # Example weights matrix for hidden layer
Wo = np.random.rand(3, 3)  # Example weights matrix for output layer
lr = 0.01  # Example learning rate

backprop(x, y, Wh, Wo, lr)


In [3]:
x, y, Wh, Wo, lr

(array([[0.91849798, 0.31459573, 0.15589525],
        [0.50063489, 0.81648542, 0.73793416]]),
 array([[0.56485251, 0.68927135, 0.55176539],
        [0.06018269, 0.01881898, 0.68293117]]),
 array([[0.61179315, 0.18823299, 0.81642904],
        [0.21938286, 0.34381293, 0.48395619],
        [0.73458135, 0.19232727, 0.16410045]]),
 array([[0.43221483, 0.46838498, 0.47532897],
        [0.7697312 , 0.82596855, 0.41345834],
        [0.16945669, 0.35066125, 0.51430952]]),
 0.01)

In [11]:
import torch

def relu_prime(Z):
    return torch.where(Z > 0, torch.tensor(1.0), torch.tensor(0.0))

def cost(yHat, y):
    return 0.5 * torch.sum((yHat - y)**2)

def cost_prime(yHat, y):
    return yHat - y

def backprop(x, y, Wh, Wo, lr):
    # Convert NumPy arrays to PyTorch tensors
    x = torch.tensor(x, dtype=torch.float32)
    y = torch.tensor(y, dtype=torch.float32)
    Wh = torch.tensor(Wh, dtype=torch.float32, requires_grad=True)
    Wo = torch.tensor(Wo, dtype=torch.float32, requires_grad=True)

    # Forward propagation
    Zh = torch.matmul(x, Wh)
    H = relu_prime(Zh)

    Zo = torch.matmul(H, Wo)
    yHat = relu_prime(Zo)

    # Layer Error
    Eo = cost_prime(yHat, y) * relu_prime(Zo)
    Eh = torch.matmul(Eo, Wo.t()) * relu_prime(Zh)

    # Cost derivative for weights
    dWo = torch.matmul(H.t(), Eo)
    dWh = torch.matmul(x.t(), Eh)

    # Update weights
    with torch.no_grad():
        Wh -= lr * dWh
        Wo -= lr * dWo

# Example usage
# Assuming x, y, Wh, Wo, lr are defined elsewhere
x = torch.rand((2, 3), dtype=torch.float32)
y = torch.rand((2, 3), dtype=torch.float32)
Wh = torch.rand((3, 3), dtype=torch.float32, requires_grad=True)
Wo = torch.rand((3, 3), dtype=torch.float32, requires_grad=True)
lr = 0.01

backprop(x, y, Wh, Wo, lr)


  x = torch.tensor(x, dtype=torch.float32)
  y = torch.tensor(y, dtype=torch.float32)
  Wh = torch.tensor(Wh, dtype=torch.float32, requires_grad=True)
  Wo = torch.tensor(Wo, dtype=torch.float32, requires_grad=True)


In [12]:
x, y, Wh, Wo, lr

(tensor([[0.6442, 0.9653, 0.4251],
         [0.6686, 0.0571, 0.2971]]),
 tensor([[0.2838, 0.3286, 0.2702],
         [0.1200, 0.4288, 0.6314]]),
 tensor([[0.3451, 0.5670, 0.5865],
         [0.7618, 0.7717, 0.2840],
         [0.4032, 0.7573, 0.0625]], requires_grad=True),
 tensor([[0.1922, 0.0263, 0.9965],
         [0.5663, 0.1059, 0.9733],
         [0.4465, 0.0786, 0.8636]], requires_grad=True),
 0.01)

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim

# Create a random 2x3 tensor and set requires_grad=True to track computation
x = torch.randn(2, 3, requires_grad=True)
print("Input Tensor:")
print(x)

# Define a simple linear layer with 3 input features and 2 output features
linear_layer = nn.Linear(3, 2)

# Forward pass
y = linear_layer(x)
print("\nOutput Tensor after Forward Pass:")
print(y)

# Create a random target tensor
target = torch.randn(2, 2)

# Define a loss function, here we use Mean Squared Error (MSE) loss
criterion = nn.MSELoss()

# Calculate the loss
loss = criterion(y, target)
print("\nLoss after Forward Pass:")
print(loss.item())

# Perform backpropagation
loss.backward()

# Access the gradients of the input tensor (dx/dw)
gradients = x.grad
print("\nGradients after Backward Pass:")
print(gradients)

# Update the weights using an optimizer (e.g., Stochastic Gradient Descent)
optimizer = optim.SGD(linear_layer.parameters(), lr=0.01)
optimizer.step()

# Check if the weights have been updated
updated_weights = linear_layer.weight
print("\nUpdated Weights after Optimization:")
print(updated_weights)

