## Gradients - Analytic vs PyTorch

In [1]:
import torch
import numpy as np

# Small dummy data (1 feature, 5 samples)
np.random.seed(0)

X_np = np.random.randn(1, 5)
y_np = np.random.randn(1, 5)
m = X_np.shape[1]

# Convert to torch tensors
X = torch.tensor(X_np, dtype=torch.float32)
y = torch.tensor(y_np, dtype=torch.float32)

# Initialize weights and bias 
w_np = np.random.randn(1, 1)
b_np = np.random.randn(1, 1)

# Convert to torch tensors
w = torch.tensor(w_np, dtype=torch.float32, requires_grad=True)
b = torch.tensor(b_np, dtype=torch.float32, requires_grad=True)

# ----------- PyTorch gradient computation -------------
z = w.T @ X + b
loss = torch.sum((z - y) ** 2) / m
loss.backward()

# Extract gradients from PyTorch
dw_torch = w.grad.clone().detach().numpy()
db_torch = b.grad.clone().detach().numpy()

# ----------- Analytic gradient computation (NumPy) -------------
z_np = w_np.T @ X_np + b_np
dw_np = 2*(1/m) * X_np @ (z_np - y_np).T
db_np = 2*(1/m) * np.ones((1, m)) @ (z_np - y_np).T

# ----------- Compare -------------
print("Gradient w.r.t w (PyTorch):", dw_torch)
print("Gradient w.r.t w (Analytic):", dw_np)
print("Difference:", np.abs(dw_np - dw_torch))

print("\nGradient w.r.t b (PyTorch):", db_torch)
print("Gradient w.r.t b (Analytic):", db_np)
print("Difference:", np.abs(db_np - db_torch))
