In [5]:
# back to nn basics to try to figure out a forward pass
import torch

# Example parameters
batch_size, input_dim, output_dim = 10, 1, 5

# Initializing weights and biases
W = torch.randn(input_dim, output_dim, requires_grad=False)
b = torch.randn(output_dim, requires_grad=False)

# Example input and gradient of loss w.r.t. output
input = torch.randn(batch_size, input_dim)
dOutput = torch.randn(batch_size, output_dim)  # Example gradient (normally from next layer or loss derivative)

# Forward pass (for reference)
output = input @ W + b

# Backward pass - Manually computing gradients
dW = input.T @ dOutput  # Gradient with respect to weights
db = dOutput.sum(0)     # Gradient with respect to biases
dInput = dOutput @ W.T  # Gradient with respect to input (if needed for layers before)

# Display calculated gradients
print("Manually calculated gradient w.r.t weights:\n", dW, dW.shape)
print("Manually calculated gradient w.r.t biases:\n", db, db.shape)
print("Gradient w.r.t input (for previous layers, if any):\n", dInput.shape)


Manually calculated gradient w.r.t weights:
 tensor([[ 3.5633, -4.2768,  1.4483, -1.3435, -4.8739]]) torch.Size([1, 5])
Manually calculated gradient w.r.t biases:
 tensor([ 4.6487, -1.0000,  2.2650, -0.2986, -3.8942]) torch.Size([5])
Gradient w.r.t input (for previous layers, if any):
 torch.Size([10, 1])
