In [1]:
import torch
import torch.nn as nn
import torch.optim as optim

# Simple neural network with one hidden layer
class SimpleNN(nn.Module):
    def __init__(self):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(2, 2)  # Input to hidden layer
        self.fc2 = nn.Linear(2, 1)  # Hidden to output layer

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Initialize the network
model = SimpleNN()

# Loss and optimizer
criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)

# Dummy data: 4 samples, 2 features each
inputs = torch.tensor([[0.5, 0.3], [0.2, 0.8], [0.9, 0.4], [0.7, 0.6]], requires_grad=True)
targets = torch.tensor([[1.0], [0.0], [1.0], [0.0]])

# Hook function to print gradients during backpropagation
def print_gradients(module, grad_input, grad_output):
    print(f'Gradients at {module}:')
    print('grad_input:', grad_input)
    print('grad_output:', grad_output)

# Register hooks for each layer
for layer in model.children():
    layer.register_backward_hook(print_gradients)

# Training loop
for epoch in range(2):  # Run for 2 epochs
    print(f'Epoch {epoch+1}')
    
    # Forward pass
    outputs = model(inputs)
    loss = criterion(outputs, targets)
    
    # Backward pass and optimize
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    # Print updated weights after each step
    for name, param in model.named_parameters():
        print(f'{name} - Weights: {param.data}, Gradient: {param.grad}')
    
    print(f'Loss: {loss.item()}\n')



Epoch 1
Gradients at Linear(in_features=2, out_features=1, bias=True):
grad_input: (tensor([-1.8877]), tensor([[0.3304, 0.5020],
        [0.0591, 0.0899],
        [0.3607, 0.5480],
        [0.1089, 0.1654]]), tensor([[-1.0568],
        [-1.1180]]))
grad_output: (tensor([[-0.7259],
        [-0.1299],
        [-0.7926],
        [-0.2392]]),)
Gradients at Linear(in_features=2, out_features=2, bias=True):
grad_input: (tensor([0.8591, 1.3053]), tensor([[ 0.2709, -0.1162],
        [ 0.0485, -0.0208],
        [ 0.2958, -0.1268],
        [ 0.0893, -0.0383]]), tensor([[0.5778, 0.8780],
        [0.3560, 0.5409]]))
grad_output: (tensor([[0.3304, 0.5020],
        [0.0591, 0.0899],
        [0.3607, 0.5480],
        [0.1089, 0.1654]]),)
fc1.weight - Weights: tensor([[ 0.4532,  0.6333],
        [ 0.2289, -0.6560]]), Gradient: tensor([[0.5778, 0.3560],
        [0.8780, 0.5409]])
fc1.bias - Weights: tensor([-0.0214,  0.6890]), Gradient: tensor([0.8591, 1.3053])
fc2.weight - Weights: tensor([[-0.4445, -

