In [1]:
import torch
import torch.nn as nn
import torch.optim as optim


In [3]:
# Define a simple neural network (1 hidden layer)
class SimpleNN(nn.Module):
    def __init__(self):
        super(SimpleNN, self).__init__()
        self.hidden = nn.Linear(2, 3)  # Input layer (2) -> Hidden layer (3)
        self.output = nn.Linear(3, 1)  # Hidden layer (3) -> Output layer (1)
        self.activation = nn.ReLU()    # ReLU activation function
    
    def forward(self, x):
        hidden_out = self.activation(self.hidden(x))  # Apply activation after first layer
        output = self.output(hidden_out)              # Compute final output
        return output

In [5]:
# Create a model instance
model = SimpleNN()

# Define a simple loss function (Mean Squared Error for regression)
loss_function = nn.MSELoss()

# Define optimizer (Stochastic Gradient Descent)
optimizer = optim.SGD(model.parameters(), lr=0.1)

# Sample input (2D input features) and target
x = torch.tensor([[0.5, -1.5]], dtype=torch.float32)  # Single data point with 2 features
y_true = torch.tensor([[1.0]], dtype=torch.float32)   # Target value

In [6]:
# **FEEDFORWARD: Compute predictions**
y_pred = model(x)  # Forward pass
print("Prediction:", y_pred.item())

# **LOSS CALCULATION: Compute error**
loss = loss_function(y_pred, y_true)
print("Loss:", loss.item())

# **BACKPROPAGATION: Compute gradients**
optimizer.zero_grad()  # Reset gradients to zero
loss.backward()  # Compute gradients using chain rule (automatic differentiation)

# Print computed gradients
for name, param in model.named_parameters():
    print(f"Gradient of {name}:", param.grad)

# **UPDATE WEIGHTS: Gradient Descent step**
optimizer.step()  # Update weights based on gradients

# Print updated parameters
for name, param in model.named_parameters():
    print(f"Updated {name}:", param.data)

Prediction: 0.1302935630083084
Loss: 0.756389319896698
Gradient of hidden.weight: tensor([[ 0.0753, -0.2258],
        [ 0.0000,  0.0000],
        [ 0.4264, -1.2791]])
Gradient of hidden.bias: tensor([0.1505, 0.0000, 0.8527])
Gradient of output.weight: tensor([[-0.9181,  0.0000, -0.1490]])
Gradient of output.bias: tensor([-1.7394])
Updated hidden.weight: tensor([[ 0.0396, -0.5521],
        [-0.5144,  0.1722],
        [-0.1322, -0.2185]])
Updated hidden.bias: tensor([-0.3728, -0.1067, -0.4744])
Updated output.weight: tensor([[ 0.0053,  0.1361, -0.4753]])
Updated output.bias: tensor([0.3919])


In [7]:
# Define a simple neural network (1 hidden layer)
class SimpleNN(nn.Module):
    def __init__(self):
        super(SimpleNN, self).__init__()
        self.hidden = nn.Linear(2, 4)  # Increased hidden layer size
        self.output = nn.Linear(4, 1)  # Hidden layer (4) -> Output layer (1)
        self.activation = nn.ReLU()    # ReLU activation function
    
    def forward(self, x):
        hidden_out = self.activation(self.hidden(x))  # Apply activation after first layer
        output = self.output(hidden_out)              # Compute final output
        return output

# Create a model instance
model = SimpleNN()

# Define a simple loss function (Mean Squared Error for regression)
loss_function = nn.MSELoss()

# Define optimizer (Stochastic Gradient Descent)
optimizer = optim.SGD(model.parameters(), lr=0.1)

# Sample input (2D input features) and target
x = torch.tensor([[0.5, -1.5]], dtype=torch.float32)  # Single data point with 2 features
y_true = torch.tensor([[1.0]], dtype=torch.float32)   # Target value

# Training loop to observe weight changes
epochs = 10
for epoch in range(epochs):
    # **FEEDFORWARD: Compute predictions**
    y_pred = model(x)  # Forward pass
    
    # **LOSS CALCULATION: Compute error**
    loss = loss_function(y_pred, y_true)
    
    # **BACKPROPAGATION: Compute gradients**
    optimizer.zero_grad()  # Reset gradients to zero
    loss.backward()  # Compute gradients using chain rule (automatic differentiation)
    # Print computed gradients
    for name, param in model.named_parameters():
        print("*"*10)
        print(f"Gradient of {name}:", param.grad)
        print("*"*10)
    # **UPDATE WEIGHTS: Gradient Descent step**
    optimizer.step()  # Update weights based on gradients
    
    print(f"Epoch {epoch+1}/{epochs}, Loss: {loss.item()}")
    
    # Print updated parameters
    for name, param in model.named_parameters():
        print(f"Updated {name}:", param.data)

**********
Gradient of hidden.weight: tensor([[-0.0525,  0.1574],
        [ 0.0150, -0.0451],
        [ 0.0000,  0.0000],
        [ 0.0000,  0.0000]])
**********
**********
Gradient of hidden.bias: tensor([-0.1049,  0.0301,  0.0000,  0.0000])
**********
**********
Gradient of output.weight: tensor([[-1.4218, -0.8808,  0.0000,  0.0000]])
**********
**********
Gradient of output.bias: tensor([-1.0153])
**********
Epoch 1/10, Loss: 0.25771385431289673
Updated hidden.weight: tensor([[ 0.4059, -0.5126],
        [-0.1471, -0.4026],
        [ 0.0841,  0.5874],
        [-0.5229,  0.4426]])
Updated hidden.bias: tensor([0.4652, 0.3267, 0.3401, 0.3606])
Updated output.weight: tensor([[ 0.2455,  0.0585, -0.0540,  0.0874]])
Updated output.bias: tensor([0.4749])
**********
Gradient of hidden.weight: tensor([[-0.0300,  0.0900],
        [-0.0071,  0.0214],
        [ 0.0000,  0.0000],
        [ 0.0000,  0.0000]])
**********
**********
Gradient of hidden.bias: tensor([-0.0600, -0.0143,  0.0000,  0.0000]