In [1]:
import torch
import torch.nn as nn
import torch.optim as optim

In [3]:
#create NN model
class SimpleNN(nn.Module):
    def __init__(self):
        super(SimpleNN, self).__init__()
        self.layer1 = nn.Linear(2, 16)
        self.relu = nn.ReLU()
        self.layer2 = nn.Linear(16, 1)

    def forward(self, x):
        x = self.layer1(x)
        x = self.relu(x)
        x = self.layer2(x)
        return x

model = SimpleNN()

In [7]:
#dummy data
X = torch.randn(100, 2)
y = (X[:, 0] + X[:, 1] > 0).float().view(-1, 1)
X.shape,y.shape

(torch.Size([100, 2]), torch.Size([100, 1]))

In [9]:
#Loss Function for binary classification (internally applies sigmoid)
criterion = nn.BCEWithLogitsLoss()

In [11]:
#optimizer
optimizer = optim.Adam(model.parameters(), lr=0.01)

In [13]:
# one training step
outputs = model(X)
loss = criterion(outputs, y)

print("Loss:", loss.item())

Loss: 0.7159508466720581


In [15]:
#backward pass
optimizer.zero_grad()  # clear old gradients
loss.backward()        # compute gradients

In [17]:
#now update weights
optimizer.step()

In [19]:
#now let me run multiple steps for the model to learn
for epoch in range(20):
    outputs = model(X)
    loss = criterion(outputs, y)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    print(f"Epoch {epoch+1}, Loss = {loss.item():.4f}")

Epoch 1, Loss = 0.7004
Epoch 2, Loss = 0.6857
Epoch 3, Loss = 0.6718
Epoch 4, Loss = 0.6585
Epoch 5, Loss = 0.6458
Epoch 6, Loss = 0.6337
Epoch 7, Loss = 0.6218
Epoch 8, Loss = 0.6101
Epoch 9, Loss = 0.5984
Epoch 10, Loss = 0.5866
Epoch 11, Loss = 0.5747
Epoch 12, Loss = 0.5627
Epoch 13, Loss = 0.5505
Epoch 14, Loss = 0.5381
Epoch 15, Loss = 0.5255
Epoch 16, Loss = 0.5127
Epoch 17, Loss = 0.4997
Epoch 18, Loss = 0.4866
Epoch 19, Loss = 0.4733
Epoch 20, Loss = 0.4599


# Loss Functions & Optimizers in PyTorch

## Objective
This notebook introduces loss functions and optimizers, demonstrating how neural networks learn through gradient-based optimization.

---

## Key Concepts

**Loss Function**  
Quantifies prediction error.

**Backpropagation**  
Computes gradients of loss w.r.t. model parameters.

**Optimizer**  
Updates parameters using gradients.

---

## Key Insight
Training a neural network consists of repeated forward passes, loss evaluation, gradient computation, and parameter updates.