# Feedforward Neural Networks with torch.nn

**Month 2, Week 1** — Sequence Models

Now we use PyTorch's high-level API to build proper neural networks.

## What You'll Learn

1. `nn.Module` — base class for all models
2. `nn.Linear` — fully connected layers
3. Activation functions (ReLU, Sigmoid)
4. `torch.optim` — optimizers
5. Complete training pipeline

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt

# Device setup
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
print(f"Using device: {device}")

---

## 1. nn.Module — The Base Class

Every PyTorch model inherits from `nn.Module`. You define:
- `__init__`: create layers
- `forward`: define computation

In [None]:
class SimpleNet(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super().__init__()
        self.layer1 = nn.Linear(input_size, hidden_size)
        self.layer2 = nn.Linear(hidden_size, output_size)
        self.relu = nn.ReLU()
    
    def forward(self, x):
        x = self.layer1(x)   # Linear transformation
        x = self.relu(x)     # Non-linearity
        x = self.layer2(x)   # Output layer
        return x

# Create model
model = SimpleNet(input_size=2, hidden_size=8, output_size=1)
print(model)

In [None]:
# Inspect parameters
print("Model parameters:")
for name, param in model.named_parameters():
    print(f"  {name}: {param.shape}")

total_params = sum(p.numel() for p in model.parameters())
print(f"\nTotal parameters: {total_params}")

---

## 2. Create Training Data

Let's create a binary classification problem (two moons).

In [None]:
from sklearn.datasets import make_moons

# Generate data
X_np, y_np = make_moons(n_samples=1000, noise=0.2, random_state=42)

# Convert to tensors
X = torch.tensor(X_np, dtype=torch.float32)
y = torch.tensor(y_np, dtype=torch.float32).unsqueeze(1)

# Split train/test
train_size = 800
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]

print(f"Train: {X_train.shape}, Test: {X_test.shape}")

In [None]:
# Visualize
plt.figure(figsize=(8, 6))
plt.scatter(X_np[:, 0], X_np[:, 1], c=y_np, cmap='coolwarm', alpha=0.7)
plt.title('Two Moons Dataset')
plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
plt.colorbar(label='Class')
plt.show()

---

## 3. Loss Function and Optimizer

- **Loss function**: measures how wrong predictions are
- **Optimizer**: updates weights to reduce loss

In [None]:
# Create fresh model
model = SimpleNet(input_size=2, hidden_size=16, output_size=1)

# Binary classification loss
criterion = nn.BCEWithLogitsLoss()

# Optimizer (Adam is a good default)
optimizer = optim.Adam(model.parameters(), lr=0.01)

print(f"Loss: {criterion}")
print(f"Optimizer: {optimizer}")

---

## 4. Training Loop

In [None]:
# Training history
train_losses = []
test_accuracies = []

num_epochs = 100

for epoch in range(num_epochs):
    # ===== TRAINING =====
    model.train()  # Set to training mode
    
    # Forward pass
    y_pred = model(X_train)
    loss = criterion(y_pred, y_train)
    
    # Backward pass
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    train_losses.append(loss.item())
    
    # ===== EVALUATION =====
    if epoch % 10 == 0:
        model.eval()  # Set to evaluation mode
        with torch.no_grad():
            y_test_pred = model(X_test)
            predictions = (torch.sigmoid(y_test_pred) > 0.5).float()
            accuracy = (predictions == y_test).float().mean()
            test_accuracies.append(accuracy.item())
            
        print(f"Epoch {epoch:3d}: loss = {loss.item():.4f}, test_acc = {accuracy.item():.3f}")

print(f"\nFinal test accuracy: {accuracy.item():.1%}")

In [None]:
# Plot training curve
plt.figure(figsize=(10, 4))

plt.subplot(1, 2, 1)
plt.plot(train_losses)
plt.title('Training Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')

plt.subplot(1, 2, 2)
plt.plot(range(0, num_epochs, 10), test_accuracies)
plt.title('Test Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')

plt.tight_layout()
plt.show()

---

## 5. Visualize Decision Boundary

In [None]:
import numpy as np

# Create grid
x_min, x_max = X_np[:, 0].min() - 0.5, X_np[:, 0].max() + 0.5
y_min, y_max = X_np[:, 1].min() - 0.5, X_np[:, 1].max() + 0.5
xx, yy = np.meshgrid(np.linspace(x_min, x_max, 100),
                     np.linspace(y_min, y_max, 100))

# Predict on grid
grid = torch.tensor(np.c_[xx.ravel(), yy.ravel()], dtype=torch.float32)
model.eval()
with torch.no_grad():
    Z = torch.sigmoid(model(grid)).numpy().reshape(xx.shape)

# Plot
plt.figure(figsize=(8, 6))
plt.contourf(xx, yy, Z, levels=50, cmap='coolwarm', alpha=0.8)
plt.scatter(X_np[:, 0], X_np[:, 1], c=y_np, cmap='coolwarm', edgecolors='black')
plt.title('Decision Boundary')
plt.colorbar(label='P(class=1)')
plt.show()

---

## 6. Using nn.Sequential (Shortcut)

For simple architectures, use `nn.Sequential` instead of writing a class.

In [None]:
# Equivalent model using Sequential
model_seq = nn.Sequential(
    nn.Linear(2, 16),
    nn.ReLU(),
    nn.Linear(16, 8),
    nn.ReLU(),
    nn.Linear(8, 1)
)

print(model_seq)

---

## Summary

| Component | Purpose |
|-----------|----------|
| `nn.Module` | Base class for models |
| `nn.Linear(in, out)` | Fully connected layer |
| `nn.ReLU()` | Activation function |
| `nn.BCEWithLogitsLoss()` | Binary classification loss |
| `optim.Adam(params, lr)` | Optimizer |
| `model.train()` | Enable dropout/batchnorm training behavior |
| `model.eval()` | Disable for inference |

## Training Loop Template

```python
model = MyModel()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

for epoch in range(num_epochs):
    model.train()
    for X_batch, y_batch in dataloader:
        # Forward
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)
        
        # Backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
```

## Next

Add DataLoader for batch processing, then apply to IMDB sentiment classification!