# Residual Networks (ResNet)
In this notebook, we explore ResNets, understand their architecture, and implement a ResNet-18 model using PyTorch.

### **1. Why Residual Networks?**
Deep networks often suffer from the vanishing gradient problem, making it difficult to train very deep architectures. ResNet introduces skip connections that allow gradients to flow directly through the network, enabling the training of networks with hundreds or even thousands of layers.

In [9]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torchvision.models import resnet18
import matplotlib.pyplot as plt

# Step 1: Data Preparation
def prepare_data(batch_size=64):
    """Prepares CIFAR-10 data for training and testing."""
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])

    train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
    test_dataset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    return train_loader, test_loader

train_loader, test_loader = prepare_data()

Files already downloaded and verified
Files already downloaded and verified


### **2. Understanding the ResNet Architecture**

In [10]:
def print_resnet_summary():
    """Prints the summary of ResNet-18."""
    model = resnet18(pretrained=False)  # Pretrained can be set to True for transfer learning
    print(model)

print_resnet_summary()

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

### **3. Define Training and Evaluation Functions**

In [11]:
def train_model(model, train_loader, criterion, optimizer, epochs=10):
    """Train the ResNet model."""
    model.train()
    for epoch in range(epochs):
        running_loss = 0.0
        for inputs, labels in train_loader:
            inputs, labels = inputs, labels

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

        print(f"Epoch {epoch+1}/{epochs}, Loss: {running_loss / len(train_loader):.4f}")

def evaluate_model(model, test_loader):
    """Evaluate the ResNet model."""
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs, labels
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print(f"Accuracy: {100 * correct / total:.2f}%")

### **4. Instantiate and Train ResNet**

In [14]:
%%time

# Load the ResNet-18 model
model = resnet18(pretrained=False, num_classes=10)
# model = model.cuda()

# Define loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Train the model
train_model(model, train_loader, criterion, optimizer, epochs=1)

# Step 5: Evaluate the Model
evaluate_model(model, test_loader)

Epoch 1/1, Loss: 1.3707
Accuracy: 61.49%
CPU times: user 42min 14s, sys: 8min 52s, total: 51min 6s
Wall time: 6min 17s


### **6. Visualizing Results**

In [None]:
def visualize_predictions(model, test_loader):
    """Visualizes predictions on test images."""
    model.eval()
    data_iter = iter(test_loader)
    images, labels = next(data_iter)
    images, labels = images, labels

    with torch.no_grad():
        outputs = model(images)
        _, predictions = torch.max(outputs, 1)

    # Unnormalize images for visualization
    images = images.cpu().numpy().transpose((0, 2, 3, 1))
    images = (images * 0.5) + 0.5

    # Display the first 10 images and predictions
    fig, axes = plt.subplots(1, 10, figsize=(15, 5))
    for i in range(10):
        axes[i].imshow(images[i])
        axes[i].set_title(f"Pred: {predictions[i].item()}\nTrue: {labels[i].item()}"")
        axes[i].axis('off')
    plt.show()

visualize_predictions(model, test_loader)

### **Conclusion**
In this notebook, we explored the ResNet architecture and trained a ResNet-18 model on the CIFAR-10 dataset. Residual connections help alleviate vanishing gradients and enable the training of very deep networks. Try experimenting with deeper versions like ResNet-50 or ResNet-101!