# Convolutional Neural Networks (CNNs): Complete Guide

This notebook provides comprehensive coverage of CNNs from basic concepts to advanced architectures.

## Table of Contents
1. [Setup and Imports](#setup)
2. [Understanding Convolutions](#convolution)
3. [Building Basic CNN](#basic-cnn)
4. [Classic Architectures](#architectures)
5. [Transfer Learning](#transfer)
6. [Visualization](#visualization)

## 1. Setup and Imports <a name="setup"></a>

In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms, models
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm

# Set random seeds
np.random.seed(42)
torch.manual_seed(42)

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using device: {device}')

## 2. Understanding Convolutions <a name="convolution"></a>

Let's visualize how convolution operations work.

In [None]:
def manual_conv2d(input_img, kernel):
    """Manual implementation of 2D convolution for educational purposes"""
    kh, kw = kernel.shape
    ih, iw = input_img.shape
    
    # Output dimensions
    oh = ih - kh + 1
    ow = iw - kw + 1
    
    output = np.zeros((oh, ow))
    
    for i in range(oh):
        for j in range(ow):
            output[i, j] = np.sum(input_img[i:i+kh, j:j+kw] * kernel)
    
    return output

# Create a simple image and various kernels
image = np.random.randn(10, 10)

# Edge detection kernels
sobel_x = np.array([[-1, 0, 1],
                     [-2, 0, 2],
                     [-1, 0, 1]])

sobel_y = np.array([[-1, -2, -1],
                     [ 0,  0,  0],
                     [ 1,  2,  1]])

# Apply convolutions
edge_x = manual_conv2d(image, sobel_x)
edge_y = manual_conv2d(image, sobel_y)

# Visualize
fig, axes = plt.subplots(1, 3, figsize=(15, 5))
axes[0].imshow(image, cmap='gray')
axes[0].set_title('Original Image')
axes[1].imshow(edge_x, cmap='gray')
axes[1].set_title('Sobel X (Vertical Edges)')
axes[2].imshow(edge_y, cmap='gray')
axes[2].set_title('Sobel Y (Horizontal Edges)')
plt.tight_layout()
plt.show()

## 3. Building Basic CNN <a name="basic-cnn"></a>

Let's build a simple CNN for MNIST digit classification.

In [None]:
class SimpleCNN(nn.Module):
    def __init__(self, num_classes=10):
        super(SimpleCNN, self).__init__()
        
        # Convolutional layers
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        
        # Pooling
        self.pool = nn.MaxPool2d(2, 2)
        
        # Fully connected layers
        self.fc1 = nn.Linear(128 * 3 * 3, 256)
        self.fc2 = nn.Linear(256, num_classes)
        
        # Dropout for regularization
        self.dropout = nn.Dropout(0.5)
        
    def forward(self, x):
        # Conv1 + ReLU + Pool: 28x28 -> 14x14
        x = self.pool(F.relu(self.conv1(x)))
        
        # Conv2 + ReLU + Pool: 14x14 -> 7x7
        x = self.pool(F.relu(self.conv2(x)))
        
        # Conv3 + ReLU + Pool: 7x7 -> 3x3
        x = self.pool(F.relu(self.conv3(x)))
        
        # Flatten
        x = x.view(-1, 128 * 3 * 3)
        
        # FC layers
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        
        return x

# Initialize model
model = SimpleCNN().to(device)
print(model)

# Count parameters
total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"\nTotal parameters: {total_params:,}")
print(f"Trainable parameters: {trainable_params:,}")

### Training the Simple CNN

In [None]:
# Data loading
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

train_dataset = datasets.MNIST('./data', train=True, download=True, transform=transform)
test_dataset = datasets.MNIST('./data', train=False, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=1000, shuffle=False)

# Training configuration
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

def train_epoch(model, loader, criterion, optimizer):
    model.train()
    total_loss = 0
    correct = 0
    
    for data, target in tqdm(loader, desc='Training'):
        data, target = data.to(device), target.to(device)
        
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
        pred = output.argmax(dim=1)
        correct += pred.eq(target).sum().item()
    
    avg_loss = total_loss / len(loader)
    accuracy = 100. * correct / len(loader.dataset)
    return avg_loss, accuracy

def test_epoch(model, loader, criterion):
    model.eval()
    total_loss = 0
    correct = 0
    
    with torch.no_grad():
        for data, target in loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            total_loss += criterion(output, target).item()
            pred = output.argmax(dim=1)
            correct += pred.eq(target).sum().item()
    
    avg_loss = total_loss / len(loader)
    accuracy = 100. * correct / len(loader.dataset)
    return avg_loss, accuracy

# Train for a few epochs (uncomment to run)
# num_epochs = 5
# for epoch in range(num_epochs):
#     train_loss, train_acc = train_epoch(model, train_loader, criterion, optimizer)
#     test_loss, test_acc = test_epoch(model, test_loader, criterion)
#     print(f"Epoch {epoch+1}: Train Loss={train_loss:.4f}, Train Acc={train_acc:.2f}%, "
#           f"Test Loss={test_loss:.4f}, Test Acc={test_acc:.2f}%")

## 4. Classic Architectures <a name="architectures"></a>

Let's implement some classic CNN architectures.

In [None]:
# LeNet-5
class LeNet5(nn.Module):
    def __init__(self, num_classes=10):
        super(LeNet5, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 4 * 4, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, num_classes)
        
    def forward(self, x):
        x = F.max_pool2d(F.relu(self.conv1(x)), 2)
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        x = x.view(-1, 16 * 4 * 4)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# Residual Block for ResNet
class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1):
        super(ResidualBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, 3, stride=stride, padding=1)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.conv2 = nn.Conv2d(out_channels, out_channels, 3, stride=1, padding=1)
        self.bn2 = nn.BatchNorm2d(out_channels)
        
        self.shortcut = nn.Sequential()
        if stride != 1 or in_channels != out_channels:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, 1, stride=stride),
                nn.BatchNorm2d(out_channels)
            )
    
    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out

# Simple ResNet
class SimpleResNet(nn.Module):
    def __init__(self, num_classes=10):
        super(SimpleResNet, self).__init__()
        self.conv1 = nn.Conv2d(1, 64, 3, padding=1)
        self.bn1 = nn.BatchNorm2d(64)
        
        self.layer1 = ResidualBlock(64, 64)
        self.layer2 = ResidualBlock(64, 128, stride=2)
        self.layer3 = ResidualBlock(128, 256, stride=2)
        
        self.avg_pool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(256, num_classes)
    
    def forward(self, x):
        x = F.relu(self.bn1(self.conv1(x)))
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.avg_pool(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x

# Test architectures
lenet = LeNet5().to(device)
resnet = SimpleResNet().to(device)

print("LeNet-5 Parameters:", sum(p.numel() for p in lenet.parameters()))
print("Simple ResNet Parameters:", sum(p.numel() for p in resnet.parameters()))

## 5. Transfer Learning <a name="transfer"></a>

Using pre-trained models for new tasks.

In [None]:
# Load pre-trained ResNet18
pretrained_model = models.resnet18(pretrained=True)

# Freeze all layers
for param in pretrained_model.parameters():
    param.requires_grad = False

# Replace final layer for our task
num_features = pretrained_model.fc.in_features
pretrained_model.fc = nn.Linear(num_features, 10)  # 10 classes for MNIST

pretrained_model = pretrained_model.to(device)

# Count trainable parameters
trainable = sum(p.numel() for p in pretrained_model.parameters() if p.requires_grad)
total = sum(p.numel() for p in pretrained_model.parameters())
print(f"Trainable parameters: {trainable:,} / {total:,}")

# Fine-tuning strategy
def unfreeze_layers(model, num_layers=1):
    """Unfreeze the last num_layers for fine-tuning"""
    layers = list(model.children())
    for layer in layers[-num_layers:]:
        for param in layer.parameters():
            param.requires_grad = True

# Example: Unfreeze last 2 layers
# unfreeze_layers(pretrained_model, 2)

## 6. Visualization <a name="visualization"></a>

Visualizing what CNNs learn.

In [None]:
def visualize_filters(model, layer_name='conv1'):
    """Visualize convolutional filters"""
    layer = getattr(model, layer_name)
    filters = layer.weight.data.cpu().numpy()
    
    n_filters = min(16, filters.shape[0])
    fig, axes = plt.subplots(4, 4, figsize=(12, 12))
    
    for i, ax in enumerate(axes.flatten()):
        if i < n_filters:
            # For single-channel filters
            if filters.shape[1] == 1:
                ax.imshow(filters[i, 0], cmap='gray')
            else:
                # For multi-channel, show first channel
                ax.imshow(filters[i, 0], cmap='gray')
            ax.set_title(f'Filter {i}')
        ax.axis('off')
    
    plt.suptitle(f'Filters from {layer_name}')
    plt.tight_layout()
    plt.show()

def visualize_feature_maps(model, image, layer_name='conv1'):
    """Visualize feature maps from a specific layer"""
    activation = {}
    
    def hook(module, input, output):
        activation['features'] = output
    
    # Register hook
    layer = getattr(model, layer_name)
    handle = layer.register_forward_hook(hook)
    
    # Forward pass
    model.eval()
    with torch.no_grad():
        _ = model(image.unsqueeze(0).to(device))
    
    # Get activations
    features = activation['features'].cpu().squeeze()
    
    # Visualize
    n_features = min(16, features.shape[0])
    fig, axes = plt.subplots(4, 4, figsize=(12, 12))
    
    for i, ax in enumerate(axes.flatten()):
        if i < n_features:
            ax.imshow(features[i], cmap='viridis')
            ax.set_title(f'Channel {i}')
        ax.axis('off')
    
    plt.suptitle(f'Feature Maps from {layer_name}')
    plt.tight_layout()
    plt.show()
    
    handle.remove()

# Visualize filters from our simple CNN
# visualize_filters(model, 'conv1')

# Visualize feature maps for a sample image
# sample_image, _ = next(iter(test_loader))
# visualize_feature_maps(model, sample_image[0], 'conv1')

## Summary

In this notebook, we covered:

1. **Convolution Operations**: Understanding the fundamental building block
2. **Basic CNN**: Building and training a simple CNN for MNIST
3. **Classic Architectures**: LeNet-5 and ResNet implementations
4. **Transfer Learning**: Using pre-trained models effectively
5. **Visualization**: Understanding what CNNs learn

## Next Steps

- Implement other architectures (VGG, Inception, EfficientNet)
- Try advanced techniques (data augmentation, learning rate schedules)
- Apply CNNs to different datasets (CIFAR-10, ImageNet)
- Explore object detection (YOLO, Faster R-CNN)
- Study semantic segmentation (U-Net, DeepLab)

## Best Practices

1. Start with pre-trained models when possible
2. Use data augmentation for better generalization
3. Apply batch normalization for stable training
4. Use residual connections for very deep networks
5. Monitor training with visualization and metrics
6. Use appropriate regularization (dropout, weight decay)