In [2]:
from google.colab import drive
drive.mount('/content/drive')

#Because Colab's environment:
#    Gets reset every time the session ends
#    Deletes all files when you disconnect
#So mounting Drive allows your work to be saved permanently.

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
SAVE_DIR = "/content/drive/MyDrive/cifar10_data/"
import os
os.makedirs(SAVE_DIR, exist_ok=True)

In [4]:
# ============================================================
# 1. Imports & Device
# ============================================================
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision # a library that contains computer vision tools for PyTorch
import torchvision.transforms as transforms # Transforms are functions to prepare images for training

from torch.utils.data import DataLoader

print("PyTorch version:", torch.__version__)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)


PyTorch version: 2.9.0+cu126
Using device: cuda


In [5]:
# ============================================================
# 2. CIFAR-10 Dataset & DataLoaders
#    CIFAR-10: 32×32 color images, 10 classes
# ============================================================

# Transform: convert to tensor + normalize
transform = transforms.Compose([
    transforms.ToTensor(),
    # Normalize each channel: mean & std for CIFAR-10
    # Three Values (R, G, B)
    transforms.Normalize((0.4914, 0.4822, 0.4465),
                         (0.2470, 0.2435, 0.2616))
])

# Download training & test sets
train_dataset = torchvision.datasets.CIFAR10(
    root=SAVE_DIR,
    train=True,
    download=True,
    transform=transform
)

test_dataset = torchvision.datasets.CIFAR10(
    root=SAVE_DIR,
    train=False,
    download=True,
    transform=transform
)


# DataLoaders (you can adjust batch_size)
batch_size = 64

train_loader = DataLoader(train_dataset, batch_size=batch_size,
                          shuffle=True, num_workers=2)

test_loader = DataLoader(test_dataset, batch_size=batch_size,
                         shuffle=False, num_workers=2)

classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

# Check number of images
print(f"Training dataset size: {len(train_dataset)}")
print(f"Test dataset size: {len(test_dataset)}")

Training dataset size: 50000
Test dataset size: 10000


In [6]:
# ============================================================
# 3. Define CNN (matches the lecture architecture)
#  It takes 3×32×32 images and outputs probabilities for 10 classes.

#    Input:  3×32×32
#    Conv1:  3 -> 16 filters, 5×5, no padding
#      Output size: 16×28×28
#    Pool1: 2×2 -> 16×14×14
#    Conv2: 16 -> 32 filters, 5×5
#      Output size: 32×10×10
#    Pool2: 2×2 -> 32×5×5
#    FC:    32*5*5 -> 10 classes
# ============================================================

class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()

        # Formula: output_size = (input_size - kernel_size + 1) / stride
        # in_channels: Takes 3 input channels (RGB)
        # out_channels: Creates 16 output filters (feature maps)
        # Conv layer 1: (3, 32, 32) -> (16, 28, 28)
        # Width:  (32 - 5 + 1) / 1 = 28
        # Height: (32 - 5 + 1) / 1 = 28
        # Output: (16, 28, 28)
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=16,
                               kernel_size=5, stride=1, padding=0)



        # in_channels: Takes 16 input channels
        # out_channels: Creates 32 output filters (feature maps)
        # Conv layer 2: (16, 14, 14) -> (32, 10, 10)
        #Width:  (14 - 5 + 1) / 1 = 10
        #Height: (14 - 5 + 1) / 1 = 10
        self.conv2 = nn.Conv2d(in_channels=16, out_channels=32,
                               kernel_size=5, stride=1, padding=0)

        # Max pooling (2×2) after each conv
        # Formula: output_size = input_size / stride
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)

        # Fully connected layer:
        # After two pools, image 3×32×32 --> 32×5×5 feature map:
        # 3×32×32 --Conv1(5x5)--> 16×28×28 --Pool--> 16×14×14
        # 16×14×14 --Conv2(5x5)--> 32×10×10 --Pool--> 32×5×5
        self.fc = nn.Linear(32 * 5 * 5, 10)

    def forward(self, x):

        # Conv1 + ReLU + Pool
        # Width:  28 / 2 = 14
        # Height: 28 / 2 = 14
        x = self.pool(torch.relu(self.conv1(x)))   # (16, 14, 14)

        # Conv2 + ReLU + Pool
        #Width:  10 / 2 = 5
        #Height: 10 / 2 = 5
        x = self.pool(torch.relu(self.conv2(x)))   # (32, 5, 5)

        # Flatten
        # Converts 2D feature maps to 1D vector: 32*5*5 = 800 neurons
        x = x.view(x.size(0), -1)                  # (32*5*5)
        # Fully connected --> logits for 10 classes
        x = self.fc(x)
        return x

model = SimpleCNN().to(device)
print(model)


SimpleCNN(
  (conv1): Conv2d(3, 16, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(16, 32, kernel_size=(5, 5), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc): Linear(in_features=800, out_features=10, bias=True)
)


In [7]:
# ============================================================
# 4. Loss Function & Optimizer
# ============================================================

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

# For demo: only a few epochs so it runs quickly in class
num_epochs = 50


In [8]:
# ============================================================
# 5. Training Loop
# ============================================================

def train_one_epoch(epoch):
    model.train()
    running_loss = 0.0
    running_correct = 0
    total = 0

    for batch_idx, (inputs, labels) in enumerate(train_loader):
        inputs, labels = inputs.to(device), labels.to(device)

        # 1. Zero gradients
        optimizer.zero_grad()

        # 2. Forward pass
        outputs = model(inputs)

        # 3. Compute loss
        loss = criterion(outputs, labels)

        # 4. Backprop
        loss.backward()

        # 5. Update weights
        optimizer.step()

        # Stats
        batch_size = inputs.size(0)
        running_loss += loss.item() * batch_size
        #scores, predicted = outputs.max(1)
        predicted = outputs.argmax(dim=1)
        total += batch_size

        # How many predictions are correct?
        is_correct = (predicted == labels)
        num_correct_in_batch = is_correct.sum().item()
        running_correct += num_correct_in_batch

        # Print a mini status every ~200 batches
        if (batch_idx + 1) % 200 == 0:
            print(f"  [Batch {batch_idx+1:3d}] Loss: {loss.item():.4f}")

    epoch_loss = running_loss / total
    epoch_acc = running_correct / total
    print(f"Epoch {epoch+1} Train Loss: {epoch_loss:.4f} | Train Acc: {epoch_acc*100:.2f}%")
    return epoch_loss, epoch_acc

In [9]:
for epoch in range(num_epochs):
    print(f"===== Epoch {epoch+1}/{num_epochs} =====")
    train_one_epoch(epoch)

===== Epoch 1/50 =====
  [Batch 200] Loss: 1.5199
  [Batch 400] Loss: 1.1879
  [Batch 600] Loss: 1.3530
Epoch 1 Train Loss: 1.4696 | Train Acc: 47.45%
===== Epoch 2/50 =====
  [Batch 200] Loss: 1.2057
  [Batch 400] Loss: 1.4738
  [Batch 600] Loss: 1.0567
Epoch 2 Train Loss: 1.1666 | Train Acc: 59.22%
===== Epoch 3/50 =====
  [Batch 200] Loss: 0.7569
  [Batch 400] Loss: 0.9955
  [Batch 600] Loss: 0.9997
Epoch 3 Train Loss: 1.0617 | Train Acc: 63.18%
===== Epoch 4/50 =====
  [Batch 200] Loss: 1.1755
  [Batch 400] Loss: 1.0142
  [Batch 600] Loss: 0.9914
Epoch 4 Train Loss: 0.9892 | Train Acc: 65.73%
===== Epoch 5/50 =====
  [Batch 200] Loss: 0.7400
  [Batch 400] Loss: 0.9936
  [Batch 600] Loss: 0.7868
Epoch 5 Train Loss: 0.9410 | Train Acc: 67.47%
===== Epoch 6/50 =====
  [Batch 200] Loss: 0.8945
  [Batch 400] Loss: 0.7878
  [Batch 600] Loss: 0.9196
Epoch 6 Train Loss: 0.9037 | Train Acc: 68.75%
===== Epoch 7/50 =====
  [Batch 200] Loss: 0.9553
  [Batch 400] Loss: 0.8824
  [Batch 600] Los

In [12]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

params = count_parameters(model)
print(f"This model has {params:,} trainable parameters")

This model has 22,058 trainable parameters


In [13]:
# ============================================================
# 6. Evaluation on Test Set
# ============================================================

def evaluate(model, loader):
    model.eval()
    total = 0
    correct = 0
    class_correct = [0] * 10
    class_total = [0] * 10

    with torch.no_grad():
        for inputs, labels in loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            predicted = outputs.argmax(dim=1)

            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()

            for i in range(labels.size(0)):
                label = labels[i].item()
                class_total[label] += 1
                class_correct[label] += (predicted[i].item() == label)

    overall_acc = correct / total
    print(f"\nTest Accuracy: {overall_acc * 100:.2f}%")

    print("\nPer-class accuracy:")
    for i, cls in enumerate(classes):
        if class_total[i] > 0:
            acc = 100.0 * class_correct[i] / class_total[i]
            print(f"  {cls:5s}: {acc:5.2f}%")
        else:
            print(f"  {cls:5s}: N/A")

evaluate(model, test_loader)



Test Accuracy: 68.31%

Per-class accuracy:
  plane: 68.60%
  car  : 81.00%
  bird : 57.80%
  cat  : 55.40%
  deer : 53.20%
  dog  : 58.90%
  frog : 74.50%
  horse: 74.70%
  ship : 81.10%
  truck: 77.90%


In [15]:
import torch
from torchvision import datasets, transforms

# Step 1: Load dataset without normalization
transform = transforms.ToTensor()  # Only convert to tensor, no normalization yet

dataset = datasets.CIFAR10(root=SAVE_DIR, train=True, download=True,
                          transform=transform)

loader = torch.utils.data.DataLoader(dataset, batch_size=1000, shuffle=False)

# Step 2: Calculate mean
mean = torch.zeros(3)
std = torch.zeros(3)

for images, _ in loader:
    # images shape: (batch_size, 3, 32, 32)
    mean += images.mean(dim=[0, 2, 3])

mean /= len(loader)

# Step 3: Calculate standard deviation
for images, _ in loader:
    std += ((images - mean.view(1, 3, 1, 1)) ** 2).mean(dim=[0, 2, 3])

std = torch.sqrt(std / len(loader))

print(f"Mean: {mean}")
print(f"Std: {std}")

Mean: tensor([0.4914, 0.4822, 0.4465])
Std: tensor([0.2470, 0.2435, 0.2616])
