In [12]:
from google.colab import drive
drive.mount('/content/drive')

#Because Colab's environment:
#    Gets reset every time the session ends
#    Deletes all files when you disconnect
#So mounting Drive allows your work to be saved permanently.

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [13]:
SAVE_DIR = "/content/drive/MyDrive/cifar10_data/"
import os
os.makedirs(SAVE_DIR, exist_ok=True)

In [14]:
# ============================================================
# 1. Imports & Device
# ============================================================
import torch
import torch.nn as nn
import torch.optim as optim

import torchvision
import torchvision.transforms as transforms
from torchvision.models import resnet18, ResNet18_Weights

from torch.utils.data import DataLoader

print("PyTorch version:", torch.__version__)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# ======

PyTorch version: 2.9.0+cu126
Using device: cuda


In [15]:
# ============================================================
# 2. CIFAR-10 Dataset & DataLoaders
# ============================================================

# --- 32×32 transform (for SimpleCNN & ResNet18_Scratch) ---
transform_32 = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(
        mean=(0.4914, 0.4822, 0.4465),
        std=(0.2470, 0.2435, 0.2616)
    )
])

# --- 224×224 transform (for pretrained ResNet18) ---
weights = ResNet18_Weights.DEFAULT
imagenet_transforms = weights.transforms()   # has Resize, CenterCrop, ToTensor, Normalize

# If we want explicit:
# transform_224 = transforms.Compose([
#     transforms.Resize(224),
#     transforms.ToTensor(),
#     transforms.Normalize(mean=weights.transforms().mean,
#                          std=weights.transforms().std)
# ])

transform_224 = imagenet_transforms  # easiest: use built-in pipeline

# --- Datasets ---
train_dataset_32 = torchvision.datasets.CIFAR10(
    root=SAVE_DIR, train=True, download=True, transform=transform_32
)
test_dataset_32 = torchvision.datasets.CIFAR10(
    root=SAVE_DIR, train=False, download=True, transform=transform_32
)

# For the 224×224 model we can reuse the same CIFAR10, just different transform:
train_dataset_224 = torchvision.datasets.CIFAR10(
    root=SAVE_DIR, train=True, download=False, transform=transform_224
)
test_dataset_224 = torchvision.datasets.CIFAR10(
    root=SAVE_DIR, train=False, download=False, transform=transform_224
)

batch_size = 64

train_loader_32 = DataLoader(train_dataset_32, batch_size=batch_size,
                             shuffle=True, num_workers=2)
test_loader_32 = DataLoader(test_dataset_32, batch_size=batch_size,
                            shuffle=False, num_workers=2)

train_loader_224 = DataLoader(train_dataset_224, batch_size=batch_size,
                              shuffle=True, num_workers=2)
test_loader_224 = DataLoader(test_dataset_224, batch_size=batch_size,
                             shuffle=False, num_workers=2)

classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

print(f"Training dataset size (32): {len(train_dataset_32)}")
print(f"Test dataset size (32): {len(test_dataset_32)}")

Training dataset size (32): 50000
Test dataset size (32): 10000


In [16]:
from torchvision.models import resnet18, ResNet18_Weights

In [17]:
# ============================================================
# 3. Models: SimpleCNN, ResNet18 for CIFAR-10, and Pretrained ResNet18
# ============================================================

class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()

        # Conv1: (3, 32, 32) -> (16, 28, 28)
        self.conv1 = nn.Conv2d(
            in_channels=3, out_channels=16,
            kernel_size=5, stride=1, padding=0
        )

        # Conv2: (16, 14, 14) -> (32, 10, 10)
        self.conv2 = nn.Conv2d(
            in_channels=16, out_channels=32,
            kernel_size=5, stride=1, padding=0
        )

        # Max pooling (2×2)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)

        # FC: 32*5*5 -> 10
        self.fc = nn.Linear(32 * 5 * 5, 10)

    def forward(self, x):
        # Conv1 + ReLU + Pool: (3,32,32) -> (16,14,14)
        x = self.pool(torch.relu(self.conv1(x)))
        # Conv2 + ReLU + Pool: (16,14,14) -> (32,5,5)
        x = self.pool(torch.relu(self.conv2(x)))
        # Flatten
        x = x.view(x.size(0), -1)   # (batch, 32*5*5)
        # FC to logits
        x = self.fc(x)
        return x


def create_resnet18_for_cifar10(num_classes=10):
    """
    ResNet18 adapted for 32x32 CIFAR-10:
      - small first conv (3x3, stride 1, padding 1)
      - remove initial maxpool
      - change final FC to num_classes
      - no pretraining (weights=None) for fair comparison
    """
    net = resnet18(weights=None)  # from scratch

    # Adapt first conv & remove maxpool
    net.conv1 = nn.Conv2d(
        3, 64, kernel_size=3, stride=1, padding=1, bias=False
    )
    net.maxpool = nn.Identity() # The images are already tiny, No pooling, No pooling, No shrinking

    # Change classifier head to CIFAR-10
    net.fc = nn.Linear(net.fc.in_features, num_classes)
    return net


def create_resnet18_pretrained_imagenet(num_classes=10, freeze_backbone=False):
    """
    ResNet18 with ImageNet pretrained weights.
    Expects 224x224 inputs with ImageNet-style transforms.
    """
    net = resnet18(weights=ResNet18_Weights.DEFAULT)

    # Replace final FC layer for CIFAR-10
    in_features = net.fc.in_features
    net.fc = nn.Linear(in_features, num_classes)

    return net


# Create all three models
models_dict = {
    "SimpleCNN":              SimpleCNN().to(device),
    "ResNet18_Scratch_32":    create_resnet18_for_cifar10().to(device),
    "ResNet18_Pretrained_224": create_resnet18_pretrained_imagenet().to(device)
}

for name, m in models_dict.items():
    print(f"\n{name} architecture:")
    print(m)



SimpleCNN architecture:
SimpleCNN(
  (conv1): Conv2d(3, 16, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(16, 32, kernel_size=(5, 5), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc): Linear(in_features=800, out_features=10, bias=True)
)

ResNet18_Scratch_32 architecture:
ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): Identity()
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True,

In [26]:
# ============================================================
# 4. Training Helper
# ============================================================

def train_model(model, optimizer, criterion,
                train_loader, val_loader,
                epochs=5, device="cuda"):

    history = {"train_loss": [], "val_acc": []}

    for epoch in range(epochs):
        model.train()
        running_loss = 0.0

        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad() #Clears old gradients from the previous batch so they don't accumulate
            outputs = model(images) #Forward pass: send input images through the model to get predictions
            loss = criterion(outputs, labels) #Compute the loss (difference between model predictions and true labels)
            loss.backward() #Backpropagation: compute gradients of loss with respect to model parameters
            optimizer.step() #Update model parameters using the gradients

            running_loss += loss.item()

        avg_loss = running_loss / len(train_loader)
        history["train_loss"].append(avg_loss)

        # ----- Validation phase  ----- (evaluation during training)
        val_acc = evaluate_model(model, val_loader, device=device)
        history["val_acc"].append(val_acc)

        print(f"Epoch {epoch+1}/{epochs}  "
              f"Loss: {avg_loss:.4f}  Val Acc: {val_acc:.4f}")

    return history

In [27]:
# ============================================================
# 5. Evaluation Helper
# ============================================================
def evaluate_model(model, loader, device="cuda"):
    model.eval()
    correct, total = 0, 0
    with torch.no_grad(): #Turn off gradient
          for images, labels in loader:
              images, labels = images.to(device), labels.to(device) #Move data to GPU

              outputs = model(images) #Forward pass: get prediction scores
              preds = torch.argmax(outputs, dim=1) #Pick the class with the highest score for each image
              correct += (preds == labels).sum().item() #Count how many predictions match the true labels
              total += labels.size(0) #Count total number of labels in this batch

    return correct / total

In [28]:
# ============================================================
# 6. Train all three models
# ============================================================

results = {}

for model_name, model in models_dict.items():
    print("\n" + "="*70)
    print(f"Training model: {model_name}")
    print("="*70)

    # Choose loaders based on model
    if model_name == "ResNet18_Pretrained_224":
        train_loader = train_loader_224
        val_loader = test_loader_224
    else:
        train_loader = train_loader_32
        val_loader = test_loader_32

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=1e-3)

    history = train_model(model, optimizer, criterion,
                          train_loader, val_loader,
                          epochs=5, device=device)

    results[model_name] = history



# After training: report final test accuracy for each model
print("\nFinal test accuracies:")
for model_name, model in models_dict.items():
    if model_name == "ResNet18_Pretrained_224":
        test_loader = test_loader_224
    else:
        test_loader = test_loader_32

    print("Model:", model_name)
    acc = evaluate_model(model, test_loader, device=device)
    print(f"{model_name}: {acc:.4f}")



Training model: SimpleCNN
Epoch 1/5  Loss: 0.7619  Val Acc: 0.6864
Epoch 2/5  Loss: 0.7458  Val Acc: 0.6873
Epoch 3/5  Loss: 0.7251  Val Acc: 0.6781
Epoch 4/5  Loss: 0.7156  Val Acc: 0.6943
Epoch 5/5  Loss: 0.7060  Val Acc: 0.6953

Training model: ResNet18_Scratch_32
Epoch 1/5  Loss: 0.0778  Val Acc: 0.8352
Epoch 2/5  Loss: 0.0562  Val Acc: 0.8296
Epoch 3/5  Loss: 0.0566  Val Acc: 0.8374
Epoch 4/5  Loss: 0.0446  Val Acc: 0.8316
Epoch 5/5  Loss: 0.0473  Val Acc: 0.8488

Training model: ResNet18_Pretrained_224
Epoch 1/5  Loss: 0.1216  Val Acc: 0.8833
Epoch 2/5  Loss: 0.0840  Val Acc: 0.8930
Epoch 3/5  Loss: 0.0733  Val Acc: 0.8953
Epoch 4/5  Loss: 0.0660  Val Acc: 0.8972
Epoch 5/5  Loss: 0.0496  Val Acc: 0.9035

Final test accuracies:
Model: SimpleCNN
SimpleCNN: 0.6953
Model: ResNet18_Scratch_32
ResNet18_Scratch_32: 0.8488
Model: ResNet18_Pretrained_224
ResNet18_Pretrained_224: 0.9035


In [31]:
# Count trainable parameters
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)


In [32]:
print("\nTrainable Parameters Per Model")
print("=" * 40)

for name, model in models_dict.items():
    params = count_parameters(model)
    print(f"{name:30s} {params:,}")



Trainable Parameters Per Model
SimpleCNN                      22,058
ResNet18_Scratch_32            11,173,962
ResNet18_Pretrained_224        11,181,642
