**Configuration**
- Model: `VGG19`
- Dataset: `Tiny ImageNet`

In [1]:
!pip install -qU torch torchvision tdqm accelerate datasets

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m779.1/779.1 MB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.0/7.0 MB[0m [31m106.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m302.4/302.4 kB[0m [31m37.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m542.0/542.0 kB[0m [31m57.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m176.2/176.2 MB[0m [31m9.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m168.1/168.1 MB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m17.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m194.1/194.1 kB[0m [3

In [2]:
import torch
from datasets import load_dataset
import torchvision.transforms as transforms
from torch.utils.tensorboard import SummaryWriter
from torch import nn
from torchvision import models
from torch.utils.data import DataLoader, Dataset
from torch.optim import Adam
import datetime
from tqdm import tqdm
from torchvision.transforms import ToPILImage, ConvertImageDtype

In [3]:
# Hyperparameters
LEARN_RATE = 0.001
NUM_EPOCHS = 5 # Much more than this isn't feasible w/o better hardware

In [4]:
# TensorBoard setup
current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
writer = SummaryWriter(f"runs/part_a2_{current_time}")

### Step 1. Load and Transform Dataset

In [5]:
transform = transforms.Compose([
    transforms.Lambda(lambda x: x.convert("RGB")),  # Forcibly convert images to RGB
    transforms.Resize((224, 224)),  # Resize images to 224x224 for VGG19
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [6]:
# Load dataset
dataset = load_dataset("zh-plus/tiny-imagenet")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Downloading readme:   0%|          | 0.00/3.90k [00:00<?, ?B/s]

Downloading metadata:   0%|          | 0.00/3.52k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/146M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/14.6M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/100000 [00:00<?, ? examples/s]

Generating valid split:   0%|          | 0/10000 [00:00<?, ? examples/s]

In [7]:
# Dataset wrapper to make the dataset compatible with PyTorch
class HDFDataset(Dataset):
    def __init__(self, dataset, split, transform=None):
        self.dataset = dataset[split]
        self.transform = transform

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx):
        item = self.dataset[idx]
        image = item['image']
        label = item['label']

        if self.transform:
            image = self.transform(image)

        return image, label

In [17]:
train_dataset = HDFDataset(dataset, "train", transform=transform)
trainloader = DataLoader(train_dataset, batch_size=32, shuffle=True)

test_dataset = HDFDataset(dataset, "valid", transform=transform)
testloader = DataLoader(test_dataset, batch_size=32, shuffle=False)

### Step 2. Setup the Model

In [9]:
vgg19 = models.vgg19(pretrained=True)
vgg19

Downloading: "https://download.pytorch.org/models/vgg19-dcbb9e9d.pth" to /root/.cache/torch/hub/checkpoints/vgg19-dcbb9e9d.pth
100%|██████████| 548M/548M [00:02<00:00, 201MB/s]


VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padd

In [10]:
# Freeze the features part
for param in vgg19.features.parameters():
    param.requires_grad = False

In [11]:
# Modify the classifier part
vgg19.classifier[6] = nn.Linear(4096, 200)  # Tiny ImageNet has 200 classes

In [12]:
# Make sure we're using GPU
device = torch.device(
    "mps"  # for macOS
    if torch.backends.mps.is_available()
    else "cuda" if torch.cuda.is_available() else "cpu"
)
vgg19 = vgg19.to(device)


device

device(type='cuda')

### Step 3. Train

In [13]:
criterion = nn.CrossEntropyLoss()
optimizer = Adam(vgg19.classifier.parameters(), lr=LEARN_RATE)  # Only train the classifier parameters

In [14]:
def train_model(model, criterion, optimizer, trainloader, num_epochs, device, writer):
    model.train()
    for epoch in range(num_epochs):
        print(f"Epoch {epoch + 1}/{num_epochs}")
        running_loss = 0.0
        correct = 0
        total = 0
        for i, data in enumerate(trainloader, 0):
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()

            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

            if i % 100 == 99:
                print(f'Batch {i + 1} loss {running_loss / 100} accuracy {correct / total}')
                running_loss = 0.0
                correct = 0
                total = 0

    print('Finished Training')

train_model(vgg19, criterion, optimizer, trainloader, NUM_EPOCHS, device, writer)

Epoch 1/5
Batch 100 loss 5.2797394418716435 accuracy 0.02125
Batch 200 loss 4.765668776035309 accuracy 0.059375
Batch 300 loss 4.6066068840026855 accuracy 0.0859375
Batch 400 loss 4.438774464130401 accuracy 0.09375
Batch 500 loss 4.434711554050446 accuracy 0.093125
Batch 600 loss 4.275138933658599 accuracy 0.1090625
Batch 700 loss 4.279490172863007 accuracy 0.1196875
Batch 800 loss 4.1705221891403195 accuracy 0.1240625
Batch 900 loss 4.143067836761475 accuracy 0.128125
Batch 1000 loss 4.107845695018768 accuracy 0.1403125
Batch 1100 loss 4.044803915023803 accuracy 0.141875
Batch 1200 loss 4.123792860507965 accuracy 0.1428125
Batch 1300 loss 3.970970766544342 accuracy 0.1540625
Batch 1400 loss 4.036695141792297 accuracy 0.1571875
Batch 1500 loss 4.028885328769684 accuracy 0.15
Batch 1600 loss 4.01763551235199 accuracy 0.1521875
Batch 1700 loss 3.9774263215065004 accuracy 0.165625
Batch 1800 loss 3.931585879325867 accuracy 0.1609375
Batch 1900 loss 3.8521954298019407 accuracy 0.17375
Batc

In [18]:
def evaluate_model(model, criterion, testloader, device):
    model.eval()
    test_loss = 0
    correct = 0
    total = 0
    with torch.no_grad():
        for data in testloader:
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)

            outputs = model(inputs)
            loss = criterion(outputs, labels)
            test_loss += loss.item()

            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    test_loss /= len(testloader)
    accuracy = 100 * correct / total
    print(f'Accuracy of the network on the test images: {accuracy} %')
    return test_loss, accuracy

evaluate_model(vgg19, criterion, testloader, device)

Accuracy of the network on the test images: 25.67 %


(3.273022875618249, 25.67)

In [None]:
# Save the model checkpoint
torch.save(vgg19.state_dict(), 'part_a2_vgg19_tiny_imagenet_model.pth')