**Configuration**
- Model: `ResNet`
- Dataset: `Tiny ImageNet`

In [1]:
!pip install -qU torch torchvision tdqm accelerate datasets

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m779.1/779.1 MB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.0/7.0 MB[0m [31m90.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m302.4/302.4 kB[0m [31m26.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m542.0/542.0 kB[0m [31m46.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m176.2/176.2 MB[0m [31m5.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m168.1/168.1 MB[0m [31m5.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m17.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m194.1/194.1 kB[0m [31

In [2]:
import torch
from datasets import load_dataset
import torchvision.transforms as transforms
from torch.utils.tensorboard import SummaryWriter
from torch import nn
from torchvision import models
from torch.utils.data import DataLoader, Dataset
from torch.optim import Adam
import datetime
from tqdm import tqdm
from torchvision.transforms import ToPILImage, ConvertImageDtype

In [3]:
# Hyperparameters
LEARN_RATE = 0.001
NUM_EPOCHS = 5 # Much more than this isn't feasible w/o better hardware

In [4]:
# TensorBoard setup
current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
writer = SummaryWriter(f"runs/part_a2_{current_time}")

### Step 1. Load and Transform Dataset

In [5]:
transform = transforms.Compose([
    transforms.Lambda(lambda x: x.convert("RGB")),  # Forcibly convert images to RGB
    transforms.Resize((224, 224)),  # Resize images to 224x224 for VGG19
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [6]:
# Load dataset
dataset = load_dataset("zh-plus/tiny-imagenet")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Downloading readme:   0%|          | 0.00/3.90k [00:00<?, ?B/s]

Downloading metadata:   0%|          | 0.00/3.52k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/146M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/14.6M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/100000 [00:00<?, ? examples/s]

Generating valid split:   0%|          | 0/10000 [00:00<?, ? examples/s]

In [7]:
# Dataset wrapper to make the dataset compatible with PyTorch
class HDFDataset(Dataset):
    def __init__(self, dataset, split, transform=None):
        self.dataset = dataset[split]
        self.transform = transform

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx):
        item = self.dataset[idx]
        image = item['image']
        label = item['label']

        if self.transform:
            image = self.transform(image)

        return image, label

In [17]:
train_dataset = HDFDataset(dataset, "train", transform=transform)
trainloader = DataLoader(train_dataset, batch_size=32, shuffle=True)

test_dataset = HDFDataset(dataset, "valid", transform=transform)
testloader = DataLoader(test_dataset, batch_size=32, shuffle=False)

### Step 2. Setup the Model

In [9]:
resnet = models.resnet50(pretrained=True)
resnet

Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 205MB/s]


ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [10]:
# Freeze the features part
for param in resnet.parameters():
    param.requires_grad = False

In [11]:
# Modify the classifier part
resnet.fc = nn.Linear(2048, 200) # Tiny has 200 classes

In [12]:
# Make sure we're using GPU
device = torch.device(
    "mps"  # for macOS
    if torch.backends.mps.is_available()
    else "cuda" if torch.cuda.is_available() else "cpu"
)
resnet = resnet.to(device)


device

device(type='cuda')

### Step 3. Train

In [14]:
criterion = nn.CrossEntropyLoss()
optimizer = Adam(resnet.parameters(), lr=LEARN_RATE)

In [16]:
def train_model(model, criterion, optimizer, trainloader, num_epochs, device, writer):
    model.train()
    for epoch in range(num_epochs):
        print(f"Epoch {epoch + 1}/{num_epochs}")
        running_loss = 0.0
        correct = 0
        total = 0
        for i, data in enumerate(trainloader, 0):
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()

            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

            if i % 100 == 99:
                print(f'Batch {i + 1} loss {running_loss / 100} accuracy {correct / total}')
                running_loss = 0.0
                correct = 0
                total = 0

    print('Finished Training')

train_model(resnet, criterion, optimizer, trainloader, NUM_EPOCHS, device, writer)

Epoch 1/5
Batch 100 loss 5.1307581472396855 accuracy 0.0721875
Batch 200 loss 3.609424171447754 accuracy 0.246875
Batch 300 loss 2.862915503978729 accuracy 0.3765625
Batch 400 loss 2.5843717396259307 accuracy 0.423125
Batch 500 loss 2.4267102169990538 accuracy 0.44875
Batch 600 loss 2.2464229130744933 accuracy 0.4725
Batch 700 loss 2.2385704278945924 accuracy 0.4740625
Batch 800 loss 2.1746776378154755 accuracy 0.485625
Batch 900 loss 2.0422186386585235 accuracy 0.5171875
Batch 1000 loss 2.027884657382965 accuracy 0.5171875
Batch 1100 loss 2.068278428316116 accuracy 0.5025
Batch 1200 loss 2.039944714307785 accuracy 0.5190625
Batch 1300 loss 2.0415841591358186 accuracy 0.513125
Batch 1400 loss 1.9577576899528504 accuracy 0.5259375
Batch 1500 loss 2.034742678999901 accuracy 0.52375
Batch 1600 loss 1.9963741314411163 accuracy 0.53
Batch 1700 loss 1.9098365235328674 accuracy 0.5428125
Batch 1800 loss 1.991488628387451 accuracy 0.5325
Batch 1900 loss 1.8571053290367125 accuracy 0.5584375
Ba

In [18]:
def evaluate_model(model, criterion, testloader, device):
    model.eval()
    test_loss = 0
    correct = 0
    total = 0
    with torch.no_grad():
        for data in testloader:
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)

            outputs = model(inputs)
            loss = criterion(outputs, labels)
            test_loss += loss.item()

            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    test_loss /= len(testloader)
    accuracy = 100 * correct / total
    print(f'Accuracy of the network on the test images: {accuracy} %')
    return test_loss, accuracy

evaluate_model(resnet, criterion, testloader, device)

Accuracy of the network on the test images: 60.16 %


(1.827039578447517, 60.16)

In [None]:
# Save the model checkpoint
torch.save(resnet.state_dict(), 'part_a2_vgg19_tiny_imagenet_model.pth')