**Configuration**
- Model: `Custom`
- Dataset: `CIFAR100`

In [1]:
!pip install -qU torch torchvision tdqm accelerate

In [2]:
import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.tensorboard import SummaryWriter
from torch import nn
from torchvision import models
from torch.utils.data import DataLoader
from torch.optim import Adam
import datetime
from tqdm import tqdm

In [3]:
# Hyperparameters
LEARN_RATE = 0.001
NUM_EPOCHS = 5 # Much more than this isn't feasible w/o better hardware

In [4]:
# TensorBoard setup
current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
writer = SummaryWriter(f"runs/part_a2_{current_time}")

### Step 1. Load and Transform Dataset

In [5]:
transform = transforms.Compose([
    transforms.Resize(224),  # Resize images to 224x224 for VGG19
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [6]:
trainset = torchvision.datasets.CIFAR100(root='./data', train=True, download=True, transform=transform)
trainloader = DataLoader(trainset, batch_size=32, shuffle=True)

Downloading https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz to ./data/cifar-100-python.tar.gz


100%|██████████| 169001437/169001437 [00:06<00:00, 24488503.97it/s]


Extracting ./data/cifar-100-python.tar.gz to ./data


In [7]:
testset = torchvision.datasets.CIFAR100(root='./data', train=False, download=True, transform=transform)
testloader = DataLoader(testset, batch_size=32, shuffle=False)

Files already downloaded and verified


### Step 2. Setup the Model

In [12]:
# Load the custom CNN checkpoint file
fp = '../part-a1/a1_model.pth'
model = torch.load(fp)
model.values()

odict_values([tensor([[[[-2.0290e-01,  1.7223e-01,  1.5180e-01],
          [ 1.9915e-01,  1.2849e-01,  1.1465e-01],
          [ 1.4957e-03, -1.8137e-01, -1.7823e-01]],

         [[-1.9633e-01,  3.0216e-02,  1.7089e-01],
          [-3.0560e-01, -9.2878e-02,  1.2916e-01],
          [-1.2268e-01, -1.1305e-01,  1.6237e-02]],

         [[-2.6653e-01,  1.8940e-01,  9.8983e-02],
          [-8.1535e-02,  1.0475e-01,  4.3740e-02],
          [-5.5085e-02, -1.3659e-02,  2.8113e-01]]],


        [[[ 2.2881e-01,  2.5824e-01,  1.8466e-01],
          [ 2.2774e-01,  1.4768e-01, -1.7847e-01],
          [ 9.7916e-03,  1.1156e-01, -3.6046e-01]],

         [[ 1.0318e-01, -1.0701e-01,  1.7691e-01],
          [ 6.0102e-02,  8.0882e-02,  5.7220e-03],
          [-2.3493e-01, -3.7935e-02, -3.7562e-01]],

         [[ 3.2029e-02, -1.3108e-01,  3.0833e-01],
          [-1.8796e-01,  1.2265e-01, -1.3713e-02],
          [-1.8799e-01, -9.8174e-02, -2.0062e-01]]],


        [[[-2.6644e-01,  1.5288e-01,  7.0215e-02],
 

In [9]:
# # Freeze the features part
# for param in model.features.parameters():
#     param.requires_grad = False

In [15]:
# Add the final layer
model = model + nn.Linear(1000, 100)

TypeError: unsupported operand type(s) for +: 'collections.OrderedDict' and 'Linear'

In [11]:
# Make sure we're using GPU
device = torch.device(
    "mps"  # for macOS
    if torch.backends.mps.is_available()
    else "cuda" if torch.cuda.is_available() else "cpu"
)
vgg19 = vgg19.to(device)


device

device(type='cuda')

### Step 3. Train

In [12]:
criterion = nn.CrossEntropyLoss()
optimizer = Adam(vgg19.classifier.parameters(), lr=LEARN_RATE)  # Only train the classifier parameters

In [13]:
def train_model(model, criterion, optimizer, trainloader, num_epochs, device, writer):
    model.train()
    for epoch in range(num_epochs):
        print(f"Epoch {epoch + 1}/{num_epochs}")
        running_loss = 0.0
        correct = 0
        total = 0
        for i, data in enumerate(trainloader, 0):
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()

            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

            if i % 100 == 99:
                print(f'Batch {i + 1} loss {running_loss / 100} accuracy {correct / total}')
                running_loss = 0.0
                correct = 0
                total = 0

    print('Finished Training')

train_model(vgg19, criterion, optimizer, trainloader, NUM_EPOCHS, device, writer)

Epoch 1/5
Batch 100 loss 4.044419877529144 accuracy 0.1046875
Batch 200 loss 3.1314492440223693 accuracy 0.2421875
Batch 300 loss 3.050539536476135 accuracy 0.2671875
Batch 400 loss 3.0528750085830687 accuracy 0.2765625
Batch 500 loss 3.0478632283210754 accuracy 0.2928125
Batch 600 loss 3.094944188594818 accuracy 0.29375
Batch 700 loss 3.0456761503219605 accuracy 0.28625
Batch 800 loss 2.909969925880432 accuracy 0.3171875
Batch 900 loss 2.8132975721359252 accuracy 0.334375
Batch 1000 loss 2.806931126117706 accuracy 0.33875
Batch 1100 loss 2.7887758576869963 accuracy 0.338125
Batch 1200 loss 2.8247710263729093 accuracy 0.341875
Batch 1300 loss 2.787786042690277 accuracy 0.34125
Batch 1400 loss 2.800323547124863 accuracy 0.33625
Batch 1500 loss 2.7855466341972352 accuracy 0.33375
Epoch 2/5
Batch 100 loss 2.5533981490135194 accuracy 0.3896875
Batch 200 loss 2.5564373564720153 accuracy 0.3771875
Batch 300 loss 2.5026396179199217 accuracy 0.3940625
Batch 400 loss 2.6352894508838656 accuracy

In [14]:
def evaluate_model(model, criterion, testloader, device):
    model.eval()
    test_loss = 0
    correct = 0
    total = 0
    with torch.no_grad():
        for data in testloader:
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)

            outputs = model(inputs)
            loss = criterion(outputs, labels)
            test_loss += loss.item()

            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    test_loss /= len(testloader)
    accuracy = 100 * correct / total
    print(f'Accuracy of the network on the test images: {accuracy} %')
    return test_loss, accuracy

evaluate_model(vgg19, criterion, testloader, device)

Accuracy of the network on the test images: 49.81 %


(1.9452560488789226, 49.81)

In [15]:
# Save the model checkpoint
torch.save(vgg19.state_dict(), 'part_a2_vgg19_cifar100_model.pth')