# Utils

In [1]:
import numpy as np

import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torchvision import models
from torchinfo import summary

device = "cuda" if torch.cuda.is_available() else "cpu"
print(device)

cuda


In [2]:
classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
from lab1_utils import train_loop, train, test, get_lr
from lab1_utils import multiple_diagnostic, test_class

In [15]:
def count_trainable_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

In [18]:
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),  # some augmentation
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

dataset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform_train)
# create a split for train/validation. We can use early stop
trainset, valset = torch.utils.data.random_split(dataset, [40000, 10000])

trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True, num_workers=2,
                                          drop_last=True)
valloader = torch.utils.data.DataLoader(valset, batch_size=batch_size,
                                          shuffle=False, num_workers=2,
                                          drop_last=False)

testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform_test)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                         shuffle=False, num_workers=2,
                                          drop_last=False)

Files already downloaded and verified
Files already downloaded and verified


In [21]:
batch_size = 64
lr = 0.01
max_epochs = 10

# Exercise 1

Scegliamo di usare `resnet18`

Due prove di fine-tuning:
- Modificare il layer finale di classificazione `resnet18_1` partendo dai pesi originali
- Mettere in coda un MLP `resnet18_2`

### Classifier layer

In [26]:
resnet18_1 = models.resnet18(weights="DEFAULT")

# start with random weights
resnet18_1.fc = nn.Linear(resnet18_1.fc.in_features, 10)

# start with resnet18 classifier weights
# im_weights = resnet18_1.fc.weight[:10]
# resnet18_1.fc = nn.Linear(resnet18_1.fc.in_features, 10)
# resnet18_1.fc.weight.data = im_weights.data

resnet18_1 = resnet18_1.to(device)

# print(resnet18_1)
# print(resnet18_1.fc.weight.data)

In [27]:
# freeze all layers
for param in resnet18_1.parameters():
    param.requires_grad = False

# let final layer be trainable, that goes into classification head
resnet18_1.fc.weight.requires_grad = True
resnet18_1.fc.bias.requires_grad = True
count_trainable_parameters(resnet18_1)
# print(summary(resnet18))

5130

In [24]:
# start with resnet18 classifier's weights

criterion = nn.CrossEntropyLoss()

resnet18_1_dict = dict(model=resnet18_1, criterion=criterion, device=device, lr=0.001, momentum=0.9,
                       max_epochs=max_epochs, do_test=True)

stats = train_loop(trainloader, valloader, **resnet18_1_dict)

print("=========")
test_class(resnet18_1, device, criterion, valloader, classes)

Epoch: 1, Learning rate: 0.001000
Training - Loss: 2.5263, Accuracy: 0.26, Runtime: 11.23
Test - Loss: 1.9734, Accuracy: 0.35
Epoch: 2, Learning rate: 0.001000
Training - Loss: 1.8313, Accuracy: 0.37, Runtime: 10.94
Test - Loss: 1.7972, Accuracy: 0.38
Epoch: 3, Learning rate: 0.001000
Training - Loss: 1.7486, Accuracy: 0.39, Runtime: 11.21
Test - Loss: 1.7323, Accuracy: 0.40
Epoch: 4, Learning rate: 0.001000
Training - Loss: 1.7052, Accuracy: 0.40, Runtime: 11.31
Test - Loss: 1.7274, Accuracy: 0.39
Epoch: 5, Learning rate: 0.001000
Training - Loss: 1.6992, Accuracy: 0.40, Runtime: 11.27
Test - Loss: 1.7056, Accuracy: 0.41
Epoch: 6, Learning rate: 0.001000
Training - Loss: 1.6820, Accuracy: 0.41, Runtime: 11.34
Test - Loss: 1.7170, Accuracy: 0.40
Epoch: 7, Learning rate: 0.001000
Training - Loss: 1.6842, Accuracy: 0.41, Runtime: 11.28
Test - Loss: 1.7165, Accuracy: 0.40
Epoch: 8, Learning rate: 0.001000
Training - Loss: 1.6737, Accuracy: 0.41, Runtime: 11.76
Test - Loss: 1.6782, Accurac

In [28]:
criterion = nn.CrossEntropyLoss()

resnet18_1_dict = dict(model=resnet18_1, criterion=criterion, device=device, lr=0.001, momentum=0.9,
                       max_epochs=max_epochs, do_test=True)

stats = train_loop(trainloader, valloader, **resnet18_1_dict)

print("=========")
test_class(resnet18_1, device, criterion, valloader, classes)

Epoch: 1, Learning rate: 0.001000
Training - Loss: 1.8957, Accuracy: 0.33, Runtime: 10.94
Test - Loss: 1.7687, Accuracy: 0.39
Epoch: 2, Learning rate: 0.001000
Training - Loss: 1.7259, Accuracy: 0.40, Runtime: 10.95
Test - Loss: 1.7212, Accuracy: 0.41
Epoch: 3, Learning rate: 0.001000
Training - Loss: 1.6999, Accuracy: 0.40, Runtime: 10.86
Test - Loss: 1.7111, Accuracy: 0.40
Epoch: 4, Learning rate: 0.001000
Training - Loss: 1.6926, Accuracy: 0.41, Runtime: 10.84
Test - Loss: 1.6973, Accuracy: 0.41
Epoch: 5, Learning rate: 0.001000
Training - Loss: 1.6823, Accuracy: 0.41, Runtime: 10.91
Test - Loss: 1.7063, Accuracy: 0.41
Epoch: 6, Learning rate: 0.001000
Training - Loss: 1.6845, Accuracy: 0.41, Runtime: 11.31
Test - Loss: 1.6755, Accuracy: 0.42
Epoch: 7, Learning rate: 0.001000
Training - Loss: 1.6744, Accuracy: 0.41, Runtime: 11.30
Test - Loss: 1.6951, Accuracy: 0.41
Epoch: 8, Learning rate: 0.001000
Training - Loss: 1.6719, Accuracy: 0.41, Runtime: 11.34
Test - Loss: 1.6940, Accurac

### Add MLP in head

In [38]:
resnet18_2 = models.resnet18(weights="DEFAULT")
resnet18_2.fc = nn.Sequential(
    nn.Linear(512, 256),
    nn.ReLU(),
    nn.Linear(256, 128),
    nn.ReLU(),
    nn.Linear(128, 10)  # logits
)
resnet18_2 = resnet18_2.to(device)
print(resnet18_2)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [37]:
(512*256 + 256) + (256*128 + 128) + (128*10 + 10)

165514

In [39]:
# freeze all layers
for param in resnet18_2.parameters():
    param.requires_grad = False

# let final layer be trainable, that goes into classification head
# random initialization
for layer_idx in (0, 2, 4):
    resnet18_2.fc[layer_idx].weight.requires_grad = True
    resnet18_2.fc[layer_idx].bias.requires_grad = True
# resnet18_2.fc.weight.requires_grad = True
# resnet18_2.fc.bias.requires_grad = True
count_trainable_parameters(resnet18_2)
# print(summary(resnet18))

165514

In [40]:
criterion = nn.CrossEntropyLoss()

resnet18_2_dict = dict(model=resnet18_2, criterion=criterion, device=device, lr=0.001, momentum=0.9,
                       max_epochs=max_epochs, do_test=True)

stats = train_loop(trainloader, valloader, **resnet18_2_dict)

print("=========")
test_class(resnet18_2, device, criterion, valloader, classes)

Epoch: 1, Learning rate: 0.001000
Training - Loss: 2.1468, Accuracy: 0.24, Runtime: 11.33
Test - Loss: 1.9370, Accuracy: 0.32
Epoch: 2, Learning rate: 0.001000
Training - Loss: 1.8304, Accuracy: 0.35, Runtime: 11.59
Test - Loss: 1.7798, Accuracy: 0.37
Epoch: 3, Learning rate: 0.001000
Training - Loss: 1.7404, Accuracy: 0.39, Runtime: 11.63
Test - Loss: 1.7309, Accuracy: 0.39
Epoch: 4, Learning rate: 0.001000
Training - Loss: 1.7030, Accuracy: 0.40, Runtime: 11.32
Test - Loss: 1.7252, Accuracy: 0.40
Epoch: 5, Learning rate: 0.001000
Training - Loss: 1.6922, Accuracy: 0.40, Runtime: 11.57
Test - Loss: 1.6892, Accuracy: 0.40
Epoch: 6, Learning rate: 0.001000
Training - Loss: 1.6746, Accuracy: 0.41, Runtime: 11.62
Test - Loss: 1.6982, Accuracy: 0.41
Epoch: 7, Learning rate: 0.001000
Training - Loss: 1.6667, Accuracy: 0.41, Runtime: 11.68
Test - Loss: 1.6535, Accuracy: 0.42
Epoch: 8, Learning rate: 0.001000
Training - Loss: 1.6552, Accuracy: 0.41, Runtime: 11.71
Test - Loss: 1.6710, Accurac

# Exercise 2

Cambiare il learning rate dei layer finali oppure mettere trainable un layer di convoluzione interno e cambiargli learning rate

# Exercise 3

Model selection strategy