# Assigment 2: PyTorch Model Zoo

The [PyTorch "Model Zoo"](https://pytorch.org/vision/stable/models.html) provides a large number of pre-trained CNN models and vision [data sets](https://pytorch.org/vision/stable/datasets.html)...

In [2]:
#imports
import torch
import torchvision
import torchvision.transforms as transforms

In [3]:
#transform input data (image) to tensor
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

#set batch size
batch_size = 64

trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                         shuffle=False, num_workers=2)

classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

100%|██████████| 170M/170M [00:04<00:00, 40.1MB/s]


## Assignment 1:
Load a "*ResNet18*" from the torchvision model zoo and train it for 10 epochs

In [4]:
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision.models as models
from torch.optim.lr_scheduler import MultiStepLR

resnet18 = models.resnet18().cuda()

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(resnet18.parameters(), lr=0.01, momentum=0.9)


In [9]:
running_loss = 0.0
for epoch in range(10):  # loop over the dataset multiple times
    print("---epoch:",epoch)
    for i, data in enumerate(trainloader, 0):

        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data[0].cuda(),data[1].cuda()
        #print(labels)
        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = resnet18(inputs)
        #print(outputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % 1000 == 999:    # every 1000 mini-batches...
            #print(running_loss / 1000)
            running_loss = 0.0
print('Finished Training')

---epoch: 0
---epoch: 1
---epoch: 2
---epoch: 3
---epoch: 4
---epoch: 5
---epoch: 6
---epoch: 7
---epoch: 8
---epoch: 9
Finished Training


In [10]:
correct = 0
total = 0

# since we're not training, we don't need to calculate the gradients for our outputs
resnet18.eval()

for data in testloader:
        images, labels = data[0].cuda(),data[1].cuda()
        # calculate outputs by running images through the network
        outputs = resnet18(images)
        #print(outputs)
        # the class with the highest energy is what we choose as prediction
        _, predicted = torch.max(outputs.data, 1)
        #print(predicted)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy of the network on the 10000 test images: {100 * correct // total} %')

Accuracy of the network on the 10000 test images: 73 %


## Assigment 2:
Load a **pre-trained** (on ImageNet) "*ResNet18*" from the torchvision model zoo and *fine-tune* it for ten epochs

In [11]:
resnet18 = models.resnet18(weights='DEFAULT') #use pre-trained model

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 184MB/s]


In [12]:
#print model architecture of the pre-trained model
print(resnet18)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [13]:
#print layer names
for name, layer in resnet18.named_children():
    print(name)

conv1
bn1
relu
maxpool
layer1
layer2
layer3
layer4
avgpool
fc


In [14]:
#resnet 18 has been pretrained on ImageNet
# -> 1000 classes = 1000 output neurons in the last FC layer
# -> Cifar 10 has only 10 classes!
# -> better change this!
resnet18.fc = torch.nn.Linear(in_features=512, out_features=10, bias=True) #this is now new and un-trained!
resnet18.cuda() #move model to GPU

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(resnet18.parameters(), lr=0.01, momentum=0.9)



In [15]:
running_loss = 0.0
for epoch in range(10):  # loop over the dataset multiple times
    print("---epoch:",epoch)
    for i, data in enumerate(trainloader, 0):

        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data[0].cuda(),data[1].cuda()

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = resnet18(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % 1000 == 999:    # every 1000 mini-batches...
            #print(running_loss / 1000)
            running_loss = 0.0
print('Finished Training')

---epoch: 0
---epoch: 1
---epoch: 2
---epoch: 3
---epoch: 4
---epoch: 5
---epoch: 6
---epoch: 7
---epoch: 8
---epoch: 9
Finished Training


In [16]:
correct = 0
total = 0

resnet18.cuda()

# since we're not training, we don't need to calculate the gradients for our outputs
resnet18.eval()

for data in testloader:
        images, labels = data[0].cuda(),data[1].cuda()
        # calculate outputs by running images through the network
        outputs = resnet18(images)
        # the class with the highest energy is what we choose as prediction
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy of the network on the 10000 test images: {100 * correct // total} %')

Accuracy of the network on the 10000 test images: 81 %


### Alternative Solution: train only last FC Layer...

In [58]:
resnet18v2 = models.resnet18(weights='DEFAULT') #get new pre-trained model

In [59]:
# Freeze all layers -> turn of gradient updates
for param in resnet18v2.parameters():
    param.requires_grad = False

In [60]:
resnet18v2.fc = torch.nn.Linear(in_features=512, out_features=10, bias=True) #this is now new and un-trained!

In [61]:
for param in resnet18v2.parameters():
    print(param.requires_grad)

False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
True
True


In [62]:
resnet18v2.cuda() #move model to GPU

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(resnet18v2.fc.parameters(), lr=0.001, momentum=0.9)

In [63]:
running_loss = 0.0
for epoch in range(10):  # loop over the dataset multiple times
    print("---epoch:",epoch)
    for i, data in enumerate(trainloader, 0):

        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data[0].cuda(),data[1].cuda()

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = resnet18v2(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % 1000 == 999:    # every 1000 mini-batches...
            #print(running_loss / 1000)
            running_loss = 0.0
print('Finished Training')


---epoch: 0
---epoch: 1
---epoch: 2
---epoch: 3
---epoch: 4
---epoch: 5
---epoch: 6
---epoch: 7
---epoch: 8
---epoch: 9
Finished Training


In [64]:
correct = 0
total = 0

# set model to eval mode
resnet18v2.eval()

for data in testloader:
        images, labels = data[0].cuda(),data[1].cuda()
        # calculate outputs by running images through the network
        outputs = resnet18v2(images)
        # the class with the highest energy is what we choose as prediction
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy of the network on the 10000 test images: {100 * correct // total} %')

Accuracy of the network on the 10000 test images: 47 %
