In [2]:
import torch
import torchvision
import torchvision.transforms as transforms
import torchvision.models.alexnet as alexnet

import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import matplotlib.pyplot as plt
import numpy as np
from IPython.display import Markdown as md

device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
print(device)

cuda


In [3]:
transform = transforms.Compose(
    [transforms.Resize(256),
    transforms.CenterCrop(224),
     transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

batch_size = 4

trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                         shuffle=False, num_workers=2)

classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')


Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:03<00:00, 47468935.08it/s]


Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified


In [4]:
def train_model(net, optimizer, criterion, epochs):

  for epoch in range(epochs):  # loop over the dataset multiple times
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
      # if i <= 2000:
            # get the inputs; data is a list of [inputs, labels]
            inputs, labels = data
            inputs = inputs.to(device)
            labels = labels.to(device)

            # print(i)

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = net(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.item()
            if i % 2000 == 1999:    # print every 2000 mini-batches
                print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 2000:.3f}')
                running_loss = 0.0

  print('Finished Training')



In [5]:
def predict_test():
  correct = 0
  total = 0
  # since we're not training, we don't need to calculate the gradients for our outputs
  with torch.no_grad():
      for data in testloader:
          images, labels = data
          images = images.to(device)
          labels = labels.to(device)
          # calculate outputs by running images through the network
          net.to(device)
          outputs = net(images)
          outputs = outputs.to(device)
          # the class with the highest energy is what we choose as prediction
          _, predicted = torch.max(outputs.data, 1)
          total += labels.size(0)
          correct += (predicted == labels).sum().item()

  #print(f'Accuracy of the network on the 10000 test images: {100 * correct // total} %')
  md(f'Accuracy of the network on the 10000 test images: {100 * correct // total} %')


## FINE TUNING with AlexNet


In [6]:
# input_size = 224

num_classes = 10
net = alexnet(pretrained=True)
num_ftrs = net.classifier[6].in_features
net.classifier[6] = nn.Linear(num_ftrs, num_classes)
net = net.to(device)

criterion = nn.CrossEntropyLoss().to(device)
optimizer_adam = optim.Adam(net.parameters(), lr=0.0001)
train_model(net, optimizer_adam, criterion, 2)

# Predict on test and report Accuracy
predict_test()

Downloading: "https://download.pytorch.org/models/alexnet-owt-7be5be79.pth" to /root/.cache/torch/hub/checkpoints/alexnet-owt-7be5be79.pth
100%|██████████| 233M/233M [00:04<00:00, 60.7MB/s]


[1,  2000] loss: 1.105
[1,  4000] loss: 0.784
[1,  6000] loss: 0.697
[1,  8000] loss: 0.662
[1, 10000] loss: 0.647
[1, 12000] loss: 0.623
[2,  2000] loss: 0.498
[2,  4000] loss: 0.496
[2,  6000] loss: 0.516
[2,  8000] loss: 0.494
[2, 10000] loss: 0.499
[2, 12000] loss: 0.510
Finished Training


## FEATURE EXTRACTOR from AlexNet

In [7]:
def set_parameter_requires_grad(model, feature_extracting):
    if feature_extracting:
        for param in model.parameters():
            param.requires_grad = False

# input_size = 224
num_classes = 10
feature_extract = True
net = alexnet(pretrained=True)
set_parameter_requires_grad(net, feature_extract)
num_ftrs = net.classifier[6].in_features
net.classifier[6] = nn.Linear(num_ftrs, num_classes)
net = net.to(device)


criterion = nn.CrossEntropyLoss().to(device)
optimizer_adam = optim.Adam(net.parameters(), lr=0.0001)
train_model(net, optimizer_adam, criterion, 2)

# Predict on test and report Accuracy
predict_test()

[1,  2000] loss: 1.242
[1,  4000] loss: 0.974
[1,  6000] loss: 0.921
[1,  8000] loss: 0.886
[1, 10000] loss: 0.870
[1, 12000] loss: 0.865
[2,  2000] loss: 0.833
[2,  4000] loss: 0.817
[2,  6000] loss: 0.818
[2,  8000] loss: 0.814
[2, 10000] loss: 0.788
[2, 12000] loss: 0.800
Finished Training


### Observation:

In all the tests I ran with different number of epochs, the Fine Tuning approach performs much better than the Feature Extractor approach. 
I also noticed that the Fine Tuning takes longer time than the Feature Extractor approach.

I think the difference in performance was because Fine Tuning retrains the layers with the the data we provided, thereby training weights that are more adapted to the new dataset, while the Feature Extractor approach mostly uses weights from the original dataset that the model was trained