**Politecnico di Torino**

**01TXFSM - Machine learning and Deep learning**

**Homework 2**

**Alberto Maria Falletta - s277971**


**Install requirements**

In [0]:
!pip3 install 'torch==1.4.0'
!pip3 install 'torchvision==0.5.0'
!pip3 install 'Pillow-SIMD'
!pip3 install 'tqdm'



**Import libraries**

In [0]:
import os
import logging

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Subset, DataLoader
from torch.backends import cudnn

import torchvision
from torchvision import transforms
from torchvision.models import alexnet, resnet18, resnet50, resnet34

from PIL import Image
from tqdm import tqdm

import random
import matplotlib.pyplot as plt

**Functions**

In [0]:
def get_key(in_dict, in_value):
  """
  This function accepts an integer value and returns
  the string associated to the class name relating 
  to the integer, using the class_to_idx dictionary.
  """
  for key, value in in_dict.items(): 
    if in_value == value: 
      return key 
  return "Key not found!"


def print_occurrences(in_dataset, filename, save=False):
  """
  This function prints horizontal bar-graphs
  of occurrences of dataset's images.
  Uses get_key to decode the class name
  from its integer value.
  """
  in_occurrence_dict = {}
  # in_dataset is an object of Caltech class, therefore using .sample 
  # a list of images and labels is return.
  # These lines build a dictionary from in_dataset with labels as key and number
  # of occurrences as value
  for index in range(0, len(in_dataset)):
    img_data, img_label = in_dataset.samples[index]
    if img_label not in in_occurrence_dict:
      in_occurrence_dict[img_label] = 1
    else:
      in_occurrence_dict[img_label] += 1

  in_y = []
  in_x = []

  for key in in_occurrence_dict:
    in_y.append(get_key(in_dataset.class_to_idx, key)) 
    in_x.append(in_occurrence_dict[key])

  # Plot
  fig, ax = plt.subplots(figsize=(14, 25))
  ax.barh(in_y, in_x, align='center', alpha=0.5)
  ax.set_xlabel('Number of images')
  ax.set_ylabel('Classes')
  ax.set_title(filename)
  for i, v in enumerate(in_x):
    plt.text(v+0.2, i, str(v), color='steelblue', va="center")
  if save:
    plt.savefig(filename + '.png')
  plt.show()

  return


def make_indexes(total_index_list, mode):
  """
  This function splits a list in order to make indexes for training set and
  validation set.
  The split can be random or based on the original order of the database.
  """
  if mode == 'random':
    random.shuffle(total_index_list)
    in_train_indexes = total_index_list[:len(total_index_list)//2]
    in_val_indexes = total_index_list[len(total_index_list)//2:]

  else:
    in_train_indexes = []
    in_val_indexes = []
  
    for index in total_index_list:
      if index % 2 == 0:
        in_train_indexes.append(index)
      else:
        in_val_indexes.append(index)

    # Shuffle the elements
    random.shuffle(in_train_indexes)
    random.shuffle(in_val_indexes)

  return in_train_indexes, in_val_indexes


def print_accuracy_loss_plot(in_loss_list, in_accuracy_list, filename):
  """
  This function prints line plots for validation accuracy and loss
  for each epoch
  """
  in_epochs = [*range(0, len(in_loss_list))]
  in_fig, in_ax = plt.subplots(1, 2, figsize=(14, 7))
  in_ax[0].plot(in_epochs, in_loss_list, c='blue', label='Loss')
  in_ax[1].plot(in_epochs, in_accuracy_list, c='green', label='Validation Accuracy')
  in_ax[0].set_xlabel('Epochs')
  in_ax[1].set_xlabel('Epochs')
  in_ax[0].set_ylabel('Loss')
  in_ax[1].set_ylabel('Validation Accuracy')
  plt.savefig(filename + '.png')
  plt.show()
  return

**Set Arguments**

In [0]:
DEVICE = 'cuda' # 'cuda' or 'cpu'

NUM_CLASSES = 102  # 101 + 1: There is an extra Background class that should be removed 

BATCH_SIZE = 128     # Higher batch sizes allows for larger learning rates. An empirical heuristic suggests that, when changing
                     # the batch size, learning rate should change by the same factor to have comparable results

LR = 5e-2            # The initial Learning Rate
MOMENTUM = 0.9       # Hyperparameter for SGD, keep this at 0.9 when using SGD
WEIGHT_DECAY = 5e-5  # Regularization, you can keep this at the default

NUM_EPOCHS = 60      # Total number of training epochs (iterations over dataset)
STEP_SIZE = 50       # How many epochs before decreasing learning rate (if using a step-down policy)
GAMMA = 0.1          # Multiplicative factor for learning rate step-down

LOG_FREQUENCY = 10

pretrained = True  # if training from scratch or finetuning (True, False)
network = "r"       # if alexnet or resnet ("a", "r")
param = "fully_connected"  # parameters to optimize (complete_network"", "fully_connected", "convolutional")

**Define Data Preprocessing**

In [0]:
# In this code cell is defined a boolean value associated with the "pretrained"
# hyperparameter of the cell where the network is defined.
# In this cell if "pretrained" is True Imagenet's mean and std are used in
# tranformation phase.

if pretrained:
  train_transform = transforms.Compose([transforms.Resize(256),      # Resizes short size of the PIL image to 256
                                        transforms.CenterCrop(224),  # Crops a central square patch of the image
                                                                    # 224 because torchvision's AlexNet needs a 224x224 input!
                                                                    # Remember this when applying different transformations, otherwise you get an error
                                        # transforms.RandomCrop(224, pad_if_needed=True, padding_mode='edge'),
                                        # transforms.RandomHorizontalFlip(p=0.5),
                                        # transforms.RandomGrayscale(p=0.1),
                                        # transforms.RandomPerspective(distortion_scale=0.5, p=0.5, interpolation=3),
                                        # transforms.RandomRotation(30, resample=False, expand=False, center=None, fill=None),
                                        # transforms.RandomVerticalFlip(p=0.5),
                                        transforms.ToTensor(), # Turn PIL Image to torch.Tensor
                                        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # https://github.com/pytorch/examples/blob/master/imagenet/main.py
  ])

  # Define transforms for the evaluation phase
  eval_transform = transforms.Compose([transforms.Resize(256),
                                        transforms.CenterCrop(224),
                                        transforms.ToTensor(),
                                        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])                                  
  ])

else:
  train_transform = transforms.Compose([transforms.Resize(256),      # Resizes short size of the PIL image to 256
                                        transforms.CenterCrop(224),  # Crops a central square patch of the image
                                                                    # 224 because torchvision's AlexNet needs a 224x224 input!
                                                                    # Remember this when applying different transformations, otherwise you get an error
                                        transforms.ToTensor(), # Turn PIL Image to torch.Tensor
                                        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) # Normalizes tensor with mean and standard deviation
  ])

  # Define transforms for the evaluation phase
  eval_transform = transforms.Compose([transforms.Resize(256),
                                        transforms.CenterCrop(224),
                                        transforms.ToTensor(),
                                        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))                                
  ])

**Prepare Dataset**

In [0]:
# Clone github repository with data
if not os.path.isdir('./Caltech101'):
  !git clone https://github.com/albeffe/Homework2-Caltech101.git
  !mv 'Homework2-Caltech101' 'Caltech101'

DATA_DIR = 'Caltech101/101_ObjectCategories'
from my_folder.my_caltech_dataset import Caltech

# Prepare Pytorch train/test Datasets
train_dataset = Caltech(DATA_DIR, split='train',  transform=train_transform)
test_dataset = Caltech(DATA_DIR, split='test', transform=eval_transform)

# For visualization purposes
visualization_flag = False
if visualization_flag:
  print('Train Dataset + Validation Dataset: {}'.format(len(train_dataset)), "\n")
  print_occurrences(train_dataset, 'Training & Validation Sets', True)
  print_occurrences(test_dataset, 'Test Set', True)

train_indexes, val_indexes = make_indexes([*range(0, len(train_dataset))], 'equal')
val_dataset = Subset(train_dataset, val_indexes)
train_dataset = Subset(train_dataset, train_indexes)

# Check dataset sizes
print('Train Dataset: {}'.format(len(train_dataset)))
print('Valid Dataset: {}'.format(len(val_dataset)))
print('Test Dataset: {}'.format(len(test_dataset)))

Train Dataset: 2892
Valid Dataset: 2892
Test Dataset: 2893


**Prepare Dataloaders**

In [0]:
# Dataloaders iterate over pytorch datasets and transparently provide useful functions (e.g. parallelization and shuffling)
train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, drop_last=True)
val_dataloader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)
test_dataloader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

**Prepare Network**

In [0]:
# Network selection

if network == "a":
  net = alexnet(pretrained=pretrained)
  net.classifier[6] = nn.Linear(4096, NUM_CLASSES) 
elif network == 'r':
  net = resnet18(pretrained=pretrained)
  net.fc = nn.Linear(512, NUM_CLASSES)

**Prepare Training**

In [0]:
# Loss function
criterion = nn.CrossEntropyLoss() # for classification, we use Cross Entropy

# Parameters to optimize:
# https://github.com/pytorch/vision/blob/master/torchvision/models/alexnet.py

if param == "complete_network":
  parameters_to_optimize = net.parameters()
elif param == "fully_connected":
  parameters_to_optimize = net.classifier.parameters()
elif param == "convolutional":
  parameters_to_optimize = net.features.parameters()

# Optimizers
optimizer = optim.SGD(parameters_to_optimize, lr=LR, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)
# optimizer = optim.Adam(parameters_to_optimize, lr=LR, weight_decay=WEIGHT_DECAY)
# optimizer = optim.Adadelta(parameters_to_optimize, lr=LR, rho=0.9, eps=1e-06, weight_decay=WEIGHT_DECAY)
# optimizer = optim.RMSprop(parameters_to_optimize, lr=LR, alpha=0.99, eps=1e-08, weight_decay=WEIGHT_DECAY, momentum=MOMENTUM, centered=False)

# Scheduler
# A scheduler dynamically changes learning rate
# The most common scheduler is the step(-down), which multiplies learning rate by gamma every STEP_SIZE epochs
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=STEP_SIZE, gamma=GAMMA)

**Train**

In [0]:
net = net.to(DEVICE)

cudnn.benchmark # Calling this optimizes runtime

current_step = 0
loss_list = []
valid_accuracy_list = []

# Start iterating over the epochs
for epoch in range(NUM_EPOCHS):
  print('Starting epoch {}/{}, LR = {}'.format(epoch+1, NUM_EPOCHS, scheduler.get_lr()))

  # Iterate over the dataset
  train_running_corrects = 0
  for images, labels in train_dataloader:
    # Bring data over the device of choice
    images = images.to(DEVICE)
    labels = labels.to(DEVICE)

    net.train() # Sets module in training mode

    # PyTorch, by default, accumulates gradients after each backward pass
    # We need to manually set the gradients to zero before starting a new iteration
    optimizer.zero_grad() # Zero-ing the gradients

    # Forward pass to the network
    train_outputs = net(images)

    # Compute loss based on output and ground truth
    loss = criterion(train_outputs, labels)

    # Compute gradients for each layer and update weights
    loss.backward()  # backward pass: computes gradients
    optimizer.step() # update weights based on accumulated gradients

    # Log loss
    if current_step % LOG_FREQUENCY == 0:
      print('Step {}, Loss {}'.format(current_step, loss.item()))

    current_step += 1

  # Step the scheduler
  scheduler.step()

  loss_list.append(loss.item())

  # Validation 
  net.train(False) # Set Network to evaluation mode
  valid_running_corrects = 0
  for images, labels in val_dataloader:
    images = images.to(DEVICE)
    labels = labels.to(DEVICE)

    # Forward Pass
    valid_outputs = net(images)

    # Get predictions
    _, valid_preds = torch.max(valid_outputs.data, 1)

    # Update Corrects
    valid_running_corrects += torch.sum(valid_preds == labels.data).data.item()

  # Calculate Accuracy
  valid_accuracy = valid_running_corrects / float(len(val_dataset))
  print('Validation Accuracy {}'.format(valid_accuracy))
  print()
  
  valid_accuracy_list.append(valid_accuracy)

print_accuracy_loss_plot(loss_list, valid_accuracy_list, "Run_plot")

Starting epoch 1/60, LR = [0.05]
Step 0, Loss 5.090920448303223
Step 10, Loss 2.3347580432891846
Step 20, Loss 0.9952521920204163
Validation Accuracy 0.4529737206085754

Starting epoch 2/60, LR = [0.05]
Step 30, Loss 0.604526937007904
Step 40, Loss 0.4152956008911133
Validation Accuracy 0.7627939142461964

Starting epoch 3/60, LR = [0.05]
Step 50, Loss 0.27601921558380127
Step 60, Loss 0.2111152857542038
Validation Accuracy 0.809820193637621

Starting epoch 4/60, LR = [0.05]
Step 70, Loss 0.15590143203735352
Step 80, Loss 0.1568639576435089
Validation Accuracy 0.8654910096818811

Starting epoch 5/60, LR = [0.05]


KeyboardInterrupt: ignored

**Test**

In [0]:
net = net.to(DEVICE)
net.train(False)

running_corrects = 0
for images, labels in tqdm(test_dataloader):
  images = images.to(DEVICE)
  labels = labels.to(DEVICE)

  # Forward Pass
  outputs = net(images)

  # Get predictions
  _, preds = torch.max(outputs.data, 1)

  # Update Corrects
  running_corrects += torch.sum(preds == labels.data).data.item()

# Calculate Accuracy
accuracy = running_corrects / float(len(test_dataset))

print('Test Accuracy: {}'.format(accuracy))