
**Install requirements**

In [2]:
!pip3 install 'torch==1.3.1'
!pip3 install 'torchvision==0.4.2'
!pip3 install 'Pillow-SIMD'
!pip3 install 'tqdm'
!pip install --upgrade pillow

Requirement already up-to-date: pillow in /usr/local/lib/python3.6/dist-packages (6.2.1)


**Import libraries**

In [0]:
import os
import logging
import numpy as np

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Subset, DataLoader
from torch.backends import cudnn

import torchvision
from torchvision import transforms
from torchvision.models import alexnet

from PIL import Image
from tqdm import tqdm
import matplotlib.pyplot as plt

**Set Arguments**

In [0]:
DEVICE = 'cuda' # 'cuda' or 'cpu'

NUM_CLASSES = 101 

#no transfer alexnet 
BATCH_SIZE = [320, 256]
LR = [3e-2, 2e-2]

#transfer alexnet
BATCH_SIZE_TRANSFER = [150, 256, 512]
LR_TRANSFER = [1e-3, 1e-4, 6e-3]

MOMENTUM = 0.9      
WEIGHT_DECAY = 5e-5  

NUM_EPOCHS = 30            
STEP_SIZE = 25
GAMMA = 0.1 

LOG_FREQUENCY = 10

#resnet
BATCH_SIZE_RESNET = [32]
LR_RESNET = [1e-2]
NUM_EPOCHS_RESNET = 15
STEP_SIZE_RESNET = 12
NUM_EPOCHS_RESNET_TRANSFER = 30
STEP_SIZE_RESNET_TRANSFER = 25

**Define Data Preprocessing**

In [0]:
'''
#1-2
#AlexNet and ResNet without transfer learning
train_transform = transforms.Compose([transforms.Resize(256), 
                                      transforms.CenterCrop(224),  
                                      transforms.ToTensor(),
                                      transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])
# Define transforms for the evaluation phase
eval_transform = transforms.Compose([transforms.Resize(256),
                                      transforms.CenterCrop(224),
                                      transforms.ToTensor(),
                                      transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))                                    
])
'''


# 3- TRANSFER LEARNING WITH ALEXNET AND RESNET

normalize = transforms.Normalize(mean = [0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225])
train_transform = transforms.Compose([transforms.Resize(256),
                                       transforms.CenterCrop(224),
                                       transforms.ToTensor(),
                                       normalize,])

eval_transform = transforms.Compose([transforms.Resize(256),
                                      transforms.CenterCrop(224),
                                      transforms.ToTensor(),
                                      normalize,])


Caltech_dataset

In [0]:
from torchvision.datasets import VisionDataset

from PIL import Image

import os
import os.path
import sys


def pil_loader(path):
    # open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835)
    with open(path, 'rb') as f:
        img = Image.open(f)
        return img.convert('RGB')


def make_dataset(dir, label_idx):
  images = []
  for target in label_idx.keys():
    d = os.path.join(dir, target)
    files = os.listdir(d)
    for file in files:
      item = (os.path.join(target, file), label_idx[target])  
      images.append(item)
  return images

class Caltech(VisionDataset):
    def __init__(self, root, split='train', transform = None, target_transform = None):
        super(Caltech, self).__init__(root, transform = transform, target_transform = target_transform)

        self.split = split 
        self.root = root
        self.transform = transform 
        self.target_transform = target_transform 
        
        self.label_list = [d for d in os.listdir(self.root) if d != 'BACKGROUND_Google'] #creo lista delle lables escludendo background
        self.label_list = sorted(self.label_list, key = str.casefold)
        idx = 0
        self.label_idx = {self.label_list[idx]: idx for idx in range(len(self.label_list))}#lista di indici delle lables
        self.samples = make_dataset(self.root, self.label_idx)

        #reading the splits files
        file = os.path.join('Homework2-Caltech101', split)
        i = 0
        self.dataset = []

        for img, labels in self.samples:
          with open(file) as f:
            if(img in f.read()):
              self.dataset.append(i)
          i += 1    

    def __getitem__(self, index):
        idx = self.dataset[index]
        image, label = self.samples[idx]
        image = os.path.join('Homework2-Caltech101/101_ObjectCategories', image)
        sample = pil_loader(image)
        # Applies preprocessing when accessing the image
        if self.transform is not None:
            image = self.transform(sample)

        return image, label

    def __len__(self):
        length = len(self.dataset) 
        return length


**Prepare Dataset**

In [0]:
# Clone github repository with data
if not os.path.isdir('./Homework2-Caltech101'):
  !git clone https://github.com/MachineLearning2020/Homework2-Caltech101.git

DATA_DIR = 'Homework2-Caltech101/101_ObjectCategories'

#PART 1-2 -3
train_dataset_t = Caltech(DATA_DIR, 'train.txt', transform = train_transform)
train_dataset_v = Caltech(DATA_DIR, 'train.txt', transform = train_transform)
test_dataset = Caltech(DATA_DIR,    'test.txt', transform = eval_transform)

val_indexes = [idx for idx in range(len(train_dataset_v)) if idx % 2]
train_indexes = [idx for idx in range(len(train_dataset_t)) if not idx % 2]

val_dataset = Subset(train_dataset_v, val_indexes)
train_dataset = Subset(train_dataset_t, train_indexes)

**Computation of accuracy**

In [0]:
def test_accuracy(net, dataloader, type_of_set):  
    ########TESTING PHASE###########

    # check accuracy on whole test set
    correct = 0
    total = 0
    net.train(False)  
    with torch.no_grad(): 
        for data in dataloader:
            images, labels = data
            images = images.to(DEVICE)
            labels = labels.to(DEVICE)
            outputs = net(images)  # predictions
            _, predicted = torch.max(outputs.data, 1)  # predicted labels
            total += labels.size(0)
            correct += torch.sum(predicted == labels.data).data.item()  # compare with ground truth
    accuracy = 100 * correct / total
    print('Accuracy of the network on the %s set: %d %%' %(type_of_set, accuracy))
    net.train(True)
    return accuracy

**Evaluation on Test set**

In [0]:
def evaluate(net):
  best_net = net.to(DEVICE) 
  best_net.train(False) 

  running_corrects = 0
  for images, labels in tqdm(test_dataloader):
    images = images.to(DEVICE)
    labels = labels.to(DEVICE)

    # Forward Pass
    outputs = best_net(images)

    # Get predictions
    _, preds = torch.max(outputs.data, 1)

    # Update Corrects
    running_corrects += torch.sum(preds == labels.data).data.item()

  # Calculate Accuracy
  accuracy = 100*running_corrects / float(len(test_dataset))

  print('Test Accuracy: {}'.format(accuracy))
  best_net.train(True)

**Train an Validation function**

In [0]:
def training_and_val(net, n_epoch, optimizer,scheduler, train_dataloader, val_dataloader):

  losses = np.empty(n_epoch)
  j = 0
  current_step = 0
  accuracies_train = np.empty(n_epoch)
  accuracies_val =  np.empty(n_epoch)
  n_loss_print = len(train_dataloader)
  criterion = nn.CrossEntropyLoss() 

   # Start iterating over the epochs
  for epoch in range(n_epoch):
    running_loss = 0.0
    print('Starting epoch {}/{}, LR = {}'.format(epoch+1, n_epoch, scheduler.get_lr()))
    # Iterate over the dataset
    for images, labels in train_dataloader:
      # Bring data over the device of choice
      images = images.to(DEVICE)
      labels = labels.to(DEVICE)

      net.train() 

      optimizer.zero_grad() 

      outputs = net(images)

      loss = criterion(outputs, labels)
      running_loss += loss.item()
      
      if current_step % LOG_FREQUENCY == 0:
        print('Step {}, Loss {}'.format(current_step, loss.item()))

      loss.backward()  
      optimizer.step() 

      current_step += 1
    
    
    losses[j] = running_loss / n_loss_print
    accuracies_train[j] = test_accuracy(net, train_dataloader, 'train')  # at each epoch

    #EVALUATION ON VALIDATION
    accuracies_val[j] = test_accuracy(net, val_dataloader, 'validation')  # at each epoch
    j += 1

    # Step the scheduler
    scheduler.step() 


  #print dei plot
  plt.plot(accuracies_train, 'r', label = 'Accuracy on Train')
  plt.plot(accuracies_val,   'g', label = 'Accuracy on Validation')
  plt.legend(loc = 'best')
  plt.xlabel('Epochs')
  plt.ylabel('Accuracy values')
  plt.grid()
  plt.figure()
  plt.show()
  plt.plot(losses,           'b', label = 'Training Loss') 
  plt.legend(loc = 'best')
  plt.xlabel('Epochs')
  plt.ylabel('Loss values')
  plt.grid()
  plt.figure()
  plt.show() 

**Train without transfer learning**

In [0]:
#PART 1-2
i = 0
for i in range(len(LR_RESNET)): # use LR_RESNET for ResNet, LR for AlexNet
  #AlexNet
  '''
  net = alexnet()#(comment this part if you want to use resnet)
  net.classifier[6] = nn.Linear(4096, NUM_CLASSES)#(comment this part if you want to use resnet)
  '''
  
  #ResNet
  net = torch.hub.load('pytorch/vision:v0.4.2', 'resnet101')#(comment this part if you want to use alexnet)
  net.fc = nn.Linear(2048, NUM_CLASSES)#(comment this part if you want to use alexnet)
  
  net = net.to(DEVICE)
  
  n_epoch = NUM_EPOCHS              #use NUM_EPOCHS_RESNET for resnet
  learning_rate = LR_RESNET[i]      #use LR_RESNET for ResNet, LR for AlexNet
  ss = STEP_SIZE                    #use STEP_SIZE_RESNET for resnet
  bs = BATCH_SIZE_RESNET[i]         #use BATCH_SIZE_RESNET for ResNet, BATCH_SIZE for AlexNet


  train_dataloader = DataLoader(train_dataset, batch_size = bs, shuffle = True,  num_workers = 4, drop_last = True)
  val_dataloader   = DataLoader(val_dataset,   batch_size = bs, shuffle = True,  num_workers = 4)
  
  cudnn.benchmark
  parameters_to_optimize = net.parameters()
  optimizer = optim.SGD(parameters_to_optimize, lr = learning_rate, momentum = MOMENTUM, weight_decay = WEIGHT_DECAY)
  scheduler = optim.lr_scheduler.StepLR(optimizer, step_size = ss, gamma = GAMMA)

  training_and_val(net, n_epoch, optimizer,scheduler, train_dataloader, val_dataloader)

**Transfer Learning**

In [0]:
#PART 3 TRANSFER LEARNING
i = 0

for i in range(len(LR_RESNET)):# use LR_RESNET for ResNet, LR_TRANSFER for AlexNet

  #AlexNet
  '''
  net = alexnet(pretrained = True)#(comment this part if you want to use resnet)
  net.classifier[6] = nn.Linear(4096, NUM_CLASSES)#(comment this part if you want to use resnet)
  '''
  #ResNet
  
  net = torch.hub.load('pytorch/vision:v0.4.2', 'resnet101', pretrained = True)#(comment this part if you want to use alexnet)
  net.fc = nn.Linear(2048, NUM_CLASSES)#(comment this part if you want to use alexnet)

  net = net.to(DEVICE)
  
  n_epoch =  NUM_EPOCHS           #use NUM_EPOCHS_RESNET_TRANSFER for resnet
  learning_rate = LR_RESNET[i]    #use LR_RESNET for ResNet, LR_TRANSFER for AlexNet
  ss = STEP_SIZE                  #use STEP_SIZE_RESNET_TRANSFER for resnet
  bs = BATCH_SIZE_RESNET[i]       #use BATCH_SIZE_RESNET for resnet, BATCH_SIZE_TRANSFER for AlexNet

  #Prepare Training
  #for tuning of BATCH_SIZE
  train_dataloader = DataLoader(train_dataset, batch_size = bs, shuffle = True,  num_workers = 4, drop_last = True)
  val_dataloader   = DataLoader(val_dataset,   batch_size = bs, shuffle = True,  num_workers = 4)
  
  cudnn.benchmark # Calling this optimizes runtime
  parameters_to_optimize = net.parameters()
  optimizer = optim.SGD(parameters_to_optimize, lr = learning_rate, momentum = MOMENTUM, weight_decay = WEIGHT_DECAY)
  scheduler = optim.lr_scheduler.StepLR(optimizer, step_size = ss, gamma = GAMMA)

  training_and_val(net, n_epoch, optimizer, scheduler, train_dataloader, val_dataloader)

**Freeze**

In [0]:
#qui definisco a mano i migliori iperparametri dati dal run con transfer learning
best_lr = 1e-3
best_bs = 150
best_epoch = 30
best_step_size = 25

#tuning of batch_size
train_dataloader = DataLoader(train_dataset, batch_size = best_bs, shuffle = True,  num_workers = 4, drop_last = True)
val_dataloader   = DataLoader(val_dataset,   batch_size = best_bs, shuffle = True,  num_workers = 4)
test_dataloader  = DataLoader(test_dataset,  batch_size = best_bs, shuffle = False, num_workers = 4)

n_loss_print = len(train_dataloader)
v = [0, 1]
i = 0
for i in v:

  new_net = alexnet(pretrained = True)
  new_net.classifier[6] = nn.Linear(4096, NUM_CLASSES)

  new_net = new_net.to(DEVICE)
  criterion = nn.CrossEntropyLoss()
  cudnn.benchmark 
  if i == 0:
    # train of only the fully connected layers(freeze of convolutional)
    parameters_to_optimize = new_net.classifier.parameters()
  else:
    # train of only the conv layers(freeze of fully connectes)
    parameters_to_optimize = new_net.features.parameters()

  optimizer = optim.SGD(parameters_to_optimize, lr = best_lr, momentum = MOMENTUM, weight_decay = WEIGHT_DECAY)
  scheduler = optim.lr_scheduler.StepLR(optimizer, step_size = best_step_size, gamma = GAMMA)
  training_and_val(new_net, best_epoch, optimizer, scheduler, train_dataloader, val_dataloader)
  evaluate(new_net)

**Data Augmentation**

In [0]:
#part 4 - Data Augmentation
#qui definisco a mano i migliori iperparametri dati dal run con transfer learning
best_lr = 1e-3
best_bs = 150
best_epoch = 30
best_step_size = 25

#ridefinisco i dataloader perche ho trovato un migliore batch size
val_dataloader   = DataLoader(val_dataset,   batch_size = best_bs, shuffle=True,  num_workers=4)
test_dataloader  = DataLoader(test_dataset,  batch_size = best_bs, shuffle=False, num_workers=4)

for i in [0, 1, 2]:
  normalize = transforms.Normalize(mean = [0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225])
  #define new transformation and train set
  if(i == 0):
    new_transform = transforms.Compose([transforms.RandomResizedCrop(224),   
                                          transforms.RandomGrayscale(), 
                                          transforms.ToTensor(), 
                                          normalize]) 
    
  else:
    if(i == 1):
      new_transform = transforms.Compose([transforms.RandomResizedCrop(224),   
                                          transforms.RandomHorizontalFlip(), 
                                          transforms.ToTensor(), 
                                          normalize])  
    else:
      new_transform = transforms.Compose([transforms.RandomResizedCrop(224),   
                                          transforms.ToTensor(), 
                                          normalize,
                                          transforms.RandomErasing()]) 
      
  train_dataset_t_da = Caltech(DATA_DIR, 'train.txt', transform = new_transform)

  train_indexes_da = [idx for idx in range(len(train_dataset_t_da)) if not idx % 2]

  train_dataset_da = Subset(train_dataset_t_da, train_indexes_da)
  #ridefinisco il dataloader perche ho trovato il migliore batch size ed ho una nuova trasformazione
  train_dataloader_da = DataLoader(train_dataset_da, batch_size = best_bs, shuffle = True,  num_workers = 4, drop_last = True)

  #alexnet

  net = alexnet(pretrained = True)
  net.classifier[6] = nn.Linear(4096, NUM_CLASSES)

  net = net.to(DEVICE)

  cudnn.benchmark
  parameters_to_optimize = net.parameters()
  optimizer = optim.SGD(parameters_to_optimize, lr = best_lr, momentum = MOMENTUM, weight_decay = WEIGHT_DECAY)
  scheduler = optim.lr_scheduler.StepLR(optimizer, step_size = best_step_size, gamma = GAMMA)

  training_and_val(net, best_epoch, optimizer, scheduler, train_dataloader_da, val_dataloader)
  evaluate(net)

**Evaluation on test set with best Hyperparameters**

In [0]:
#valori da aggiungere a run precedente finita (ogni volta che ricerco i migliori iperparametri eseguo questo blocco per riallenare la rete e ottenere pesi coerenti)
best_lr = 1e-3
best_bs = 150
best_epoch = 30
best_step_size = 25

#ridefinisco la rete
#alexnet(comment this part if you want to use resnet)

best_net = alexnet(pretrained = True) #best_net = alexnet() without transfer learning (comment this part if you want to use resnet)
best_net.classifier[6] = nn.Linear(4096, NUM_CLASSES)#(comment this part if you want to use resnet)

#resnet
'''
best_net = torch.hub.load('pytorch/vision:v0.4.2', 'resnet101')#best_net = torch.hub.load('pytorch/vision:v0.4.2', 'resnet101', pretrained=True) for transfer learning(comment this part if you want to use alexnet)
best_net.fc = nn.Linear(2048, NUM_CLASSES)#(comment this part if you want to use alexnet)
'''
best_net = best_net.to(DEVICE)# this will bring the network to GPU if DEVICE is cuda
cudnn.benchmark # Calling this optimizes runtime
parameters_to_optimize = best_net.parameters()
optimizer = optim.SGD(parameters_to_optimize, lr = best_lr, momentum = MOMENTUM, weight_decay = WEIGHT_DECAY)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size = best_step_size, gamma = GAMMA)
train_dataloader = DataLoader(train_dataset, batch_size = best_bs, shuffle=True,  num_workers=4, drop_last=True)
val_dataloader   = DataLoader(val_dataset,   batch_size = best_bs, shuffle=True,  num_workers=4)
test_dataloader  = DataLoader(test_dataset,  batch_size = best_bs, shuffle=False, num_workers=4)
#rialleno la rete per avere pesi aggiornati con gli iperparametri migliori
training_and_val(best_net, best_epoch, optimizer, scheduler, train_dataloader, val_dataloader)
evaluate(best_net)