
**Install requirements**

In [None]:
!python -m pip install -U pip
!pip3 install 'torch==1.3.1'
!pip3 install 'torchvision==0.5.0'
!pip3 install 'Pillow-SIMD'
!pip3 install 'tqdm'

Collecting pip
[?25l  Downloading https://files.pythonhosted.org/packages/54/eb/4a3642e971f404d69d4f6fa3885559d67562801b99d7592487f1ecc4e017/pip-20.3.3-py2.py3-none-any.whl (1.5MB)
[K     |████████████████████████████████| 1.5MB 4.0MB/s 
[?25hInstalling collected packages: pip
  Found existing installation: pip 19.3.1
    Uninstalling pip-19.3.1:
      Successfully uninstalled pip-19.3.1
Successfully installed pip-20.3.3
Collecting torch==1.3.1
  Downloading torch-1.3.1-cp36-cp36m-manylinux1_x86_64.whl (734.6 MB)
[K     |████████████████████████████████| 734.6 MB 21 kB/s 
Installing collected packages: torch
  Attempting uninstall: torch
    Found existing installation: torch 1.7.0+cu101
    Uninstalling torch-1.7.0+cu101:
      Successfully uninstalled torch-1.7.0+cu101
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
torchvision 0.8.1+cu101 requires t



**Imports**

In [None]:
import os
import logging
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Subset, DataLoader
from torch.backends import cudnn
import torchvision
from torchvision import transforms
from PIL import Image
from tqdm import tqdm
from torchvision.datasets import VisionDataset
from PIL import Image
import os
import os.path
import sys
import matplotlib.pyplot as plt
from torchvision.models import alexnet
import numpy as np
from PIL import ImageDraw
import matplotlib.image as mpimg

**Function used to plot the loss and accuracy**

In [None]:
def plotting(loss_train, loss_val, acc_train, acc_val):
  
  epochs=len(loss_train)
  x=[i+1 for i in range(epochs)]
  x_div=int(epochs/6)
  xdisp=[i for i in range(epochs) if i%x_div==1]
  plt.figure()
  plt.plot(x, loss_train, 'r', label='Training loss')
  plt.plot(x, loss_val, 'b', label='Validation loss')
  plt.legend()
  plt.xlabel('Epochs')
  plt.xticks(xdisp)
  plt.savefig('Losses', format='png')
  plt.figure()
  plt.plot(x, acc_train, 'r', label='Training accuracy')
  plt.plot(x, acc_val, 'b', label='Validation accuracy')
  plt.legend()
  plt.xticks(xdisp)
  plt.xlabel('Epochs')

**Functions implementing the logic to load the database**

In [None]:
def pil_loader(path):
    with open(path, 'rb') as f:
        img = Image.open(f)
        return img.convert('RGB')

class Caltech(VisionDataset):

    def __init__(self, root, split, transform=None, transform_train=None, transform_val=None, train_or_val=None):

    #In this implementation the first row of the training file contains a picture of the training set, and the second row a picture of the validation set.
    #Since pictures of the same classes are in sequential rows, this ensures the same number of pictures for each class in the training and validation set.
    #I used it for the 4th section of the homework since I find this logic of implementation a bit easier when exploiting the data augmentation technique.  
    #However, in the comments another implementation is introduced, which uses the standard train test split function with stratify.

        self.images =[]
        self.images_val=[]
        self.indexes = {}
        super(Caltech, self).__init__(root, transform=transform)
        self.split = split
        self.train_or_val=train_or_val
        self.transform=transform
        self.transform_train=transform_train
        self.transform_val=transform_val
        skip = False
        filenames = open(root.split("/")[0]+"/"+split+".txt", "r")
        i=0
        ind='TRAIN'
        for filename in filenames.readlines():
            skip=False
            for class_ignored in  ["BACKGROUND_Google"]:
                if filename.startswith(class_ignored):
                    skip=True
                    break
            if skip:
                continue
            label = filename.split("/")[0]
            if self.split=='train':
              if ind=='TRAIN':
                self.images.append((pil_loader(root+"/"+filename.rstrip()), label))
              else:
                self.images_val.append((pil_loader(root+"/"+filename.rstrip()), label))
            else:
              self.images.append((pil_loader(root+"/"+filename.rstrip()), label))
            if label not in self.indexes.keys():
                self.indexes[label]=i
                i+=1
            if self.split=='train':
              if ind=='TRAIN':
                ind='VAL'
              else:
                ind='TRAIN'

    def __getitem__(self, index):

        train_or_val=self.train_or_val
        if train_or_val=='train':
            image, label = self.images[index][0], self.indexes[self.images[index][1]]
            image=self.transform_train(image)
            return image, label
        if train_or_val=='val':
            image, label = self.images_val[index][0], self.indexes[self.images[index][1]]
            image=self.transform_val(image)
            return image, label
        else:
            image, label = self.images[index][0], self.indexes[self.images[index][1]]
            image=self.transform(image)
            return image, label

    def __len__(self):

        if self.train_or_val=='val':
            return len(self.images_val)
        else:
            return len(self.images)



**Function used to load the database**

In [None]:
def init(pre_trained, batch_size):

  if pre_trained==True:

    train_transform = transforms.Compose([transforms.Resize(256),    
                                        transforms.CenterCrop(224),  
                                        #transforms.RandomHorizontalFlip(p=0.5),
                                        #transforms.ColorJitter(brightness=0.5, contrast=0.5, saturation=0.5, hue=0.5),
                                        #transforms.RandomCrop(size=130),                                                        
                                        transforms.ToTensor(), 
                                        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) 
    ])
    val_transform = transforms.Compose([transforms.Resize(256),
                                        transforms.CenterCrop(224),
                                        transforms.ToTensor(),
                                        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])  
    test_transform = transforms.Compose([transforms.Resize(256),
                                        transforms.CenterCrop(224),
                                        transforms.ToTensor(),
                                        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])   
                                    
  if pre_trained==False:

    train_transform = transforms.Compose([transforms.Resize(256),     
                                        transforms.CenterCrop(224),                                                                
                                        #transforms.RandomHorizontalFlip(p=0.5),
                                        transforms.ToTensor(), 
                                        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))                                    
    ])
    val_transform = transforms.Compose([transforms.Resize(256),
                                        transforms.CenterCrop(224),
                                        transforms.ToTensor(),
                                        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
                                         ])
    test_transform = transforms.Compose([transforms.Resize(256),
                                        transforms.CenterCrop(224),
                                        transforms.ToTensor(),
                                        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
                                         ])

  if not os.path.isdir('./Caltech101'):
    !git clone https://github.com/MachineLearning2020/Homework2-Caltech101.git
    !mv 'Homework2-Caltech101' 'Caltech101'
  DATA_DIR = 'Caltech101/101_ObjectCategories'

  train_dataset = Caltech(DATA_DIR, split='train', train_or_val='train', transform_train=train_transform)
  val_dataset= Caltech(DATA_DIR, split='train', train_or_val='val', transform_val=val_transform)
  test_dataset = Caltech(DATA_DIR, split='test', transform=test_transform)
  
  '''
  labels=[]
  for i in range(len(train_dataset)):
    labels.append(train_dataset[i][1])
  from sklearn.model_selection import train_test_split
  train, val=train_test_split(train_dataset, stratify=labels, test_size=0.5, random_state=42)
  '''

  print('Train Dataset: {}'.format(len(train_dataset)))
  print('Valid Dataset: {}'.format(len(val_dataset)))
  print('Test Dataset: {}'.format(len(test_dataset)))

  # Dataloaders iterate over pytorch datasets and transparently provide useful functions (e.g. parallelization and shuffling)
  train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4, drop_last=True)
  val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=4)
  test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=4)  

  return train_dataloader, val_dataloader, test_dataloader, train_dataset, val_dataset, test_dataset

**Training, validation and test**

In [None]:
def train_net(net, layer, train_dataloader,  val_dataloader, test_dataloader, train_dataset, val_dataset, test_dataset, NUM_CLASSES, BATCH_SIZE,LR, MOMENTUM, WEIGHT_DECAY, NUM_EPOCHS, STEP_SIZE, GAMMA, LOG_FREQUENCY):

  criterion = nn.CrossEntropyLoss() 

  if layer=='all':
    parameters_to_optimize = net.parameters() # In this case we optimize over all the parameters of AlexNet
  if layer=='fully':
    parameters_to_optimize=net.classifier.parameters()
  if layer=='conv':
    parameters_to_optimize=net.features.parameters()

  optimizer = optim.SGD(parameters_to_optimize, lr=LR, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)
  #optimizer=optim.Adadelta(parameters_to_optimize, lr=LR)
  scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=STEP_SIZE, gamma=GAMMA)
  print('preparing validation!')

  cudnn.benchmark 

  current_step = 0
  val_acc_list=[]
  val_loss_list=[]
  train_loss_list=[]
  train_acc_list=[]

  for epoch in range(NUM_EPOCHS):

    print('Starting epoch {}/{}, LR = {}'.format(epoch+1, NUM_EPOCHS, scheduler.get_lr()))
    net = net.to(DEVICE)
    net.eval()
    running_corrects = 0
    train_loss=0

    #Validation is made before training in order to check the accuracy of pretrained models

    for images, labels in (val_dataloader):

      images = images.to(DEVICE)
      labels = labels.to(DEVICE)
      outputs = net(images)
      _, preds = torch.max(outputs.data, 1)
      local_loss = criterion(outputs, labels)
      train_loss+=local_loss.item()
      running_corrects += torch.sum(preds == labels.data).data.item()

    train_loss=train_loss/len(val_dataloader)
    print('Validation Loss: {}'.format(train_loss))
    accuracy = running_corrects / float(len(val_dataset))
    print('Validation Accuracy: {}'.format(accuracy))
    val_acc_list.append(accuracy)
    val_loss_list.append(train_loss)

    running_corrects = 0
    train_loss=0

    for images, labels in train_dataloader:     

      images = images.to(DEVICE)
      labels = labels.to(DEVICE)
      net.train() 
      optimizer.zero_grad() 
      outputs = net(images)
      loss = criterion(outputs, labels)
      train_loss+=loss.item()
      loss.backward()
      optimizer.step() 
      current_step += 1
      _, preds = torch.max(outputs.data, 1)
      running_corrects += torch.sum(preds == labels.data).data.item()

    train_loss=train_loss/len(train_dataloader)
    print('Training Loss: {}'.format(train_loss))
    accuracy = running_corrects / float(len(train_dataset))
    print('Training Accuracy: {}'.format(accuracy))
    train_acc_list.append(accuracy)
    train_loss_list.append(train_loss)
    scheduler.step()

  #Testing

  net = net.to(DEVICE) 
  net.eval() 
  running_corrects = 0

  for images, labels in tqdm(test_dataloader):
    images = images.to(DEVICE)
    labels = labels.to(DEVICE)
    outputs = net(images)
    _, preds = torch.max(outputs.data, 1)
    running_corrects += torch.sum(preds == labels.data).data.item()

  accuracy = running_corrects / float(len(test_dataset))
  print('Test Accuracy: {}'.format(accuracy))

  return train_loss_list, train_acc_list, val_loss_list, val_acc_list

**Hyperparameters**

In [None]:
DEVICE = 'cuda'
NUM_CLASSES = 101
BATCH_SIZE = 10
LR = 1e-3
MOMENTUM = 0.9
WEIGHT_DECAY = 5e-5
NUM_EPOCHS = 50
STEP_SIZE = 30
GAMMA = 0.1
LOG_FREQUENCY = 10
pre_trained=True
parameters='conv'

**Loading, training, evaluating, testing the network and plotting the loss and the accuracy**

In [None]:
train_dataloader, val_dataloader, test_dataloader,train,val,test=init(pre_trained, BATCH_SIZE)

import torchvision.models as models
net=models.alexnet(pretrained=True).to(DEVICE)
net.classifier[6]=nn.Linear(4096, 101)

train_loss_list, train_acc_list, val_loss_list, val_acc_list=train_net(net, parameters, train_dataloader, val_dataloader, test_dataloader, train, val, test, NUM_CLASSES, BATCH_SIZE,LR, MOMENTUM, WEIGHT_DECAY, NUM_EPOCHS, STEP_SIZE, GAMMA, LOG_FREQUENCY)
plotting(train_loss_list, val_loss_list, train_acc_list, val_acc_list)