
**Install requirements**

In [1]:
!pip3 install 'torch==1.3.1' 
!pip3 install 'torchvision==0.4.2'
!pip3 install 'Pillow-SIMD'
!pip3 install 'tqdm'



**Import libraries**

In [0]:
import os
import logging

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Subset, DataLoader
from torch.backends import cudnn

import torchvision
from torchvision import transforms
from torchvision.models import alexnet

from PIL import Image
from tqdm import tqdm

**Set Arguments**

In [0]:
DEVICE = 'cuda' # 'cuda' or 'cpu'

NUM_CLASSES = 101 # 101 + 1: There is am extra Background class that should be removed 
# ^ Set the number of classes to 101 since i filtered out the BACKGROUND class 

BATCH_SIZE = 256     # Higher batch sizes allows for larger learning rates. An empirical heuristic suggests that, when changing
                     # the batch size, learning rate should change by the same factor to have comparable results

LR = 1e-3            # The initial Learning Rate
MOMENTUM = 0.9       # Hyperparameter for SGD, keep this at 0.9 when using SGD
WEIGHT_DECAY = 5e-5  # Regularization, you can keep this at the default

NUM_EPOCHS = 30      # Total number of training epochs (iterations over dataset)
STEP_SIZE = 20       # How many epochs before decreasing learning rate (if using a step-down policy)
GAMMA = 0.1          # Multiplicative factor for learning rate step-down

LOG_FREQUENCY = 10

**Caltech_101 class provided by teachers**

In [0]:
from torchvision.datasets import VisionDataset

from PIL import Image

import os
import os.path
import sys


def pil_loader(path):
    # open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835)
    with open(path, 'rb') as f:
        img = Image.open(f)
        return img.convert('RGB')


class Caltech(VisionDataset):
    def __init__(self, root, split='train', transform=None, target_transform=None):
        super(Caltech, self).__init__(root, transform=transform, target_transform=target_transform)

        self.split = split # This defines the split you are going to use
                           # (split files are called 'train.txt' and 'test.txt')

        '''
        - Here you should implement the logic for reading the splits files and accessing elements
        - If the RAM size allows it, it is faster to store all data in memory
        - PyTorch Dataset classes use indexes to read elements
        - You should provide a way for the __getitem__ method to access the image-label pair
          through the index
        - Labels should start from 0, so for Caltech you will have lables 0...100 (excluding the background class) 
        '''

        # Open file in read only mode and read all lines
        file = open(self.split, "r")
        lines = file.readlines()

        file.close()

        # Filter out the lines which start with 'BACKGROUND_Google' as asked in the homework
        self.elements = [i for i in lines if not i.startswith('BACKGROUND_Google')]

        # Delete BACKGROUND_Google class from dataset labels
        self.classes = sorted(os.listdir(os.path.join(self.root, "")))
        self.classes.remove("BACKGROUND_Google")


    def __getitem__(self, index):
        ''' 
        __getitem__ should access an element through its index
        Args:
            index (int): Index
        Returns:
            tuple: (sample, target) where target is class_index of the target class.
        '''

        img = pil_loader(os.path.join(self.root, self.elements[index].rstrip()))

        target = self.classes.index(self.elements[index].rstrip().split('/')[0])

        image, label = img, target # Provide a way to access image and label via index
                           # Image should be a PIL Image
                           # label can be int

        # Applies preprocessing when accessing the image
        if self.transform is not None:
            image = self.transform(image)

        return image, label

    def __len__(self):
        '''
        The __len__ method returns the length of the dataset
        It is mandatory, as this is used by several other components
        '''
        # Provides a way to get the length (number of elements) of the dataset
        length =  len(self.elements)
        return length

**Define Data Preprocessing**

In [0]:
# Define transforms for training phase
train_transform = transforms.Compose([transforms.Resize(256),      # Resizes short size of the PIL image to 256
                                      transforms.CenterCrop(224),  # Crops a central square patch of the image
                                                                   # 224 because torchvision's AlexNet needs a 224x224 input!
                                                                   # Remember this when applying different transformations, otherwise you get an error
                                      transforms.ToTensor(), # Turn PIL Image to torch.Tensor
                                      transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) # Normalizes tensor with mean and standard deviation
])
# Define transforms for the evaluation phase
eval_transform = transforms.Compose([transforms.Resize(256),
                                      transforms.CenterCrop(224),
                                      transforms.ToTensor(),
                                      transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))                                    
])

# Imagenet transforms 
img_transform = transforms.Compose([transforms.Resize(256),
                                      transforms.CenterCrop(224),
                                      transforms.ToTensor(),
                                      transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
])

**Prepare Dataset**

In [31]:
from sklearn.model_selection import train_test_split #for splitting stuff

# Clone github repository with data
if not os.path.isdir('./Homework2-Caltech101'):
  !git clone https://github.com/MachineLearning2020/Homework2-Caltech101.git

# Commands to execute when there is an error saying directory ./Homework2-Caltech101/ is already present and not empty
!rm -r ./Homework2-Caltech101/
!git clone https://github.com/MachineLearning2020/Homework2-Caltech101.git

DATA_DIR = 'Homework2-Caltech101/101_ObjectCategories'
SPLIT_TRAIN = 'Homework2-Caltech101/train.txt'
SPLIT_TEST = 'Homework2-Caltech101/test.txt'

# # Prepare Pytorch train/test Datasets
# train_dataset = torchvision.datasets.ImageFolder(DATA_DIR, transform=train_transform)
# test_dataset = torchvision.datasets.ImageFolder(DATA_DIR, transform=eval_transform)

# # Indexes help divide the dataset into 4/5 for the training set and 1/5 for the test set
# train_indexes = [idx for idx in range(len(train_dataset)) if idx % 5]
# test_indexes = [idx for idx in range(len(test_dataset)) if not idx % 5]

# train_dataset = Subset(train_dataset, train_indexes)
# test_dataset = Subset(test_dataset, test_indexes)

# # Check dataset sizes
# print('Train Dataset: {}'.format(len(train_dataset)))
# print('Test Dataset: {}'.format(len(test_dataset)))

# 1 - Data preparation
myTrainDS = Caltech(DATA_DIR, split = SPLIT_TRAIN, transform=train_transform)
myTestDS = Caltech(DATA_DIR, split = SPLIT_TEST, transform=eval_transform)

print('My Train DS: {}'.format(len(myTrainDS)))
print('My Test DS: {}'.format(len(myTestDS)))


# 2 - Training from scratch
# Open the train.txt file and instantiate two lists 
training = open(SPLIT_TRAIN, "r")
buffer = training.readlines()
buffer_class = []

training.close()

# Add each class of the buffer's corresponding entry to the buffer_class list 
# In this way the element in buffer[i] will be of class buffer_class[i]
for ent in buffer:
  cl = ent.rstrip().split('/')[0]
  buffer_class.append(cl)

# I used the train_test_split method with the option stratify to have same percentage of examples of each class
# both in train and validation dataset
train, validation, y_tr, y_val = train_test_split(buffer, buffer_class, test_size=0.5, stratify=buffer_class)

# Here i simply create two files and write down the new entry and validation splits.
t = open("Homework2-Caltech101/train_2.txt", "w+")
v = open("Homework2-Caltech101/validation_2.txt", "w+")

for e in train:
  t.write(e)

for e in validation:
  v.write(e)

t.close()
v.close()

TRAIN2 = 'Homework2-Caltech101/train_2.txt'
VALID2 = 'Homework2-Caltech101/validation_2.txt'

# Now from here I can create two datasets for the train2 and val2
train2 = Caltech(DATA_DIR, split = TRAIN2, transform=train_transform)
valid2 = Caltech(DATA_DIR, split = VALID2, transform=eval_transform)

imgnet = Caltech(DATA_DIR, split = SPLIT_TEST, transform=img_transform)

Cloning into 'Homework2-Caltech101'...
remote: Enumerating objects: 3, done.[K
remote: Counting objects:  33% (1/3)[Kremote: Counting objects:  66% (2/3)[Kremote: Counting objects: 100% (3/3)[Kremote: Counting objects: 100% (3/3), done.[K
remote: Compressing objects: 100% (3/3), done.[K
remote: Total 9256 (delta 0), reused 2 (delta 0), pack-reused 9253[K
Receiving objects: 100% (9256/9256), 129.48 MiB | 32.43 MiB/s, done.
Resolving deltas: 100% (3/3), done.
Checking out files: 100% (9149/9149), done.
My Train DS: 5784
My Test DS: 2893


**Prepare Dataloaders**

In [0]:
# # Dataloaders iterate over pytorch datasets and transparently provide useful functions (e.g. parallelization and shuffling)
# train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, drop_last=True)
# test_dataloader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

# 1 - Data preparation - My dataloaders for this point
myTrain_dataloader = DataLoader(myTrainDS, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, drop_last=True)
myTest_dataloader = DataLoader(myTestDS, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

# 2 - Training from scratch
TDL2 = DataLoader(train2, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, drop_last=True)
EDL2 = DataLoader(valid2, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)
# Test data loader remains the same from the previous point

# 3 - Transfer Learning 
imgnet_dataloader = DataLoader(imgnet, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

**Prepare Network**

In [0]:
net = alexnet() # Loading AlexNet model
bestnet = alexnet() # Best network from validation phase
ptalexnet = alexnet(pretrained=True)
# AlexNet has 1000 output neurons, corresponding to the 1000 ImageNet's classes
# We need 101 outputs for Caltech-101
net.classifier[6] = nn.Linear(4096, NUM_CLASSES) # nn.Linear in pytorch is a fully connected layer
                                                 # The convolutional layer is nn.Conv2d
bestnet.classifier[6] = nn.Linear(4096, NUM_CLASSES)


ptalexnet.classifier[6] = nn.Linear(4096, NUM_CLASSES)
# We just changed the last layer of AlexNet with a new fully connected layer with 101 outputs
# It is mandatory to study torchvision.models.alexnet source code

**Prepare Training**

In [0]:
# Define loss function
criterion = nn.CrossEntropyLoss() # for classification, we use Cross Entropy

# Choose parameters to optimize
# To access a different set of parameters, you have to access submodules of AlexNet
# (nn.Module objects, like AlexNet, implement the Composite Pattern)
# e.g.: parameters of the fully connected layers: net.classifier.parameters()
# e.g.: parameters of the convolutional layers: look at alexnet's source code ;) 
parameters_to_optimize = net.parameters() # In this case we optimize over all the parameters of AlexNet
best_parameters_to_optimize = bestnet.parameters()
pt_parameters_to_optimize = ptalexnet.parameters()
# Define optimizer
# An optimizer updates the weights based on loss
# We use SGD with momentum
optimizer = optim.SGD(parameters_to_optimize, lr=LR, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)
best_optimizer = optim.SGD(best_parameters_to_optimize, lr=LR, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)
pt_optimizer = optim.SGD(pt_parameters_to_optimize, lr=LR, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)
# Define scheduler
# A scheduler dynamically changes learning rate
# The most common schedule is the step(-down), which multiplies learning rate by gamma every STEP_SIZE epochs
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=STEP_SIZE, gamma=GAMMA)
best_scheduler = optim.lr_scheduler.StepLR(best_optimizer, step_size=STEP_SIZE, gamma=GAMMA)
pt_scheduler = optim.lr_scheduler.StepLR(pt_optimizer, step_size=STEP_SIZE, gamma=GAMMA)

**Train**

In [10]:
import copy
# By default, everything is loaded to cpu
net = net.to(DEVICE) # this will bring the network to GPU if DEVICE is cuda

cudnn.benchmark # Calling this optimizes runtime

current_step = 0
# Start iterating over the epochs - SOURCE CODE AS GIVEN BY TEACHERS

# for epoch in range(NUM_EPOCHS):
#   print('Starting epoch {}/{}, LR = {}'.format(epoch+1, NUM_EPOCHS, scheduler.get_lr()))

#   # Iterate over the dataset
#   for images, labels in train_dataloader:

#     # Bring data over the device of choice
#     images = images.to(DEVICE)
#     labels = labels.to(DEVICE)

#     net.train() # Sets module in training mode

#     # PyTorch, by default, accumulates gradients after each backward pass
#     # We need to manually set the gradients to zero before starting a new iteration
#     optimizer.zero_grad() # Zero-ing the gradients

#     # Forward pass to the network
#     outputs = net(images)

#     # Compute loss based on output and ground truth
#     loss = criterion(outputs, labels)

#     # Log loss
#     if current_step % LOG_FREQUENCY == 0:
#       print('Step {}, Loss {}'.format(current_step, loss.item()))

#     # Compute gradients for each layer and update weights
#     loss.backward()  # backward pass: computes gradients
#     optimizer.step() # update weights based on accumulated gradients

#     current_step += 1

#   # Step the scheduler
#   scheduler.step() 


################## MY TRAINING PROCESS FOR 1 - Data Preparation ##################
# for epoch in range(NUM_EPOCHS):
#   print('Starting epoch {}/{}, LR = {}'.format(epoch+1, NUM_EPOCHS, scheduler.get_lr()))

#   # Iterate over the dataset
#   for images, labels in myTrain_dataloader:

#     # Bring data over the device of choiceTest Pretrained Net Accuracy: 0.01106118216384376
#     images = images.to(DEVICE)
#     labels = labels.to(DEVICE)

#     net.train() # Sets module in training mode

#     # PyTorch, by default, accumulates gradients after each backward pass
#     # We need to manually set the gradients to zero before starting a new iteration
#     optimizer.zero_grad() # Zero-ing the gradients

#     # Forward pass to the network
#     outputs = net(images)

#     # Compute loss based on output and ground truth
#     loss = criterion(outputs, labels)

#     # Log loss
#     if current_step % LOG_FREQUENCY == 0:
#       print('Step {}, Loss {}'.format(current_step, loss.item()))

#     # Compute gradients for each layer and update weights
#     loss.backward()  # backward pass: computes gradients
#     optimizer.step() # update weights based on accumulated gradients

#     current_step += 1

#   # Step the scheduler
#   scheduler.step() 


################## MY TRAINING PROCESS FOR 2 - Training from scratch ##################
best_acc = -1
for epoch in range(NUM_EPOCHS):
  print('Starting epoch {}/{}, LR = {}'.format(epoch+1, NUM_EPOCHS, scheduler.get_lr()))

  # Iterate over the dataset
  for images, labels in TDL2:

    # Bring data over the device of choice
    images = images.to(DEVICE)
    labels = labels.to(DEVICE)

    net.train() # Sets module in training mode

    # PyTorch, by default, accumulates gradients after each backward pass
    # We need to manually set the gradients to zero before starting a new iteration
    optimizer.zero_grad() # Zero-ing the gradients

    # Forward pass to the network
    outputs = net(images)

    # Compute loss based on output and ground truth
    loss = criterion(outputs, labels)

    # Log loss
    if current_step % LOG_FREQUENCY == 0:
      print('Step {}, Loss {}'.format(current_step, loss.item()))

    # Compute gradients for each layer and update weights
    loss.backward()  # backward pass: computes gradients
    optimizer.step() # update weights based on accumulated gradients

    current_step += 1

  ### Validation phase ###
  net.train(False)

  running_corrects = 0
  for images_v, labels_v in EDL2:
    images_v = images_v.to(DEVICE)
    labels_v = labels_v.to(DEVICE)

    # Forward Pass
    outputs = net(images_v)

    # Get predictions
    _, preds = torch.max(outputs.data, 1)

    # Update Corrects
    running_corrects += torch.sum(preds == labels_v.data).data.item()

  # Calculate Accuracy
  accuracy = running_corrects / float(len(valid2))
  print("\nAccuracy = {}\n".format(accuracy))

  if accuracy > best_acc:
    bestnet = copy.deepcopy(net)
    best_acc = accuracy

  # Step the scheduler to change the LR
  scheduler.step()

print("\n\n\n best net is: ")
print(bestnet)

Starting epoch 1/30, LR = [0.002]
Step 0, Loss 4.614223480224609

Accuracy = 0.023167358229598894

Starting epoch 2/30, LR = [0.002]

Accuracy = 0.09163208852005532

Starting epoch 3/30, LR = [0.002]
Step 10, Loss 4.612725257873535

Accuracy = 0.09232365145228216

Starting epoch 4/30, LR = [0.002]

Accuracy = 0.09232365145228216

Starting epoch 5/30, LR = [0.002]
Step 20, Loss 4.60761833190918

Accuracy = 0.09232365145228216

Starting epoch 6/30, LR = [0.002]

Accuracy = 0.09232365145228216

Starting epoch 7/30, LR = [0.002]
Step 30, Loss 4.601624965667725

Accuracy = 0.09232365145228216

Starting epoch 8/30, LR = [0.002]

Accuracy = 0.09232365145228216

Starting epoch 9/30, LR = [0.002]
Step 40, Loss 4.59588098526001

Accuracy = 0.09232365145228216

Starting epoch 10/30, LR = [0.002]


KeyboardInterrupt: ignored

**Test**

In [0]:
### Normal net test phase for point 1 and 2 to compare with the best net ###
net = net.to(DEVICE) # this will bring the network to GPU if DEVICE is cuda
net.train(False) # Set Network to evaluation mode

running_corrects = 0
for images, labels in tqdm(myTest_dataloader):
  images = images.to(DEVICE)
  labels = labels.to(DEVICE)

  # Forward Pass
  outputs = net(images)

  # Get predictions
  _, preds = torch.max(outputs.data, 1)

  # Update Corrects
  running_corrects += torch.sum(preds == labels.data).data.item()

# Calculate Accuracy
accuracy = running_corrects / float(len(myTestDS))

print('\nTest Normal Net Accuracy: {}'.format(accuracy))
### end normal net test phase ###

######## 2 - Training from scratch - My test set of best model ########
bestnet = bestnet.to(DEVICE) # this will bring the network to GPU if DEVICE is cuda
bestnet.train(False) # Set Network to evaluation mode

running_corrects = 0
for images, labels in tqdm(myTest_dataloader):
  images = images.to(DEVICE)
  labels = labels.to(DEVICE)

  # Forward Pass
  outputs = bestnet(images)

  # Get predictions
  _, preds = torch.max(outputs.data, 1)

  # Update Corrects
  running_corrects += torch.sum(preds == labels.data).data.item()

# Calculate Accuracy
accuracy = running_corrects / float(len(myTestDS))

print('\nTest Best Net Accuracy: {}'.format(accuracy))

**Train and Test AlexNet with pre-trained weights**

In [35]:
import copy
# By default, everything is loaded to cpu
ptalexnet = ptalexnet.to(DEVICE) # this will bring the network to GPU if DEVICE is cuda

cudnn.benchmark # Calling this optimizes runtime

current_step = 0

best_acc = -1
for epoch in range(NUM_EPOCHS):
  print('Starting epoch {}/{}, LR = {}'.format(epoch+1, NUM_EPOCHS, pt_scheduler.get_lr()))

  # Iterate over the dataset
  for images, labels in imgnet_dataloader:

    # Bring data over the device of choice
    images = images.to(DEVICE)
    labels = labels.to(DEVICE)

    ptalexnet.train() # Sets module in training mode

    # PyTorch, by default, accumulates gradients after each backward pass
    # We need to manually set the gradients to zero before starting a new iteration
    pt_optimizer.zero_grad() # Zero-ing the gradients

    # Forward pass to the network
    outputs = ptalexnet(images)

    # Compute loss based on output and ground truth
    loss = criterion(outputs, labels)

    # Log loss
    if current_step % LOG_FREQUENCY == 0:
      print('Step {}, Loss {}'.format(current_step, loss.item()))

    # Compute gradients for each layer and update weights
    loss.backward()  # backward pass: computes gradients
    pt_optimizer.step() # update weights based on accumulated gradients

    current_step += 1

  ### Validation phase ###
  ptalexnet.train(False)

  running_corrects = 0
  for images_v, labels_v in EDL2:
    images_v = images_v.to(DEVICE)
    labels_v = labels_v.to(DEVICE)

    # Forward Pass
    outputs = ptalexnet(images_v)

    # Get predictions
    _, preds = torch.max(outputs.data, 1)

    # Update Corrects
    running_corrects += torch.sum(preds == labels_v.data).data.item()

  # Calculate Accuracy
  accuracy = running_corrects / float(len(imgnet))
  print("\nAccuracy = {}\n".format(accuracy))

  if accuracy > best_acc:
    bestnet = copy.deepcopy(ptalexnet)
    best_acc = accuracy

  # Step the scheduler to change the LR
  pt_scheduler.step()



  ### TEST PHASE ###
bestnet = bestnet.to(DEVICE) # this will bring the network to GPU if DEVICE is cuda
bestnet.train(False) # Set Network to evaluation mode

running_corrects = 0
for images, labels in tqdm(myTest_dataloader):
  images = images.to(DEVICE)
  labels = labels.to(DEVICE)

  # Forward Pass
  outputs = bestnet(images)

  # Get predictions
  _, preds = torch.max(outputs.data, 1)

  # Update Corrects
  running_corrects += torch.sum(preds == labels.data).data.item()

# Calculate Accuracy
accuracy = running_corrects / float(len(myTestDS))

print('\nTest Best Pretrained Net Accuracy: {}'.format(accuracy))

Starting epoch 1/30, LR = [0.6]
Step 0, Loss 4.410877704620361
Step 10, Loss nan

Accuracy = 0.05012098167991704

Starting epoch 2/30, LR = [0.6]
Step 20, Loss nan

Accuracy = 0.05012098167991704

Starting epoch 3/30, LR = [0.6]
Step 30, Loss nan

Accuracy = 0.05012098167991704

Starting epoch 4/30, LR = [0.6]
Step 40, Loss nan


KeyboardInterrupt: ignored