
**Install requirements**

In [None]:
#!pip3 install 'torch==1.3.1'
#!pip3 install 'torchvision==0.5.0'
#!pip3 install 'Pillow-SIMD'
#!pip3 install 'tqdm'

**Import libraries**

In [None]:
import os
import logging

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Subset, DataLoader
from torch.backends import cudnn

import torchvision
from torchvision import transforms
from torchvision.models import alexnet, resnet50, resnet18, vgg16

from PIL import Image
from tqdm import tqdm
from sklearn.model_selection import StratifiedShuffleSplit


 



**Set Arguments**

In [None]:
DEVICE = 'cuda' # 'cuda' or 'cpu'

NUM_CLASSES = 101 # 101 + 1: There is am extra Background class that should be removed 

BATCH_SIZE = 64    # Higher batch sizes allows for larger learning rates. An empirical heuristic suggests that, when changing
                     # the batch size, learning rate should change by the same factor to have comparable results

LR = 0.01   # The initial Learning Rate 
MOMENTUM = 0.9       # Hyperparameter for SGD, keep this at 0.9 when using SGD
WEIGHT_DECAY = 5e-5  # Regularization, you can keep this at the default

NUM_EPOCHS = 30     # Total number of training epochs (iterations over dataset)
STEP_SIZE = 20 # How many epochs before decreasing learning rate (if using a step-down policy)
GAMMA = 0.1          # Multiplicative factor for learning rate step-down

LOG_FREQUENCY = 10
NET= "AlexNet" #is it possible tho choose the net by changing the string. Allowed value vgg16, AlexNet, resnet18
PRETRAINED= False #set to false to not upload weights
FREEZE= 0  #set to 1 to freeze conv layers, to 2 to freeze fc layers
ALGORITHM= 'SGD' #set to adam for adam optimizer
AUGMENTATION = 0


**Define Data Preprocessing**

In [None]:
# Define transforms for training phase
train_transform = transforms.Compose([transforms.Resize(256),      # Resizes short size of the PIL image to 256
                                      transforms.CenterCrop(224),  # Crops a central square patch of the image
                                                                   # 224 because torchvision's AlexNet needs a 224x224 input!
                                                                   # Remember this when applying different transformations, otherwise you get an error
                                     
                                      transforms.ToTensor(), # Turn PIL Image to torch.Tensor
                                      transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) # Normalizes tensor with mean and standard deviation

])
alexNet_transform = transforms.Compose([transforms.Resize(256),
                                      transforms.CenterCrop(224),
                                      transforms.ToTensor(),
                                      transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
                                            ])
# Define transforms for the evaluation phase
eval_transform = transforms.Compose([transforms.Resize(256),
                                      transforms.CenterCrop(224),
                                      transforms.ToTensor(),
                                      transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))                                    
])

dataAug_transform1 = transforms.Compose([transforms.Resize(256),
                                         transforms.CenterCrop(224),
                                         transforms.RandomVerticalFlip(),
                                         #transforms.RandomRotation(45),
                                         transforms.ColorJitter(brightness=0.5, contrast=0.5, saturation=0.5, hue=0.5),
                                         transforms.ToTensor(),
                                         transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))])

dataAug_transform2 = transforms.Compose([transforms.Resize(256),
                                         transforms.RandomCrop(224),
                                         transforms.RandomHorizontalFlip(p=0.7),
                                         #transforms.RandomRotation(45),
                                         #transforms.ColorJitter(brightness=0.5, contrast=0.5, saturation=0.5, hue=0.5),
                                         transforms.ToTensor(),
                                         transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))])


dataAug_transform3 = transforms.Compose([transforms.Resize(256),
                                         transforms.CenterCrop(224),
                                         transforms.RandomRotation(45),
                                         transforms.ColorJitter(brightness=0.5, contrast=0.5, saturation=0.5, hue=0.5),
                                         transforms.ToTensor(),
                                         transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))])





**Prepare Dataset**

In [None]:
from torchvision.datasets import VisionDataset
from PIL import Image
import os
import os.path
import sys
import glob

# Clone github repository with data
if not os.path.isdir('./Caltech101'):
  !git clone https://github.com/MachineLearning2020/Homework2-Caltech101.git
  !mv 'Homework2-Caltech101' 'Caltech101'

DATA_DIR = 'Caltech101/101_ObjectCategories'
from Caltech101.caltech_dataset import Caltech

def pil_loader(path):
  
  #path=path.rstrip("\n")
  with open(path, 'rb') as f:
    img=Image.open(f)
    return img.convert('RGB')




# pil_loader('Caltech101//101_ObjectCategories//accordion//image_0002.jpg')

class Caltech(VisionDataset):
    def __init__(self, root, split='train', transform=None, target_transform=None):
        super(Caltech, self).__init__(root, transform=transform, target_transform=target_transform)

        self.split = split
        self.root = root
        self.transform = transform
        classes = os.listdir(root)
        classes.remove('BACKGROUND_Google')
        self.pathDataset = []
        self.targets = []
        self.intTargets = []
        self.samples = [[], []]
        self.instances = []
        x = split + ".txt"
        dir = 'Caltech101/' + x
        #print("------------------", dir)
        with open(dir, "r") as fp:
            for line in fp:
                cl = line.split('/')[0]
                if cl != 'BACKGROUND_Google':
                    #self.pathDataset.append(line)
                    #self.targets.append(cl)
                    self.intTargets.append(classes.index(cl))
                    self.samples[0].append(line)
                    self.samples[1].append(classes.index(cl))
                    self.instances.append((line, classes.index(cl)))
        #print("tuple---", self.instances[0])
        #print(classes.index('accordion'))
        
        print("samples di zero", self.samples[0])
        print("samples di uno", self.samples[1])


    def __getitem__(self, index):
      #print("DATA DIR + INDEX", DATA_DIR+"/"+self.datasetPath[0])
      path= os.path.join(DATA_DIR,self.instances[index][0])
      path=path.rstrip("\n")
  
      image = pil_loader(path)

      label = self.instances[index][1]
 
      if self.transform is not None:
        image = self.transform(image)

      return image, label

    def __len__(self):
      return len(self.instances)
          
          
                

# Prepare Pytorch train/test Datasets



if NET =='AlexNet' and PRETRAINED == True:
  if AUGMENTATION == 1:
    train_dataset = Caltech(DATA_DIR, split='train', transform=train_transform)
    train_aug=Caltech(DATA_DIR, split='train', transform=dataAug_transform1) #to apply data augmentation only on the training set and not on the validation
    test_dataset = Caltech(DATA_DIR, split='test', transform=alexNet_transform)
    print("first transformation applied")
  elif AUGMENTATION == 2:
    train_dataset = Caltech(DATA_DIR, split='train', transform=train_transform)
    train_aug=Caltech(DATA_DIR, split='train', transform=dataAug_transform2) #to apply data augmentation only on the training set and not on the validation
    test_dataset = Caltech(DATA_DIR, split='test', transform=alexNet_transform)
    print("second transformation applied")
  elif AUGMENTATION == 3:
    train_dataset = Caltech(DATA_DIR, split='train', transform=train_transform)
    train_aug=Caltech(DATA_DIR, split='train', transform=dataAug_transform3) #to apply data augmentation only on the training set and not on the validation
    test_dataset = Caltech(DATA_DIR, split='test', transform=alexNet_transform)
    print("third transformation applied")
  else:
    train_dataset = Caltech(DATA_DIR, split='train', transform=alexNet_transform)
    test_dataset = Caltech(DATA_DIR, split='test', transform=alexNet_transform)
else:
  train_dataset = Caltech(DATA_DIR, split='train', transform=train_transform)
  test_dataset = Caltech(DATA_DIR, split='test', transform=eval_transform)


print("get item di zero",train_dataset.__getitem__(0))
#print("test",train_dataset.pathDataset[0])

sss = StratifiedShuffleSplit(n_splits=1, test_size=0.5, random_state=0)

train_indexes = [] # split the indices for your train split
val_indexes = [] #split the indices for your val split

for train, val in sss.split(train_dataset.samples[0], train_dataset.samples[1]):
  print("train-----",train)
  print("test-----",val)
  train_indexes=train
  val_indexes=val
    



print("val indices", val_indexes)

if AUGMENTATION !=0:
  val_dataset = Subset(train_dataset, val_indexes)
  train_dataset=Subset(train_aug, train_indexes) #when data augm is used
  print("augmentation split")
else:
   val_dataset = Subset(train_dataset, val_indexes)
   train_dataset= Subset(train_dataset, train_indexes)


print("final train dataset", train_dataset)

# Check dataset sizes
print('Train Dataset: {}'.format(len(train_dataset)))
print('Valid Dataset: {}'.format(len(val_dataset)))
print('Test Dataset: {}'.format(len(test_dataset)))
#print('train val Dataset: {}'.format(len(train_val_dataset)))

# New Section

**Prepare Dataloaders**

In [None]:
# Dataloaders iterate over pytorch datasets and transparently provide useful functions (e.g. parallelization and shuffling)
from torchvision import utils
import matplotlib.pyplot as plt
print(train_dataset[0])

train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, drop_last=True)
val_dataloader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)
#train_val_dataloader =  DataLoader(train_val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)


test_dataloader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

#xb, yb = next(iter(train_dataloader))
#out = torchvision.utils.make_grid(xb)
#plt.imshow(out.numpy().transpose((1, 2, 0)))


**Prepare Network**

In [None]:
if NET == "AlexNet" :
  net= alexnet(pretrained=PRETRAINED)
  net.classifier[6] = nn.Linear(4096, NUM_CLASSES)
elif NET == "resnet18":
  net = resnet18(pretrained=True)
  net.fc.out_features= NUM_CLASSES
elif NET == "vgg16":
  net= vgg16(pretrained=True)
  net.classifier[6] = nn.Linear(4096, NUM_CLASSES)


#net = alexnet() # Loading AlexNet model
#net= alexnet(pretrained=True)
#for param in net.classifier.parameters():
  #param.requires_grad= False
#net = resnet18(pretrained=True)

#net= vgg16(pretrained=True)


# AlexNet has 1000 output neurons, corresponding to the 1000 ImageNet's classes
# We need 101 outputs for Caltech-101
#net.classifier[6] = nn.Linear(4096, NUM_CLASSES) # nn.Linear in pytorch is a fully connected layer
                                                 # The convolutional layer is nn.Conv2d
#alexnet.classifier[6] = nn.Linear(4096, NUM_CLASSES)
#net.fc.out_features= NUM_CLASSES
print(net)

# We just changed the last layer of AlexNet with a new fully connected layer with 101 outputs
# It is strongly suggested to study torchvision.models.alexnet source code

AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
 

**Prepare Training**

In [None]:
## Define loss function
criterion = nn.CrossEntropyLoss() # for classification, we use Cross Entropy

# Choose parameters to optimize
# To access a different set of parameters, you have to access submodules of AlexNet
# (nn.Module objects, like AlexNet, implement the Composite Pattern)
# e.g.: parameters of the fully connected layers: net.classifier.parameters()
# e.g.: parameters of the convolutional layers: look at alexnet's source code ;)

if FREEZE == 0:
  parameters_to_optimize = net.parameters() # In this case we optimize over all the parameters of AlexNet

elif FREEZE == 1:
  parameters_to_optimize= net.classifier.parameters()  # we  freeze conv layers
  

elif FREEZE == 2:
  parameters_to_optimize = net.features.parameters() # we freeze FC layers
  


# Define optimizer
# An optimizer updates the weights based on loss
# We use SGD with momentum
if ALGORITHM =='SGD':
  optimizer = optim.SGD(parameters_to_optimize, lr=LR, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)
elif ALGORITHM == 'adam':
  optimizer=optim.Adam(parameters_to_optimize, lr=LR, weight_decay=WEIGHT_DECAY)
# Define scheduler
# A scheduler dynamically changes learning rate
# The most common schedule is the step(-down), which multiplies learning rate by gamma every STEP_SIZE epochs
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=STEP_SIZE, gamma=GAMMA)

**Train**

In [None]:
# By default, everything is loaded to cpu
net = net.to(DEVICE) # this will bring the network to GPU if DEVICE is cuda
#alexnet= alexnet.to(DEVICE)

cudnn.benchmark # Calling this optimizes runtime
accuracy_train=[]
accuracy_val=[]
loss_train=[]
loss_val=[]

current_step = 0
# Start iterating over the epochs
for epoch in range(NUM_EPOCHS):
  print('Starting epoch {}/{}, LR = {}'.format(epoch+1, NUM_EPOCHS, scheduler.get_lr()))
 
  # Iterate over the dataset
  running_corrects=0
  for images, labels in train_dataloader:
    
    # Bring data over the device of choice
    images = images.to(DEVICE)
    labels = labels.to(DEVICE)
    
    net.train() # Sets module in training mode

    # PyTorch, by default, accumulates gradients after each backward pass
    # We need to manually set the gradients to zero before starting a new iteration
    optimizer.zero_grad() # Zero-ing the gradients

    # Forward pass to the network
    outputs_training = net(images)
    #print("outputs.data", outputs_training.data[0])

    _, preds = torch.max(outputs_training.data, 1)
    running_corrects += torch.sum(preds == labels.data).data.item()


   # Compute loss based on output and ground truth
    loss = criterion(outputs_training, labels)

   
   

    # Log loss
    if current_step % LOG_FREQUENCY == 0:
      print('Step {}, Loss {}'.format(current_step, loss.item()))
  

    # Compute gradients for each layer and update weights
    loss.backward()  # backward pass: computes gradients
    optimizer.step() # update weights based on accumulated gradients

    current_step += 1

  accuracy= running_corrects / float(len(train_dataset))
  loss_train.append(loss.item())
  accuracy_train.append(accuracy)

  net.train(False)
  running_corrects = 0
  for images, labels in tqdm(val_dataloader):

    images = images.to(DEVICE)
    labels = labels.to(DEVICE)

      # Forward Pass
    outputs = net(images)
    
      # Get predictions
    _, preds = torch.max(outputs.data, 1)
   

      # Update Corrects
    x= torch.sum(preds == labels.data).data.item()
    print(x)
    running_corrects += x

    

    loss=criterion(outputs.data,labels)
  

    # Calculate Accuracy
  accuracy = running_corrects / float(len(test_dataset))
  accuracy_val.append(accuracy)
 
  loss_val.append(loss.item())


  # Step the scheduler
  scheduler.step() 
 

**Validation**

In [None]:
net = net.to(DEVICE) # this will bring the network to GPU if DEVICE is cuda
net.eval() # Set Network to evaluation mode
#alexet = alexnet.to(DEVICE)
#alexnet.eval()
criterion1 = nn.CrossEntropyLoss(size_average=False)

running_corrects = 0
running_loss=0
for images, labels in tqdm(val_dataloader):
  images = images.to(DEVICE)
  labels = labels.to(DEVICE)

  # Forward Pass
  outputs = net(images)

  # Get predictions
  _, preds = torch.max(outputs.data, 1)

  # Update Corrects
  running_corrects += torch.sum(preds == labels.data).data.item()
  loss= criterion1(outputs.data,labels)
  
  running_loss += loss.item()

# Calculate Accuracy
accuracy = running_corrects / float(len(test_dataset))
lossval= running_loss/ len(val_dataset)
val accuracy [0.09194607673695127, 0.10127894918769444, 0.18216384376080194, 0.21776702385067404, 0.23608710680954026, 0.22813688212927757, 0.24680262703076392, 0.2799861735222952, 0.32042862080884893, 0.3435879709643968, 0.3840304182509506, 0.39405461458693397, 0.39854821984099553, 0.44071897684064987, 0.44866920152091255, 0.4742481852748012, 0.49982716902868995, 0.4787417905288628, 0.511925337020394, 0.5070860698237124, 0.538195644659523, 0.5475285171102662, 0.5551330798479087, 0.5506394745938472, 0.5437262357414449, 0.5513307984790875, 0.5523677843069478, 0.5461458693397857, 0.5471828551676461, 0.5492568268233667]

print(len(accuracy_train))
print(len(accuracy_val))
print(len(loss_train))
print(len(loss_val))
#print("loss val", loss_val)
#print("loss train", loss_train)
print('Validation Accuracy: {}'.format(accuracy))
import matplotlib.pyplot as plt
print("val accuracy", accuracy_val)
print("loss validation", lossval)
#print("trining accuracy", accuracy_train)
print("trainin accuracy", accuracy_train[NUM_EPOCHS-1])
print("loss train", loss_train[NUM_EPOCHS-1])




plt.figure(figsize=(8, 8))
plt.subplot(2, 1, 1)
plt.plot(accuracy_train, label='Training Accuracy')
plt.plot(accuracy_val, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.ylabel('Accuracy')
#plt.ylim([min(plt.ylim()),1])
plt.title('Training and Validation Accuracy')


plt.subplot(2, 1, 2)
plt.plot(loss_train, label='Training Loss')
plt.plot(loss_val, label='Validation Loss')
plt.legend(loc='upper right')
plt.ylabel('Loss')
#plt.ylim([0,1.0])
plt.title('Training and Validation Loss')
plt.xlabel('epoch')
plt.show()


#plt.plot(loss_train, linestyle='-', marker='o')
#plt.plot(loss_val, linestyle='-', marker='o')
#plt.title('loss accuracy')
#plt.ylabel('Loss')
#plt.xlabel('Epoch')

#plt.legend(['Train', 'Val'], loc='upper right')

#plt.show

**Test**

In [None]:
net = net.to(DEVICE) # this will bring the network to GPU if DEVICE is cuda
net.train(False) # Set Network to evaluation mode

running_corrects = 0
for images, labels in tqdm(test_dataloader):
  images = images.to(DEVICE)
  labels = labels.to(DEVICE)

  # Forward Pass
  outputs = net(images)

  # Get predictions
  _, preds = torch.max(outputs.data, 1)

  # Update Corrects
  running_corrects += torch.sum(preds == labels.data).data.item()

# Calculate Accuracy
accuracy = running_corrects / float(len(test_dataset))

print('Test Accuracy: {}'.format(accuracy))

100%|██████████| 46/46 [00:12<00:00,  3.54it/s]

Test Accuracy: 0.5506394745938472



