<a href="https://colab.research.google.com/github/dadebulba/DeepLearningProject/blob/main/DeepLearningProject_withaugmentation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Deep Learning Project - Person classification task learning

Importing from Google Drive the dataset.zip and extract into dataset folder, change the path with your dataset location

In [None]:
from google.colab import drive
drive.mount('/content/drive')
!unzip "/content/drive/MyDrive/UNITN/5° anno/Deep Learning 2021/dataset.zip" -d dataset

importing necessary libraries

In [None]:
import torch
import torchvision
import torch.nn.functional as F
import torchvision.transforms as T
import pandas as pd
from skimage import io, transform
import numpy as np
import matplotlib.pyplot as plt
from torchvision import transforms, utils
from torch.utils.data import Dataset, DataLoader
import os
from os import listdir
from os.path import isfile, join
from torch.utils.tensorboard import SummaryWriter
from PIL import Image
from sklearn.model_selection import train_test_split
import random
random.seed(10)

# print cuda info
print(f"Cuda available: {torch.cuda.is_available()}")
print(f"Cuda device count: {torch.cuda.device_count()}")

## Data Preprocessing

In [None]:
'''
Returns: dict where keys are the labels associated with their encoding and the values are the corresponding indexes in the one-hot encoding
Input arguments
  annotations_frame: dataframe containing annotations associated to people ids
'''
def setupLabelsDict(annotations_frame):
    labels = {}
    index = 0
    for i in list(annotations_frame):
        # id is not part of labels
        if(i != "id"):
            # to build the one-hot encoding I need to know the min and max value range for each label
            for j in range(min(annotations_frame[i]), max(annotations_frame[i])+1):
                labels[f"{i}-{j}"] = index
                index+=1
    return labels

'''
Returns: target encoding for corresponding id
Input arguments
  id: which id has to get the encoding
  annotations_frame: used to localize the original target values
  labels: newly computed labels
'''
def getTargetEncoding(id, annotations_frame, labels):
    encoding = [0 for _ in range(len(labels))]
    labels_df = annotations_frame.loc[annotations_frame['id'] == id]
    for label, content in labels_df.items():
        if(label != 'id'):
            encoding[labels["%s-%s" % (label, labels_df[label].iloc[0])]] += 1
    return encoding

In [None]:
'''
Returns: dataframe with annotations for every image of the specified dataset where id is the image name
Input arguments
  annotations_frame: original dataframe parsed from annotations_train.csv
  train_dir: directory containing the images used for training
'''
def convertAnnotationsFrame(annotations_frame, train_dir):
    annotations_frame = pd.read_csv('dataset/annotations_train.csv')

    img_files = [f for f in listdir(train_dir)]

    augmented_annotations_list = [] 
    for entry in annotations_frame.itertuples():
        for i in img_files:
            if(int(entry[1]) == int(i.split("_")[0])):
                img_with_annotation = {
                    "id": i, 
                    "age": entry[2], 
                    "backpack":entry[3],
                    "bag":entry[4],
                    "handbag":entry[5],
                    "clothes":entry[6],
                    "down":entry[7],
                    "up":entry[8],
                    "hair":entry[9],
                    "hat":entry[10],
                    "gender":entry[11],
                    "upblack":entry[12],
                    "upwhite":entry[13],
                    "upred":entry[14],
                    "uppurple":entry[15],
                    "upyellow":entry[16],
                    "upgray":entry[17],
                    "upblue":entry[18],
                    "upgreen":entry[19],
                    "downblack":entry[20],
                    "downwhite":entry[21],
                    "downpink":entry[22],
                    "downpurple":entry[23],
                    "downyellow":entry[24],
                    "downgray":entry[25],
                    "downblue":entry[26],
                    "downgreen":entry[27],
                    "downbrown":entry[28]
                }
                augmented_annotations_list.append(img_with_annotation)

    augmented_annotations_frame = pd.DataFrame(augmented_annotations_list)
    return augmented_annotations_frame


## Fine tuning Resnet-50

In [None]:
'''
Returns: fine tuned ResNet-50
Input arguments
  num_classes: number of classes in the dataset.
               This is equal to the number of output neurons.
'''
def initialize_resnet(num_classes):
  resnet = torchvision.models.resnet50(pretrained=True)
  num_features = resnet.fc.in_features
  resnet.fc = torch.nn.Sequential(
    torch.nn.Linear(in_features=num_features, out_features=1024),
    torch.nn.Linear(in_features=1024, out_features=512),
    torch.nn.Linear(in_features=512, out_features=num_classes),
    torch.nn.Sigmoid()
  )
  return resnet

Cost function

In [None]:
"""
Returns: Binary Cross Entropy Loss function: https://pytorch.org/docs/stable/generated/torch.nn.BCELoss.html
"""
def get_cost_function():
  cost_function = torch.nn.BCELoss()
  return cost_function

Optimizer

In [None]:
"""
Returns: the Adam optimizer
Input arguments
  net: network used to setup the optimizer
  lr: used learning rate
"""
def get_optimizer(net, lr):
  optimizer = torch.optim.Adam(net.parameters(), lr=lr, betas=(0.5, 0.999))
  return optimizer

In [None]:
"""
Returns: mean loss and accuracy based on passed data and model during test phase
Input arguments
  net: trained network
  data_loader: DataLoader containing test data
  cost_function: cost function used to compute the loss
  num_classes: number of target classes
  device: which device to use during testing phase (default is GPU)
"""
def test(net, data_loader, cost_function, num_classes, device='cuda:0'):
  samples = 0.
  cumulative_loss = 0.
  cumulative_accuracy = 0.

  net.eval() # Strictly needed if network contains layers which has different behaviours between train and test
  with torch.no_grad():
    for batch_idx, (inputs, targets) in enumerate(data_loader):
      # Load data into GPU
      inputs = inputs.to(device)
      targets = targets.to(torch.float32) #converting to float for BCELoss
      targets = targets.to(device)

      # Forward pass
      outputs = net(inputs)
      # Apply the loss
      loss = cost_function(outputs, targets)

      # Compute comulative accuracy and loss
      samples+=inputs.shape[0]
      cumulative_loss += loss.item() # Note: the .item() is needed to extract scalars from tensors
      predicted = torch.round(outputs)
      cumulative_accuracy += predicted.eq(targets).sum().item()/num_classes

  return cumulative_loss/samples, cumulative_accuracy/samples*100

"""
Returns: mean loss and accuracy obtained in current epoch
Input arguments
  net: network to train
  data_loader: DataLoader containing training data
  cost_function: cost function used to compute the loss
  num_classes: number of target classes
  device: which device to use during testing phase (default is GPU)
"""
def train(net,data_loader,optimizer,cost_function, num_classes, device='cuda:0'):
  samples = 0.
  cumulative_loss = 0.
  cumulative_accuracy = 0.

  
  net.train() # Strictly needed if network contains layers which has different behaviours between train and test
  for batch_idx, (inputs, targets) in enumerate(data_loader):
    # Load data into GPU
    inputs = inputs.to(device)
    targets = targets.to(torch.float32) #converting to float for BCELoss
    targets = targets.to(device)

    # Forward pass
    outputs = net(inputs)

    # Apply the loss
    loss = cost_function(outputs,targets)
      
    # Backward pass
    loss.backward()
    
    # Update parameters
    optimizer.step()
    
    # Reset the optimizer
    optimizer.zero_grad()

    # Compute comulative accuracy and loss
    samples+=inputs.shape[0]
    cumulative_loss += loss.item()
    predicted = torch.round(outputs)
    cumulative_accuracy += predicted.eq(targets).sum().item()/num_classes

  return cumulative_loss/samples, cumulative_accuracy/samples*100

In [None]:
class PeopleTrainDataset(Dataset):
    """People training dataset containing images and annotations."""

    def __init__(self, X, Y, transform):
        """
        Args:
            X: list of image names, that are the locations where to load them
            Y: list of labels for each image
            transform (optional): Optional transform to be applied on a sample.
        """
        self.transform = transform
        self.X = X
        self.Y = Y

    def __len__(self):
        return len(self.Y)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        # Loading an image
        img_name = self.X[idx]
        image = Image.open(img_name)

        # If transofrm is specified apply to loaded image otherwise simply convert to tensor
        if self.transform != None:
          image = self.transform(image)
        else:
          image = T.ToTensor()(image)
        
        # Bound image to size 128 to input it to network
        image = F.interpolate(image, size=128)  
        
        # Build single sample as tuple of image with corresponding labels
        sample = (image, self.Y[idx])
        return sample

class PeopleTestDataset(Dataset):
    """People test dataset containing only images."""

    def __init__(self, root_dir):
        """
        Args:
            root_dir: Directory with all the images.
        """
        self.root_dir = root_dir
        self.img_files = [f for f in listdir(root_dir)]
        
    def __len__(self):
        return len(self.annotations_frame)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
        
        # Load image and bound size to 128 to input it to network
        image = io.imread("%s/%s" % (self.root_dir, self.img_files[idx]))
        image = T.ToTensor()(image)
        image = F.interpolate(image, size=128)  
        return image

In [None]:
"""
Returns: 
  train_X: inputs used during training
  val_X: inputs used during validation
  train_Y: targets of training inputs
  val_Y: targets of validation inputs
Input arguments
  root_dir: directory containing the dataset images
  annotations_frame: dataframe where each image is associated to a label
  labels: newly computed labels
"""
def split_training_data(root_dir, annotations_frame, labels):
  full_dataset_X = []
  full_dataset_Y = []

  for idx, img in enumerate(os.listdir(root_dir)):
    img_name = os.path.join(root_dir,annotations_frame.iloc[idx, 0])
    encoding = getTargetEncoding(annotations_frame.iloc[idx, 0],annotations_frame, labels)
    full_dataset_X.append(img_name)
    full_dataset_Y.append(torch.tensor(encoding))
  ids = annotations_frame['id']
  train_id = set()
  for id in ids:
    train_id.add(int(id.split("_")[0]))
  train_id = list(train_id)
  random.shuffle(train_id)
  val_id = []
  for i in range(int(len(train_id)*0.2)):
    val_id.append(train_id.pop(i))
  
  val_X = []
  val_Y = []
  ids_to_remove = []
  for i in val_id:
    for idx, img in enumerate(os.listdir(root_dir)):
      if int(img.split("_")[0]) == i:
        val_X.append(os.path.join(root_dir,img))
        full_dataset_X.remove(os.path.join(root_dir,img))
        val_Y.append(full_dataset_Y[idx])
        ids_to_remove.append(idx)

  full_dataset_Y = [value for idx, value in enumerate(full_dataset_Y) if idx not in ids_to_remove] 
  train_X = full_dataset_X
  train_Y = full_dataset_Y

  return train_X, val_X, train_Y, val_Y

"""
Returns: 
  train_loader: DataLoader used during training
  val_loader: DataLoader used during validation
  test_loader: DataLoader used during testing
Input arguments
  annotations_frame: dataframe where each image is associated to a label
  labels: newly computed labels
  batch_size: batch size used during training phase
  img_root: directory containing the dataset images
  test_batch_size: batch size used during validation and test phase
"""
def get_data(augmented_annotations_frame, labels, batch_size, img_root, test_batch_size=256):
  # Get splitted data
  X_train, X_val, y_train, y_val = split_training_data(root_dir="%s/train" % (img_root), annotations_frame=augmented_annotations_frame, labels=labels)

  # Load data for training and validation
  training_data = PeopleTrainDataset(X=X_train,
                                     Y=y_train,
                                     transform=None)
  
  validation_data = PeopleTrainDataset(X=X_val,
                                     Y=y_val,
                                     transform=None)
  
  test_data = PeopleTestDataset(root_dir="%s/test" % (img_root))

  # Initialize dataloaders
  train_loader = torch.utils.data.DataLoader(training_data, batch_size, shuffle=True, num_workers=0)    
  val_loader = torch.utils.data.DataLoader(validation_data, test_batch_size, shuffle=False, num_workers=0)
  test_loader = torch.utils.data.DataLoader(test_data, test_batch_size, shuffle=False, num_workers=0)
  
  return train_loader, val_loader, test_loader

In [1]:
# Logger for loss and accuracy at each step 
def log_values(writer, step, loss, accuracy, prefix):
  writer.add_scalar(f"{prefix}/loss", loss, step)
  writer.add_scalar(f"{prefix}/accuracy", accuracy, step)

def main(batch_size=256, 
         device='cuda:0', 
         learning_rate=0.001, 
         epochs=5, 
         img_root='./dataset'):
  
  #Instantiate the SummaryWriter for Tensorboard visualization
  writer = SummaryWriter(log_dir="runs/resnet50_5epoch")

  #Parse annotations 
  annotations_frame = pd.read_csv("./dataset/annotations_train.csv")
  
  #Get labels for every training image
  augmented_annotations_frame = convertAnnotationsFrame(annotations_frame, "%s/train" % (img_root))

  #Get dictionary where each label is associated with a position in the one-hot encoding
  labels = setupLabelsDict(augmented_annotations_frame)

  # Instantiates dataloaders
  train_loader, val_loader, test_loader = get_data(augmented_annotations_frame=augmented_annotations_frame, labels=labels, batch_size=batch_size, img_root=img_root)
  
  # Instantiates the model
  net = initialize_resnet(num_classes=len(labels)).to(device)
  
  # Instantiates the optimizer
  optimizer = get_optimizer(net, learning_rate)
  
  # Instantiates the cost function
  cost_function = get_cost_function()

  print('Before training:')
  train_loss, train_accuracy = test(net, train_loader, cost_function, num_classes=len(labels))
  val_loss, val_accuracy = test(net, val_loader, cost_function, num_classes=len(labels))

  log_values(writer, -1, train_loss, train_accuracy, "Train")
  log_values(writer, -1, val_loss, val_accuracy, "Validation")

  print('\t Training loss {:.5f}, Training accuracy {:.2f}'.format(train_loss, train_accuracy))
  print('\t Validation loss {:.5f}, Validation accuracy {:.2f}'.format(val_loss, val_accuracy))
  print('-----------------------------------------------------')

  for e in range(epochs):
    train_loss, train_accuracy = train(net, train_loader, optimizer, cost_function, num_classes=len(labels))
    val_loss, val_accuracy = test(net, val_loader, cost_function, num_classes=len(labels))
    print('Epoch: {:d}'.format(e+1))
    print('\t Training loss {:.5f}, Training accuracy {:.2f}'.format(train_loss, train_accuracy))
    print('\t Validation loss {:.5f}, Validation accuracy {:.2f}'.format(val_loss, val_accuracy))
    print('-----------------------------------------------------')
    log_values(writer, e, train_loss, train_accuracy, "Train")
    log_values(writer, e, val_loss, val_accuracy, "Validation")
  print('After training:')
  train_loss, train_accuracy = test(net, train_loader, cost_function, num_classes=len(labels))
  val_loss, val_accuracy = test(net, val_loader, cost_function, num_classes=len(labels))
  log_values(writer, e, train_loss, train_accuracy, "Train")
  log_values(writer, e, val_loss, val_accuracy, "Validation")
  print('\t Training loss {:.5f}, Training accuracy {:.2f}'.format(train_loss, train_accuracy))
  print('\t Validation loss {:.5f}, Validation accuracy {:.2f}'.format(val_loss, val_accuracy))
  print('-----------------------------------------------------')

  # Closes the logger
  writer.close()
  
  #output the trained network
  return net

In [None]:
# Free GPU memory
torch.cuda.empty_cache()
import gc
gc.collect()

# clear runs
#! rm -r runs
%reload_ext tensorboard
%tensorboard --logdir=runs/resnet50_5epoch

In [None]:
net = main()

#save the trained model
torch.save(net.state_dict(), "./resnet50_5epoch.pth")

In [None]:
## Make a zip containing the runs if need to save them
#import shutil
#shutil.make_archive('runs_23_08', 'zip', 'runs')

In [None]:
## Save the model directly in Google Drive if needed for other tasks
#torch.save(net.state_dict(), "/content/drive/MyDrive/UNITN/5° anno/Deep Learning 2021/models/resnet50_5epoch")