### Notebook for training a Faster-RCNN model on a custom dataset 

This notebook aims to train a Faster-RCNN type model with Pytorch on a custom dataset such as for swimmer head detection. This notebook can be used online (with google Colab for example) or on a local machine. 

You can find the Pytorch implementation of Faster-RCNN here :  https://pytorch.org/vision/stable/_modules/torchvision/models/detection/faster_rcnn.html

If you plan to use this notebook with Google Colab you need first to mount your Google Drive by executing the first cell.

In [None]:
# If you use Google Colab

import os

from google.colab import drive
drive.mount('/content/gdrive')
os.chdir('gdrive/My Drive/')

Then we import the different modules.

engine, utils and transforms are imported from the .py files you should have in your working directory. They can be found here : https://github.com/pytorch/vision/tree/master/references/detection

In [None]:
# Importing the necessary modules 

import numpy as np
import torch
import torch.utils.data
import torchvision
from PIL import Image, ImageDraw
import pandas as pd

from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.rpn import AnchorGenerator

from engine import train_one_epoch, evaluate
import utils
import transforms as T  

import matplotlib.pyplot as plt
import cv2

You also need to fill in the path towards the label and the dataset.

In [None]:
path_label = './labels_nageur.csv' #Path to the .csv containing the dataset annotations in Pascal VOC format
path_images = './images' #Path to the folder containing the dataset images
path_model = './model.pt'

We define a function to parse the data. Then we create the class for our dataset, named "SwimmerDataset".


In [None]:
def parse_one_annot(path_to_data_file, filename):
    data = pd.read_csv(path_to_data_file)
    boxes_array = data[data["filename"] == filename][["xmin", "ymin","xmax", "ymax"]].values  #annotations are in Pascal VOC format
   
    return boxes_array

class SwimmerDataset(torch.utils.data.Dataset):
    def __init__(self, root, data_file, transforms=None):
        self.root = root
        self.transforms = transforms
        self.imgs = sorted(os.listdir(os.path.join(root, nom_image)))
        self.path_to_data_file = data_file
    
    def __getitem__(self, idx):
        # load images and bounding boxes
        img_path = os.path.join(self.root,nom_image, self.imgs[idx]) 
        img = Image.open(img_path).convert("RGB") 
        box_list = parse_one_annot(self.path_to_data_file, 
        self.imgs[idx])
        boxes = torch.as_tensor(box_list, dtype=torch.float32)
        num_objs = len(box_list)
        
        # only one class
        labels = torch.ones((num_objs,), dtype=torch.int64)
        image_id = torch.tensor([idx])
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:,
        0])
        
        # suppose all instances are not crowd
        iscrowd = torch.zeros((num_objs,), dtype=torch.int64)
        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        target["image_id"] = image_id
        target["area"] = area
        target["iscrowd"] = iscrowd
        if self.transforms is not None:
            img, target = self.transforms(img, target)
            return img, target
        
    def __len__(self):
        return len(self.imgs)

We define a function to instantiate the model. We use a **resnet_50fpn pretrained backbone** for the Faster-RCNN.  **Anchor sizes** are an important parameter that you can modify by chaning sizes and spect ratios of *anchor_generator*. Faster-RCNN has many other parameters that have been left at their default values here. These can be found in the link given above for the Faster-RCNN implementation

We also define the transform applied to the dataset during training (including data augmentation). These transform are those defined in the transform.py file but it is possible to define new ones.

In [None]:
def get_model(num_classes):
   # load an object detection model pre-trained on COCO
   model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)

   # create an anchor_generator for the FPN which by default has 5 outputs
   anchor_generator = AnchorGenerator(sizes=((10,), (20,), (30,), (40,), (50,)), aspect_ratios=tuple([(0.5, 1.0, 2.0) for _ in range(5)]))
        
   # get the number of input features for the classifier
   in_features = model.roi_heads.box_predictor.cls_score.in_features
    
   # replace the pre-trained head with a new on
   model.roi_heads.box_predictor = FastRCNNPredictor(in_features,num_classes)
   
   return model

def get_transform(train):
    transformslist = []
    # converts the image, a PIL image, into a PyTorch Tensor
    transformslist.append(T.ToTensor())

    if train:
        # data augmentation
        transformslist.append(T.RandomHorizontalFlip(0.5))
        transformslist.append(T.RandomPhotometricDistort())
        transformslist.append(T.RandomZoomOut())

    return T.Compose(transformslist)

We split the dataset between training dataset (80%) and validation dataset (20%). We also choose the **batch size** for the training (currently 4).

In [None]:
# use our dataset and defined transformations
dataset = SwimmerDataset(root= "swimmer",
          data_file= "swimmer/"+nom_csv ,
          transforms = get_transform(train=True))

dataset_test = SwimmerDataset(root= "swimmer",
               data_file= "swimmer/"+nom_csv ,
               transforms = get_transform(train=False))

# split the dataset between train and test set

#torch.manual_seed(1)
indices = torch.randperm(len(dataset)).tolist()

nb_test = int(0.2*len(dataset))
dataset = torch.utils.data.Subset(dataset, indices[:-nb_test])
dataset_test = torch.utils.data.Subset(dataset_test, indices[-(nb_test):])

# define training and validation data loaders
data_loader_train = torch.utils.data.DataLoader(
              dataset, batch_size=4, shuffle=True, num_workers=0,
              collate_fn=utils.collate_fn) #batch_size here
data_loader_test = torch.utils.data.DataLoader(
         dataset_test, batch_size=1, shuffle=False, num_workers=0,
         collate_fn=utils.collate_fn)

print("The dataset contains {} images : {} for training and {} for validation".format(len(indices), len(dataset), len(dataset_test)))

We instantiate the model with only 2 classes (the background and the object we want to detect). We use a **learning rate scheduler** to decrease the learning rate along the training. The training is done on the GPU is CUDA is available (leading to a much more faster training) otherwise it is done on the CPU.

In this cell we define the **optimizer** (SGD) and its hyperparameters such as the **learning rate**, the **momentum** and the **weight_decay**.

In [None]:
# use GPU if CUDA is available otherwise use CPU
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

num_classes = 2

# get the model 
model = get_model(num_classes)

# move model to the right device
model.to(device)

# construct an optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.01,
                            momentum=0.9, weight_decay=0.0005)

# and a learning rate scheduler which decreases the learning rate over epochs
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)

We finally train our Faster-RCNN.

In [None]:
num_epochs = 10

for epoch in range(num_epochs):
  # train for one epoch
  training = train_one_epoch(model, optimizer, data_loader_train, device, epoch, print_freq=40) #print every print_freq batches
    
  # update the learning rate
  lr_scheduler.step()

  # evaluate on the test dataset
  evaluate(model, data_loader_test, device=device)

Then we save the weights of the trained model.

In [None]:
#Save the model 

torch.save(model.state_dict(), path_model)