## Downloading Dependencies

In [None]:

!pip install cython
!pip install -U 'git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI'

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI
  Cloning https://github.com/cocodataset/cocoapi.git to /tmp/pip-req-build-ebv5swjd
  Running command git clone -q https://github.com/cocodataset/cocoapi.git /tmp/pip-req-build-ebv5swjd
Building wheels for collected packages: pycocotools
  Building wheel for pycocotools (setup.py) ... [?25l[?25hdone
  Created wheel for pycocotools: filename=pycocotools-2.0-cp37-cp37m-linux_x86_64.whl size=265176 sha256=0e4dbb8a869c272c825d5b1dc9d46c13109547c173bf869b7138be24f97c377b
  Stored in directory: /tmp/pip-ephem-wheel-cache-t0cpo9z3/wheels/e2/6b/1d/344ac773c7495ea0b85eb228bc66daec7400a143a92d36b7b1
Successfully built pycocotools
Installing collected packages: pycocotools
  Attempting uninstall: pycocotools
    Found ex

In [None]:
import json
import math
import os
import random
import sys
import time
import cv2

import numpy as np
import torch
import torch.utils.data
import torchvision

from PIL import Image, ImageDraw
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
from torchvision import transforms
from torchvision.ops import batched_nms
from torchvision.transforms import functional as F

In [None]:
!nvidia-smi

Fri Jun 17 16:04:12 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.32.03    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   36C    P0    29W / 250W |      0MiB / 16280MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [None]:
from typing import List, Tuple

import torchvision.models as models

from torchvision.models.detection.backbone_utils import resnet_fpn_backbone
from torchvision.models.detection.faster_rcnn import FasterRCNN
from torchvision.models.detection.rpn import AnchorGenerator
from torchvision.ops import MultiScaleRoIAlign

In `references/detection/,` we have a number of helper functions to simplify training and evaluating detection models.
Here, we will use `references/detection/utils.py` and `references/detection/coco_eval.py`.

Let's copy those files (and their dependencies) in here so that they are available in the notebook

In [None]:
%%shell

# Download TorchVision repo to use some files from references/detection
git clone https://github.com/pytorch/vision.git
cd vision
git checkout v0.3.0

cp references/detection/utils.py ../
cp references/detection/coco_eval.py ../

Cloning into 'vision'...
remote: Enumerating objects: 158732, done.[K
remote: Counting objects: 100% (3364/3364), done.[K
remote: Compressing objects: 100% (316/316), done.[K
remote: Total 158732 (delta 3050), reused 3299 (delta 3031), pack-reused 155368[K
Receiving objects: 100% (158732/158732), 313.17 MiB | 17.87 MiB/s, done.
Resolving deltas: 100% (141019/141019), done.
Note: checking out 'v0.3.0'.

You are in 'detached HEAD' state. You can look around, make experimental
changes and commit them, and you can discard any commits you make in this
state without impacting any branches by performing another checkout.

If you want to create a new branch to retain commits you create, you may
do so (now or later) by using -b with the checkout command again. Example:

  git checkout -b <new-branch-name>

HEAD is now at be376084d version check against PyTorch's CUDA version




In [None]:
import utils
from coco_eval import CocoEvaluator

In [None]:
# check if cuda GPU is available, make sure you're using GPU runtime on Google Colab
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device) # you should output "cuda"

cuda


In [None]:
# mount your google drive
from google.colab import drive
drive.mount('/content/drive/')

Mounted at /content/drive/


## Object Detection Dataset
We will be providing the base dataset that will be used for the first task of the Object Detection competition.

In [None]:
# split json into train and val

with open('/content/drive/MyDrive/Qualifiers/CV Training Dataset/training_data_new.json') as f:
  data = json.load(f)

In [None]:
class TILDataset(torch.utils.data.Dataset):
    def __init__(self, root, annotation, transforms=None, sub_folder='Images'):
        self.root = root
        self.transforms = transforms
        self.coco = COCO(annotation)
        self.ids = list(sorted(self.coco.imgs.keys()))
        cats = self.coco.loadCats(self.coco.getCatIds())
        self.cat2name = {cat['id']:cat['name'] for cat in cats} # maps category id to category name (useful for visualization)
        self.sub_folder = sub_folder

    def __getitem__(self, index):
        coco = self.coco
        img_id = self.ids[index] # Image ID
        ann_ids = coco.getAnnIds(imgIds=img_id) # get annotation id from coco
        coco_annotation = coco.loadAnns(ann_ids) # target coco_annotation file for an image
        path = coco.loadImgs(img_id)[0]['file_name'] # path for input image
        img = Image.open(os.path.join(self.root, self.sub_folder, path)).convert('RGB') # open the input image

        # number of objects in the image
        num_objs = len(coco_annotation)

        # Bounding boxes for objects
        # In coco format, bbox = [xmin, ymin, width, height]
        # In pytorch, the input should be [xmin, ymin, xmax, ymax]
        boxes = []
        for i in range(num_objs):
            xmin = coco_annotation[i]['bbox'][0]
            ymin = coco_annotation[i]['bbox'][1]
            xmax = xmin + coco_annotation[i]['bbox'][2]
            ymax = ymin + coco_annotation[i]['bbox'][3]
            boxes.append([xmin, ymin, xmax, ymax])
        boxes = torch.as_tensor(boxes, dtype=torch.float32)

        # Labels
        labels = []
        for i in range(num_objs):
            labels.append(coco_annotation[i]['category_id'])
        labels = torch.as_tensor(labels, dtype=torch.int64)

        # Tensorise img_id
        img_id = torch.tensor([img_id])

        # Size of bbox (Rectangular)
        areas = []
        for i in range(num_objs):
            areas.append(coco_annotation[i]['area'])
        areas = torch.as_tensor(areas, dtype=torch.float32)

        # suppose all instances are not crowd
        iscrowd = torch.zeros((num_objs,), dtype=torch.int64)

        # Annotation is in dictionary format
        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        target["image_id"] = img_id
        target["area"] = areas
        target["iscrowd"] = iscrowd

        if self.transforms is not None:
            img, target = self.transforms(img, target)

        return img, target

    def __len__(self):
        return len(self.ids)

That's all for the dataset. Let's see how the outputs are structured for this dataset

In [None]:
til_root = '/content/drive/MyDrive/Qualifiers/CV Training Dataset' # extracted training dataset path
train_annotation = '/content/drive/MyDrive/src/train.json'
train_annotation = '/content/drive/MyDrive/Qualifiers/CV Training Dataset/training_data_new.json'  # taking all training data no val
val_annotation = '/content/drive/MyDrive/src/val.json'

easter_annotation = '/content/drive/MyDrive/Easter Egg 1 - 01Jun/CV_Datasets/easter_egg_day_1.json'

## Setting up the Model

In this object detection example, we will make use of Faster R-CNN model with a ResNet50-FPN backbone. To understand the underlying code structure, you can read this [article](https://zhuanlan.zhihu.com/p/145842317) (right click and translate to English).

Feel free to explore with different hyper-parameters to see what works best!

In [None]:
# hyper-parameters
params = {'BATCH_SIZE': 16,
          'LR': 5e-4,  # 1e-4
          'CLASSES': 2+1,
          'MAXEPOCHS': 10,
          'BACKBONE': 'resnet50',
          'FPN': True,
          # 'ANCHOR_SIZE': ((32,), (64,), (128,), (256,), (512,)),
          'ANCHOR_SIZE': ((256,), (16,), (32,), (64,), (128,)),
          'ASPECT_RATIOS': ((0.5, 1.0, 2.0),),
          'MIN_SIZE': 256,
          'MAX_SIZE': 256,
          'IMG_MEAN': [0.485, 0.456, 0.406],
          'IMG_STD': [0.229, 0.224, 0.225],
          'IOU_THRESHOLD': 0.5
          }

In [None]:
!wget https://github.com/airctic/icedata/releases/download/m1/pennfudan_maskrcnn_resnet50_fpn.zip
!unzip /content/pennfudan_maskrcnn_resnet50_fpn.zip


--2022-06-17 16:05:13--  https://github.com/airctic/icedata/releases/download/m1/pennfudan_maskrcnn_resnet50_fpn.zip
Resolving github.com (github.com)... 140.82.121.3
Connecting to github.com (github.com)|140.82.121.3|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://objects.githubusercontent.com/github-production-release-asset-2e65be/293939172/d447a600-f439-11ea-999e-2e6ab49f8766?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAIWNJYAX4CSVEH53A%2F20220617%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20220617T160513Z&X-Amz-Expires=300&X-Amz-Signature=d01c05f024e9741f5d18bc2873de28b929590d898bdead00d02ee4392dea872a&X-Amz-SignedHeaders=host&actor_id=0&key_id=0&repo_id=293939172&response-content-disposition=attachment%3B%20filename%3Dpennfudan_maskrcnn_resnet50_fpn.zip&response-content-type=application%2Foctet-stream [following]
--2022-06-17 16:05:13--  https://objects.githubusercontent.com/github-production-release-asset-2e65be/293939172/d447a600-f

In [None]:
# model = torchvision.models.detection.fcos_resnet50_fpn(pretrained=True, trainable_backbone_layers=4)
# model.head.classification_head.cls_logits.out_channels = 3
# model.transform.max_size = 800

In [None]:
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
num_classes = 3

model = torchvision.models.detection.fasterrcnn_resnet50_fpn(num_classes=2, pretrained_backbone=False)  # ,trainable_backbone_layers=1


In [None]:
# model.load_state_dict(torch.load('/content/pennfundan_maskrcnn_resnet50fpn.pth', map_location=device), strict=False)
model.load_state_dict(torch.load('/content/pennfundan_maskrcnn_resnet50fpn.pth', map_location=device), strict=False)


_IncompatibleKeys(missing_keys=[], unexpected_keys=['roi_heads.mask_head.mask_fcn1.weight', 'roi_heads.mask_head.mask_fcn1.bias', 'roi_heads.mask_head.mask_fcn2.weight', 'roi_heads.mask_head.mask_fcn2.bias', 'roi_heads.mask_head.mask_fcn3.weight', 'roi_heads.mask_head.mask_fcn3.bias', 'roi_heads.mask_head.mask_fcn4.weight', 'roi_heads.mask_head.mask_fcn4.bias', 'roi_heads.mask_predictor.conv5_mask.weight', 'roi_heads.mask_predictor.conv5_mask.bias', 'roi_heads.mask_predictor.mask_fcn_logits.weight', 'roi_heads.mask_predictor.mask_fcn_logits.bias'])

In [None]:
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
model.transform.min_size = (720,)
model.transform.max_size = 720



In [None]:
# continue training from prev checkpoint
# save_path = '/content/drive/MyDrive/model_weights/cv/frcnn_pt_06-07-2022-23:34:40_e1'
# model.load_state_dict(torch.load(save_path, map_location=device))


# chkpt_path = '/content/drive/MyDrive/model_weights/cv/frcnn_pt_06-07-2022-02:03:30_e2'
# model.load_state_dict(torch.load(chkpt_path, map_location=device))

In [None]:
# move model to the right device
for idx, (name, param) in enumerate(model.named_parameters()):
  print(idx, name, param.requires_grad)

0 backbone.body.conv1.weight True
1 backbone.body.layer1.0.conv1.weight True
2 backbone.body.layer1.0.conv2.weight True
3 backbone.body.layer1.0.conv3.weight True
4 backbone.body.layer1.0.downsample.0.weight True
5 backbone.body.layer1.1.conv1.weight True
6 backbone.body.layer1.1.conv2.weight True
7 backbone.body.layer1.1.conv3.weight True
8 backbone.body.layer1.2.conv1.weight True
9 backbone.body.layer1.2.conv2.weight True
10 backbone.body.layer1.2.conv3.weight True
11 backbone.body.layer2.0.conv1.weight True
12 backbone.body.layer2.0.conv2.weight True
13 backbone.body.layer2.0.conv3.weight True
14 backbone.body.layer2.0.downsample.0.weight True
15 backbone.body.layer2.1.conv1.weight True
16 backbone.body.layer2.1.conv2.weight True
17 backbone.body.layer2.1.conv3.weight True
18 backbone.body.layer2.2.conv1.weight True
19 backbone.body.layer2.2.conv2.weight True
20 backbone.body.layer2.2.conv3.weight True
21 backbone.body.layer2.3.conv1.weight True
22 backbone.body.layer2.3.conv2.weigh

In [None]:
# # for param in model.parameters():
# #   param.requires_grad = False

# for idx, (name, param) in enumerate(model.named_parameters()):
#     if idx <= 42:  # up till layer 3
#         param.requires_grad = False
#     else:
#         param.requires_grad = True
#     print(idx, name, param.requires_grad)

In [None]:
# for name, parameter in model.named_parameters():
#   if 'head' in name:
#     parameter.requires_grad = True


In [None]:
for name, parameter in model.named_parameters():
  if parameter.requires_grad:
    print(name)


backbone.body.conv1.weight
backbone.body.layer1.0.conv1.weight
backbone.body.layer1.0.conv2.weight
backbone.body.layer1.0.conv3.weight
backbone.body.layer1.0.downsample.0.weight
backbone.body.layer1.1.conv1.weight
backbone.body.layer1.1.conv2.weight
backbone.body.layer1.1.conv3.weight
backbone.body.layer1.2.conv1.weight
backbone.body.layer1.2.conv2.weight
backbone.body.layer1.2.conv3.weight
backbone.body.layer2.0.conv1.weight
backbone.body.layer2.0.conv2.weight
backbone.body.layer2.0.conv3.weight
backbone.body.layer2.0.downsample.0.weight
backbone.body.layer2.1.conv1.weight
backbone.body.layer2.1.conv2.weight
backbone.body.layer2.1.conv3.weight
backbone.body.layer2.2.conv1.weight
backbone.body.layer2.2.conv2.weight
backbone.body.layer2.2.conv3.weight
backbone.body.layer2.3.conv1.weight
backbone.body.layer2.3.conv2.weight
backbone.body.layer2.3.conv3.weight
backbone.body.layer3.0.conv1.weight
backbone.body.layer3.0.conv2.weight
backbone.body.layer3.0.conv3.weight
backbone.body.layer3.0.

In [None]:
# construct an optimizer

# model_params = [p for p in model.parameters() if p.requires_grad]
model_params = [
    {"params": model.backbone.body.conv1.parameters(), "lr": 1e-7},
    {"params": model.backbone.body.layer1.parameters(), "lr": 1e-6},
    {"params": model.backbone.body.layer2.parameters(), "lr": 1e-6},
    {"params": model.backbone.body.layer3.parameters(), "lr": 1e-5},
    {"params": model.backbone.body.layer4.parameters(), "lr": 1e-5},
    {"params": model.backbone.fpn.parameters(), "lr": 1e-5},
    {"params": model.rpn.parameters(), "lr": 1e-4},
    {"params": model.roi_heads.parameters(), "lr": 1e-3},
]
# optimizer = torch.optim.SGD(model_params, 
#                             lr=params['LR'],
#                             momentum=0.9, 
#                             weight_decay=0.0005)

# optimizer = torch.optim.AdamW(model_params, lr=params['LR'], weight_decay = 0.01)
# optimizer = torch.optim.AdamW(model_params, lr=1e-4, weight_decay = 0.01)

# for finetuning
# optimizer = torch.optim.AdamW(model_params, lr=1e-3, weight_decay = 0.01)
optimizer = torch.optim.AdamW(model_params, weight_decay = 0.01)

lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                               step_size=3,
                                               gamma=0.5)




model.to(device)


FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(720,), max_size=720, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64, eps=1e-05)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64, eps=1e-05)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64, eps=1e-05)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256, eps=1e-05)
          (relu)

## Data Augmentation

Let's write some helper functions for data augmentation / transformation.

Do not just stop here, add in your own data augmentations! Remember to also augment the bounding boxes accordingly.

In [None]:
class Compose(object):
    def __init__(self, transforms):
        self.transforms = transforms

    def __call__(self, image, target):
        for t in self.transforms:
            image, target = t(image, target)
        return image, target

In [None]:
# converts the image, a PIL image, into a PyTorch Tensor
class ToTensor(object):
    def __call__(self, image, target):
        image = F.to_tensor(image)
        return image, target

In [None]:
# randomly horizontal flip the images and ground-truth labels
class RandomHorizontalFlip(object):
    def __init__(self, prob):
        self.prob = prob

    def __call__(self, image, target):
        if random.random() < self.prob:
            height, width = image.shape[-2:]
            image = image.flip(-1)
            bbox = target["boxes"]
            bbox[:, [0, 2]] = width - bbox[:, [2, 0]]
            target["boxes"] = bbox
        return image, target

In [None]:
class Blur_7(object):
    def __init__(self, max_kernel=(6, 6)):
        self.max_kernel = max_kernel

    def __call__(self,img,target):
        img = np.array(img)
        img = cv2.blur(img, (7, 7))
        img = transforms.ToPILImage()(img).convert("RGB")
        return img, target


In [None]:
class Blur_12(object):
    def __init__(self, max_kernel=(6, 6)):
        self.max_kernel = max_kernel

    def __call__(self,img,target):
        img = np.array(img)
        img = cv2.blur(img, (12, 12))
        img = transforms.ToPILImage()(img).convert("RGB")
        return img, target

In [None]:
class RandomGrayscale(object):
    """Randomly convert image to grayscale with a probability of p (default 0.1).
    If the image is torch Tensor, it is expected
    to have [..., 3, H, W] shape, where ... means an arbitrary number of leading dimensions

    Args:
        p (float): probability that image should be converted to grayscale.

    Returns:
        PIL Image or Tensor: Grayscale version of the input image with probability p and unchanged
        with probability (1-p).
        - If input image is 1 channel: grayscale version is 1 channel
        - If input image is 3 channel: grayscale version is 3 channel with r == g == b

    """

    def __init__(self, p=0.1):
        self.p = p

    def __call__(self, img, target):
        """
        Args:
            img (PIL Image or Tensor): Image to be converted to grayscale.

        Returns:
            PIL Image or Tensor: Randomly grayscaled image.
        """
        num_output_channels = F.get_image_num_channels(img)
        if torch.rand(1) < self.p:
            img = F.rgb_to_grayscale(img, num_output_channels=num_output_channels)
            return img, target
        return img , target


In [None]:
import random

class RandomErasing(object):
    '''
    Class that performs Random Erasing in Random Erasing Data Augmentation by Zhong et al. 
    -------------------------------------------------------------------------------------
    probability: The probability that the operation will be performed.
    sl: min erasing area
    sh: max erasing area
    r1: min aspect ratio
    mean: erasing value
    -------------------------------------------------------------------------------------
    '''
    def __init__(self, probability = 0.5, sl = 0.02, sh = 0.4, r1 = 0.3, mean=[0.4914, 0.4822, 0.4465]):
        self.probability = probability
        self.mean = mean
        self.sl = sl
        self.sh = sh
        self.r1 = r1
       
    def __call__(self, img, target):
        """
        img: tensor
        target: dict
        """
        if random.uniform(0, 1) > self.probability:
            return img, target

        for attempt in range(100):
            area = img.size()[1] * img.size()[2]
       
            target_area = random.uniform(self.sl, self.sh) * area
            aspect_ratio = random.uniform(self.r1, 1/self.r1)

            h = int(round(math.sqrt(target_area * aspect_ratio)))
            w = int(round(math.sqrt(target_area / aspect_ratio)))

            if w < img.size()[2] and h < img.size()[1]:
                x1 = random.randint(0, img.size()[1] - h)
                y1 = random.randint(0, img.size()[2] - w)
                if img.size()[0] == 3:
                    img[0, x1:x1+h, y1:y1+w] = self.mean[0]
                    img[1, x1:x1+h, y1:y1+w] = self.mean[1]
                    img[2, x1:x1+h, y1:y1+w] = self.mean[2]
                else:
                    img[0, x1:x1+h, y1:y1+w] = self.mean[0]
                return img, target

        return img, target



In [None]:
# TODO: add more transforms

def get_transform(train):
    if train:
        transforms = Compose([
            ToTensor(), 
            RandomHorizontalFlip(0.5),
            # RandomCrop(),

        ])
    else: # during evaluation, no augmentations will be done
        transforms = Compose([
            ToTensor()
        ])
    
    return transforms

# def get_transform_flip(train):
#     if train:
#         transforms = Compose([
#             ToTensor(), 
#             RandomHorizontalFlip(1)
#         ])
#     else: # during evaluation, no augmentations will be done
#         transforms = Compose([
#             ToTensor()
#         ])
    
#     return transforms

def get_transform_grayscale(train):
  if train:
      transforms = Compose([
          ToTensor(), 
          RandomGrayscale(1)
      ])
  else: 
      transforms = Compose([
          ToTensor()
      ])
  
  return transforms

def get_transform_blur(train):
  if train:
      transforms = Compose([
          Blur(),
          ToTensor()
      ])
  else: 
      transforms = Compose([
          ToTensor()
      ])
  
  return transforms
  
def get_transform_erase(train):
  if train:
      transforms = Compose([
          ToTensor(), 
          RandomErasing(probability=1, sh=0.3)
      ])
  else: 
      transforms = Compose([
          ToTensor()
      ])
  
  return transforms

In [None]:
easter_root = '/content/drive/MyDrive/Easter Egg 1 - 01Jun/CV_Datasets'
train_easter = TILDataset(easter_root, easter_annotation, get_transform(train=True), sub_folder='New Images')
train_easter_blur = TILDataset(easter_root, easter_annotation, get_transform_blur(train=True), sub_folder='New Images')
train_easter_gray = TILDataset(easter_root, easter_annotation, get_transform_grayscale(train=True), sub_folder='New Images')
train_easter_erase = TILDataset(easter_root, easter_annotation, get_transform_erase(train=True), sub_folder='New Images')

loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!


In [None]:
# train_dataset_flip = TILDataset(til_root, train_annotation, get_transform_flip(train=True))
train_dataset_og = TILDataset(til_root, train_annotation, get_transform(train=True))
train_dataset_blur = TILDataset(til_root, train_annotation, get_transform_blur(train=True))
train_dataset_gray = TILDataset(til_root, train_annotation, get_transform_grayscale(train=True))
train_dataset_erase = TILDataset(til_root, train_annotation, get_transform_erase(train=True))

val_dataset = TILDataset(til_root, val_annotation, get_transform(train=False))


train_dataset_list = [
              train_dataset_og,
              # train_dataset_flip,
              train_dataset_blur,
              train_dataset_gray,
              train_easter,
              train_easter_blur,
              train_easter_gray
]
train_augmented_dataset = torch.utils.data.ConcatDataset(train_dataset_list)
print(len(train_augmented_dataset))

loading annotations into memory...
Done (t=0.09s)
creating index...
index created!
loading annotations into memory...
Done (t=0.09s)
creating index...
index created!
loading annotations into memory...
Done (t=0.39s)
creating index...
index created!
loading annotations into memory...
Done (t=0.09s)
creating index...
index created!
loading annotations into memory...
Done (t=0.30s)
creating index...
index created!
18240


## Data Loaders

Let's now set up our data loaders so that we can streamline the batch loading of data for our model training later on.

We now have the dataset class, the models and the data transforms. Let's instantiate them

In [None]:
NUM_WORKERS = 2

# define training and validation data loaders
train_loader = torch.utils.data.DataLoader(
    train_augmented_dataset, batch_size=params['BATCH_SIZE'], shuffle=True, num_workers=NUM_WORKERS,
    collate_fn=utils.collate_fn)

val_loader = torch.utils.data.DataLoader(
    val_dataset, batch_size=16, shuffle=False, num_workers=NUM_WORKERS,
    collate_fn=utils.collate_fn)

In [None]:
# lr_scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, 1e-3, epochs = 2, 
                                                # steps_per_epoch = len(train_loader), pct_start=0.1)

## Model Training

And now let's train the model, evaluating at the end of every epoch.

In [None]:
def train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq):
    model.train()
    metric_logger = utils.MetricLogger(delimiter="  ")
    metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}'))
    header = 'Epoch: [{}]'.format(epoch)

    lr_scheduler = None
    if epoch == 0:
        warmup_factor = 1. / 100
        warmup_iters = min(1000, len(data_loader) - 1)

        lr_scheduler = utils.warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor)

    for images, targets in metric_logger.log_every(data_loader, print_freq, header):
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())
        loss_value = losses.item()

        if not math.isfinite(loss_value):
            print("Loss is {}, stopping training".format(loss_value))
            print(loss_dict)
            sys.exit(1)

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

        if lr_scheduler is not None:
            lr_scheduler.step()

        metric_logger.update(loss=losses, **loss_dict)
        metric_logger.update(lr=optimizer.param_groups[0]["lr"])

In [None]:
from datetime import datetime
import pytz

@torch.no_grad()
def evaluate(model, data_loader, device):
    n_threads = torch.get_num_threads()
    torch.set_num_threads(1)
    model.eval()
    metric_logger = utils.MetricLogger(delimiter="  ")
    header = 'Test:'

    coco = data_loader.dataset.coco
    iou_types = ["bbox"]
    coco_evaluator = CocoEvaluator(coco, iou_types)

    for image, targets in metric_logger.log_every(data_loader, 100, header):
        image = list(img.to(device) for img in image)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        torch.cuda.synchronize()
        model_time = time.time()
        outputs = model(image)

        outputs = [{k: v for k, v in t.items()} for t in outputs]
        model_time = time.time() - model_time

        res = {target["image_id"].item(): output for target, output in zip(targets, outputs)}
        evaluator_time = time.time()
        coco_evaluator.update(res)
        evaluator_time = time.time() - evaluator_time
        metric_logger.update(model_time=model_time, evaluator_time=evaluator_time)

    # gather the stats from all processes
    metric_logger.synchronize_between_processes()
    print("Averaged stats:", metric_logger)
    coco_evaluator.synchronize_between_processes()

    # accumulate predictions from all images
    coco_evaluator.accumulate()
    coco_evaluator.summarize()
    torch.set_num_threads(n_threads)
    return coco_evaluator


In [None]:
now = datetime.now(pytz.timezone('Singapore')) # current date and time
now = now.strftime("%m-%d-%Y-%H:%M:%S")
weights_folder = '/content/drive/MyDrive/model_weights/cv'
now

'06-18-2022-00:49:02'

In [None]:
starting_epoch = 0
for epoch in range(starting_epoch, starting_epoch + 4):
    # train for one epoch, printing every 1 iteration
    train_one_epoch(model, optimizer, train_loader, device, epoch, print_freq=10)

    # update the learning rate
    lr_scheduler.step()

    # evaluate on the test dataset
    # res = evaluate(model, val_loader, device=device) # TODO: to extract AP all 0.5-0.95


    # save_path = os.path.join(weights_folder, f'frcnn_{now}_e{epoch}')
    save_path = os.path.join(weights_folder, f'frcnn_pt_{now}_e{epoch}')
    torch.save(model.state_dict(), save_path)
    # save model weights



Epoch: [0]  [   0/1140]  eta: 4:19:29  lr: 0.000000  loss: 1.8241 (1.8241)  loss_classifier: 1.2891 (1.2891)  loss_box_reg: 0.3567 (0.3567)  loss_objectness: 0.1519 (0.1519)  loss_rpn_box_reg: 0.0264 (0.0264)  time: 13.6574  data: 11.7975  max mem: 8584
Epoch: [0]  [  10/1140]  eta: 1:53:02  lr: 0.000000  loss: 1.3272 (1.3970)  loss_classifier: 0.7913 (0.8296)  loss_box_reg: 0.3776 (0.3756)  loss_objectness: 0.1519 (0.1656)  loss_rpn_box_reg: 0.0245 (0.0262)  time: 6.0024  data: 4.6713  max mem: 9007
Epoch: [0]  [  20/1140]  eta: 1:43:03  lr: 0.000000  loss: 0.9279 (1.1102)  loss_classifier: 0.3778 (0.5739)  loss_box_reg: 0.3569 (0.3615)  loss_objectness: 0.1414 (0.1502)  loss_rpn_box_reg: 0.0223 (0.0245)  time: 5.1138  data: 3.8540  max mem: 9007
Epoch: [0]  [  30/1140]  eta: 1:39:51  lr: 0.000000  loss: 0.7392 (0.9957)  loss_classifier: 0.2395 (0.4636)  loss_box_reg: 0.3319 (0.3524)  loss_objectness: 0.1355 (0.1547)  loss_rpn_box_reg: 0.0205 (0.0249)  time: 5.0656  data: 3.8185  max 

In [None]:
save_path = os.path.join(weights_folder, f'frcnn_pt_{now}_e{epoch}')
torch.save(model.state_dict(), save_path)

In [None]:
# evaluate(model, val_loader, device=device) # TODO: to extract AP all 0.5-0.95


Test:  [ 0/19]  eta: 0:01:08  model_time: 0.6091 (0.6091)  evaluator_time: 0.0897 (0.0897)  time: 3.6090  data: 2.7256  max mem: 4060
Test:  [18/19]  eta: 0:00:01  model_time: 0.5907 (0.5780)  evaluator_time: 0.0757 (0.0645)  time: 1.3425  data: 0.5237  max mem: 4060
Test: Total time: 0:00:25 (1.3465 s / it)
Averaged stats: model_time: 0.5907 (0.5780)  evaluator_time: 0.0757 (0.0645)
Accumulating evaluation results...
DONE (t=0.10s).
IoU metric: bbox
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.785
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.973
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.912
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.401
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.732
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.825
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets

<coco_eval.CocoEvaluator at 0x7fa593e1a250>

## Fine-tuning

In [None]:
for param in model.parameters():
  param.requires_grad = True

In [None]:
# construct an optimizer
# TODO: try AdamW optimizer

model_params = [p for p in model.parameters() if p.requires_grad]
# optimizer = torch.optim.SGD(model_params, 
#                             lr=params['LR'],
#                             momentum=0.9, 
#                             weight_decay=0.0005)

# optimizer = torch.optim.AdamW(model_params, lr=params['LR'], weight_decay = 0.01)
optimizer = torch.optim.AdamW(model_params, lr=1e-7, weight_decay = 0.01)

# def opt_func(params, **kwargs): return OptimWrapper(optimizer)
# and a learning rate scheduler which decreases the learning rate by 2x every 3 epochs
# TODO: try cosine_scheduler

lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                               step_size=3,
                                               gamma=0.5)

In [1]:
starting_epoch = 2
for epoch in range(starting_epoch, starting_epoch + params['MAXEPOCHS']):
    # train for one epoch, printing every 1 iteration
    train_one_epoch(model, optimizer, train_loader, device, epoch, print_freq=10)

    # update the learning rate
    lr_scheduler.step()

    # evaluate on the test dataset
    res = evaluate(model, val_loader, device=device) # TODO: to extract AP all 0.5-0.95


    # save_path = os.path.join(weights_folder, f'frcnn_{now}_e{epoch}')
    save_path = os.path.join(weights_folder, f'frcnn_pt_{now}_e{epoch}')
    torch.save(model.state_dict(), save_path)
    # save model weights



In [None]:
# note: do inference in CV_inference!

## Visualization of results

Now that training has finished, let's have a look at what it actually predicts.

In [None]:
# pick one image from the validation set
img, _ = val_dataset[391]

model.eval()
with torch.no_grad():
    prediction = model([img.to(device)])

prediction

Printing the prediction shows that we have a list of dictionaries. Each element of the list corresponds to a different image. As we have a single image, there is a single dictionary in the list.
The dictionary contains the predictions for the image we passed. In this case, we can see that it contains `boxes`, `labels`, and `scores` as fields.

Let's inspect the image and the predicted boxes.
For that, we need to convert the image, which has been rescaled to 0-1 and had the channels flipped so that we have it in `[C, H, W]` format.

In [None]:
# convert the image, which has been rescaled to 0-1 and had the channels flipped
pred_img = Image.fromarray(img.mul(255).permute(1, 2, 0).byte().numpy())
draw = ImageDraw.Draw(pred_img)

img_preds = prediction[0]
for i in range(len(img_preds["boxes"])):
    x1, y1, x2, y2 = img_preds["boxes"][i]
    label = int(img_preds["labels"][i])
    score = float(img_preds["scores"][i])

    draw.rectangle(((x1, y1), (x2, y2)), outline="red")
    text = f'{dataset.cat2name[label]}: {score}'
    draw.text((x1+5, y1+5), text)

display(pred_img)

## Post-processing

We might notice that there are duplicate detections in the image. Let's post-process the detections with non-maximum suppression.

** Update: FasterRCNN already has NMS built into it, so you actually do not need to do NMS again.

In [None]:
img_preds = prediction[0]
keep_idx = batched_nms(boxes=img_preds["boxes"], scores=img_preds["scores"], idxs=img_preds["labels"], iou_threshold=params['IOU_THRESHOLD'])

Check the predictions again after applying nms.

** Update: You should not see any difference unless you have specified a lower IoU threshold than the default of 0.5.

In [None]:
# convert the image, which has been rescaled to 0-1 and had the channels flipped
pred_img = Image.fromarray(img.mul(255).permute(1, 2, 0).byte().numpy())
draw = ImageDraw.Draw(pred_img)

for i in range(len(img_preds["boxes"])):
    if i in keep_idx:
        x1, y1, x2, y2 = img_preds["boxes"][i]
        label = int(img_preds["labels"][i])
        score = float(img_preds["scores"][i])

        draw.rectangle(((x1, y1), (x2, y2)), outline="red")
        text = f'{dataset.cat2name[label]}: {score}'
        draw.text((x1+5, y1+5), text)

display(pred_img)

Now, let's further filter out the non-confident detections.

In [None]:
det_threshold = 0.5

# convert the image, which has been rescaled to 0-1 and had the channels flipped
pred_img = Image.fromarray(img.mul(255).permute(1, 2, 0).byte().numpy())
draw = ImageDraw.Draw(pred_img)

for i in range(len(img_preds["boxes"])):
    if i in keep_idx:
        x1, y1, x2, y2 = img_preds["boxes"][i]
        label = int(img_preds["labels"][i])
        score = float(img_preds["scores"][i])

        # filter out non-confident detections
        if score > det_threshold:
            draw.rectangle(((x1, y1), (x2, y2)), outline="red")
            text = f'{dataset.cat2name[label]}: {score}'
            draw.text((x1+5, y1+5), text)

display(pred_img)

## Generate Predictions on Test Images

In [None]:
til_test_root = "/content/drive/MyDrive/Qualifiers/CV Interim Dataset/" # extracted testing images path
test_img_root = os.path.join(til_test_root, "Images")
img_dir = os.scandir(test_img_root)
test_annotation = os.path.join(til_test_root, 'interim_no_annotations.json')

In [None]:
# load model weights (if not using the current trained model)
model.load_state_dict(torch.load(save_path, map_location=device))
model.to(device)
model.eval()

In [None]:
with open(test_annotation, 'r') as f:
  test_images = json.load(f)

Let's visualize some predictions on the test images. Run this a few times to visualize different images.

In [None]:
img = Image.open(next(img_dir).path).convert('RGB')
draw = ImageDraw.Draw(img)
det_threshold = 0.5

# do the prediction
with torch.no_grad():
    img_tensor = transforms.ToTensor()(img)
    img_preds = model([img_tensor.to(device)])[0]

for i in range(len(img_preds["boxes"])):
    x1, y1, x2, y2 = img_preds["boxes"][i]
    label = int(img_preds["labels"][i])
    score = float(img_preds["scores"][i])

    # filter out non-confident detections
    if score > det_threshold:
        draw.rectangle(((x1, y1), (x2, y2)), outline="red")
        text = f'{dataset.cat2name[label]}: {score}'
        draw.text((x1+5, y1+5), text)

display(img)

## Submission of Results

Submission json file should be in [COCO format](https://cocodataset.org/#format-results).

```
[{
    "image_id": int, 
    "category_id": int, 
    "bbox": [x,y,width,height], 
    "score": float,
}]
```

Refer to **sample_submission_cv.json** for an example.

For this competition, the metric for evaluation will be mAP @ 0.50:0.95

In [None]:
# generate detections on the folder of test images (this will be used for submission)
from tqdm.notebook import tqdm

detections = []
with torch.no_grad():
    for image in tqdm(test_images['images'], total=len(test_images['images'])):
        img_id = int(image['id'])

        img = Image.open(os.path.join(test_img_root, image['file_name'])).convert('RGB')
        img_tensor = transforms.ToTensor()(img)

        preds = model([img_tensor.to(device)])[0]

        for i in range(len(preds["boxes"])):
            x1, y1, x2, y2 = preds["boxes"][i]
            label = int(preds["labels"][i])
            score = float(preds["scores"][i])

            left = int(x1)
            top = int(y1)
            width = int(x2 - x1)
            height = int(y2 - y1)

            detections.append({'image_id':img_id, 'category_id':label, 'bbox':[left, top, width, height], 'score':score})

In [None]:
test_pred_json = os.path.join('/content/drive/MyDrive/results/cv', f"test_preds_{now}.json")
with open(test_pred_json, 'w') as f:
    json.dump(detections, f)