### Notebook 11: Final model training

Here we take the best model from the previous notebook and we train it on the validation data then again on the training data with a lower learning rate before making our final submission

In [1]:
import os
import detectron2
from pathlib import Path
import cv2
import numpy as np
import torch
import json
import pycocotools.mask as mask_util
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor, DefaultTrainer
from detectron2.config import get_cfg
from detectron2.data import MetadataCatalog, DatasetCatalog, DatasetMapper
from detectron2.data.datasets import register_coco_instances
from detectron2.utils.logger import setup_logger
from detectron2.evaluation.evaluator import DatasetEvaluator
from detectron2.data import transforms as T
from detectron2.data import build_detection_test_loader, build_detection_train_loader
from detectron2.modeling import DatasetMapperTTA
setup_logger()

<Logger detectron2 (DEBUG)>

In [2]:
dataDir=Path('../')
register_coco_instances('sartorius_val',{}, '../sartorius-annotations-coco-format/annotations_train.json', dataDir)
register_coco_instances('sartorius_train',{},'../sartorius-annotations-coco-format/annotations_val.json', dataDir)

In [2]:
score_threshold = [0.204, 0.386, 0.568]
min_mask_area = [75, 150, 75]
def precision_at(threshold, iou):
    matches = iou > threshold
    false_positives = np.sum(matches, axis=0) == 0
    if len(matches.shape)>1:
        false_negatives = np.sum(matches, axis=1) == 0
        true_positives = np.sum(matches, axis=1) == 1
    else:
        false_negatives = 0
        true_positives = 0
    return np.sum(true_positives), np.sum(false_positives), np.sum(false_negatives)

def score(pred, targ):
    pred_class = torch.mode(pred['instances'].pred_classes)[0]
    take = pred['instances'].scores >= score_threshold[pred_class]
    pred_masks = pred['instances'].pred_masks[take].cpu().numpy()
    if len(pred_masks)==0:
        return 0.
    else:
        enc_preds = []
        used = np.zeros(pred_masks[0].shape, dtype=int)
        for mask in pred_masks:
            mask = (mask * (1-used)).astype(bool)
            if mask.sum() >= min_mask_area[pred_class]:
                used += mask
                enc_preds.append(mask_util.encode(np.asarray(mask, order='F')) )
        enc_targs = list(map(lambda x:x['segmentation'], targ))
        ious = mask_util.iou(enc_preds, enc_targs, [0]*len(enc_targs))
        prec = []
        for t in np.arange(0.5, 1.0, 0.05):
            tp, fp, fn = precision_at(t, ious)
            p = tp / (tp + fp + fn)
            prec.append(p)
        return np.mean(prec)

class MAPIOUEvaluator(DatasetEvaluator):
    def __init__(self, dataset_name):
        dataset_dicts = DatasetCatalog.get(dataset_name)
        self.annotations_cache = {item['image_id']:item['annotations'] for item in dataset_dicts}
            
    def reset(self):
        self.scores = []

    def process(self, inputs, outputs):
        for inp, out in zip(inputs, outputs):
            if len(out['instances']) == 0:
                self.scores.append(0)    
            else:
                targ = self.annotations_cache[inp['image_id']]
                self.scores.append(score(out, targ))

    def evaluate(self):
        return {"mAP IoU": np.mean(self.scores)}

class Trainer(DefaultTrainer):
    @classmethod
    def build_evaluator(cls, cfg, dataset_name, output_folder=None):
        return MAPIOUEvaluator(dataset_name)
    @classmethod
    def build_train_loader(cls, cfg):
        return build_detection_train_loader(cfg, mapper=DatasetMapper(cfg, is_train=True, augmentations=[
        T.RandomContrast(0.95,1.05),
        T.RandomBrightness(0.95,1.05),
        T.RandomFlip(prob=0.5, horizontal=True, vertical=False),
        T.RandomFlip(prob=0.5, horizontal=False, vertical=True),
        T.ResizeShortestEdge(short_edge_length=(832, 864, 896, 928, 960, 992, 1024), max_size=9999, sample_style='choice')
        ]))

In [4]:
# train on validation set
cfg = get_cfg()
cfg.INPUT.MASK_FORMAT='bitmask'
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
cfg.INPUT.MIN_SIZE_TEST = 1024
cfg.INPUT.MAX_SIZE_TEST = 3000
cfg.DATASETS.TRAIN = ("sartorius_train",)
cfg.DATASETS.TEST = ("sartorius_val",)
cfg.DATALOADER.NUM_WORKERS = 2
cfg.MODEL.WEIGHTS = 'output_10.2/model_0009679.pth'
cfg.SOLVER.IMS_PER_BATCH = 2
cfg.SOLVER.BASE_LR = 0.0002
cfg.SOLVER.GAMMA = 0.8
cfg.SOLVER.MAX_ITER = 3000
cfg.SOLVER.STEPS = list(range(1000,3000,1000))
cfg.SOLVER.CHECKPOINT_PERIOD = len(DatasetCatalog.get('sartorius_train')) // cfg.SOLVER.IMS_PER_BATCH
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 512
cfg.MODEL.RPN.BATCH_SIZE_PER_IMAGE = 512
cfg.MODEL.RPN.PRE_NMS_TOPK_TRAIN = 15000
cfg.MODEL.RPN.PRE_NMS_TOPK_TEST = 10000
cfg.MODEL.RPN.POST_NMS_TOPK_TRAIN = 4000
cfg.MODEL.RPN.POST_NMS_TOPK_TEST = 2000
cfg.MODEL.PIXEL_MEAN = [127.965, 127.965, 127.965]
cfg.MODEL.BACKBONE.FREEZE_AT = 1
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 3
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = .3
cfg.MODEL.ANCHOR_GENERATOR.SIZES = [[24], [40], [80], [128], [256]]
cfg.MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS = [[0.33, 0.5, 3.0]]
cfg.MODEL.RPN.IOU_THRESHOLDS = [0.2, 0.7]
cfg.MODEL.RPN.BATCH_SIZE_PER_IMAGE = 512
cfg.MODEL.RPN.NMS_THRESH = 0.75
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.03
cfg.TEST.DETECTIONS_PER_IMAGE = 700
cfg.TEST.EVAL_PERIOD = 2*len(DatasetCatalog.get('sartorius_train')) // cfg.SOLVER.IMS_PER_BATCH

os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
trainer = Trainer(cfg) 
trainer.resume_or_load(resume=False)
trainer.train()
os.rename("output", "output_11.1")

[32m[02/13 11:03:10 d2.data.datasets.coco]: [0mLoaded 121 images in COCO format from ../sartorius-annotations-coco-format/annotations_val.json
[32m[02/13 11:03:10 d2.data.datasets.coco]: [0mLoaded 121 images in COCO format from ../sartorius-annotations-coco-format/annotations_val.json
[32m[02/13 11:03:13 d2.engine.defaults]: [0mModel:
GeneralizedRCNN(
  (backbone): FPN(
    (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1),

  max_size = (max_size + (stride - 1)) // stride * stride
  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


[32m[02/13 11:03:34 d2.utils.events]: [0m eta: 0:36:37  iter: 19  total_loss: 1.42  loss_cls: 0.3744  loss_box_reg: 0.5738  loss_mask: 0.2941  loss_rpn_cls: 0.04519  loss_rpn_loc: 0.1105  time: 0.8887  data_time: 0.2772  lr: 3.2362e-06  max_mem: 7961M
[32m[02/13 11:03:52 d2.utils.events]: [0m eta: 0:37:05  iter: 39  total_loss: 1.392  loss_cls: 0.3584  loss_box_reg: 0.5753  loss_mask: 0.3034  loss_rpn_cls: 0.04174  loss_rpn_loc: 0.1149  time: 0.9118  data_time: 0.2166  lr: 6.4322e-06  max_mem: 8235M
[32m[02/13 11:04:11 d2.utils.events]: [0m eta: 0:35:52  iter: 59  total_loss: 1.324  loss_cls: 0.3392  loss_box_reg: 0.5418  loss_mask: 0.2838  loss_rpn_cls: 0.04178  loss_rpn_loc: 0.1064  time: 0.9201  data_time: 0.2265  lr: 9.6282e-06  max_mem: 8235M
[32m[02/13 11:04:26 d2.utils.events]: [0m eta: 0:35:23  iter: 79  total_loss: 1.267  loss_cls: 0.3406  loss_box_reg: 0.5228  loss_mask: 0.281  loss_rpn_cls: 0.02557  loss_rpn_loc: 0.0964  time: 0.8680  data_time: 0.0402  lr: 1.2824e-0

In [3]:
# train on training set
dataDir=Path('../')
register_coco_instances('sartorius_train',{}, '../sartorius-annotations-coco-format/annotations_train.json', dataDir)
register_coco_instances('sartorius_val',{},'../sartorius-annotations-coco-format/annotations_val.json', dataDir)

cfg = get_cfg()
cfg.INPUT.MASK_FORMAT='bitmask'
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
cfg.INPUT.MIN_SIZE_TEST = 1024
cfg.INPUT.MAX_SIZE_TEST = 3000
cfg.DATASETS.TRAIN = ("sartorius_train",)
cfg.DATASETS.TEST = ("sartorius_val",)
cfg.DATALOADER.NUM_WORKERS = 2
cfg.MODEL.WEIGHTS = 'output_11.1/model_0001439.pth'
cfg.SOLVER.IMS_PER_BATCH = 2
cfg.SOLVER.BASE_LR = 0.0001
cfg.SOLVER.GAMMA = 0.8
cfg.SOLVER.MAX_ITER = 10000
cfg.SOLVER.STEPS = list(range(2000,10000,1000))
cfg.SOLVER.CHECKPOINT_PERIOD = len(DatasetCatalog.get('sartorius_train')) // cfg.SOLVER.IMS_PER_BATCH
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 512
cfg.MODEL.RPN.BATCH_SIZE_PER_IMAGE = 512
cfg.MODEL.RPN.PRE_NMS_TOPK_TRAIN = 15000
cfg.MODEL.RPN.PRE_NMS_TOPK_TEST = 10000
cfg.MODEL.RPN.POST_NMS_TOPK_TRAIN = 4000
cfg.MODEL.RPN.POST_NMS_TOPK_TEST = 2000
cfg.MODEL.PIXEL_MEAN = [127.965, 127.965, 127.965]
cfg.MODEL.BACKBONE.FREEZE_AT = 1
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 3
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = .3
cfg.MODEL.ANCHOR_GENERATOR.SIZES = [[24], [40], [80], [128], [256]]
cfg.MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS = [[0.33, 0.5, 3.0]]
cfg.MODEL.RPN.IOU_THRESHOLDS = [0.2, 0.7]
cfg.MODEL.RPN.BATCH_SIZE_PER_IMAGE = 512
cfg.MODEL.RPN.NMS_THRESH = 0.75
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.03
cfg.TEST.DETECTIONS_PER_IMAGE = 700
cfg.TEST.EVAL_PERIOD = len(DatasetCatalog.get('sartorius_train')) // cfg.SOLVER.IMS_PER_BATCH

os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
trainer = Trainer(cfg) 
trainer.resume_or_load(resume=False)
trainer.train()
os.rename("output", "output_11.2")

[32m[02/13 13:03:09 d2.data.datasets.coco]: [0mLoaded 485 images in COCO format from ../sartorius-annotations-coco-format/annotations_train.json
[32m[02/13 13:03:10 d2.data.datasets.coco]: [0mLoaded 485 images in COCO format from ../sartorius-annotations-coco-format/annotations_train.json
[32m[02/13 13:03:13 d2.engine.defaults]: [0mModel:
GeneralizedRCNN(
  (backbone): FPN(
    (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1,

  max_size = (max_size + (stride - 1)) // stride * stride
  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


[32m[02/13 13:03:33 d2.utils.events]: [0m eta: 1:56:50  iter: 19  total_loss: 1.251  loss_cls: 0.3154  loss_box_reg: 0.4978  loss_mask: 0.2924  loss_rpn_cls: 0.03218  loss_rpn_loc: 0.106  time: 0.8024  data_time: 0.2694  lr: 1.9981e-06  max_mem: 8018M
[32m[02/13 13:03:50 d2.utils.events]: [0m eta: 1:53:56  iter: 39  total_loss: 1.265  loss_cls: 0.3243  loss_box_reg: 0.5256  loss_mask: 0.2806  loss_rpn_cls: 0.03559  loss_rpn_loc: 0.09594  time: 0.8130  data_time: 0.1411  lr: 3.9961e-06  max_mem: 8018M
[32m[02/13 13:03:59 d2.utils.memory]: [0mAttempting to copy inputs of <function pairwise_iou at 0x7ff0e10fe310> to CPU due to CUDA OOM
[32m[02/13 13:04:06 d2.utils.memory]: [0mAttempting to copy inputs of <function pairwise_iou at 0x7ff0e10fe310> to CPU due to CUDA OOM
[32m[02/13 13:04:13 d2.utils.events]: [0m eta: 1:58:03  iter: 59  total_loss: 1.359  loss_cls: 0.341  loss_box_reg: 0.563  loss_mask: 0.2898  loss_rpn_cls: 0.04548  loss_rpn_loc: 0.1179  time: 0.9290  data_time: 0.

We select the model obtained after iteration 9919 (which has the best score)