### Notebook 9: Mask R-CNN hyperparameter tuning

In [1]:
import os
import detectron2
from pathlib import Path
import cv2
import numpy as np
import torch
import json
import pycocotools.mask as mask_util
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor, DefaultTrainer
from detectron2.config import get_cfg
from detectron2.data import MetadataCatalog, DatasetCatalog
from detectron2.data.datasets import register_coco_instances
from detectron2.utils.logger import setup_logger
from detectron2.evaluation.evaluator import DatasetEvaluator
setup_logger()

<Logger detectron2 (DEBUG)>

In [2]:
dataDir=Path('../')
register_coco_instances('sartorius_train',{}, '../sartorius-annotations-coco-format/annotations_train.json', dataDir)
register_coco_instances('sartorius_val',{},'../sartorius-annotations-coco-format/annotations_val.json', dataDir)

[32m[02/06 18:20:17 d2.data.datasets.coco]: [0mLoaded 485 images in COCO format from ../sartorius-annotations-coco-format/annotations_train.json


In [3]:
score_threshold = [0.204, 0.386, 0.568]
min_mask_area = [75, 180, 75]
def precision_at(threshold, iou):
    matches = iou > threshold
    false_positives = np.sum(matches, axis=0) == 0
    if len(matches.shape)>1:
        false_negatives = np.sum(matches, axis=1) == 0
        true_positives = np.sum(matches, axis=1) == 1
    else:
        false_negatives = 0
        true_positives = 0
    return np.sum(true_positives), np.sum(false_positives), np.sum(false_negatives)

def score(pred, targ):
    pred_class = torch.mode(pred['instances'].pred_classes)[0]
    take = pred['instances'].scores >= score_threshold[pred_class]
    pred_masks = pred['instances'].pred_masks[take].cpu().numpy()
    if len(pred_masks)==0:
        return 0.
    else:
        enc_preds = []
        used = np.zeros(pred_masks[0].shape, dtype=int)
        for mask in pred_masks:
            mask = (mask * (1-used)).astype(bool)
            if mask.sum() >= min_mask_area[pred_class]:
                used += mask
                enc_preds.append(mask_util.encode(np.asarray(mask, order='F')) )
        enc_targs = list(map(lambda x:x['segmentation'], targ))
        ious = mask_util.iou(enc_preds, enc_targs, [0]*len(enc_targs))
        prec = []
        for t in np.arange(0.5, 1.0, 0.05):
            tp, fp, fn = precision_at(t, ious)
            p = tp / (tp + fp + fn)
            prec.append(p)
        return np.mean(prec)

class MAPIOUEvaluator(DatasetEvaluator):
    def __init__(self, dataset_name):
        dataset_dicts = DatasetCatalog.get(dataset_name)
        self.annotations_cache = {item['image_id']:item['annotations'] for item in dataset_dicts}
            
    def reset(self):
        self.scores = []

    def process(self, inputs, outputs):
        for inp, out in zip(inputs, outputs):
            if len(out['instances']) == 0:
                self.scores.append(0)    
            else:
                targ = self.annotations_cache[inp['image_id']]
                self.scores.append(score(out, targ))

    def evaluate(self):
        return {"mAP IoU": np.mean(self.scores)}

class Trainer(DefaultTrainer):
    @classmethod
    def build_evaluator(cls, cfg, dataset_name, output_folder=None):
        return MAPIOUEvaluator(dataset_name)

In [4]:
# Increasing the batch size per image (the number of ROIs per image) to 512
cfg = get_cfg()
cfg.INPUT.MASK_FORMAT='bitmask'
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
cfg.DATASETS.TRAIN = ("sartorius_train",)
cfg.DATASETS.TEST = ("sartorius_val",)
cfg.DATALOADER.NUM_WORKERS = 2
cfg.MODEL.WEIGHTS = 'output_1.1/best_model.pth'
cfg.SOLVER.IMS_PER_BATCH = 2
cfg.SOLVER.BASE_LR = 0.0005
cfg.SOLVER.GAMMA = 0.8
cfg.SOLVER.MAX_ITER = 10000
cfg.SOLVER.STEPS = list(range(2000,10000,1000))
cfg.SOLVER.CHECKPOINT_PERIOD = len(DatasetCatalog.get('sartorius_train')) // cfg.SOLVER.IMS_PER_BATCH
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 512
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 3
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = .5
cfg.TEST.EVAL_PERIOD = len(DatasetCatalog.get('sartorius_train')) // cfg.SOLVER.IMS_PER_BATCH

os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
trainer = Trainer(cfg) 
trainer.resume_or_load(resume=False)
trainer.train()
os.rename("output", "output_9.1")

[32m[02/03 16:46:35 d2.data.datasets.coco]: [0mLoaded 485 images in COCO format from ../sartorius-annotations-coco-format/annotations_train.json
[32m[02/03 16:46:36 d2.data.datasets.coco]: [0mLoaded 485 images in COCO format from ../sartorius-annotations-coco-format/annotations_train.json
[32m[02/03 16:46:39 d2.engine.defaults]: [0mModel:
GeneralizedRCNN(
  (backbone): FPN(
    (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1,

Skip loading parameter 'roi_heads.box_predictor.cls_score.weight' to the model due to incompatible shapes: (9, 1024) in the checkpoint but (4, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.cls_score.bias' to the model due to incompatible shapes: (9,) in the checkpoint but (4,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.weight' to the model due to incompatible shapes: (32, 1024) in the checkpoint but (12, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.bias' to the model due to incompatible shapes: (32,) in the checkpoint but (12,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.mask_head.predictor.weight' to the model due to incompatible shapes: (8, 256, 1, 1) in the checkpoint but (3, 256, 1, 1) in th

[32m[02/03 16:46:40 d2.engine.train_loop]: [0mStarting training from iteration 0


  max_size = (max_size + (stride - 1)) // stride * stride
  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


[32m[02/03 16:46:52 d2.utils.events]: [0m eta: 1:07:12  iter: 19  total_loss: 3.051  loss_cls: 1.419  loss_box_reg: 0.3043  loss_mask: 0.6935  loss_rpn_cls: 0.3503  loss_rpn_loc: 0.2986  time: 0.5016  data_time: 0.1857  lr: 9.9905e-06  max_mem: 3969M
[32m[02/03 16:47:03 d2.utils.events]: [0m eta: 1:03:26  iter: 39  total_loss: 2.779  loss_cls: 1.318  loss_box_reg: 0.1775  loss_mask: 0.6867  loss_rpn_cls: 0.3173  loss_rpn_loc: 0.2404  time: 0.5358  data_time: 0.2030  lr: 1.998e-05  max_mem: 6384M
[32m[02/03 16:47:19 d2.utils.events]: [0m eta: 1:06:06  iter: 59  total_loss: 2.71  loss_cls: 1.118  loss_box_reg: 0.2774  loss_mask: 0.6758  loss_rpn_cls: 0.3015  loss_rpn_loc: 0.264  time: 0.6170  data_time: 0.3703  lr: 2.997e-05  max_mem: 6384M
[32m[02/03 16:47:31 d2.utils.events]: [0m eta: 1:06:26  iter: 79  total_loss: 2.493  loss_cls: 0.9342  loss_box_reg: 0.4445  loss_mask: 0.6509  loss_rpn_cls: 0.2431  loss_rpn_loc: 0.2533  time: 0.6229  data_time: 0.2430  lr: 3.9961e-05  max_me

In [5]:
# Increasing the number of top scoring RPN proposals to keep before applying NMS (15000 for train and 10000 for test) and after applying NMS (3000 for train and 2000 for test)
cfg = get_cfg()
cfg.INPUT.MASK_FORMAT='bitmask'
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
cfg.DATASETS.TRAIN = ("sartorius_train",)
cfg.DATASETS.TEST = ("sartorius_val",)
cfg.DATALOADER.NUM_WORKERS = 2
cfg.MODEL.WEIGHTS = 'output_1.1/best_model.pth'
cfg.SOLVER.IMS_PER_BATCH = 2
cfg.SOLVER.BASE_LR = 0.0005
cfg.SOLVER.GAMMA = 0.8
cfg.SOLVER.MAX_ITER = 10000
cfg.SOLVER.STEPS = list(range(2000,10000,1000))
cfg.SOLVER.CHECKPOINT_PERIOD = len(DatasetCatalog.get('sartorius_train')) // cfg.SOLVER.IMS_PER_BATCH
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 512
cfg.MODEL.RPN.PRE_NMS_TOPK_TRAIN = 15000
cfg.MODEL.RPN.PRE_NMS_TOPK_TEST = 10000
cfg.MODEL.RPN.POST_NMS_TOPK_TRAIN = 3000
cfg.MODEL.RPN.POST_NMS_TOPK_TEST = 2000
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 3
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = .5
cfg.TEST.EVAL_PERIOD = len(DatasetCatalog.get('sartorius_train')) // cfg.SOLVER.IMS_PER_BATCH

os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
trainer = Trainer(cfg) 
trainer.resume_or_load(resume=False)
trainer.train()
os.rename("output", "output_9.2")

[32m[02/04 09:00:32 d2.data.datasets.coco]: [0mLoaded 485 images in COCO format from ../sartorius-annotations-coco-format/annotations_train.json
[32m[02/04 09:00:33 d2.data.datasets.coco]: [0mLoaded 485 images in COCO format from ../sartorius-annotations-coco-format/annotations_train.json
[32m[02/04 09:00:37 d2.engine.defaults]: [0mModel:
GeneralizedRCNN(
  (backbone): FPN(
    (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1,

Skip loading parameter 'roi_heads.box_predictor.cls_score.weight' to the model due to incompatible shapes: (9, 1024) in the checkpoint but (4, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.cls_score.bias' to the model due to incompatible shapes: (9,) in the checkpoint but (4,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.weight' to the model due to incompatible shapes: (32, 1024) in the checkpoint but (12, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.bias' to the model due to incompatible shapes: (32,) in the checkpoint but (12,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.mask_head.predictor.weight' to the model due to incompatible shapes: (8, 256, 1, 1) in the checkpoint but (3, 256, 1, 1) in th

[32m[02/04 09:00:38 d2.engine.train_loop]: [0mStarting training from iteration 0


  max_size = (max_size + (stride - 1)) // stride * stride
  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


[32m[02/04 09:00:51 d2.utils.events]: [0m eta: 1:09:39  iter: 19  total_loss: 3.216  loss_cls: 1.44  loss_box_reg: 0.4789  loss_mask: 0.6903  loss_rpn_cls: 0.3186  loss_rpn_loc: 0.2651  time: 0.5583  data_time: 0.1971  lr: 9.9905e-06  max_mem: 4337M
[32m[02/04 09:01:04 d2.utils.events]: [0m eta: 1:10:42  iter: 39  total_loss: 3.182  loss_cls: 1.354  loss_box_reg: 0.4957  loss_mask: 0.6846  loss_rpn_cls: 0.3067  loss_rpn_loc: 0.2616  time: 0.6065  data_time: 0.2247  lr: 1.998e-05  max_mem: 5909M
[32m[02/04 09:01:17 d2.utils.events]: [0m eta: 1:12:30  iter: 59  total_loss: 3.021  loss_cls: 1.198  loss_box_reg: 0.5968  loss_mask: 0.673  loss_rpn_cls: 0.2879  loss_rpn_loc: 0.2574  time: 0.6170  data_time: 0.2070  lr: 2.997e-05  max_mem: 5909M
[32m[02/04 09:01:28 d2.utils.events]: [0m eta: 1:10:58  iter: 79  total_loss: 2.756  loss_cls: 0.9924  loss_box_reg: 0.6185  loss_mask: 0.6453  loss_rpn_cls: 0.2398  loss_rpn_loc: 0.2479  time: 0.6047  data_time: 0.1540  lr: 3.9961e-05  max_me

In [6]:
# Increasing the number of top scoring RPN proposals to keep only before applying NMS
cfg = get_cfg()
cfg.INPUT.MASK_FORMAT='bitmask'
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
cfg.DATASETS.TRAIN = ("sartorius_train",)
cfg.DATASETS.TEST = ("sartorius_val",)
cfg.DATALOADER.NUM_WORKERS = 2
cfg.MODEL.WEIGHTS = 'output_1.1/best_model.pth'
cfg.SOLVER.IMS_PER_BATCH = 2
cfg.SOLVER.BASE_LR = 0.0005
cfg.SOLVER.GAMMA = 0.8
cfg.SOLVER.MAX_ITER = 10000
cfg.SOLVER.STEPS = list(range(2000,10000,1000))
cfg.SOLVER.CHECKPOINT_PERIOD = len(DatasetCatalog.get('sartorius_train')) // cfg.SOLVER.IMS_PER_BATCH
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 512
cfg.MODEL.RPN.PRE_NMS_TOPK_TRAIN = 15000
cfg.MODEL.RPN.PRE_NMS_TOPK_TEST = 10000
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 3
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = .5
cfg.TEST.EVAL_PERIOD = len(DatasetCatalog.get('sartorius_train')) // cfg.SOLVER.IMS_PER_BATCH

os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
trainer = Trainer(cfg) 
trainer.resume_or_load(resume=False)
trainer.train()
os.rename("output", "output_9.3")

[32m[02/04 10:59:12 d2.data.datasets.coco]: [0mLoaded 485 images in COCO format from ../sartorius-annotations-coco-format/annotations_train.json
[32m[02/04 10:59:13 d2.data.datasets.coco]: [0mLoaded 485 images in COCO format from ../sartorius-annotations-coco-format/annotations_train.json
[32m[02/04 10:59:14 d2.engine.defaults]: [0mModel:
GeneralizedRCNN(
  (backbone): FPN(
    (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1,

Skip loading parameter 'roi_heads.box_predictor.cls_score.weight' to the model due to incompatible shapes: (9, 1024) in the checkpoint but (4, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.cls_score.bias' to the model due to incompatible shapes: (9,) in the checkpoint but (4,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.weight' to the model due to incompatible shapes: (32, 1024) in the checkpoint but (12, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.bias' to the model due to incompatible shapes: (32,) in the checkpoint but (12,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.mask_head.predictor.weight' to the model due to incompatible shapes: (8, 256, 1, 1) in the checkpoint but (3, 256, 1, 1) in th

[32m[02/04 10:59:15 d2.engine.train_loop]: [0mStarting training from iteration 0
[32m[02/04 10:59:29 d2.utils.events]: [0m eta: 1:09:58  iter: 19  total_loss: 3.01  loss_cls: 1.408  loss_box_reg: 0.2622  loss_mask: 0.6902  loss_rpn_cls: 0.329  loss_rpn_loc: 0.2716  time: 0.6791  data_time: 0.2939  lr: 9.9905e-06  max_mem: 6558M
[32m[02/04 10:59:40 d2.utils.events]: [0m eta: 1:05:52  iter: 39  total_loss: 2.906  loss_cls: 1.294  loss_box_reg: 0.3404  loss_mask: 0.6842  loss_rpn_cls: 0.3146  loss_rpn_loc: 0.2536  time: 0.6129  data_time: 0.1707  lr: 1.998e-05  max_mem: 6558M
[32m[02/04 10:59:50 d2.utils.events]: [0m eta: 1:05:36  iter: 59  total_loss: 2.679  loss_cls: 1.081  loss_box_reg: 0.3684  loss_mask: 0.6678  loss_rpn_cls: 0.2654  loss_rpn_loc: 0.2777  time: 0.5834  data_time: 0.1522  lr: 2.997e-05  max_mem: 6558M
[32m[02/04 10:59:59 d2.utils.events]: [0m eta: 1:06:43  iter: 79  total_loss: 2.475  loss_cls: 0.9116  loss_box_reg: 0.4195  loss_mask: 0.6451  loss_rpn_cls: 0.

In [14]:
# mean pixel values in the training dataset
from PIL import Image
_, _, filenames = next(os.walk("../train/"), (None, None, []))
s=0.
for filename in filenames:
    imtest = Image.open(f'../train/{filename}')
    imtest = np.asarray(imtest)
    s+=np.mean(imtest)
s/len(filenames)

127.96482685978931

In [15]:
# correcting the mean pixel values used for normalisation and unfreezing the second layer of the backbone (making it trainable)
cfg = get_cfg()
cfg.INPUT.MASK_FORMAT='bitmask'
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
cfg.DATASETS.TRAIN = ("sartorius_train",)
cfg.DATASETS.TEST = ("sartorius_val",)
cfg.DATALOADER.NUM_WORKERS = 2
cfg.MODEL.WEIGHTS = 'output_1.1/best_model.pth'
cfg.SOLVER.IMS_PER_BATCH = 2
cfg.SOLVER.BASE_LR = 0.0005
cfg.SOLVER.GAMMA = 0.8
cfg.SOLVER.MAX_ITER = 10000
cfg.SOLVER.STEPS = list(range(2000,10000,1000))
cfg.SOLVER.CHECKPOINT_PERIOD = len(DatasetCatalog.get('sartorius_train')) // cfg.SOLVER.IMS_PER_BATCH
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 512
cfg.MODEL.RPN.PRE_NMS_TOPK_TRAIN = 15000
cfg.MODEL.RPN.PRE_NMS_TOPK_TEST = 10000
cfg.MODEL.RPN.POST_NMS_TOPK_TRAIN = 3000
cfg.MODEL.RPN.POST_NMS_TOPK_TEST = 2000
cfg.MODEL.PIXEL_MEAN = [127.965, 127.965, 127.965]
cfg.MODEL.BACKBONE.FREEZE_AT = 1
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 3
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = .5
cfg.TEST.EVAL_PERIOD = len(DatasetCatalog.get('sartorius_train')) // cfg.SOLVER.IMS_PER_BATCH

os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
trainer = Trainer(cfg) 
trainer.resume_or_load(resume=False)
trainer.train()
os.rename("output", "output_9.4")

[32m[02/04 13:49:58 d2.data.datasets.coco]: [0mLoaded 485 images in COCO format from ../sartorius-annotations-coco-format/annotations_train.json
[32m[02/04 13:49:59 d2.data.datasets.coco]: [0mLoaded 485 images in COCO format from ../sartorius-annotations-coco-format/annotations_train.json
[32m[02/04 13:50:00 d2.engine.defaults]: [0mModel:
GeneralizedRCNN(
  (backbone): FPN(
    (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1,

Skip loading parameter 'roi_heads.box_predictor.cls_score.weight' to the model due to incompatible shapes: (9, 1024) in the checkpoint but (4, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.cls_score.bias' to the model due to incompatible shapes: (9,) in the checkpoint but (4,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.weight' to the model due to incompatible shapes: (32, 1024) in the checkpoint but (12, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.bias' to the model due to incompatible shapes: (32,) in the checkpoint but (12,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.mask_head.predictor.weight' to the model due to incompatible shapes: (8, 256, 1, 1) in the checkpoint but (3, 256, 1, 1) in th

[32m[02/04 13:50:01 d2.engine.train_loop]: [0mStarting training from iteration 0


  max_size = (max_size + (stride - 1)) // stride * stride


[32m[02/04 13:50:13 d2.utils.events]: [0m eta: 1:16:43  iter: 19  total_loss: 3.2  loss_cls: 1.368  loss_box_reg: 0.4795  loss_mask: 0.6948  loss_rpn_cls: 0.3864  loss_rpn_loc: 0.2618  time: 0.6026  data_time: 0.1502  lr: 9.9905e-06  max_mem: 6670M
[32m[02/04 13:50:25 d2.utils.events]: [0m eta: 1:17:49  iter: 39  total_loss: 3.111  loss_cls: 1.296  loss_box_reg: 0.5804  loss_mask: 0.6872  loss_rpn_cls: 0.3017  loss_rpn_loc: 0.2378  time: 0.6167  data_time: 0.1657  lr: 1.998e-05  max_mem: 6670M
[32m[02/04 13:50:36 d2.utils.events]: [0m eta: 1:17:01  iter: 59  total_loss: 2.9  loss_cls: 1.121  loss_box_reg: 0.6241  loss_mask: 0.6702  loss_rpn_cls: 0.2519  loss_rpn_loc: 0.2299  time: 0.5797  data_time: 0.0699  lr: 2.997e-05  max_mem: 6670M
[32m[02/04 13:50:49 d2.utils.events]: [0m eta: 1:17:19  iter: 79  total_loss: 2.726  loss_cls: 0.9239  loss_box_reg: 0.6438  loss_mask: 0.6529  loss_rpn_cls: 0.232  loss_rpn_loc: 0.2218  time: 0.6061  data_time: 0.2224  lr: 3.9961e-05  max_mem: 

In [17]:
# unfreezing the first layer of the backbone
cfg = get_cfg()
cfg.INPUT.MASK_FORMAT='bitmask'
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
cfg.DATASETS.TRAIN = ("sartorius_train",)
cfg.DATASETS.TEST = ("sartorius_val",)
cfg.DATALOADER.NUM_WORKERS = 2
cfg.MODEL.WEIGHTS = 'output_1.1/best_model.pth'
cfg.SOLVER.IMS_PER_BATCH = 2
cfg.SOLVER.BASE_LR = 0.0005
cfg.SOLVER.GAMMA = 0.8
cfg.SOLVER.MAX_ITER = 10000
cfg.SOLVER.STEPS = list(range(2000,10000,1000))
cfg.SOLVER.CHECKPOINT_PERIOD = len(DatasetCatalog.get('sartorius_train')) // cfg.SOLVER.IMS_PER_BATCH
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 512
cfg.MODEL.RPN.PRE_NMS_TOPK_TRAIN = 15000
cfg.MODEL.RPN.PRE_NMS_TOPK_TEST = 10000
cfg.MODEL.RPN.POST_NMS_TOPK_TRAIN = 3000
cfg.MODEL.RPN.POST_NMS_TOPK_TEST = 2000
cfg.MODEL.PIXEL_MEAN = [127.965, 127.965, 127.965]
cfg.MODEL.BACKBONE.FREEZE_AT = 0
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 3
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = .5
cfg.TEST.EVAL_PERIOD = len(DatasetCatalog.get('sartorius_train')) // cfg.SOLVER.IMS_PER_BATCH

os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
trainer = Trainer(cfg) 
trainer.resume_or_load(resume=False)
trainer.train()
os.rename("output", "output_9.5")

[32m[02/04 15:55:49 d2.data.datasets.coco]: [0mLoaded 485 images in COCO format from ../sartorius-annotations-coco-format/annotations_train.json
[32m[02/04 15:55:50 d2.data.datasets.coco]: [0mLoaded 485 images in COCO format from ../sartorius-annotations-coco-format/annotations_train.json
[32m[02/04 15:55:51 d2.engine.defaults]: [0mModel:
GeneralizedRCNN(
  (backbone): FPN(
    (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1,

Skip loading parameter 'roi_heads.box_predictor.cls_score.weight' to the model due to incompatible shapes: (9, 1024) in the checkpoint but (4, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.cls_score.bias' to the model due to incompatible shapes: (9,) in the checkpoint but (4,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.weight' to the model due to incompatible shapes: (32, 1024) in the checkpoint but (12, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.bias' to the model due to incompatible shapes: (32,) in the checkpoint but (12,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.mask_head.predictor.weight' to the model due to incompatible shapes: (8, 256, 1, 1) in the checkpoint but (3, 256, 1, 1) in th

[32m[02/04 15:55:52 d2.engine.train_loop]: [0mStarting training from iteration 0


  max_size = (max_size + (stride - 1)) // stride * stride


[32m[02/04 15:56:10 d2.utils.events]: [0m eta: 1:28:48  iter: 19  total_loss: 3.332  loss_cls: 1.386  loss_box_reg: 0.5705  loss_mask: 0.6899  loss_rpn_cls: 0.3127  loss_rpn_loc: 0.2975  time: 0.9220  data_time: 0.4318  lr: 9.9905e-06  max_mem: 7043M
[32m[02/04 15:56:24 d2.utils.events]: [0m eta: 1:26:04  iter: 39  total_loss: 3.177  loss_cls: 1.312  loss_box_reg: 0.5733  loss_mask: 0.6848  loss_rpn_cls: 0.3146  loss_rpn_loc: 0.2766  time: 0.8035  data_time: 0.2189  lr: 1.998e-05  max_mem: 7043M
[32m[02/04 15:56:36 d2.utils.events]: [0m eta: 1:22:54  iter: 59  total_loss: 2.993  loss_cls: 1.143  loss_box_reg: 0.6084  loss_mask: 0.6706  loss_rpn_cls: 0.2776  loss_rpn_loc: 0.2583  time: 0.7205  data_time: 0.1268  lr: 2.997e-05  max_mem: 7043M
[32m[02/04 15:56:48 d2.utils.events]: [0m eta: 1:22:44  iter: 79  total_loss: 2.725  loss_cls: 0.9296  loss_box_reg: 0.6844  loss_mask: 0.6465  loss_rpn_cls: 0.2357  loss_rpn_loc: 0.2403  time: 0.6950  data_time: 0.1549  lr: 3.9961e-05  max_

In [19]:
# Increasing the RPN batch size per image and decreasing the ROI heads minimum score threshold
cfg = get_cfg()
cfg.INPUT.MASK_FORMAT='bitmask'
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
cfg.DATASETS.TRAIN = ("sartorius_train",)
cfg.DATASETS.TEST = ("sartorius_val",)
cfg.DATALOADER.NUM_WORKERS = 2
cfg.MODEL.WEIGHTS = 'output_1.1/best_model.pth'
cfg.SOLVER.IMS_PER_BATCH = 2
cfg.SOLVER.BASE_LR = 0.0005
cfg.SOLVER.GAMMA = 0.8
cfg.SOLVER.MAX_ITER = 10000
cfg.SOLVER.STEPS = list(range(2000,10000,1000))
cfg.SOLVER.CHECKPOINT_PERIOD = len(DatasetCatalog.get('sartorius_train')) // cfg.SOLVER.IMS_PER_BATCH
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 512
cfg.MODEL.RPN.BATCH_SIZE_PER_IMAGE = 512
cfg.MODEL.RPN.PRE_NMS_TOPK_TRAIN = 15000
cfg.MODEL.RPN.PRE_NMS_TOPK_TEST = 10000
cfg.MODEL.RPN.POST_NMS_TOPK_TRAIN = 3000
cfg.MODEL.RPN.POST_NMS_TOPK_TEST = 2000
cfg.MODEL.PIXEL_MEAN = [127.965, 127.965, 127.965]
cfg.MODEL.BACKBONE.FREEZE_AT = 1
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 3
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = .3
cfg.TEST.EVAL_PERIOD = len(DatasetCatalog.get('sartorius_train')) // cfg.SOLVER.IMS_PER_BATCH

os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
trainer = Trainer(cfg) 
trainer.resume_or_load(resume=False)
trainer.train()
os.rename("output", "output_9.6")

[32m[02/04 19:02:28 d2.data.datasets.coco]: [0mLoaded 485 images in COCO format from ../sartorius-annotations-coco-format/annotations_train.json
[32m[02/04 19:02:29 d2.data.datasets.coco]: [0mLoaded 485 images in COCO format from ../sartorius-annotations-coco-format/annotations_train.json
[32m[02/04 19:02:30 d2.engine.defaults]: [0mModel:
GeneralizedRCNN(
  (backbone): FPN(
    (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1,

Skip loading parameter 'roi_heads.box_predictor.cls_score.weight' to the model due to incompatible shapes: (9, 1024) in the checkpoint but (4, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.cls_score.bias' to the model due to incompatible shapes: (9,) in the checkpoint but (4,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.weight' to the model due to incompatible shapes: (32, 1024) in the checkpoint but (12, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.bias' to the model due to incompatible shapes: (32,) in the checkpoint but (12,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.mask_head.predictor.weight' to the model due to incompatible shapes: (8, 256, 1, 1) in the checkpoint but (3, 256, 1, 1) in th

[32m[02/04 19:02:31 d2.engine.train_loop]: [0mStarting training from iteration 0


  max_size = (max_size + (stride - 1)) // stride * stride


[32m[02/04 19:02:46 d2.utils.events]: [0m eta: 1:17:16  iter: 19  total_loss: 3.095  loss_cls: 1.284  loss_box_reg: 0.529  loss_mask: 0.6923  loss_rpn_cls: 0.3269  loss_rpn_loc: 0.2261  time: 0.7143  data_time: 0.3056  lr: 9.9905e-06  max_mem: 7240M
[32m[02/04 19:03:00 d2.utils.events]: [0m eta: 1:26:30  iter: 39  total_loss: 3.001  loss_cls: 1.221  loss_box_reg: 0.5494  loss_mask: 0.686  loss_rpn_cls: 0.3178  loss_rpn_loc: 0.2218  time: 0.7154  data_time: 0.2364  lr: 1.998e-05  max_mem: 7240M
[32m[02/04 19:03:15 d2.utils.events]: [0m eta: 1:21:49  iter: 59  total_loss: 2.856  loss_cls: 1.069  loss_box_reg: 0.653  loss_mask: 0.6751  loss_rpn_cls: 0.2841  loss_rpn_loc: 0.2426  time: 0.7175  data_time: 0.2398  lr: 2.997e-05  max_mem: 7240M
[32m[02/04 19:03:26 d2.utils.events]: [0m eta: 1:22:58  iter: 79  total_loss: 2.631  loss_cls: 0.8948  loss_box_reg: 0.6729  loss_mask: 0.6469  loss_rpn_cls: 0.2247  loss_rpn_loc: 0.1936  time: 0.6800  data_time: 0.0878  lr: 3.9961e-05  max_mem

In [71]:
# changing the anchor generator sizes and aspect ratios
cfg = get_cfg()
cfg.INPUT.MASK_FORMAT='bitmask'
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
cfg.DATASETS.TRAIN = ("sartorius_train",)
cfg.DATASETS.TEST = ("sartorius_val",)
cfg.DATALOADER.NUM_WORKERS = 2
cfg.MODEL.WEIGHTS = 'output_1.1/best_model.pth'
cfg.SOLVER.IMS_PER_BATCH = 2
cfg.SOLVER.BASE_LR = 0.0005
cfg.SOLVER.GAMMA = 0.8
cfg.SOLVER.MAX_ITER = 10000
cfg.SOLVER.STEPS = list(range(2000,10000,1000))
cfg.SOLVER.CHECKPOINT_PERIOD = len(DatasetCatalog.get('sartorius_train')) // cfg.SOLVER.IMS_PER_BATCH
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 512
cfg.MODEL.RPN.BATCH_SIZE_PER_IMAGE = 512
cfg.MODEL.RPN.PRE_NMS_TOPK_TRAIN = 15000
cfg.MODEL.RPN.PRE_NMS_TOPK_TEST = 10000
cfg.MODEL.RPN.POST_NMS_TOPK_TRAIN = 3000
cfg.MODEL.RPN.POST_NMS_TOPK_TEST = 2000
cfg.MODEL.PIXEL_MEAN = [127.965, 127.965, 127.965]
cfg.MODEL.BACKBONE.FREEZE_AT = 1
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 3
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = .3
cfg.MODEL.ANCHOR_GENERATOR.SIZES = [[24, 40, 80, 128, 256]]
cfg.MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS = [[0.33, 1.0, 3.0]]
cfg.TEST.EVAL_PERIOD = len(DatasetCatalog.get('sartorius_train')) // cfg.SOLVER.IMS_PER_BATCH

os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
trainer = Trainer(cfg) 
trainer.resume_or_load(resume=False)
trainer.train()
os.rename("output", "output_9.7")

[32m[02/04 21:55:08 d2.data.datasets.coco]: [0mLoaded 485 images in COCO format from ../sartorius-annotations-coco-format/annotations_train.json
[32m[02/04 21:55:09 d2.data.datasets.coco]: [0mLoaded 485 images in COCO format from ../sartorius-annotations-coco-format/annotations_train.json
[32m[02/04 21:55:10 d2.engine.defaults]: [0mModel:
GeneralizedRCNN(
  (backbone): FPN(
    (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1,

Skip loading parameter 'proposal_generator.rpn_head.objectness_logits.weight' to the model due to incompatible shapes: (3, 256, 1, 1) in the checkpoint but (15, 256, 1, 1) in the model! You might want to double check if this is expected.
Skip loading parameter 'proposal_generator.rpn_head.objectness_logits.bias' to the model due to incompatible shapes: (3,) in the checkpoint but (15,) in the model! You might want to double check if this is expected.
Skip loading parameter 'proposal_generator.rpn_head.anchor_deltas.weight' to the model due to incompatible shapes: (12, 256, 1, 1) in the checkpoint but (60, 256, 1, 1) in the model! You might want to double check if this is expected.
Skip loading parameter 'proposal_generator.rpn_head.anchor_deltas.bias' to the model due to incompatible shapes: (12,) in the checkpoint but (60,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.cls_score.weight' to the model due to incompatible

[32m[02/04 21:55:11 d2.engine.train_loop]: [0mStarting training from iteration 0


  max_size = (max_size + (stride - 1)) // stride * stride


[32m[02/04 21:55:15 d2.utils.memory]: [0mAttempting to copy inputs of <function pairwise_iou at 0x7f9b0687f1f0> to CPU due to CUDA OOM
[32m[02/04 21:55:21 d2.utils.memory]: [0mAttempting to copy inputs of <function pairwise_iou at 0x7f9b0687f1f0> to CPU due to CUDA OOM
[32m[02/04 21:55:28 d2.utils.memory]: [0mAttempting to copy inputs of <function pairwise_iou at 0x7f9b0687f1f0> to CPU due to CUDA OOM
[32m[02/04 21:55:33 d2.utils.memory]: [0mAttempting to copy inputs of <function pairwise_iou at 0x7f9b0687f1f0> to CPU due to CUDA OOM
[32m[02/04 21:55:38 d2.utils.memory]: [0mAttempting to copy inputs of <function pairwise_iou at 0x7f9b0687f1f0> to CPU due to CUDA OOM
[32m[02/04 21:55:44 d2.utils.events]: [0m eta: 1:27:25  iter: 19  total_loss: 3.087  loss_cls: 1.35  loss_box_reg: 0.05116  loss_mask: 0.6923  loss_rpn_cls: 0.6956  loss_rpn_loc: 0.2911  time: 1.7304  data_time: 0.1277  lr: 9.9905e-06  max_mem: 9210M
[32m[02/04 21:55:44 d2.utils.memory]: [0mAttempting to copy 

In [73]:
# changing only the anchor generator aspect ratios
cfg = get_cfg()
cfg.INPUT.MASK_FORMAT='bitmask'
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
cfg.DATASETS.TRAIN = ("sartorius_train",)
cfg.DATASETS.TEST = ("sartorius_val",)
cfg.DATALOADER.NUM_WORKERS = 2
cfg.MODEL.WEIGHTS = 'output_1.1/best_model.pth'
cfg.SOLVER.IMS_PER_BATCH = 2
cfg.SOLVER.BASE_LR = 0.0005
cfg.SOLVER.GAMMA = 0.8
cfg.SOLVER.MAX_ITER = 10000
cfg.SOLVER.STEPS = list(range(2000,10000,1000))
cfg.SOLVER.CHECKPOINT_PERIOD = len(DatasetCatalog.get('sartorius_train')) // cfg.SOLVER.IMS_PER_BATCH
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 512
cfg.MODEL.RPN.BATCH_SIZE_PER_IMAGE = 512
cfg.MODEL.RPN.PRE_NMS_TOPK_TRAIN = 15000
cfg.MODEL.RPN.PRE_NMS_TOPK_TEST = 10000
cfg.MODEL.RPN.POST_NMS_TOPK_TRAIN = 3000
cfg.MODEL.RPN.POST_NMS_TOPK_TEST = 2000
cfg.MODEL.PIXEL_MEAN = [127.965, 127.965, 127.965]
cfg.MODEL.BACKBONE.FREEZE_AT = 1
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 3
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = .3
cfg.MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS = [[0.33, 1.0, 3.0]]
cfg.TEST.EVAL_PERIOD = len(DatasetCatalog.get('sartorius_train')) // cfg.SOLVER.IMS_PER_BATCH

os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
trainer = Trainer(cfg) 
trainer.resume_or_load(resume=False)
trainer.train()
os.rename("output", "output_9.8")

[32m[02/05 11:02:00 d2.data.datasets.coco]: [0mLoaded 485 images in COCO format from ../sartorius-annotations-coco-format/annotations_train.json
[32m[02/05 11:02:01 d2.data.datasets.coco]: [0mLoaded 485 images in COCO format from ../sartorius-annotations-coco-format/annotations_train.json
[32m[02/05 11:02:02 d2.engine.defaults]: [0mModel:
GeneralizedRCNN(
  (backbone): FPN(
    (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1,

Skip loading parameter 'roi_heads.box_predictor.cls_score.weight' to the model due to incompatible shapes: (9, 1024) in the checkpoint but (4, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.cls_score.bias' to the model due to incompatible shapes: (9,) in the checkpoint but (4,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.weight' to the model due to incompatible shapes: (32, 1024) in the checkpoint but (12, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.bias' to the model due to incompatible shapes: (32,) in the checkpoint but (12,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.mask_head.predictor.weight' to the model due to incompatible shapes: (8, 256, 1, 1) in the checkpoint but (3, 256, 1, 1) in th

[32m[02/05 11:02:03 d2.engine.train_loop]: [0mStarting training from iteration 0
[32m[02/05 11:02:17 d2.utils.events]: [0m eta: 1:22:24  iter: 19  total_loss: 3.285  loss_cls: 1.383  loss_box_reg: 0.5793  loss_mask: 0.6971  loss_rpn_cls: 0.3415  loss_rpn_loc: 0.2482  time: 0.7281  data_time: 0.2516  lr: 9.9905e-06  max_mem: 9526M
[32m[02/05 11:02:31 d2.utils.events]: [0m eta: 1:22:14  iter: 39  total_loss: 3.136  loss_cls: 1.287  loss_box_reg: 0.5804  loss_mask: 0.6898  loss_rpn_cls: 0.2998  loss_rpn_loc: 0.241  time: 0.7004  data_time: 0.2197  lr: 1.998e-05  max_mem: 9526M
[32m[02/05 11:02:43 d2.utils.events]: [0m eta: 1:21:31  iter: 59  total_loss: 2.939  loss_cls: 1.107  loss_box_reg: 0.5904  loss_mask: 0.6756  loss_rpn_cls: 0.2893  loss_rpn_loc: 0.2273  time: 0.6633  data_time: 0.1438  lr: 2.997e-05  max_mem: 9526M
[32m[02/05 11:02:55 d2.utils.events]: [0m eta: 1:19:52  iter: 79  total_loss: 2.786  loss_cls: 0.9529  loss_box_reg: 0.7385  loss_mask: 0.6531  loss_rpn_cls: 0

In [83]:
# changing only the anchor generator sizes
cfg = get_cfg()
cfg.INPUT.MASK_FORMAT='bitmask'
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
cfg.DATASETS.TRAIN = ("sartorius_train",)
cfg.DATASETS.TEST = ("sartorius_val",)
cfg.DATALOADER.NUM_WORKERS = 2
cfg.MODEL.WEIGHTS = 'output_1.1/best_model.pth'
cfg.SOLVER.IMS_PER_BATCH = 2
cfg.SOLVER.BASE_LR = 0.0005
cfg.SOLVER.GAMMA = 0.8
cfg.SOLVER.MAX_ITER = 10000
cfg.SOLVER.STEPS = list(range(2000,10000,1000))
cfg.SOLVER.CHECKPOINT_PERIOD = len(DatasetCatalog.get('sartorius_train')) // cfg.SOLVER.IMS_PER_BATCH
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 512
cfg.MODEL.RPN.BATCH_SIZE_PER_IMAGE = 512
cfg.MODEL.RPN.PRE_NMS_TOPK_TRAIN = 15000
cfg.MODEL.RPN.PRE_NMS_TOPK_TEST = 10000
cfg.MODEL.RPN.POST_NMS_TOPK_TRAIN = 3000
cfg.MODEL.RPN.POST_NMS_TOPK_TEST = 2000
cfg.MODEL.PIXEL_MEAN = [127.965, 127.965, 127.965]
cfg.MODEL.BACKBONE.FREEZE_AT = 1
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 3
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = .3
cfg.MODEL.ANCHOR_GENERATOR.SIZES = [[16], [32], [64], [128], [256]]
cfg.TEST.EVAL_PERIOD = len(DatasetCatalog.get('sartorius_train')) // cfg.SOLVER.IMS_PER_BATCH

os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
trainer = Trainer(cfg) 
trainer.resume_or_load(resume=False)
trainer.train()
os.rename("output", "output_9.9")

[32m[02/05 13:41:35 d2.data.datasets.coco]: [0mLoaded 485 images in COCO format from ../sartorius-annotations-coco-format/annotations_train.json
[32m[02/05 13:41:36 d2.data.datasets.coco]: [0mLoaded 485 images in COCO format from ../sartorius-annotations-coco-format/annotations_train.json
[32m[02/05 13:41:37 d2.engine.defaults]: [0mModel:
GeneralizedRCNN(
  (backbone): FPN(
    (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1,

Skip loading parameter 'roi_heads.box_predictor.cls_score.weight' to the model due to incompatible shapes: (9, 1024) in the checkpoint but (4, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.cls_score.bias' to the model due to incompatible shapes: (9,) in the checkpoint but (4,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.weight' to the model due to incompatible shapes: (32, 1024) in the checkpoint but (12, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.bias' to the model due to incompatible shapes: (32,) in the checkpoint but (12,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.mask_head.predictor.weight' to the model due to incompatible shapes: (8, 256, 1, 1) in the checkpoint but (3, 256, 1, 1) in th

[32m[02/05 13:41:38 d2.engine.train_loop]: [0mStarting training from iteration 0
[32m[02/05 13:41:54 d2.utils.events]: [0m eta: 1:16:35  iter: 19  total_loss: 2.925  loss_cls: 1.395  loss_box_reg: 0.2373  loss_mask: 0.6913  loss_rpn_cls: 0.3704  loss_rpn_loc: 0.1941  time: 0.6930  data_time: 0.3306  lr: 9.9905e-06  max_mem: 9526M
[32m[02/05 13:42:06 d2.utils.events]: [0m eta: 1:15:46  iter: 39  total_loss: 2.633  loss_cls: 1.239  loss_box_reg: 0.2358  loss_mask: 0.687  loss_rpn_cls: 0.3196  loss_rpn_loc: 0.1787  time: 0.6595  data_time: 0.1953  lr: 1.998e-05  max_mem: 9526M
[32m[02/05 13:42:18 d2.utils.events]: [0m eta: 1:15:39  iter: 59  total_loss: 2.387  loss_cls: 0.9911  loss_box_reg: 0.3298  loss_mask: 0.6743  loss_rpn_cls: 0.2766  loss_rpn_loc: 0.16  time: 0.6251  data_time: 0.1238  lr: 2.997e-05  max_mem: 9526M
[32m[02/05 13:42:31 d2.utils.events]: [0m eta: 1:15:40  iter: 79  total_loss: 2.164  loss_cls: 0.7837  loss_box_reg: 0.3739  loss_mask: 0.6556  loss_rpn_cls: 0.

In [None]:
# changing the anchor generator sizes and aspect ratios (other values)
cfg = get_cfg()
cfg.INPUT.MASK_FORMAT='bitmask'
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
cfg.DATASETS.TRAIN = ("sartorius_train",)
cfg.DATASETS.TEST = ("sartorius_val",)
cfg.DATALOADER.NUM_WORKERS = 2
cfg.MODEL.WEIGHTS = 'output_1.1/best_model.pth'
cfg.SOLVER.IMS_PER_BATCH = 2
cfg.SOLVER.BASE_LR = 0.0005
cfg.SOLVER.GAMMA = 0.8
cfg.SOLVER.MAX_ITER = 10000
cfg.SOLVER.STEPS = list(range(2000,10000,1000))
cfg.SOLVER.CHECKPOINT_PERIOD = len(DatasetCatalog.get('sartorius_train')) // cfg.SOLVER.IMS_PER_BATCH
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 512
cfg.MODEL.RPN.BATCH_SIZE_PER_IMAGE = 512
cfg.MODEL.RPN.PRE_NMS_TOPK_TRAIN = 15000
cfg.MODEL.RPN.PRE_NMS_TOPK_TEST = 10000
cfg.MODEL.RPN.POST_NMS_TOPK_TRAIN = 3000
cfg.MODEL.RPN.POST_NMS_TOPK_TEST = 2000
cfg.MODEL.PIXEL_MEAN = [127.965, 127.965, 127.965]
cfg.MODEL.BACKBONE.FREEZE_AT = 1
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 3
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = .3
cfg.MODEL.ANCHOR_GENERATOR.SIZES = [[16], [32], [64], [128], [256]]
cfg.MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS = [[0.25, 0.5, 1.0, 2.0, 4.0]]
cfg.TEST.EVAL_PERIOD = len(DatasetCatalog.get('sartorius_train')) // cfg.SOLVER.IMS_PER_BATCH

os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
trainer = Trainer(cfg) 
trainer.resume_or_load(resume=False)
trainer.train()
os.rename("output", "output_9.10")

[32m[02/05 15:52:21 d2.data.datasets.coco]: [0mLoaded 485 images in COCO format from ../sartorius-annotations-coco-format/annotations_train.json
[32m[02/05 15:52:22 d2.data.datasets.coco]: [0mLoaded 485 images in COCO format from ../sartorius-annotations-coco-format/annotations_train.json
[32m[02/05 15:52:23 d2.engine.defaults]: [0mModel:
GeneralizedRCNN(
  (backbone): FPN(
    (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1,

Skip loading parameter 'proposal_generator.rpn_head.objectness_logits.weight' to the model due to incompatible shapes: (3, 256, 1, 1) in the checkpoint but (5, 256, 1, 1) in the model! You might want to double check if this is expected.
Skip loading parameter 'proposal_generator.rpn_head.objectness_logits.bias' to the model due to incompatible shapes: (3,) in the checkpoint but (5,) in the model! You might want to double check if this is expected.
Skip loading parameter 'proposal_generator.rpn_head.anchor_deltas.weight' to the model due to incompatible shapes: (12, 256, 1, 1) in the checkpoint but (20, 256, 1, 1) in the model! You might want to double check if this is expected.
Skip loading parameter 'proposal_generator.rpn_head.anchor_deltas.bias' to the model due to incompatible shapes: (12,) in the checkpoint but (20,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.cls_score.weight' to the model due to incompatible s

[32m[02/05 15:52:24 d2.engine.train_loop]: [0mStarting training from iteration 0
[32m[02/05 15:52:32 d2.utils.memory]: [0mAttempting to copy inputs of <function pairwise_iou at 0x7f9b0687f1f0> to CPU due to CUDA OOM
[32m[02/05 15:52:35 d2.utils.memory]: [0mAttempting to copy inputs of <function pairwise_iou at 0x7f9b0687f1f0> to CPU due to CUDA OOM
[32m[02/05 15:52:43 d2.utils.events]: [0m eta: 1:21:07  iter: 19  total_loss: 2.919  loss_cls: 1.315  loss_box_reg: 0.01587  loss_mask: 0.6953  loss_rpn_cls: 0.6952  loss_rpn_loc: 0.1944  time: 0.9617  data_time: 0.2748  lr: 9.9905e-06  max_mem: 9526M
[32m[02/05 15:52:54 d2.utils.memory]: [0mAttempting to copy inputs of <function pairwise_iou at 0x7f9b0687f1f0> to CPU due to CUDA OOM
[32m[02/05 15:52:57 d2.utils.memory]: [0mAttempting to copy inputs of <function pairwise_iou at 0x7f9b0687f1f0> to CPU due to CUDA OOM
[32m[02/05 15:53:00 d2.utils.memory]: [0mAttempting to copy inputs of <function pairwise_iou at 0x7f9b0687f1f0> t

In [89]:
# changing the anchor generator sizes and aspect ratios (other values)
cfg = get_cfg()
cfg.INPUT.MASK_FORMAT='bitmask'
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
cfg.DATASETS.TRAIN = ("sartorius_train",)
cfg.DATASETS.TEST = ("sartorius_val",)
cfg.DATALOADER.NUM_WORKERS = 2
cfg.MODEL.WEIGHTS = 'output_1.1/best_model.pth'
cfg.SOLVER.IMS_PER_BATCH = 2
cfg.SOLVER.BASE_LR = 0.0005
cfg.SOLVER.GAMMA = 0.8
cfg.SOLVER.MAX_ITER = 10000
cfg.SOLVER.STEPS = list(range(2000,10000,1000))
cfg.SOLVER.CHECKPOINT_PERIOD = len(DatasetCatalog.get('sartorius_train')) // cfg.SOLVER.IMS_PER_BATCH
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 512
cfg.MODEL.RPN.BATCH_SIZE_PER_IMAGE = 512
cfg.MODEL.RPN.PRE_NMS_TOPK_TRAIN = 15000
cfg.MODEL.RPN.PRE_NMS_TOPK_TEST = 10000
cfg.MODEL.RPN.POST_NMS_TOPK_TRAIN = 3000
cfg.MODEL.RPN.POST_NMS_TOPK_TEST = 2000
cfg.MODEL.PIXEL_MEAN = [127.965, 127.965, 127.965]
cfg.MODEL.BACKBONE.FREEZE_AT = 1
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 3
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = .3
cfg.MODEL.ANCHOR_GENERATOR.SIZES = [[24], [40], [80], [128], [256]]
cfg.MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS = [[0.33, 0.5, 3.0]]
cfg.TEST.EVAL_PERIOD = len(DatasetCatalog.get('sartorius_train')) // cfg.SOLVER.IMS_PER_BATCH

os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
trainer = Trainer(cfg) 
trainer.resume_or_load(resume=False)
trainer.train()
os.rename("output", "output_9.11")

[32m[02/05 18:29:30 d2.data.datasets.coco]: [0mLoaded 485 images in COCO format from ../sartorius-annotations-coco-format/annotations_train.json
[32m[02/05 18:29:31 d2.data.datasets.coco]: [0mLoaded 485 images in COCO format from ../sartorius-annotations-coco-format/annotations_train.json
[32m[02/05 18:29:32 d2.engine.defaults]: [0mModel:
GeneralizedRCNN(
  (backbone): FPN(
    (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1,

Skip loading parameter 'roi_heads.box_predictor.cls_score.weight' to the model due to incompatible shapes: (9, 1024) in the checkpoint but (4, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.cls_score.bias' to the model due to incompatible shapes: (9,) in the checkpoint but (4,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.weight' to the model due to incompatible shapes: (32, 1024) in the checkpoint but (12, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.bias' to the model due to incompatible shapes: (32,) in the checkpoint but (12,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.mask_head.predictor.weight' to the model due to incompatible shapes: (8, 256, 1, 1) in the checkpoint but (3, 256, 1, 1) in th

[32m[02/05 18:29:33 d2.engine.train_loop]: [0mStarting training from iteration 0
[32m[02/05 18:29:47 d2.utils.events]: [0m eta: 1:16:03  iter: 19  total_loss: 3.282  loss_cls: 1.349  loss_box_reg: 0.5794  loss_mask: 0.6912  loss_rpn_cls: 0.3576  loss_rpn_loc: 0.3019  time: 0.5957  data_time: 0.2176  lr: 9.9905e-06  max_mem: 9526M
[32m[02/05 18:29:57 d2.utils.memory]: [0mAttempting to copy inputs of <function pairwise_iou at 0x7f9b0687f1f0> to CPU due to CUDA OOM
[32m[02/05 18:30:04 d2.utils.events]: [0m eta: 1:16:44  iter: 39  total_loss: 2.976  loss_cls: 1.252  loss_box_reg: 0.477  loss_mask: 0.6865  loss_rpn_cls: 0.3261  loss_rpn_loc: 0.2597  time: 0.7398  data_time: 0.3207  lr: 1.998e-05  max_mem: 9526M
[32m[02/05 18:30:17 d2.utils.events]: [0m eta: 1:17:11  iter: 59  total_loss: 2.746  loss_cls: 1.049  loss_box_reg: 0.5973  loss_mask: 0.6702  loss_rpn_cls: 0.2752  loss_rpn_loc: 0.2091  time: 0.7098  data_time: 0.1944  lr: 2.997e-05  max_mem: 9526M
[32m[02/05 18:30:31 d2.

In [92]:
# changing the RPN IOU thresholds and the NMS threshold
cfg = get_cfg()
cfg.INPUT.MASK_FORMAT='bitmask'
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
cfg.DATASETS.TRAIN = ("sartorius_train",)
cfg.DATASETS.TEST = ("sartorius_val",)
cfg.DATALOADER.NUM_WORKERS = 2
cfg.MODEL.WEIGHTS = 'output_1.1/best_model.pth'
cfg.SOLVER.IMS_PER_BATCH = 2
cfg.SOLVER.BASE_LR = 0.0005
cfg.SOLVER.GAMMA = 0.8
cfg.SOLVER.MAX_ITER = 10000
cfg.SOLVER.STEPS = list(range(2000,10000,1000))
cfg.SOLVER.CHECKPOINT_PERIOD = len(DatasetCatalog.get('sartorius_train')) // cfg.SOLVER.IMS_PER_BATCH
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 512
cfg.MODEL.RPN.BATCH_SIZE_PER_IMAGE = 512
cfg.MODEL.RPN.PRE_NMS_TOPK_TRAIN = 15000
cfg.MODEL.RPN.PRE_NMS_TOPK_TEST = 10000
cfg.MODEL.RPN.POST_NMS_TOPK_TRAIN = 4000
cfg.MODEL.RPN.POST_NMS_TOPK_TEST = 2000
cfg.MODEL.PIXEL_MEAN = [127.965, 127.965, 127.965]
cfg.MODEL.BACKBONE.FREEZE_AT = 1
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 3
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = .3
cfg.MODEL.ANCHOR_GENERATOR.SIZES = [[24], [40], [80], [128], [256]]
cfg.MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS = [[0.33, 0.5, 3.0]]
cfg.MODEL.RPN.IOU_THRESHOLDS = [0.4, 0.7]
cfg.MODEL.RPN.BATCH_SIZE_PER_IMAGE = 512
cfg.MODEL.RPN.NMS_THRESH = 0.6
cfg.TEST.EVAL_PERIOD = len(DatasetCatalog.get('sartorius_train')) // cfg.SOLVER.IMS_PER_BATCH

os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
trainer = Trainer(cfg) 
trainer.resume_or_load(resume=False)
trainer.train()
os.rename("output", "output_9.12")

[32m[02/05 20:54:21 d2.data.datasets.coco]: [0mLoaded 485 images in COCO format from ../sartorius-annotations-coco-format/annotations_train.json
[32m[02/05 20:54:22 d2.data.datasets.coco]: [0mLoaded 485 images in COCO format from ../sartorius-annotations-coco-format/annotations_train.json
[32m[02/05 20:54:23 d2.engine.defaults]: [0mModel:
GeneralizedRCNN(
  (backbone): FPN(
    (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1,

Skip loading parameter 'roi_heads.box_predictor.cls_score.weight' to the model due to incompatible shapes: (9, 1024) in the checkpoint but (4, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.cls_score.bias' to the model due to incompatible shapes: (9,) in the checkpoint but (4,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.weight' to the model due to incompatible shapes: (32, 1024) in the checkpoint but (12, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.bias' to the model due to incompatible shapes: (32,) in the checkpoint but (12,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.mask_head.predictor.weight' to the model due to incompatible shapes: (8, 256, 1, 1) in the checkpoint but (3, 256, 1, 1) in th

[32m[02/05 20:54:24 d2.engine.train_loop]: [0mStarting training from iteration 0
[32m[02/05 20:54:39 d2.utils.events]: [0m eta: 1:17:52  iter: 19  total_loss: 3.192  loss_cls: 1.335  loss_box_reg: 0.4647  loss_mask: 0.6943  loss_rpn_cls: 0.4021  loss_rpn_loc: 0.2733  time: 0.7555  data_time: 0.3069  lr: 9.9905e-06  max_mem: 9526M
[32m[02/05 20:54:52 d2.utils.events]: [0m eta: 1:14:56  iter: 39  total_loss: 2.95  loss_cls: 1.228  loss_box_reg: 0.488  loss_mask: 0.687  loss_rpn_cls: 0.3336  loss_rpn_loc: 0.2124  time: 0.6876  data_time: 0.1944  lr: 1.998e-05  max_mem: 9526M
[32m[02/05 20:55:03 d2.utils.events]: [0m eta: 1:15:02  iter: 59  total_loss: 2.89  loss_cls: 1.058  loss_box_reg: 0.6111  loss_mask: 0.6681  loss_rpn_cls: 0.2997  loss_rpn_loc: 0.2471  time: 0.6410  data_time: 0.1133  lr: 2.997e-05  max_mem: 9526M
[32m[02/05 20:55:17 d2.utils.events]: [0m eta: 1:15:00  iter: 79  total_loss: 2.57  loss_cls: 0.8471  loss_box_reg: 0.6056  loss_mask: 0.6426  loss_rpn_cls: 0.265

In [93]:
# changing the RPN IOU thresholds and the NMS threshold
cfg = get_cfg()
cfg.INPUT.MASK_FORMAT='bitmask'
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
cfg.DATASETS.TRAIN = ("sartorius_train",)
cfg.DATASETS.TEST = ("sartorius_val",)
cfg.DATALOADER.NUM_WORKERS = 2
cfg.MODEL.WEIGHTS = 'output_1.1/best_model.pth'
cfg.SOLVER.IMS_PER_BATCH = 2
cfg.SOLVER.BASE_LR = 0.0005
cfg.SOLVER.GAMMA = 0.8
cfg.SOLVER.MAX_ITER = 10000
cfg.SOLVER.STEPS = list(range(2000,10000,1000))
cfg.SOLVER.CHECKPOINT_PERIOD = len(DatasetCatalog.get('sartorius_train')) // cfg.SOLVER.IMS_PER_BATCH
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 512
cfg.MODEL.RPN.BATCH_SIZE_PER_IMAGE = 512
cfg.MODEL.RPN.PRE_NMS_TOPK_TRAIN = 15000
cfg.MODEL.RPN.PRE_NMS_TOPK_TEST = 10000
cfg.MODEL.RPN.POST_NMS_TOPK_TRAIN = 4000
cfg.MODEL.RPN.POST_NMS_TOPK_TEST = 2000
cfg.MODEL.PIXEL_MEAN = [127.965, 127.965, 127.965]
cfg.MODEL.BACKBONE.FREEZE_AT = 1
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 3
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = .3
cfg.MODEL.ANCHOR_GENERATOR.SIZES = [[24], [40], [80], [128], [256]]
cfg.MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS = [[0.33, 0.5, 3.0]]
cfg.MODEL.RPN.IOU_THRESHOLDS = [0.2, 0.7]
cfg.MODEL.RPN.BATCH_SIZE_PER_IMAGE = 512
cfg.MODEL.RPN.NMS_THRESH = 0.75
cfg.TEST.EVAL_PERIOD = len(DatasetCatalog.get('sartorius_train')) // cfg.SOLVER.IMS_PER_BATCH

os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
trainer = Trainer(cfg) 
trainer.resume_or_load(resume=False)
trainer.train()
os.rename("output", "output_9.13")

[32m[02/05 23:23:16 d2.data.datasets.coco]: [0mLoaded 485 images in COCO format from ../sartorius-annotations-coco-format/annotations_train.json
[32m[02/05 23:23:17 d2.data.datasets.coco]: [0mLoaded 485 images in COCO format from ../sartorius-annotations-coco-format/annotations_train.json
[32m[02/05 23:23:18 d2.engine.defaults]: [0mModel:
GeneralizedRCNN(
  (backbone): FPN(
    (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1,

Skip loading parameter 'roi_heads.box_predictor.cls_score.weight' to the model due to incompatible shapes: (9, 1024) in the checkpoint but (4, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.cls_score.bias' to the model due to incompatible shapes: (9,) in the checkpoint but (4,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.weight' to the model due to incompatible shapes: (32, 1024) in the checkpoint but (12, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.bias' to the model due to incompatible shapes: (32,) in the checkpoint but (12,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.mask_head.predictor.weight' to the model due to incompatible shapes: (8, 256, 1, 1) in the checkpoint but (3, 256, 1, 1) in th

[32m[02/05 23:23:19 d2.engine.train_loop]: [0mStarting training from iteration 0
[32m[02/05 23:23:34 d2.utils.events]: [0m eta: 1:13:40  iter: 19  total_loss: 2.99  loss_cls: 1.337  loss_box_reg: 0.4009  loss_mask: 0.6913  loss_rpn_cls: 0.3282  loss_rpn_loc: 0.2408  time: 0.7408  data_time: 0.2973  lr: 9.9905e-06  max_mem: 9526M
[32m[02/05 23:23:46 d2.utils.events]: [0m eta: 1:18:46  iter: 39  total_loss: 3.177  loss_cls: 1.268  loss_box_reg: 0.5786  loss_mask: 0.6846  loss_rpn_cls: 0.3271  loss_rpn_loc: 0.2844  time: 0.6605  data_time: 0.1172  lr: 1.998e-05  max_mem: 9526M
[32m[02/05 23:24:03 d2.utils.memory]: [0mAttempting to copy inputs of <function pairwise_iou at 0x7f9b0687f1f0> to CPU due to CUDA OOM
[32m[02/05 23:24:05 d2.utils.events]: [0m eta: 1:24:57  iter: 59  total_loss: 2.886  loss_cls: 1.091  loss_box_reg: 0.6299  loss_mask: 0.6733  loss_rpn_cls: 0.2977  loss_rpn_loc: 0.272  time: 0.7638  data_time: 0.3763  lr: 2.997e-05  max_mem: 9526M
[32m[02/05 23:24:17 d2.u

In [4]:
# Increasing the maximum number of detections per image
cfg = get_cfg()
cfg.INPUT.MASK_FORMAT='bitmask'
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
cfg.DATASETS.TRAIN = ("sartorius_train",)
cfg.DATASETS.TEST = ("sartorius_val",)
cfg.DATALOADER.NUM_WORKERS = 2
cfg.MODEL.WEIGHTS = 'output_1.1/best_model.pth'
cfg.SOLVER.IMS_PER_BATCH = 2
cfg.SOLVER.BASE_LR = 0.0005
cfg.SOLVER.GAMMA = 0.8
cfg.SOLVER.MAX_ITER = 10000
cfg.SOLVER.STEPS = list(range(2000,10000,1000))
cfg.SOLVER.CHECKPOINT_PERIOD = len(DatasetCatalog.get('sartorius_train')) // cfg.SOLVER.IMS_PER_BATCH
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 512
cfg.MODEL.RPN.BATCH_SIZE_PER_IMAGE = 512
cfg.MODEL.RPN.PRE_NMS_TOPK_TRAIN = 15000
cfg.MODEL.RPN.PRE_NMS_TOPK_TEST = 10000
cfg.MODEL.RPN.POST_NMS_TOPK_TRAIN = 4000
cfg.MODEL.RPN.POST_NMS_TOPK_TEST = 2000
cfg.MODEL.PIXEL_MEAN = [127.965, 127.965, 127.965]
cfg.MODEL.BACKBONE.FREEZE_AT = 1
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 3
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = .3
cfg.MODEL.ANCHOR_GENERATOR.SIZES = [[24], [40], [80], [128], [256]]
cfg.MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS = [[0.33, 0.5, 3.0]]
cfg.MODEL.RPN.IOU_THRESHOLDS = [0.2, 0.7]
cfg.MODEL.RPN.BATCH_SIZE_PER_IMAGE = 512
cfg.MODEL.RPN.NMS_THRESH = 0.75
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.03
cfg.TEST.DETECTIONS_PER_IMAGE = 700
cfg.TEST.EVAL_PERIOD = len(DatasetCatalog.get('sartorius_train')) // cfg.SOLVER.IMS_PER_BATCH

os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
trainer = Trainer(cfg) 
trainer.resume_or_load(resume=False)
trainer.train()
os.rename("output", "output_9.14")

[32m[02/06 18:27:20 d2.data.datasets.coco]: [0mLoaded 485 images in COCO format from ../sartorius-annotations-coco-format/annotations_train.json
[32m[02/06 18:27:21 d2.data.datasets.coco]: [0mLoaded 485 images in COCO format from ../sartorius-annotations-coco-format/annotations_train.json
[32m[02/06 18:27:23 d2.engine.defaults]: [0mModel:
GeneralizedRCNN(
  (backbone): FPN(
    (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1,

Skip loading parameter 'roi_heads.box_predictor.cls_score.weight' to the model due to incompatible shapes: (9, 1024) in the checkpoint but (4, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.cls_score.bias' to the model due to incompatible shapes: (9,) in the checkpoint but (4,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.weight' to the model due to incompatible shapes: (32, 1024) in the checkpoint but (12, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.bias' to the model due to incompatible shapes: (32,) in the checkpoint but (12,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.mask_head.predictor.weight' to the model due to incompatible shapes: (8, 256, 1, 1) in the checkpoint but (3, 256, 1, 1) in th

[32m[02/06 18:27:25 d2.engine.train_loop]: [0mStarting training from iteration 0


  max_size = (max_size + (stride - 1)) // stride * stride
  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


[32m[02/06 18:27:39 d2.utils.events]: [0m eta: 1:21:41  iter: 19  total_loss: 3.311  loss_cls: 1.469  loss_box_reg: 0.5768  loss_mask: 0.6958  loss_rpn_cls: 0.3359  loss_rpn_loc: 0.2579  time: 0.6966  data_time: 0.2034  lr: 9.9905e-06  max_mem: 5557M
[32m[02/06 18:27:50 d2.utils.events]: [0m eta: 1:17:58  iter: 39  total_loss: 3.098  loss_cls: 1.343  loss_box_reg: 0.5051  loss_mask: 0.6889  loss_rpn_cls: 0.3425  loss_rpn_loc: 0.2068  time: 0.6358  data_time: 0.1388  lr: 1.998e-05  max_mem: 5557M
[32m[02/06 18:28:05 d2.utils.events]: [0m eta: 1:22:20  iter: 59  total_loss: 2.91  loss_cls: 1.107  loss_box_reg: 0.5708  loss_mask: 0.6718  loss_rpn_cls: 0.3166  loss_rpn_loc: 0.2534  time: 0.6757  data_time: 0.2392  lr: 2.997e-05  max_mem: 5557M
[32m[02/06 18:28:18 d2.utils.events]: [0m eta: 1:23:18  iter: 79  total_loss: 2.836  loss_cls: 0.8743  loss_box_reg: 0.817  loss_mask: 0.6459  loss_rpn_cls: 0.2498  loss_rpn_loc: 0.2402  time: 0.6603  data_time: 0.1177  lr: 3.9961e-05  max_me

In [5]:
%load_ext tensorboard
%tensorboard --logdir output_9.1

In [7]:
%load_ext tensorboard
%tensorboard --logdir output_9.2

In [8]:
%tensorboard --logdir output_9.3

In [16]:
%tensorboard --logdir output_9.4

In [18]:
%tensorboard --logdir output_9.5

In [95]:
%tensorboard --logdir output_9.13

In [9]:
import json
def print_metrics(output):
    with open(f"output_{output}/metrics.json",'r') as f:
        metrics = [json.loads(line) for line in f]
    print("mAP :", np.mean([metrics[i]["mAP IoU"] for i in range(len(metrics)) if 'mAP IoU' in metrics[i]][-10:]))
    print("False negatives :", np.mean([metrics[i]["mask_rcnn/false_negative"] for i in range(len(metrics)) if 'mask_rcnn/false_negative' in metrics[i]][-100:]))
    print("False positives :", np.mean([metrics[i]["mask_rcnn/false_positive"] for i in range(len(metrics)) if 'mask_rcnn/false_positive' in metrics[i]][-100:]))
i=1
while True:
    try:
        print("Experiment ",i)
        print_metrics("9."+str(i))
        i+=1
    except:
        break

Experiment  1
mAP : 0.26894040640600936
False negatives : 0.113113852888519
False positives : 0.16123373938854985
Experiment  2
mAP : 0.2701370579642064
False negatives : 0.11342911120800112
False positives : 0.16442133602449874
Experiment  3
mAP : 0.26494994681832634
False negatives : 0.10975767042699404
False positives : 0.16906982250560273
Experiment  4
mAP : 0.27252464439209206
False negatives : 0.11170060394335944
False positives : 0.16533865012292878
Experiment  5
mAP : 0.27114199741654127
False negatives : 0.11149395340595372
False positives : 0.1638460054617235
Experiment  6
mAP : 0.2721525248472835
False negatives : 0.11033692143454119
False positives : 0.1658628270748274
Experiment  7
mAP : 0.27581772601010607
False negatives : 0.11140320612418314
False positives : 0.1543423591246918
Experiment  8
mAP : 0.2705608873056785
False negatives : 0.10605228047482015
False positives : 0.17166389912850488
Experiment  9
mAP : 0.2718862787511309
False negatives : 0.11195976208029558
Fal