### Notebook 8: Solver (learning rate) tuning

In [6]:
import os
import detectron2
from pathlib import Path
import cv2
import numpy as np
import torch
import pycocotools.mask as mask_util
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor, DefaultTrainer
from detectron2.config import get_cfg
from detectron2.data import MetadataCatalog, DatasetCatalog
from detectron2.data.datasets import register_coco_instances
from detectron2.utils.logger import setup_logger
from detectron2.evaluation.evaluator import DatasetEvaluator
setup_logger()

<Logger detectron2 (DEBUG)>

In [None]:
dataDir=Path('../')
cfg = get_cfg()
cfg.INPUT.MASK_FORMAT='bitmask'
register_coco_instances('sartorius_train',{}, '../sartorius-annotations-coco-format/annotations_train.json', dataDir)
register_coco_instances('sartorius_val',{},'../sartorius-annotations-coco-format/annotations_val.json', dataDir)

In [12]:
score_threshold = [0.204, 0.386, 0.568]
min_mask_area = [75, 180, 75]
def precision_at(threshold, iou):
    matches = iou > threshold
    true_positives = np.sum(matches, axis=1) == 1
    false_positives = np.sum(matches, axis=0) == 0
    false_negatives = np.sum(matches, axis=1) == 0
    return np.sum(true_positives), np.sum(false_positives), np.sum(false_negatives)

def score(pred, targ):
    pred_class = torch.mode(pred['instances'].pred_classes)[0]
    take = pred['instances'].scores >= score_threshold[pred_class]
    pred_masks = pred['instances'].pred_masks[take].cpu().numpy()
    if len(pred_masks)==0:
        return 0.
    else:
        enc_preds = []
        used = np.zeros(pred_masks[0].shape, dtype=int)
        for mask in pred_masks:
            mask = (mask * (1-used)).astype(bool)
            if mask.sum() >= min_mask_area[pred_class]:
                used += mask
                enc_preds.append(mask_util.encode(np.asarray(mask, order='F')) )
        enc_targs = list(map(lambda x:x['segmentation'], targ))
        ious = mask_util.iou(enc_preds, enc_targs, [0]*len(enc_targs))
        prec = []
        for t in np.arange(0.5, 1.0, 0.05):
            tp, fp, fn = precision_at(t, ious)
            p = tp / (tp + fp + fn)
            prec.append(p)
        return np.mean(prec)

class MAPIOUEvaluator(DatasetEvaluator):
    def __init__(self, dataset_name):
        dataset_dicts = DatasetCatalog.get(dataset_name)
        self.annotations_cache = {item['image_id']:item['annotations'] for item in dataset_dicts}
            
    def reset(self):
        self.scores = []

    def process(self, inputs, outputs):
        for inp, out in zip(inputs, outputs):
            if len(out['instances']) == 0:
                self.scores.append(0)    
            else:
                targ = self.annotations_cache[inp['image_id']]
                self.scores.append(score(out, targ))

    def evaluate(self):
        return {"mAP IoU": np.mean(self.scores)}

class Trainer(DefaultTrainer):
    @classmethod
    def build_evaluator(cls, cfg, dataset_name, output_folder=None):
        return MAPIOUEvaluator(dataset_name)

In [4]:
# learning rate = 0.001
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
cfg.DATASETS.TRAIN = ("sartorius_train",)
cfg.DATASETS.TEST = ("sartorius_val",)
cfg.DATALOADER.NUM_WORKERS = 2
cfg.MODEL.WEIGHTS = 'output_1.1/best_model.pth'
cfg.SOLVER.IMS_PER_BATCH = 2
cfg.SOLVER.BASE_LR = 0.001
cfg.SOLVER.MAX_ITER = 10000
cfg.SOLVER.STEPS = []
cfg.SOLVER.CHECKPOINT_PERIOD = len(DatasetCatalog.get('sartorius_train')) // cfg.SOLVER.IMS_PER_BATCH
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 3
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = .5
cfg.TEST.EVAL_PERIOD = len(DatasetCatalog.get('sartorius_train')) // cfg.SOLVER.IMS_PER_BATCH

os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
trainer = Trainer(cfg) 
trainer.resume_or_load(resume=False)
trainer.train()
os.rename("output", "output_8.1")

[32m[02/01 19:38:05 d2.data.datasets.coco]: [0mLoaded 485 images in COCO format from ../sartorius-annotations-coco-format/annotations_train.json
[32m[02/01 19:38:06 d2.data.datasets.coco]: [0mLoaded 485 images in COCO format from ../sartorius-annotations-coco-format/annotations_train.json
[32m[02/01 19:38:09 d2.engine.defaults]: [0mModel:
GeneralizedRCNN(
  (backbone): FPN(
    (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1,

Skip loading parameter 'roi_heads.box_predictor.cls_score.weight' to the model due to incompatible shapes: (9, 1024) in the checkpoint but (4, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.cls_score.bias' to the model due to incompatible shapes: (9,) in the checkpoint but (4,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.weight' to the model due to incompatible shapes: (32, 1024) in the checkpoint but (12, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.bias' to the model due to incompatible shapes: (32,) in the checkpoint but (12,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.mask_head.predictor.weight' to the model due to incompatible shapes: (8, 256, 1, 1) in the checkpoint but (3, 256, 1, 1) in th

[32m[02/01 19:38:10 d2.engine.train_loop]: [0mStarting training from iteration 0


  max_size = (max_size + (stride - 1)) // stride * stride
  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


[32m[02/01 19:38:24 d2.utils.events]: [0m eta: 1:00:42  iter: 19  total_loss: 3.078  loss_cls: 1.381  loss_box_reg: 0.3886  loss_mask: 0.6933  loss_rpn_cls: 0.3497  loss_rpn_loc: 0.2589  time: 0.6826  data_time: 0.3387  lr: 1.9981e-05  max_mem: 5115M
[32m[02/01 19:38:36 d2.utils.events]: [0m eta: 0:58:15  iter: 39  total_loss: 2.901  loss_cls: 1.239  loss_box_reg: 0.4499  loss_mask: 0.6791  loss_rpn_cls: 0.2923  loss_rpn_loc: 0.2585  time: 0.6259  data_time: 0.2776  lr: 3.9961e-05  max_mem: 5115M
[32m[02/01 19:38:47 d2.utils.events]: [0m eta: 0:56:31  iter: 59  total_loss: 2.596  loss_cls: 0.9934  loss_box_reg: 0.5051  loss_mask: 0.6508  loss_rpn_cls: 0.2319  loss_rpn_loc: 0.2518  time: 0.6024  data_time: 0.2445  lr: 5.9941e-05  max_mem: 5115M
[32m[02/01 19:38:56 d2.utils.events]: [0m eta: 0:53:36  iter: 79  total_loss: 2.371  loss_cls: 0.7882  loss_box_reg: 0.5218  loss_mask: 0.594  loss_rpn_cls: 0.2065  loss_rpn_loc: 0.2304  time: 0.5712  data_time: 0.1790  lr: 7.9921e-05  ma

In [4]:
# learning rate = 0.0005
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
cfg.DATASETS.TRAIN = ("sartorius_train",)
cfg.DATASETS.TEST = ("sartorius_val",)
cfg.DATALOADER.NUM_WORKERS = 2
cfg.MODEL.WEIGHTS = 'output_1.1/best_model.pth'
cfg.SOLVER.IMS_PER_BATCH = 2
cfg.SOLVER.BASE_LR = 0.0005
cfg.SOLVER.MAX_ITER = 10000
cfg.SOLVER.STEPS = []
cfg.SOLVER.CHECKPOINT_PERIOD = len(DatasetCatalog.get('sartorius_train')) // cfg.SOLVER.IMS_PER_BATCH
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 3
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = .5
cfg.TEST.EVAL_PERIOD = len(DatasetCatalog.get('sartorius_train')) // cfg.SOLVER.IMS_PER_BATCH

os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
trainer = Trainer(cfg) 
trainer.resume_or_load(resume=False)
trainer.train()
os.rename("output", "output_8.2")

[32m[02/01 21:45:36 d2.data.datasets.coco]: [0mLoaded 485 images in COCO format from ../sartorius-annotations-coco-format/annotations_train.json
[32m[02/01 21:45:37 d2.data.datasets.coco]: [0mLoaded 485 images in COCO format from ../sartorius-annotations-coco-format/annotations_train.json
[32m[02/01 21:45:39 d2.engine.defaults]: [0mModel:
GeneralizedRCNN(
  (backbone): FPN(
    (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1,

Skip loading parameter 'roi_heads.box_predictor.cls_score.weight' to the model due to incompatible shapes: (9, 1024) in the checkpoint but (4, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.cls_score.bias' to the model due to incompatible shapes: (9,) in the checkpoint but (4,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.weight' to the model due to incompatible shapes: (32, 1024) in the checkpoint but (12, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.bias' to the model due to incompatible shapes: (32,) in the checkpoint but (12,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.mask_head.predictor.weight' to the model due to incompatible shapes: (8, 256, 1, 1) in the checkpoint but (3, 256, 1, 1) in th

[32m[02/01 21:45:40 d2.engine.train_loop]: [0mStarting training from iteration 0


  max_size = (max_size + (stride - 1)) // stride * stride
  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


[32m[02/01 21:45:53 d2.utils.events]: [0m eta: 0:55:37  iter: 19  total_loss: 3.118  loss_cls: 1.448  loss_box_reg: 0.328  loss_mask: 0.6905  loss_rpn_cls: 0.353  loss_rpn_loc: 0.2658  time: 0.6283  data_time: 0.2861  lr: 9.9905e-06  max_mem: 4775M
[32m[02/01 21:46:05 d2.utils.events]: [0m eta: 0:56:37  iter: 39  total_loss: 3.039  loss_cls: 1.367  loss_box_reg: 0.402  loss_mask: 0.6856  loss_rpn_cls: 0.3087  loss_rpn_loc: 0.2504  time: 0.6039  data_time: 0.2556  lr: 1.998e-05  max_mem: 4775M
[32m[02/01 21:46:18 d2.utils.events]: [0m eta: 0:58:04  iter: 59  total_loss: 2.88  loss_cls: 1.226  loss_box_reg: 0.3949  loss_mask: 0.6678  loss_rpn_cls: 0.2798  loss_rpn_loc: 0.2832  time: 0.6321  data_time: 0.3609  lr: 2.997e-05  max_mem: 4775M
[32m[02/01 21:46:30 d2.utils.events]: [0m eta: 0:58:29  iter: 79  total_loss: 2.645  loss_cls: 1.042  loss_box_reg: 0.4243  loss_mask: 0.6509  loss_rpn_cls: 0.2782  loss_rpn_loc: 0.2497  time: 0.6229  data_time: 0.2703  lr: 3.9961e-05  max_mem: 

In [4]:
# learning rate = 0.0001
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
cfg.DATASETS.TRAIN = ("sartorius_train",)
cfg.DATASETS.TEST = ("sartorius_val",)
cfg.DATALOADER.NUM_WORKERS = 2
cfg.MODEL.WEIGHTS = 'output_1.1/best_model.pth'
cfg.SOLVER.IMS_PER_BATCH = 2
cfg.SOLVER.BASE_LR = 0.0001
cfg.SOLVER.MAX_ITER = 10000
cfg.SOLVER.STEPS = []
cfg.SOLVER.CHECKPOINT_PERIOD = len(DatasetCatalog.get('sartorius_train')) // cfg.SOLVER.IMS_PER_BATCH
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 3
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = .5
cfg.TEST.EVAL_PERIOD = len(DatasetCatalog.get('sartorius_train')) // cfg.SOLVER.IMS_PER_BATCH

os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
trainer = Trainer(cfg) 
trainer.resume_or_load(resume=False)
trainer.train()
os.rename("output", "output_8.3")

[32m[02/02 09:16:59 d2.data.datasets.coco]: [0mLoaded 485 images in COCO format from ../sartorius-annotations-coco-format/annotations_train.json
[32m[02/02 09:16:59 d2.data.datasets.coco]: [0mLoaded 485 images in COCO format from ../sartorius-annotations-coco-format/annotations_train.json
[32m[02/02 09:17:03 d2.engine.defaults]: [0mModel:
GeneralizedRCNN(
  (backbone): FPN(
    (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1,

Skip loading parameter 'roi_heads.box_predictor.cls_score.weight' to the model due to incompatible shapes: (9, 1024) in the checkpoint but (4, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.cls_score.bias' to the model due to incompatible shapes: (9,) in the checkpoint but (4,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.weight' to the model due to incompatible shapes: (32, 1024) in the checkpoint but (12, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.bias' to the model due to incompatible shapes: (32,) in the checkpoint but (12,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.mask_head.predictor.weight' to the model due to incompatible shapes: (8, 256, 1, 1) in the checkpoint but (3, 256, 1, 1) in th

[32m[02/02 09:17:04 d2.engine.train_loop]: [0mStarting training from iteration 0


  max_size = (max_size + (stride - 1)) // stride * stride
  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


[32m[02/02 09:17:17 d2.utils.events]: [0m eta: 0:58:58  iter: 19  total_loss: 3.105  loss_cls: 1.473  loss_box_reg: 0.3957  loss_mask: 0.6937  loss_rpn_cls: 0.3369  loss_rpn_loc: 0.2583  time: 0.5612  data_time: 0.2377  lr: 1.9981e-06  max_mem: 4664M
[32m[02/02 09:17:28 d2.utils.events]: [0m eta: 0:55:27  iter: 39  total_loss: 3.152  loss_cls: 1.463  loss_box_reg: 0.3548  loss_mask: 0.692  loss_rpn_cls: 0.331  loss_rpn_loc: 0.2538  time: 0.5611  data_time: 0.2547  lr: 3.9961e-06  max_mem: 4664M
[32m[02/02 09:17:40 d2.utils.events]: [0m eta: 0:55:06  iter: 59  total_loss: 3.099  loss_cls: 1.421  loss_box_reg: 0.4539  loss_mask: 0.6878  loss_rpn_cls: 0.2877  loss_rpn_loc: 0.2332  time: 0.5710  data_time: 0.2770  lr: 5.9941e-06  max_mem: 4664M
[32m[02/02 09:17:52 d2.utils.events]: [0m eta: 0:54:30  iter: 79  total_loss: 3  loss_cls: 1.364  loss_box_reg: 0.3917  loss_mask: 0.6829  loss_rpn_cls: 0.3409  loss_rpn_loc: 0.2737  time: 0.5776  data_time: 0.2926  lr: 7.9921e-06  max_mem: 

In [9]:
# learning rate = 0.0007
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
cfg.DATASETS.TRAIN = ("sartorius_train",)
cfg.DATASETS.TEST = ("sartorius_val",)
cfg.DATALOADER.NUM_WORKERS = 2
cfg.MODEL.WEIGHTS = 'output_1.1/best_model.pth'
cfg.SOLVER.IMS_PER_BATCH = 2
cfg.SOLVER.BASE_LR = 0.0007
cfg.SOLVER.MAX_ITER = 10000
cfg.SOLVER.STEPS = []
cfg.SOLVER.CHECKPOINT_PERIOD = len(DatasetCatalog.get('sartorius_train')) // cfg.SOLVER.IMS_PER_BATCH
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 3
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = .5
cfg.TEST.EVAL_PERIOD = len(DatasetCatalog.get('sartorius_train')) // cfg.SOLVER.IMS_PER_BATCH

os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
trainer = Trainer(cfg) 
trainer.resume_or_load(resume=False)
trainer.train()
os.rename("output", "output_8.4")

[32m[02/02 13:16:33 d2.data.datasets.coco]: [0mLoaded 485 images in COCO format from ../sartorius-annotations-coco-format/annotations_train.json
[32m[02/02 13:16:34 d2.data.datasets.coco]: [0mLoaded 485 images in COCO format from ../sartorius-annotations-coco-format/annotations_train.json
[32m[02/02 13:16:35 d2.engine.defaults]: [0mModel:
GeneralizedRCNN(
  (backbone): FPN(
    (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1,

Skip loading parameter 'roi_heads.box_predictor.cls_score.weight' to the model due to incompatible shapes: (9, 1024) in the checkpoint but (4, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.cls_score.bias' to the model due to incompatible shapes: (9,) in the checkpoint but (4,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.weight' to the model due to incompatible shapes: (32, 1024) in the checkpoint but (12, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.bias' to the model due to incompatible shapes: (32,) in the checkpoint but (12,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.mask_head.predictor.weight' to the model due to incompatible shapes: (8, 256, 1, 1) in the checkpoint but (3, 256, 1, 1) in th

[32m[02/02 13:16:36 d2.engine.train_loop]: [0mStarting training from iteration 0


  max_size = (max_size + (stride - 1)) // stride * stride


[32m[02/02 13:16:51 d2.utils.events]: [0m eta: 1:52:52  iter: 19  total_loss: 3.11  loss_cls: 1.369  loss_box_reg: 0.4056  loss_mask: 0.6925  loss_rpn_cls: 0.3542  loss_rpn_loc: 0.2703  time: 0.7189  data_time: 0.4181  lr: 1.3987e-05  max_mem: 6565M
[32m[02/02 13:17:01 d2.utils.events]: [0m eta: 1:01:12  iter: 39  total_loss: 2.943  loss_cls: 1.279  loss_box_reg: 0.4505  loss_mask: 0.6801  loss_rpn_cls: 0.2729  loss_rpn_loc: 0.2363  time: 0.6147  data_time: 0.2078  lr: 2.7973e-05  max_mem: 6565M
[32m[02/02 13:17:15 d2.utils.events]: [0m eta: 0:57:42  iter: 59  total_loss: 2.686  loss_cls: 1.083  loss_box_reg: 0.3748  loss_mask: 0.6633  loss_rpn_cls: 0.2923  loss_rpn_loc: 0.2472  time: 0.6449  data_time: 0.3741  lr: 4.1959e-05  max_mem: 6565M
[32m[02/02 13:17:27 d2.utils.events]: [0m eta: 0:57:15  iter: 79  total_loss: 2.523  loss_cls: 0.8916  loss_box_reg: 0.497  loss_mask: 0.6369  loss_rpn_cls: 0.2177  loss_rpn_loc: 0.249  time: 0.6333  data_time: 0.2838  lr: 5.5945e-05  max_m

In [25]:
# learning rate = 0.0005 with AMP enabled (Automatic Mixed Precision)
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
cfg.DATASETS.TRAIN = ("sartorius_train",)
cfg.DATASETS.TEST = ("sartorius_val",)
cfg.DATALOADER.NUM_WORKERS = 2
cfg.MODEL.WEIGHTS = 'output_1.1/best_model.pth'
cfg.SOLVER.IMS_PER_BATCH = 2
cfg.SOLVER.BASE_LR = 0.0005
cfg.SOLVER.AMP.ENABLED = True
cfg.SOLVER.MAX_ITER = 10000
cfg.SOLVER.STEPS = []
cfg.SOLVER.CHECKPOINT_PERIOD = len(DatasetCatalog.get('sartorius_train')) // cfg.SOLVER.IMS_PER_BATCH
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 3
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = .5
cfg.TEST.EVAL_PERIOD = len(DatasetCatalog.get('sartorius_train')) // cfg.SOLVER.IMS_PER_BATCH  # Once per epoch

os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
trainer = Trainer(cfg) 
trainer.resume_or_load(resume=False)
trainer.train()
os.rename("output", "output_8.5")

[32m[02/02 15:45:50 d2.data.datasets.coco]: [0mLoaded 485 images in COCO format from ../sartorius-annotations-coco-format/annotations_train.json
[32m[02/02 15:45:51 d2.data.datasets.coco]: [0mLoaded 485 images in COCO format from ../sartorius-annotations-coco-format/annotations_train.json
[32m[02/02 15:45:52 d2.engine.defaults]: [0mModel:
GeneralizedRCNN(
  (backbone): FPN(
    (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1,

Skip loading parameter 'roi_heads.box_predictor.cls_score.weight' to the model due to incompatible shapes: (9, 1024) in the checkpoint but (4, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.cls_score.bias' to the model due to incompatible shapes: (9,) in the checkpoint but (4,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.weight' to the model due to incompatible shapes: (32, 1024) in the checkpoint but (12, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.bias' to the model due to incompatible shapes: (32,) in the checkpoint but (12,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.mask_head.predictor.weight' to the model due to incompatible shapes: (8, 256, 1, 1) in the checkpoint but (3, 256, 1, 1) in th

[32m[02/02 15:45:53 d2.engine.train_loop]: [0mStarting training from iteration 0


  max_size = (max_size + (stride - 1)) // stride * stride


[32m[02/02 15:46:02 d2.utils.events]: [0m eta: 0:38:54  iter: 19  total_loss: 3.116  loss_cls: 1.411  loss_box_reg: 0.4753  loss_mask: 0.694  loss_rpn_cls: 0.3239  loss_rpn_loc: 0.2377  time: 0.4016  data_time: 0.2358  lr: 9.9905e-06  max_mem: 6664M
[32m[02/02 15:46:13 d2.utils.events]: [0m eta: 0:39:36  iter: 39  total_loss: 3.003  loss_cls: 1.329  loss_box_reg: 0.4428  loss_mask: 0.6866  loss_rpn_cls: 0.2815  loss_rpn_loc: 0.2584  time: 0.4883  data_time: 0.3380  lr: 1.998e-05  max_mem: 6664M
[32m[02/02 15:46:26 d2.utils.events]: [0m eta: 0:39:36  iter: 59  total_loss: 2.861  loss_cls: 1.216  loss_box_reg: 0.425  loss_mask: 0.6702  loss_rpn_cls: 0.2852  loss_rpn_loc: 0.2561  time: 0.5371  data_time: 0.3998  lr: 2.997e-05  max_mem: 6664M
[32m[02/02 15:46:40 d2.utils.events]: [0m eta: 0:42:14  iter: 79  total_loss: 2.616  loss_cls: 1.04  loss_box_reg: 0.4201  loss_mask: 0.6448  loss_rpn_cls: 0.2713  loss_rpn_loc: 0.2399  time: 0.5777  data_time: 0.4618  lr: 3.9961e-05  max_mem:

In [30]:
# learning rate = 0.0005 with gamma=0.1 and lr reduction every 1000 iterations starting from iteration 2000
cfg = get_cfg()
cfg.INPUT.MASK_FORMAT='bitmask'
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
cfg.DATASETS.TRAIN = ("sartorius_train",)
cfg.DATASETS.TEST = ("sartorius_val",)
cfg.DATALOADER.NUM_WORKERS = 2
cfg.MODEL.WEIGHTS = 'output_1.1/best_model.pth'
cfg.SOLVER.IMS_PER_BATCH = 2
cfg.SOLVER.BASE_LR = 0.0005
cfg.SOLVER.GAMMA = 0.1
cfg.SOLVER.MAX_ITER = 10000
cfg.SOLVER.STEPS = list(range(2000,10000,1000))
cfg.SOLVER.CHECKPOINT_PERIOD = len(DatasetCatalog.get('sartorius_train')) // cfg.SOLVER.IMS_PER_BATCH
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 3
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = .5
cfg.TEST.EVAL_PERIOD = len(DatasetCatalog.get('sartorius_train')) // cfg.SOLVER.IMS_PER_BATCH  # Once per epoch

os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
trainer = Trainer(cfg) 
trainer.resume_or_load(resume=False)
trainer.train()
os.rename("output", "output_8.6")

[32m[02/02 19:15:13 d2.data.datasets.coco]: [0mLoaded 485 images in COCO format from ../sartorius-annotations-coco-format/annotations_train.json
[32m[02/02 19:15:14 d2.data.datasets.coco]: [0mLoaded 485 images in COCO format from ../sartorius-annotations-coco-format/annotations_train.json
[32m[02/02 19:15:15 d2.engine.defaults]: [0mModel:
GeneralizedRCNN(
  (backbone): FPN(
    (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1,

Skip loading parameter 'roi_heads.box_predictor.cls_score.weight' to the model due to incompatible shapes: (9, 1024) in the checkpoint but (4, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.cls_score.bias' to the model due to incompatible shapes: (9,) in the checkpoint but (4,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.weight' to the model due to incompatible shapes: (32, 1024) in the checkpoint but (12, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.bias' to the model due to incompatible shapes: (32,) in the checkpoint but (12,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.mask_head.predictor.weight' to the model due to incompatible shapes: (8, 256, 1, 1) in the checkpoint but (3, 256, 1, 1) in th

[32m[02/02 19:15:16 d2.engine.train_loop]: [0mStarting training from iteration 0


  max_size = (max_size + (stride - 1)) // stride * stride


[32m[02/02 19:15:29 d2.utils.events]: [0m eta: 0:53:36  iter: 19  total_loss: 3.041  loss_cls: 1.378  loss_box_reg: 0.3663  loss_mask: 0.6953  loss_rpn_cls: 0.3455  loss_rpn_loc: 0.2661  time: 0.5935  data_time: 0.3090  lr: 9.9905e-06  max_mem: 6664M
[32m[02/02 19:15:37 d2.utils.events]: [0m eta: 0:52:47  iter: 39  total_loss: 3.02  loss_cls: 1.322  loss_box_reg: 0.5111  loss_mask: 0.6873  loss_rpn_cls: 0.2562  loss_rpn_loc: 0.22  time: 0.4970  data_time: 0.1128  lr: 1.998e-05  max_mem: 6664M
[32m[02/02 19:15:46 d2.utils.events]: [0m eta: 0:52:43  iter: 59  total_loss: 2.82  loss_cls: 1.193  loss_box_reg: 0.4594  loss_mask: 0.6696  loss_rpn_cls: 0.2498  loss_rpn_loc: 0.2324  time: 0.4883  data_time: 0.1691  lr: 2.997e-05  max_mem: 6664M
[32m[02/02 19:15:59 d2.utils.events]: [0m eta: 0:52:54  iter: 79  total_loss: 2.654  loss_cls: 0.9854  loss_box_reg: 0.3739  loss_mask: 0.6468  loss_rpn_cls: 0.2959  loss_rpn_loc: 0.2916  time: 0.5195  data_time: 0.3065  lr: 3.9961e-05  max_mem:

In [17]:
# learning rate = 0.0005 with gamma=0.8 and lr reduction every 1000 iterations starting from iteration 2000
cfg = get_cfg()
cfg.INPUT.MASK_FORMAT='bitmask'
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
cfg.DATASETS.TRAIN = ("sartorius_train",)
cfg.DATASETS.TEST = ("sartorius_val",)
cfg.DATALOADER.NUM_WORKERS = 2
cfg.MODEL.WEIGHTS = 'output_1.1/best_model.pth'
cfg.SOLVER.IMS_PER_BATCH = 2
cfg.SOLVER.BASE_LR = 0.0005
cfg.SOLVER.GAMMA = 0.8
cfg.SOLVER.MAX_ITER = 10000
cfg.SOLVER.STEPS = list(range(2000,10000,1000))
cfg.SOLVER.CHECKPOINT_PERIOD = len(DatasetCatalog.get('sartorius_train')) // cfg.SOLVER.IMS_PER_BATCH
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 3
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = .5
cfg.TEST.EVAL_PERIOD = len(DatasetCatalog.get('sartorius_train')) // cfg.SOLVER.IMS_PER_BATCH

os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
trainer = Trainer(cfg) 
trainer.resume_or_load(resume=False)
trainer.train()
os.rename("output", "output_8.7")

[32m[02/03 14:38:56 d2.data.datasets.coco]: [0mLoaded 485 images in COCO format from ../sartorius-annotations-coco-format/annotations_train.json
[32m[02/03 14:38:57 d2.data.datasets.coco]: [0mLoaded 485 images in COCO format from ../sartorius-annotations-coco-format/annotations_train.json
[32m[02/03 14:38:58 d2.engine.defaults]: [0mModel:
GeneralizedRCNN(
  (backbone): FPN(
    (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1,

Skip loading parameter 'roi_heads.box_predictor.cls_score.weight' to the model due to incompatible shapes: (9, 1024) in the checkpoint but (4, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.cls_score.bias' to the model due to incompatible shapes: (9,) in the checkpoint but (4,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.weight' to the model due to incompatible shapes: (32, 1024) in the checkpoint but (12, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.bias' to the model due to incompatible shapes: (32,) in the checkpoint but (12,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.mask_head.predictor.weight' to the model due to incompatible shapes: (8, 256, 1, 1) in the checkpoint but (3, 256, 1, 1) in th

[32m[02/03 14:38:59 d2.engine.train_loop]: [0mStarting training from iteration 0


  max_size = (max_size + (stride - 1)) // stride * stride


[32m[02/03 14:39:11 d2.utils.events]: [0m eta: 0:55:52  iter: 19  total_loss: 3.033  loss_cls: 1.345  loss_box_reg: 0.4651  loss_mask: 0.6917  loss_rpn_cls: 0.3185  loss_rpn_loc: 0.2383  time: 0.5531  data_time: 0.2877  lr: 9.9905e-06  max_mem: 6514M
[32m[02/03 14:39:23 d2.utils.events]: [0m eta: 0:55:15  iter: 39  total_loss: 2.989  loss_cls: 1.275  loss_box_reg: 0.3904  loss_mask: 0.6853  loss_rpn_cls: 0.322  loss_rpn_loc: 0.2452  time: 0.5804  data_time: 0.2821  lr: 1.998e-05  max_mem: 6514M
[32m[02/03 14:39:36 d2.utils.events]: [0m eta: 0:54:45  iter: 59  total_loss: 2.794  loss_cls: 1.16  loss_box_reg: 0.4584  loss_mask: 0.6686  loss_rpn_cls: 0.2149  loss_rpn_loc: 0.2261  time: 0.5976  data_time: 0.3071  lr: 2.997e-05  max_mem: 6909M
[32m[02/03 14:39:48 d2.utils.events]: [0m eta: 0:54:03  iter: 79  total_loss: 2.596  loss_cls: 0.9762  loss_box_reg: 0.4754  loss_mask: 0.6427  loss_rpn_cls: 0.2733  loss_rpn_loc: 0.2543  time: 0.5984  data_time: 0.2946  lr: 3.9961e-05  max_me

In [10]:
import json
def print_metrics(output):
    with open(f"output_{output}/metrics.json",'r') as f:
        metrics = [json.loads(line) for line in f]
    print("mAP :", np.mean([metrics[i]["mAP IoU"] for i in range(len(metrics)) if 'mAP IoU' in metrics[i]][-10:])) # Average mAP in the last 10 epochs
    print("False negatives :", np.mean([metrics[i]["mask_rcnn/false_negative"] for i in range(len(metrics)) if 'mask_rcnn/false_negative' in metrics[i]][-100:]))
    print("False positives :", np.mean([metrics[i]["mask_rcnn/false_positive"] for i in range(len(metrics)) if 'mask_rcnn/false_positive' in metrics[i]][-100:]))
i=1
while True:
    try:
        print("Experiment ",i)
        print_metrics("8."+str(i))
        i+=1
    except:
        break

Experiment  1
mAP : 0.26041674840123386
False negatives : 0.1091769381932369
False positives : 0.15870824222011362
Experiment  2
mAP : 0.26435589723958464
False negatives : 0.11097223966023297
False positives : 0.15866990742949222
Experiment  3
mAP : 0.26081937610254613
False negatives : 0.11307294932245454
False positives : 0.1617883726075458
Experiment  4
mAP : 0.2636338649191153
False negatives : 0.10976414966407033
False positives : 0.1585849269005686
Experiment  5
mAP : 0.2668117103727
False negatives : 0.10974907050829233
False positives : 0.1589980911079324
Experiment  6
mAP : 0.26120385696089043
False negatives : 0.11449647063904518
False positives : 0.16376188109298417
Experiment  7
mAP : 0.2668593156088135
False negatives : 0.11234782869593239
False positives : 0.16108722956894572
Experiment  8


In [1]:
%load_ext tensorboard

In [2]:
%tensorboard --logdir output_8.1

In [8]:
%tensorboard --logdir output_8.2

In [11]:
%tensorboard --logdir output_8.3

In [14]:
%tensorboard --logdir output_8.4

In [9]:
%tensorboard --logdir output_8.5

In [13]:
%tensorboard --logdir output_8.6

In [12]:
%tensorboard --logdir output_8.7