In [None]:
import os
import wandb
import detectron2
from detectron2.utils.logger import setup_logger
setup_logger()
from detectron2 import model_zoo
from detectron2.engine import DefaultTrainer, DefaultPredictor
from detectron2.config import get_cfg
from detectron2.data import MetadataCatalog, DatasetCatalog, build_detection_test_loader
from detectron2.evaluation import COCOEvaluator, inference_on_dataset
from detectron2.data.datasets import register_coco_instances

def register_datasets():
    register_coco_instances("my_dataset_train", {}, "train.json", "train")
    register_coco_instances("my_dataset_val", {}, "validation.json", "val")
    register_coco_instances("my_dataset_test", {}, "test.json", "test")


import torch
from detectron2.solver.build import get_default_optimizer_params
from detectron2.solver.build import maybe_add_gradient_clipping
class MyTrainer(DefaultTrainer):
    @classmethod
    def build_optimizer(cls, cfg, model):
        """
        Build an optimizer from config.
        """
        params = get_default_optimizer_params(model)
        return maybe_add_gradient_clipping(cfg, torch.optim.AdamW)(
            params, 
            lr=cfg.SOLVER.BASE_LR, 
            weight_decay=cfg.SOLVER.WEIGHT_DECAY
        )
    @classmethod
    def build_evaluator(cls, cfg, dataset_name, output_folder=None):
        if output_folder is None:
            os.makedirs("coco_eval", exist_ok=True)
            output_folder = "coco_eval"
        return COCOEvaluator(dataset_name, cfg, False, output_folder)

class CocoTrainer(DefaultTrainer):
    @classmethod
    def build_evaluator(cls, cfg, dataset_name, output_folder=None):
        if output_folder is None:
            os.makedirs("coco_eval", exist_ok=True)
            output_folder = "coco_eval"
        return COCOEvaluator(dataset_name, cfg, False, output_folder)

def train():
    cfg = get_cfg()
    cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml"))
    cfg.DATASETS.TRAIN = ("my_dataset_train",)
    cfg.DATASETS.TEST = ("my_dataset_val",)
    cfg.DATALOADER.NUM_WORKERS = 4
    cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml")
    cfg.SOLVER.IMS_PER_BATCH = 2
    cfg.SOLVER.BASE_LR = 0.00306982
    cfg.SOLVER.MAX_ITER = 1000
    #cfg.SOLVER.STEPS = (5000, 7500)
    cfg.SOLVER.GAMMA = 0.05
    cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 64
    cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1
    cfg.TEST.EVAL_PERIOD = 500
    cfg.SOLVER.BASE_LR = 8e-4
    cfg.SOLVER.WEIGHT_DECAY = 0.0001
    cfg.SOLVER.LR_SCHEDULER_NAME = "WarmupCosineLR"
    cfg.SOLVER.WARMUP_ITERS = int(0.2*cfg.SOLVER.MAX_ITER)
    cfg.SOLVER.CLIP_GRADIENTS.ENABLED = True
    cfg.SOLVER.CLIP_GRADIENTS.CLIP_TYPE = "value"
    cfg.SOLVER.CLIP_GRADIENTS.CLIP_VALUE = 1.0
    cfg.SOLVER.AMP.ENABLED = True
    os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
    trainer = MyTrainer(cfg)
    trainer.resume_or_load(resume=False)
    trainer.train()

def test():
    cfg = get_cfg()
    cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
    cfg.MODEL.WEIGHTS = os.path.join("output", "model_final.pth")
    cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1
    cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.85
    cfg.DATASETS.TEST = ("my_dataset_test",)
    predictor = DefaultPredictor(cfg)

    evaluator = COCOEvaluator("my_dataset_test", cfg, False, output_dir="./output/")
    val_loader = build_detection_test_loader(cfg, "my_dataset_test")
    inference = inference_on_dataset(predictor.model, val_loader, evaluator)
    print(inference)

# Register datasets
register_datasets()

# Train the model
train()

# Test the model
test()

[32m[03/04 17:32:52 d2.engine.defaults]: [0mModel:
GeneralizedRCNN(
  (backbone): FPN(
    (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (top_block): LastLevelMaxPool()
    (bottom_up): ResNet(
      (stem): BasicStem(
        (conv1): Conv2d(
          3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False
          (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
        )
      )
 

Skip loading parameter 'roi_heads.box_predictor.cls_score.weight' to the model due to incompatible shapes: (81, 1024) in the checkpoint but (2, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.cls_score.bias' to the model due to incompatible shapes: (81,) in the checkpoint but (2,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.weight' to the model due to incompatible shapes: (320, 1024) in the checkpoint but (4, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.bias' to the model due to incompatible shapes: (320,) in the checkpoint but (4,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.mask_head.predictor.weight' to the model due to incompatible shapes: (80, 256, 1, 1) in the checkpoint but (1, 256, 1, 1) in

[32m[03/04 17:32:52 d2.engine.train_loop]: [0mStarting training from iteration 0


  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


[32m[03/04 17:33:01 d2.utils.events]: [0m eta: 0:05:51  iter: 19  total_loss: 2.057  loss_cls: 0.3429  loss_box_reg: 0.9732  loss_mask: 0.6837  loss_rpn_cls: 0.0007385  loss_rpn_loc: 0.004262    time: 0.3630  last_time: 0.3724  data_time: 0.0169  last_data_time: 0.0034   lr: 6.9467e-05  max_mem: 1639M




[32m[03/04 17:33:12 d2.utils.events]: [0m eta: 0:05:52  iter: 39  total_loss: 1.201  loss_cls: 0.1037  loss_box_reg: 0.6719  loss_mask: 0.3558  loss_rpn_cls: 0.0004404  loss_rpn_loc: 0.006151    time: 0.3710  last_time: 0.3787  data_time: 0.0035  last_data_time: 0.0035   lr: 0.00014175  max_mem: 1639M
[32m[03/04 17:33:19 d2.utils.events]: [0m eta: 0:05:46  iter: 59  total_loss: 0.6734  loss_cls: 0.06037  loss_box_reg: 0.4166  loss_mask: 0.1944  loss_rpn_cls: 0.0001401  loss_rpn_loc: 0.004081    time: 0.3678  last_time: 0.3683  data_time: 0.0040  last_data_time: 0.0036   lr: 0.00021403  max_mem: 1639M
[32m[03/04 17:33:26 d2.utils.events]: [0m eta: 0:05:38  iter: 79  total_loss: 0.6853  loss_cls: 0.07796  loss_box_reg: 0.4367  loss_mask: 0.1628  loss_rpn_cls: 0.0004354  loss_rpn_loc: 0.004412    time: 0.3656  last_time: 0.3525  data_time: 0.0034  last_data_time: 0.0031   lr: 0.00028631  max_mem: 1655M
[32m[03/04 17:33:33 d2.utils.events]: [0m eta: 0:05:28  iter: 99  total_loss: 0