In [1]:
from detectron2.data.datasets import register_coco_instances
from detectron2.config import get_cfg
from detectron2 import model_zoo
from detectron2.engine import DefaultTrainer
from detectron2.evaluation import COCOEvaluator, inference_on_dataset
from detectron2.data import build_detection_test_loader

# Register datasets
register_coco_instances("coco_train", {}, "annotations/instances_train2017.json", "train2017")
register_coco_instances("coco_val", {}, "annotations/instances_val2017.json", "val2017")

# Set configuration
cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml"))
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml")
cfg.DATASETS.TRAIN = ("coco_train",)
cfg.DATASETS.TEST = ("coco_val",)
cfg.DATALOADER.NUM_WORKERS = 4
cfg.SOLVER.IMS_PER_BATCH = 4
cfg.SOLVER.BASE_LR = 0.001
cfg.SOLVER.MAX_ITER = 70000
cfg.SOLVER.STEPS = [30000, 50000]
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 80
cfg.OUTPUT_DIR = "./output"

# Custom Trainer class to add evaluator
class CocoTrainer(DefaultTrainer):
    @classmethod
    def build_evaluator(cls, cfg, dataset_name, output_folder=None):
        if output_folder is None:
            output_folder = cfg.OUTPUT_DIR
        return COCOEvaluator(dataset_name, cfg, False, output_folder)

# Train the model
trainer = CocoTrainer(cfg)
trainer.resume_or_load(resume=False)
trainer.train()

# If validation or testing is needed
evaluator = CocoTrainer.build_evaluator(cfg, "coco_val")
val_loader = build_detection_test_loader(cfg, "coco_val")
inference_on_dataset(trainer.model, val_loader, evaluator)


[32m[05/27 13:57:30 d2.engine.defaults]: [0mModel:
GeneralizedRCNN(
  (backbone): FPN(
    (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (top_block): LastLevelMaxPool()
    (bottom_up): ResNet(
      (stem): BasicStem(
        (conv1): Conv2d(
          3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False
          (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
        )
      )
 

  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


[32m[05/27 13:58:07 d2.utils.events]: [0m eta: 12:56:28  iter: 19  total_loss: 0.8001  loss_cls: 0.2153  loss_box_reg: 0.2893  loss_mask: 0.1942  loss_rpn_cls: 0.01537  loss_rpn_loc: 0.0468    time: 0.6579  last_time: 0.6358  data_time: 0.0160  last_data_time: 0.0057   lr: 1.9981e-05  max_mem: 6139M


2024-05-27 13:58:07.586742: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


[32m[05/27 13:58:22 d2.utils.events]: [0m eta: 12:58:36  iter: 39  total_loss: 0.8321  loss_cls: 0.2175  loss_box_reg: 0.3018  loss_mask: 0.2096  loss_rpn_cls: 0.01733  loss_rpn_loc: 0.04987    time: 0.6726  last_time: 0.6516  data_time: 0.0064  last_data_time: 0.0048   lr: 3.9961e-05  max_mem: 6573M
[32m[05/27 13:58:35 d2.utils.events]: [0m eta: 12:54:26  iter: 59  total_loss: 0.7174  loss_cls: 0.1633  loss_box_reg: 0.2699  loss_mask: 0.2089  loss_rpn_cls: 0.009401  loss_rpn_loc: 0.03703    time: 0.6700  last_time: 0.7046  data_time: 0.0063  last_data_time: 0.0025   lr: 5.9941e-05  max_mem: 6575M
[32m[05/27 13:58:49 d2.utils.events]: [0m eta: 12:58:56  iter: 79  total_loss: 0.7109  loss_cls: 0.1852  loss_box_reg: 0.2746  loss_mask: 0.196  loss_rpn_cls: 0.01482  loss_rpn_loc: 0.04299    time: 0.6712  last_time: 0.6789  data_time: 0.0054  last_data_time: 0.0017   lr: 7.9921e-05  max_mem: 6575M
[32m[05/27 13:59:02 d2.utils.events]: [0m eta: 12:57:56  iter: 99  total_loss: 0.804  

KeyboardInterrupt: 