In [1]:
import json

def make_annotation_ids_unique(json_file):
    with open(json_file, 'r') as f:
        data = json.load(f)

    annotation_id_set = set()
    for ann in data['annotations']:
        ann_id = ann['id']
        while ann_id in annotation_id_set:
            ann_id += 1  # Increment ID until unique
        annotation_id_set.add(ann_id)
        ann['id'] = ann_id

    with open(json_file, 'w') as f:
        json.dump(data, f)

# Fix annotation files for training and validation sets
make_annotation_ids_unique('train.json')
make_annotation_ids_unique('val.json')


In [2]:
from detectron2.data.datasets import register_coco_instances
from detectron2.config import get_cfg
from detectron2 import model_zoo
from detectron2.engine import DefaultTrainer
from detectron2.evaluation import COCOEvaluator, inference_on_dataset
from detectron2.data import build_detection_test_loader

# Register custom dataset
register_coco_instances("custom_train", {}, "train.json", "train")
register_coco_instances("custom_val", {}, "val.json", "val")

# Set configuration
cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml"))
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml")
cfg.DATASETS.TRAIN = ("custom_train",)
cfg.DATASETS.TEST = ("custom_val",)
cfg.DATALOADER.NUM_WORKERS = 4
cfg.SOLVER.IMS_PER_BATCH = 4
cfg.SOLVER.BASE_LR = 0.001
cfg.SOLVER.MAX_ITER = 70000
cfg.SOLVER.STEPS = [30000, 50000]
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 80  # Adjust according to the number of classes in your dataset
cfg.MODEL.MASK_ON = False  # Disable mask head
cfg.OUTPUT_DIR = "./output"

# Custom Trainer class to add evaluator
class CocoTrainer(DefaultTrainer):
    @classmethod
    def build_evaluator(cls, cfg, dataset_name, output_folder=None):
        if output_folder is None:
            output_folder = cfg.OUTPUT_DIR
        return COCOEvaluator(dataset_name, cfg, False, output_folder)

# Train the model
trainer = CocoTrainer(cfg)
trainer.resume_or_load(resume=False)
trainer.train()

# If validation or testing is needed
evaluator = CocoTrainer.build_evaluator(cfg, "custom_val")
val_loader = build_detection_test_loader(cfg, "custom_val")
inference_on_dataset(trainer.model, val_loader, evaluator)


[32m[05/23 22:41:59 d2.engine.defaults]: [0mModel:
GeneralizedRCNN(
  (backbone): FPN(
    (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (top_block): LastLevelMaxPool()
    (bottom_up): ResNet(
      (stem): BasicStem(
        (conv1): Conv2d(
          3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False
          (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
        )
      )
 

The checkpoint state_dict contains keys that are not used by the model:
  [35mroi_heads.mask_head.mask_fcn1.{bias, weight}[0m
  [35mroi_heads.mask_head.mask_fcn2.{bias, weight}[0m
  [35mroi_heads.mask_head.mask_fcn3.{bias, weight}[0m
  [35mroi_heads.mask_head.mask_fcn4.{bias, weight}[0m
  [35mroi_heads.mask_head.deconv.{bias, weight}[0m
  [35mroi_heads.mask_head.predictor.{bias, weight}[0m


[32m[05/23 22:42:00 d2.engine.train_loop]: [0mStarting training from iteration 0


  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


[32m[05/23 22:42:15 d2.utils.events]: [0m eta: 13:17:02  iter: 19  total_loss: 1.741  loss_cls: 1.322  loss_box_reg: 0.3404  loss_rpn_cls: 0.04781  loss_rpn_loc: 0.009895    time: 0.6743  last_time: 0.6866  data_time: 0.0137  last_data_time: 0.0052   lr: 1.9981e-05  max_mem: 6068M


2024-05-23 22:42:25.068257: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


[32m[05/23 22:42:53 d2.utils.events]: [0m eta: 13:17:07  iter: 39  total_loss: 0.9661  loss_cls: 0.552  loss_box_reg: 0.2806  loss_rpn_cls: 0.02333  loss_rpn_loc: 0.007459    time: 0.6725  last_time: 0.7110  data_time: 0.0056  last_data_time: 0.0059   lr: 3.9961e-05  max_mem: 6068M
[32m[05/23 22:43:06 d2.utils.events]: [0m eta: 13:18:02  iter: 59  total_loss: 0.4497  loss_cls: 0.1687  loss_box_reg: 0.2444  loss_rpn_cls: 0.01076  loss_rpn_loc: 0.00543    time: 0.6782  last_time: 0.7062  data_time: 0.0058  last_data_time: 0.0058   lr: 5.9941e-05  max_mem: 6068M
[32m[05/23 22:43:20 d2.utils.events]: [0m eta: 13:18:42  iter: 79  total_loss: 0.4059  loss_cls: 0.1332  loss_box_reg: 0.2555  loss_rpn_cls: 0.009434  loss_rpn_loc: 0.006144    time: 0.6793  last_time: 0.6849  data_time: 0.0058  last_data_time: 0.0052   lr: 7.9921e-05  max_mem: 6068M
[32m[05/23 22:43:34 d2.utils.events]: [0m eta: 13:18:20  iter: 99  total_loss: 0.3678  loss_cls: 0.1082  loss_box_reg: 0.2404  loss_rpn_cls: 

OrderedDict([('bbox',
              {'AP': 80.02775032093258,
               'AP50': 96.03405275225569,
               'AP75': 92.8848652930717,
               'APs': nan,
               'APm': nan,
               'APl': 80.05826114518666})])