In [None]:
import os

os.environ['CLEARML_API_ACCESS_KEY'] = os.getenv("CLEARML_API_ACCESS_KEY")
os.environ['CLEARML_API_SECRET_KEY'] = os.getenv("CLEARML_API_SECRET_KEY")
os.environ["CLEARML_WEB_HOST"]  = "https://app.clear.ml/"
os.environ["CLEARML_API_HOST"]  = "https://api.clear.ml"
os.environ["CLEARML_FILES_HOST"] = "https://files.clear.ml"

In [3]:
import os
from detectron2.engine import DefaultTrainer, HookBase
from detectron2.config import get_cfg
from detectron2.engine.hooks import BestCheckpointer
from detectron2 import model_zoo
from detectron2.data.datasets import register_pascal_voc
from detectron2.evaluation import PascalVOCDetectionEvaluator
from detectron2.utils.events import get_event_storage
from clearml import Task
from detectron2.data import transforms as T
from detectron2.data import DatasetMapper
from detectron2.data import DatasetCatalog
import math

from numbers import Number

In [4]:
epochs=12
cfg = get_cfg()

cfg.merge_from_file(model_zoo.get_config_file("COCO-Detection/retinanet_R_50_FPN_1x.yaml"))


voc_root = "VOCdevkit"
year = 2012

for split in ["train", "val"]:
    register_pascal_voc(
        f"voc_{str(split)}",
        os.path.join(voc_root, f"VOC{str(year)}"),
        split,
        year
    )

cfg = get_cfg()
cfg.merge_from_file(
    model_zoo.get_config_file(
        "COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml"
    )
)

cfg.DATASETS.TRAIN = ("voc_train",)
cfg.DATASETS.TEST = ("voc_val",)


dataset_len = len(DatasetCatalog.get(cfg.DATASETS.TRAIN[0]))

iters_per_epoch = math.ceil(dataset_len / cfg.SOLVER.IMS_PER_BATCH)
epochs = 20


cfg.SOLVER.MAX_ITER = epochs * iters_per_epoch
cfg.TEST.EVAL_PERIOD = iters_per_epoch


cfg.INPUT.MIN_SIZE_TRAIN    = (640, 672, 704, 736, 768, 800)
cfg.INPUT.MAX_SIZE_TRAIN    = 1333
cfg.INPUT.MIN_SIZE_TEST     = 800
cfg.INPUT.MAX_SIZE_TEST     = 1333
cfg.INPUT.RANDOM_FLIP       = "horizontal"


cfg.DATALOADER.NUM_WORKERS = 8

cfg.SOLVER.IMS_PER_BATCH = 4
cfg.SOLVER.BASE_LR      = 0.01 * (cfg.SOLVER.IMS_PER_BATCH / 16)
cfg.SOLVER.WEIGHT_DECAY = 0.0001
cfg.SOLVER.STEPS = (
    int(0.7 * cfg.SOLVER.MAX_ITER),
    int(0.9 * cfg.SOLVER.MAX_ITER),
)
cfg.SOLVER.GAMMA = 0.1
cfg.SOLVER.WARMUP_METHOD       = "linear"
cfg.SOLVER.WARMUP_FACTOR       = 1.0 / 1e3
cfg.SOLVER.WARMUP_ITERS        = 1000
cfg.SOLVER.WARMUP_START_LR     = 0.0

cfg.MODEL.RETINANET.NUM_CLASSES = 20
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(
    "COCO-Detection/retinanet_R_50_FPN_1x.yaml"
)
cfg.OUTPUT_DIR = "output"
os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)



Loading config /anaconda/envs/azureml_py310_sdkv2/lib/python3.10/site-packages/detectron2/model_zoo/configs/COCO-Detection/../Base-RetinaNet.yaml with yaml.unsafe_load. Your machine may be at risk if the file contains malicious content.


In [5]:
class ClearMLHook(HookBase):
    def __init__(self):
        self.logger = Task.current_task().get_logger()
    def after_step(self):
        storage = get_event_storage()
        for name, value in storage.latest().items():
            if isinstance(value, Number):
                self.logger.report_scalar(name, "train", iteration=storage.iter, value=float(value))


class Trainer(DefaultTrainer):
    @classmethod
    def build_evaluator(cls, cfg, dataset_name, output_folder=None):
        if output_folder is None:
            output_folder = os.path.join(cfg.OUTPUT_DIR, "inference")
        os.makedirs(output_folder, exist_ok=True)
        return PascalVOCDetectionEvaluator(dataset_name)



In [6]:
from detectron2.data import MetadataCatalog

meta = MetadataCatalog.get("voc_val")
print("dirname:",    meta.dirname)
print("split:",      meta.split)
print("thing_classes:", meta.thing_classes)
print("year:",       meta.year, type(meta.year))

dirname: VOCdevkit/VOC2012
split: val
thing_classes: ['aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor']
year: 2012 <class 'int'>


In [7]:
!ls

RetinaNet-Detectron2.ipynb   downloads
RetinaNetTraining.ipynb      output
VOCdevkit		     retinanet-detectron2.ipynb.amltmp
VOCtrainval_11-May-2012.tar  retinanettraining.ipynb.amltmp
data			     runs


In [8]:
experiment_tags = [
    "model_name:retinanet",
    "dataset:voc2012",
    "platform:azureml-notebook",
    "author:hussain",
    "account:hussainsyed.dev@outlook.com",
    "studio:obj-detect-3xc",
    "detectron2"
]

task = Task.init(
    project_name="CMT318-Object-Detection",
    task_name="RetinaNet-Detectron2-Training",
    tags=experiment_tags
)

task.connect(cfg)

ClearML Task: overwriting (reusing) task id=3a544a46e86b4ea9ac6367c59cf60493
2025-05-02 23:52:36,714 - clearml.Task - INFO - No repository found, storing script code instead
ClearML results page: https://app.clear.ml/projects/31ab205b5fdb489d9ad1b4ed44a65563/experiments/3a544a46e86b4ea9ac6367c59cf60493/output/log


{'VERSION': 2,
 'MODEL': {'LOAD_PROPOSALS': False,
  'MASK_ON': False,
  'KEYPOINT_ON': False,
  'DEVICE': 'cuda',
  'META_ARCHITECTURE': 'GeneralizedRCNN',
  'WEIGHTS': 'https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/retinanet_R_50_FPN_1x/190397773/model_final_bfca0b.pkl',
  'PIXEL_MEAN': [103.53, 116.28, 123.675],
  'PIXEL_STD': [1.0, 1.0, 1.0],
  'BACKBONE': {'NAME': 'build_resnet_fpn_backbone', 'FREEZE_AT': 2},
  'FPN': {'IN_FEATURES': ['res2', 'res3', 'res4', 'res5'],
   'OUT_CHANNELS': 256,
   'NORM': '',
   'FUSE_TYPE': 'sum'},
  'PROPOSAL_GENERATOR': {'NAME': 'RPN', 'MIN_SIZE': 0},
  'ANCHOR_GENERATOR': {'NAME': 'DefaultAnchorGenerator',
   'SIZES': [[32], [64], [128], [256], [512]],
   'ASPECT_RATIOS': [[0.5, 1.0, 2.0]],
   'ANGLES': [[-90, 0, 90]],
   'OFFSET': 0.0},
  'RPN': {'HEAD_NAME': 'StandardRPNHead',
   'IN_FEATURES': ['p2', 'p3', 'p4', 'p5', 'p6'],
   'BOUNDARY_THRESH': -1,
   'IOU_THRESHOLDS': [0.3, 0.7],
   'IOU_LABELS': [0, -1, 1],
   'BATCH_SIZE_PER_IMA

In [None]:
trainer = Trainer(cfg)

trainer.register_hooks([
    BestCheckpointer(
        eval_period=cfg.TEST.EVAL_PERIOD,
        checkpointer=trainer.checkpointer,
        val_metric="bbox/AP50",
        mode="max",
        file_prefix="model_best"
    ),
    ClearMLHook()
])
trainer.resume_or_load(resume=False)

[32m[05/02 23:52:38 d2.engine.defaults]: [0mModel:
GeneralizedRCNN(
  (backbone): FPN(
    (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (top_block): LastLevelMaxPool()
    (bottom_up): ResNet(
      (stem): BasicStem(
        (conv1): Conv2d(
          3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False
          (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
        )
      )
 

Some model parameters or buffers are not found in the checkpoint:
[34mbackbone.fpn_lateral2.{bias, weight}[0m
[34mbackbone.fpn_output2.{bias, weight}[0m
[34mproposal_generator.rpn_head.anchor_deltas.{bias, weight}[0m
[34mproposal_generator.rpn_head.conv.{bias, weight}[0m
[34mproposal_generator.rpn_head.objectness_logits.{bias, weight}[0m
[34mroi_heads.box_head.fc1.{bias, weight}[0m
[34mroi_heads.box_head.fc2.{bias, weight}[0m
[34mroi_heads.box_predictor.bbox_pred.{bias, weight}[0m
[34mroi_heads.box_predictor.cls_score.{bias, weight}[0m
The checkpoint state_dict contains keys that are not used by the model:
  [35mpixel_mean[0m
  [35mpixel_std[0m
  [35mhead.cls_subnet.0.{bias, weight}[0m
  [35mhead.cls_subnet.2.{bias, weight}[0m
  [35mhead.cls_subnet.4.{bias, weight}[0m
  [35mhead.cls_subnet.6.{bias, weight}[0m
  [35mhead.bbox_subnet.0.{bias, weight}[0m
  [35mhead.bbox_subnet.2.{bias, weight}[0m
  [35mhead.bbox_subnet.4.{bias, weight}[0m
  [35mhead.bb

In [10]:
trainer.train()

[32m[05/02 23:53:50 d2.engine.train_loop]: [0mStarting training from iteration 0
[32m[05/02 23:54:07 d2.utils.events]: [0m eta: 1:33:45  iter: 19  total_loss: 4.858  loss_cls: 4.123  loss_box_reg: 0.002125  loss_rpn_cls: 0.6728  loss_rpn_loc: 0.04768    time: 0.7896  last_time: 0.8347  data_time: 0.0219  last_data_time: 0.0084   lr: 4.9952e-05  max_mem: 4873M
[32m[05/02 23:54:24 d2.utils.events]: [0m eta: 1:39:16  iter: 39  total_loss: 1.069  loss_cls: 0.4115  loss_box_reg: 0.01702  loss_rpn_cls: 0.6158  loss_rpn_loc: 0.03535    time: 0.8253  last_time: 0.7611  data_time: 0.0082  last_data_time: 0.0068   lr: 9.9903e-05  max_mem: 4873M
[32m[05/02 23:54:42 d2.utils.events]: [0m eta: 1:41:22  iter: 59  total_loss: 0.9351  loss_cls: 0.3369  loss_box_reg: 0.05776  loss_rpn_cls: 0.4821  loss_rpn_loc: 0.04997    time: 0.8444  last_time: 0.8657  data_time: 0.0082  last_data_time: 0.0079   lr: 0.00014985  max_mem: 4873M
[32m[05/02 23:55:00 d2.utils.events]: [0m eta: 1:42:03  iter: 79 

  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


In [None]:
output_folder = "./output"

for root, _, files in os.walk(output_folder):
    for fname in files:
        file_path = os.path.join(root, fname)
        artifact_name = os.path.relpath(file_path, output_folder)
        task.upload_artifact(name=artifact_name, artifact_object=file_path)
        print(f"Uploaded {artifact_name}")


In [None]:
task.close()