In [None]:
import os

os.environ['CLEARML_API_ACCESS_KEY'] = os.getenv("CLEARML_API_ACCESS_KEY")
os.environ['CLEARML_API_SECRET_KEY'] = os.getenv("CLEARML_API_SECRET_KEY")
os.environ["CLEARML_WEB_HOST"]  = "https://app.clear.ml/"
os.environ["CLEARML_API_HOST"]  = "https://api.clear.ml"
os.environ["CLEARML_FILES_HOST"] = "https://files.clear.ml"

In [None]:
import os
from detectron2.engine import DefaultTrainer, HookBase
from detectron2.config import get_cfg
from detectron2.engine.hooks import BestCheckpointer
from detectron2 import model_zoo
from detectron2.data.datasets import register_pascal_voc
from detectron2.evaluation import PascalVOCDetectionEvaluator
from detectron2.utils.events import get_event_storage
from clearml import Task
from detectron2.data import transforms as T
from detectron2.data import DatasetMapper
from detectron2.data import DatasetCatalog
import math

from numbers import Number

In [None]:
import random
import numpy as np
import torch
os.environ["PYTHONHASHSEED"] = str(42)

random.seed(42)
np.random.seed(42)
torch.manual_seed(42)
torch.cuda.manual_seed(42)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

In [None]:
epochs=12

voc_root = "VOCdevkit"
year = 2012

for split in ["train", "val"]:
    register_pascal_voc(
        f"voc_{str(split)}",
        os.path.join(voc_root, f"VOC{str(year)}"),
        split,
        year
    )

cfg = get_cfg()
cfg.merge_from_file(
    model_zoo.get_config_file(
        "COCO-Detection/retinanet_R_50_FPN_1x.yaml"
    )
)

cfg.DATASETS.TRAIN = ("voc_train",)
cfg.DATASETS.TEST = ("voc_val",)


dataset_len = len(DatasetCatalog.get(cfg.DATASETS.TRAIN[0]))

iters_per_epoch = math.ceil(dataset_len / cfg.SOLVER.IMS_PER_BATCH)
epochs = 20


cfg.SOLVER.MAX_ITER = epochs * iters_per_epoch
cfg.TEST.EVAL_PERIOD = iters_per_epoch


cfg.INPUT.MIN_SIZE_TRAIN    = (640, 672, 704, 736, 768, 800)
cfg.INPUT.MAX_SIZE_TRAIN    = 1333
cfg.INPUT.MIN_SIZE_TEST     = 800
cfg.INPUT.MAX_SIZE_TEST     = 1333
cfg.INPUT.RANDOM_FLIP       = "horizontal"


cfg.DATALOADER.NUM_WORKERS = 8

cfg.SOLVER.IMS_PER_BATCH = 4
cfg.SOLVER.BASE_LR      = 0.01 * (cfg.SOLVER.IMS_PER_BATCH / 16)
cfg.SOLVER.WEIGHT_DECAY = 0.0001
cfg.SOLVER.STEPS = (
    int(0.7 * cfg.SOLVER.MAX_ITER),
    int(0.9 * cfg.SOLVER.MAX_ITER),
)
cfg.SOLVER.GAMMA = 0.1
cfg.SOLVER.WARMUP_METHOD       = "linear"
cfg.SOLVER.WARMUP_FACTOR       = 1.0 / 1e3
cfg.SOLVER.WARMUP_ITERS        = 1000
cfg.SOLVER.WARMUP_START_LR     = 0.0

cfg.MODEL.RETINANET.NUM_CLASSES = 20
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(
    "COCO-Detection/retinanet_R_50_FPN_1x.yaml"
)
cfg.OUTPUT_DIR = "output"
os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)



In [None]:
class ClearMLHook(HookBase):
    def __init__(self):
        self.logger = Task.current_task().get_logger()
    def after_step(self):
        storage = get_event_storage()
        for name, value in storage.latest().items():
            if isinstance(value, Number):
                self.logger.report_scalar(name, "train", iteration=storage.iter, value=float(value))


class Trainer(DefaultTrainer):
    @classmethod
    def build_evaluator(cls, cfg, dataset_name, output_folder=None):
        if output_folder is None:
            output_folder = os.path.join(cfg.OUTPUT_DIR, "inference")
        os.makedirs(output_folder, exist_ok=True)
        return PascalVOCDetectionEvaluator(dataset_name)



In [None]:
from detectron2.data import MetadataCatalog

meta = MetadataCatalog.get("voc_val")
print("dirname:",    meta.dirname)
print("split:",      meta.split)
print("thing_classes:", meta.thing_classes)
print("year:",       meta.year, type(meta.year))

In [None]:
!ls

In [None]:
experiment_tags = [
    "model_name:retinanet",
    "dataset:voc2012",
    "platform:azureml-notebook",
    "author:hussain",
    "account:hussainsyed.dev@outlook.com",
    "studio:obj-detect-3xc",
    "detectron2"
]

task = Task.init(
    project_name="CMT318-Object-Detection",
    task_name="RetinaNet-Detectron2-Training",
    tags=experiment_tags
)

task.connect(cfg)

In [None]:
trainer = Trainer(cfg)

trainer.register_hooks([
    BestCheckpointer(
        eval_period=cfg.TEST.EVAL_PERIOD,
        checkpointer=trainer.checkpointer,
        val_metric="bbox/AP50",
        mode="max",
        file_prefix="model_best"
    ),
    ClearMLHook()
])
trainer.resume_or_load(resume=False)

In [None]:
trainer.train()

In [None]:
output_folder = "./output"

for root, _, files in os.walk(output_folder):
    for fname in files:
        file_path = os.path.join(root, fname)
        artifact_name = os.path.relpath(file_path, output_folder)
        task.upload_artifact(name=artifact_name, artifact_object=file_path)
        print(f"Uploaded {artifact_name}")


In [None]:
task.close()