# Steps to follow

- clome the mmdetection repo: https://github.com/open-mmlab/mmdetection
- pip install -e . in the mmdetection directory
- Change the file: mmdetection/configs/htc/htc_r50_fpn_1x_artifact.py


python tools/train.py configs/htc/htc_r50_fpn_1x_artifact.py --cfg-options device=mps


In [3]:
from pycocotools.coco import COCO

# Paths to your COCO annotation files
train_anno = '/Users/jbm/Documents/DSAN_6500/WatchdogAI/data_artifacts/train/_annotations.coco.json'
valid_anno = '/Users/jbm/Documents/DSAN_6500/WatchdogAI/data_artifacts/valid/_annotations.coco.json'
test_anno = '/Users/jbm/Documents/DSAN_6500/WatchdogAI/data_artifacts/test/_annotations.coco.json'

# Load each dataset
coco_train = COCO(train_anno)
coco_valid = COCO(valid_anno)
coco_test = COCO(test_anno)

# Print number of images
print(f"Train images: {len(coco_train.getImgIds())}")
print(f"Validation images: {len(coco_valid.getImgIds())}")
print(f"Test images: {len(coco_test.getImgIds())}")

loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
Train images: 69
Validation images: 25
Test images: 10


In [2]:
import json
import random
from pathlib import Path

# Paths
base_dir = Path('/Users/jbm/Documents/DSAN_6500/WatchdogAI/data_artifacts/')
train_path = base_dir / 'train/_annotations.coco.json'
valid_path = base_dir / 'valid/_annotations.coco.json'
test_path  = base_dir / 'test/_annotations.coco.json'

# Load annotation files
with open(train_path) as f:
    train_data = json.load(f)
with open(valid_path) as f:
    valid_data = json.load(f)
with open(test_path) as f:
    test_data = json.load(f)

# Shuffle and split test images
random.seed(42)
random.shuffle(test_data['images'])

test_images = test_data['images'][:10]
extra_images = test_data['images'][10:]

# Get image ids to move
extra_ids = {img['id'] for img in extra_images}
test_ids = {img['id'] for img in test_images}

# Separate corresponding annotations
extra_annotations = [ann for ann in test_data['annotations'] if ann['image_id'] in extra_ids]
test_annotations  = [ann for ann in test_data['annotations'] if ann['image_id'] in test_ids]

# Split the extra images between train and val
extra_val = extra_images[:6]
extra_train = extra_images[6:]

extra_val_ids = {img['id'] for img in extra_val}
extra_train_ids = {img['id'] for img in extra_train}

extra_val_annotations = [ann for ann in extra_annotations if ann['image_id'] in extra_val_ids]
extra_train_annotations = [ann for ann in extra_annotations if ann['image_id'] in extra_train_ids]

# Update original files
train_data['images'].extend(extra_train)
train_data['annotations'].extend(extra_train_annotations)

valid_data['images'].extend(extra_val)
valid_data['annotations'].extend(extra_val_annotations)

test_data['images'] = test_images
test_data['annotations'] = test_annotations

# Save back to disk
with open(train_path, 'w') as f:
    json.dump(train_data, f)

with open(valid_path, 'w') as f:
    json.dump(valid_data, f)

with open(test_path, 'w') as f:
    json.dump(test_data, f)

print("✅ Split complete. Test now has 10 images. Others redistributed to train and valid.")

✅ Split complete. Test now has 10 images. Others redistributed to train and valid.


# Modified htc_r50_fpn_1x_artifact.py:

In [None]:
_base_ = './htc_r50_fpn_1x_coco.py'

# 1. Modify dataset classes
classes = ('artefact',)

# 2. Update dataset paths
data_root = '/Users/jbm/Documents/DSAN_6500/WatchdogAI/data_artifacts/'
# data = dict(
#     samples_per_gpu=2,
#     workers_per_gpu=2,
#     train=dict(
#         type='CocoDataset',
#         ann_file=data_root + 'train/_annotations.coco.json',
#         img_prefix=data_root + 'train/',
#         classes=classes
#     ),
#     val=dict(
#         type='CocoDataset',
#         ann_file=data_root + 'valid/_annotations.coco.json',
#         img_prefix=data_root + 'valid/',
#         classes=classes
#     ),
#     test=dict(
#         type='CocoDataset',
#         ann_file=data_root + 'test/_annotations.coco.json',
#         img_prefix=data_root + 'test/',
#         classes=classes
#     )
# )

train_dataloader = dict(
    batch_size=16,
    num_workers=4,
    persistent_workers=True,
    sampler=dict(type='DefaultSampler', shuffle=True),
    batch_sampler=dict(type='AspectRatioBatchSampler'),
    dataset=dict(
        type='CocoDataset',
        data_root=data_root,
        ann_file='train/_annotations.coco.json',
        data_prefix=dict(img='train/'),
        metainfo=dict(classes=classes),
        pipeline=[
            dict(type='LoadImageFromFile'),
            dict(type='LoadAnnotations', with_bbox=True, with_mask=False),
            dict(type='RandomFlip', prob=0.5),

            # AutoAugment with scale changes — simulates variable resolution
            dict(
                type='AutoAugment',
                policies=[
                    [dict(type='Resize', scale=(1333, 640), keep_ratio=True)],
                    [dict(type='Resize', scale=(1333, 800), keep_ratio=True)],
                    [dict(type='Resize', scale=(1333, 960), keep_ratio=True)]
                ]
            ),

            # Mild brightness/contrast jitter — simulates different lighting
            dict(
                type='PhotoMetricDistortion',
                brightness_delta=16,
                contrast_range=(0.9, 1.1),
                saturation_range=(0.95, 1.05),
                hue_delta=4
            ),

            dict(type='PackDetInputs')
        ]
        # pipeline=[
        #     dict(type='LoadImageFromFile'),
        #     dict(type='LoadAnnotations', with_bbox=True, with_mask=False),
        #     dict(type='Resize', scale=(1333, 800), keep_ratio=True),
        #     dict(type='RandomFlip', prob=0.5),
        #     dict(type='PhotoMetricDistortion'),
        #     dict(type='PackDetInputs')
        # ]
    )
)

val_dataloader = dict(
    batch_size=1,
    num_workers=2,
    persistent_workers=True,
    sampler=dict(type='DefaultSampler', shuffle=False),
    dataset=dict(
        type='CocoDataset',
        data_root=data_root,
        ann_file='valid/_annotations.coco.json',
        data_prefix=dict(img='valid/'),
        metainfo=dict(classes=classes),
        pipeline=[
            dict(type='LoadImageFromFile'),
            dict(type='Resize', scale=(1333, 800), keep_ratio=True),
            dict(type='LoadAnnotations', with_bbox=True, with_mask=False),
            dict(
                type='PackDetInputs',
                meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'scale_factor'))
        ]
    )
)

test_dataloader = val_dataloader


val_evaluator = dict(
    type='CocoMetric',
    ann_file=data_root + 'valid/_annotations.coco.json',
    metric=['bbox']
)

test_evaluator = val_evaluator

# 3. Adjust model to match 1 class
model = dict(
    backbone=dict(
    frozen_stages=1  # maybe 2 later (for me to check after training)
    ),
    roi_head=dict(
        bbox_head=[
            dict(type='Shared2FCBBoxHead', num_classes=1),
            dict(type='Shared2FCBBoxHead', num_classes=1),
            dict(type='Shared2FCBBoxHead', num_classes=1)
        ]
    ),

    rpn_head=dict(
        type='RPNHead',
        in_channels=256,
        feat_channels=256,
        anchor_generator=dict(
            type='AnchorGenerator',
            scales=[8],
            ratios=[0.5, 1.0, 2.0],
            strides=[4, 8, 16, 32, 64]),
        bbox_coder=dict(
            type='DeltaXYWHBBoxCoder',
            target_means=[.0, .0, .0, .0],
            target_stds=[1.0, 1.0, 1.0, 1.0]),
        loss_cls=dict(
            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
        loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),
        test_cfg=dict(
            nms_pre=2000,
            max_per_img=1000,
            nms=dict(type='nms', iou_threshold=0.7),
            min_bbox_size=0
        )
    ),

    test_cfg=dict(
        rpn=dict(
            nms_pre=2000,
            max_per_img=1000,
            nms=dict(type='nms', iou_threshold=0.7),
            min_bbox_size=0
        ),
        rcnn=dict(
            score_thr=0.05,
            nms=dict(type='nms', iou_threshold=0.5),
            max_per_img=100
        )
    ),

    train_cfg=dict(
        rpn=dict(
            assigner=dict(
                type='MaxIoUAssigner',
                pos_iou_thr=0.7,
                neg_iou_thr=0.3,
                min_pos_iou=0.3,
                ignore_iof_thr=-1
            ),
            sampler=dict(
                type='RandomSampler',
                num=256,
                pos_fraction=0.5,
                neg_pos_ub=-1,
                add_gt_as_proposals=False
            ),
            allowed_border=0,
            pos_weight=-1,
            debug=False,
            nms=dict(type='nms', iou_threshold=0.7),
            nms_pre=2000,
            max_per_img=1000,
            min_bbox_size=0
        ),
        rpn_proposal=dict(
            nms_pre=2000,
            max_per_img=1000,
            nms=dict(type='nms', iou_threshold=0.7),
            min_bbox_size=0
        ),
        rcnn=[
            dict(
                assigner=dict(
                    type='MaxIoUAssigner',
                    pos_iou_thr=0.5,
                    neg_iou_thr=0.5,
                    min_pos_iou=0.5,
                    ignore_iof_thr=-1),
                sampler=dict(
                    type='RandomSampler',
                    num=512,
                    pos_fraction=0.25,
                    neg_pos_ub=-1,
                    add_gt_as_proposals=True),
                mask_size=28,
                pos_weight=-1,
                debug=False
            ) for _ in range(3)
        ]
    )
)


model['train_cfg'] = dict(
    rpn=dict(
        nms=dict(type='nms', iou_threshold=0.7)
    )
)

# 4. Shorten training (for Mac)
# runner = dict(type='EpochBasedRunner', max_epochs=6)

# Early stopping but on iterations not epochs
train_cfg = dict(
    type='IterBasedTrainLoop', max_iters=10000, val_interval=150
)

val_cfg = dict(type='ValLoop')
test_cfg = dict(type='TestLoop')

# Add early stopping hook
custom_hooks = [
    dict(
        type='EarlyStoppingHook',
        monitor='coco/bbox_mAP',  # or 'bbox_mAP_50', check your val_evaluator logs
        rule='greater',  # because higher mAP is better
        patience=30  # stops training if no improvement in X val intervals
    )
]

custom_hooks += [
    dict(
        type='VisualizationHook',
        interval=0,  # only do at the very end
        draw=True,
        test_out_dir='work_dirs/htc_r50_artifact/vis_results'  # saves images here
    )
]

default_hooks = dict(
    checkpoint=dict(
        type='CheckpointHook',
        interval=1000,  # How often to save
        save_best='coco/bbox_mAP',  # Metric to track for best
        rule='greater',  # Maximize the metric
        max_keep_ckpts=1  # Only keep best to save space
    ),
    logger=dict(type='LoggerHook', interval=50)
)

visualizer = dict(
    type='DetLocalVisualizer',
    vis_backends=[dict(type='LocalVisBackend')],
    name='vis'
)

# 5. Set working directory
work_dir = './work_dirs/htc_r50_artifact'
load_from = 'checkpoints/htc_r50.pth'