In [1]:
# 모듈 import
import pandas as pd
from collections import Counter
from IPython.display import display

from mmengine.config import Config
from mmengine.runner import Runner
from mmdet.registry import DATASETS
from mmdet.utils import register_all_modules

In [2]:
from mmengine.hooks import Hook
from mmdet.registry import HOOKS

from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
import torch

In [None]:
@HOOKS.register_module()
class PrintCocoMapHook(Hook):
    """MMDetection + COCOeval.summarize()를 torchvision 스타일로 출력하는 Hook"""

    def __init__(self, iou_type='bbox', score_thr=0.0, ann_file=None):
        super().__init__()
        self.iou_type = iou_type
        self.score_thr = score_thr
        self.ann_file = ann_file

    def after_val_epoch(self, runner, metrics=None):
        if metrics is not None:
            mAP = metrics.get('coco/bbox_mAP', None)
            mAP50 = metrics.get('coco/bbox_mAP_50', None)
            if mAP is not None and mAP50 is not None:
                epoch = runner.epoch
                print(f'Epoch {epoch} - mAP: {mAP:.3f} mAP50: {mAP50:.3f}')
        
        model = runner.model
        model.eval()
        val_loader = runner.val_dataloader

        # ann_file 찾기
        ann_file = self.ann_file
        if ann_file is None:
            val_evaluator_cfg = getattr(runner.cfg, 'val_evaluator', None)
            if val_evaluator_cfg is not None:
                ann_file = val_evaluator_cfg.get('ann_file', None)
        
        if ann_file is None:
            print('[Warn] ann_file을 찾지 못해 COCOeval.summarize()를 건너뜁니다.')
            model.train()
            return

        coco_gt = COCO(ann_file)
        results = []
        img_ids = []

        with torch.no_grad():
            for data in val_loader:
                batch_results = model.val_step(data)

                for result in batch_results:
                    img_id = int(result.img_id)
                    img_ids.append(img_id)

                    if self.iou_type == 'bbox':
                        if 'pred_instances' not in result:
                            continue
                            
                        pred = result.pred_instances
                        bboxes = pred.bboxes.cpu().numpy()
                        scores = pred.scores.cpu().numpy()
                        labels = pred.labels.cpu().numpy()

                        for box, score, label in zip(bboxes, scores, labels):
                            if score < self.score_thr:
                                continue
                            x1, y1, x2, y2 = box.tolist()
                            w = x2 - x1
                            h = y2 - y1
                            results.append({
                                'image_id': img_id,
                                'category_id': int(label) + 1,
                                'bbox': [float(x1), float(y1), float(w), float(h)],
                                'score': float(score),
                            })

        if len(results) == 0:
            print('[Info] No detections for COCOeval.summarize().')
            model.train()
            return

        # COCO Evaluation
        coco_dt = coco_gt.loadRes(results)
        coco_eval = COCOeval(coco_gt, coco_dt, iouType=self.iou_type)
        coco_eval.params.imgIds = img_ids
        coco_eval.evaluate()
        coco_eval.accumulate()
        coco_eval.summarize()

        model.train()

In [None]:
import os
from mmengine.config import Config
from mmengine.runner import Runner
from mmdet.utils import register_all_modules

# ---------------------------------------------------
# 0. 기본 설정
# ---------------------------------------------------
classes = (
    "General trash", "Paper", "Paper pack", "Metal", "Glass",
    "Plastic", "Styrofoam", "Plastic bag", "Battery", "Clothing"
)

root = "../../dataset/"
train_ann = "train_split.json"
val_ann = "val_split.json"
test_ann = "test.json"

cfg = Config.fromfile("configs/cascade_rcnn/cascade-rcnn_r50_fpn_1x_coco.py")
register_all_modules(init_default_scope=True)
cfg.default_scope = "mmdet"

cfg.model.backbone = dict(
    type='ResNeXt',
    depth=101,
    groups=64,
    base_width=4,
    num_stages=4,
    out_indices=(0, 1, 2, 3),
    frozen_stages=1,
    norm_cfg=dict(type='BN', requires_grad=True),
    style='pytorch',
    init_cfg=dict(
        type='Pretrained', 
        checkpoint='open-mmlab://resnext101_64x4d'
    )
)

if isinstance(cfg.model.roi_head.bbox_head, list):
    cfg.model.roi_head.bbox_head[0].bbox_coder.target_stds = [0.1, 0.1, 0.2, 0.2]
    cfg.model.roi_head.bbox_head[1].bbox_coder.target_stds = [0.05, 0.05, 0.1, 0.1]
    cfg.model.roi_head.bbox_head[2].bbox_coder.target_stds = [0.033, 0.033, 0.067, 0.067]
else:
    cfg.model.roi_head.bbox_head.num_classes = 10
    cfg.model.roi_head.bbox_head.loss_bbox = dict(type='CIoULoss', loss_weight=10.0)


if 'rpn_proposal' in cfg.model.train_cfg:
    cfg.model.train_cfg.rpn_proposal.nms_pre = 2500
    cfg.model.train_cfg.rpn_proposal.max_per_img = 2000   


if 'rpn' in cfg.model.test_cfg:
    cfg.model.test_cfg.rpn.nms = dict(type='nms', iou_threshold=0.7)
    cfg.model.test_cfg.rpn.nms_pre = 2000
    cfg.model.test_cfg.rpn.max_per_img = 2000

cfg.model.test_cfg.rcnn.score_thr = 0.05
cfg.model.test_cfg.rcnn.nms = dict(
    type='nms',       
    iou_threshold=0.5
)
cfg.model.test_cfg.rcnn.max_per_img = 1000


albu_train_transforms = [
    dict(type='ShiftScaleRotate', shift_limit=0.0625, scale_limit=0.0, rotate_limit=0, interpolation=1, p=0.5),
    dict(type='RandomBrightnessContrast', brightness_limit=[0.1, 0.3], contrast_limit=[0.1, 0.3], p=0.2),
    dict(type='OneOf', transforms=[
        dict(type='RGBShift', r_shift_limit=10, g_shift_limit=10, b_shift_limit=10, p=1.0),
        dict(type='HueSaturationValue', hue_shift_limit=20, sat_shift_limit=30, val_shift_limit=20, p=1.0)
    ], p=0.1),
    dict(type='JpegCompression', quality_lower=85, quality_upper=95, p=0.2),
    dict(type='ChannelShuffle', p=0.1),
    dict(type='OneOf', transforms=[
        dict(type='Blur', blur_limit=3, p=1.0),
        dict(type='MedianBlur', blur_limit=3, p=1.0)
    ], p=0.1),
]

backend_args = None

train_pipeline = [
    dict(type='LoadImageFromFile', backend_args=backend_args),
    dict(type='LoadAnnotations', with_bbox=True),
    dict(type='Resize', scale=(1024, 1024), keep_ratio=True),
    dict(
        type='Albu',
        transforms=albu_train_transforms,
        bbox_params=dict(
            type='BboxParams',
            format='pascal_voc',
            label_fields=['gt_bboxes_labels', 'gt_ignore_flags'],
            min_visibility=0.1,
            filter_lost_elements=True
        ),
        keymap={
            'img': 'image',
            'gt_masks': 'masks',
            'gt_bboxes': 'bboxes'
        }
    ),
    dict(type='RandomFlip', prob=0.5),
    dict(type='PackDetInputs') 
]

test_pipeline = [
    dict(type='LoadImageFromFile', backend_args=backend_args),
    dict(type='Resize', scale=(1024, 1024), keep_ratio=True),
    dict(type='LoadAnnotations', with_bbox=True),
    dict(type='PackDetInputs', meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'scale_factor'))
]

for ds_key in ["train_dataloader", "val_dataloader", "test_dataloader"]:
    if ds_key not in cfg: continue
    
    ds = cfg[ds_key].get("dataset", cfg[ds_key])
    ds.metainfo = dict(classes=classes)
    ds.data_root = root
    ds.data_prefix = dict(img="")
    
    if ds_key == "train_dataloader":
        ds.ann_file = train_ann
        ds.pipeline = train_pipeline
        cfg.train_dataloader.sampler = dict(type='ClassAwareSampler')
    elif ds_key == "val_dataloader":
        ds.ann_file = val_ann
        ds.pipeline = test_pipeline
    else:
        ds.ann_file = test_ann
        ds.pipeline = test_pipeline

cfg.train_dataloader.batch_size = 4
cfg.train_dataloader.num_workers = 4
cfg.val_dataloader.batch_size = 2
cfg.val_dataloader.num_workers = 2


cfg.optim_wrapper.type = 'OptimWrapper'
if hasattr(cfg.optim_wrapper, 'loss_scale'):
    cfg.optim_wrapper.pop('loss_scale')

cfg.optim_wrapper.optimizer = dict(
    type='SGD',
    lr=0.0025,
    momentum=0.9,
    weight_decay=0.0001
)

cfg.param_scheduler = [
    dict(type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, end=500),
    dict(type='CosineAnnealingLR', T_max=12, by_epoch=True, begin=0, end=12)
]

cfg.train_cfg.max_epochs = 3
cfg.train_cfg.val_interval = 1

cfg.val_evaluator.ann_file = root + val_ann
cfg.val_evaluator.metric = 'bbox'
cfg.val_evaluator.classwise = True
cfg.test_evaluator = cfg.val_evaluator

cfg.default_hooks.checkpoint = dict(
    type='CheckpointHook',
    interval=1,
    max_keep_ckpts=3,
    save_best='coco/bbox_mAP',
    rule='greater'
)

if 'custom_hooks' in cfg:
    cfg.custom_hooks = []

cfg.work_dir = './work_dirs/cascade_resnext101_albu_nms'
cfg.randomness = dict(seed=2025)

print("Config Setup Completed!")

Config Setup Completed!


In [6]:
def summarize_dataset(ds):
    ds.full_init()
    num_images = len(ds)
    classes = list(ds.metainfo.get("classes", []))

    counts = Counter()
    for i in range(num_images):
        info = ds.get_data_info(i)
        for inst in info.get("instances", []):
            lbl = inst.get("bbox_label", None)
            if lbl is not None:
                counts[lbl] += 1

    df = pd.DataFrame({
        "category": [f"{i} [{c}]" for i, c in enumerate(classes)],
        "count": [counts.get(i, 0) for i in range(len(classes))]
    })
    display(df)


train_ds_cfg = cfg.train_dataloader.dataset
train_ds = DATASETS.build(train_ds_cfg)
summarize_dataset(train_ds)

loading annotations into memory...
Done (t=0.06s)
creating index...
index created!


Unnamed: 0,category,count
0,0 [General trash],3233
1,1 [Paper],5139
2,2 [Paper pack],748
3,3 [Metal],765
4,4 [Glass],780
5,5 [Plastic],2384
6,6 [Styrofoam],983
7,7 [Plastic bag],4123
8,8 [Battery],100
9,9 [Clothing],333


In [7]:
cfg.val_cfg = dict(type='ValLoop')

runner = Runner.from_cfg(cfg)
runner.train()

12/11 05:13:19 - mmengine - [4m[97mINFO[0m - 
------------------------------------------------------------
System environment:
    sys.platform: linux
    Python: 3.10.13 (main, Sep 11 2023, 13:44:35) [GCC 11.2.0]
    CUDA available: True
    MUSA available: False
    numpy_random_seed: 2025
    GPU 0: Tesla V100-SXM2-32GB
    CUDA_HOME: None
    GCC: n/a
    PyTorch: 2.1.0+cu118
    PyTorch compiling details: PyTorch built with:
  - GCC 9.3
  - C++ Version: 201703
  - Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications
  - Intel(R) MKL-DNN v3.1.1 (Git Hash 64f6bcbcbab628e96f33a62c3e975f8535a7bde4)
  - OpenMP 201511 (a.k.a. OpenMP 4.5)
  - LAPACK is enabled (usually provided by MKL)
  - NNPACK is enabled
  - CPU capability usage: AVX512
  - CUDA Runtime 11.8
  - NVCC architecture flags: -gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,co

/bin/sh: 1: gcc: not found


12/11 05:13:20 - mmengine - [4m[97mINFO[0m - Config:
auto_scale_lr = dict(base_batch_size=16, enable=False)
backend_args = None
data_root = 'data/coco/'
dataset_type = 'CocoDataset'
default_hooks = dict(
    checkpoint=dict(
        interval=1,
        max_keep_ckpts=3,
        rule='greater',
        save_best='coco/bbox_mAP',
        type='CheckpointHook'),
    logger=dict(interval=50, type='LoggerHook'),
    param_scheduler=dict(type='ParamSchedulerHook'),
    sampler_seed=dict(type='DistSamplerSeedHook'),
    timer=dict(type='IterTimerHook'),
    visualization=dict(type='DetVisualizationHook'))
default_scope = 'mmdet'
env_cfg = dict(
    cudnn_benchmark=False,
    dist_cfg=dict(backend='nccl'),
    mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0))
load_from = None
log_level = 'INFO'
log_processor = dict(by_epoch=True, type='LogProcessor', window_size=50)
model = dict(
    backbone=dict(
        base_width=4,
        depth=101,
        frozen_stages=1,
        groups=64,

CascadeRCNN(
  (data_preprocessor): DetDataPreprocessor()
  (backbone): ResNeXt(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): ResLayer(
      (0): Bottleneck(
        (conv1): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=64, bias=False)
        (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu