# Disclaimer:
**baseline from naver boostcamp, edited by cuffyluv**

In [1]:
# 모듈 import
import pandas as pd
from collections import Counter
from IPython.display import display

from mmengine.config import Config
from mmengine.runner import Runner
from mmdet.registry import DATASETS
from mmdet.utils import register_all_modules

In [2]:
# custom 설정
classes = ("General trash", "Paper", "Paper pack", "Metal", "Glass",
           "Plastic", "Styrofoam", "Plastic bag", "Battery", "Clothing")

root = "../../../dataset/"
train_ann = "train.json"
test_ann  = "test.json"

# config file 들고오기
cfg = Config.fromfile("configs/faster_rcnn/faster-rcnn_r50_fpn_1x_coco.py")
register_all_modules(init_default_scope=True)
cfg.default_scope = "mmdet"

# dataset config 수정
for ds_key in ["train_dataloader", "test_dataloader"]:
    if ds_key not in cfg:
        continue
    ds = cfg[ds_key]["dataset"] if "dataset" in cfg[ds_key] else cfg[ds_key]
    ds.metainfo = dict(classes=classes)
    ds.data_root = root
    ds.ann_file = train_ann if ds_key == "train_dataloader" else test_ann
    ds.data_prefix = dict(img="")

cfg.train_dataloader.batch_size = 4
cfg.train_dataloader.num_workers = max(2, cfg.train_dataloader.get("num_workers", 2))
cfg.train_dataloader.dataset.pipeline[2]["scale"] = (512, 512)

cfg.test_dataloader.batch_size = 1
cfg.test_dataloader.num_workers = max(2, cfg.test_dataloader.get("num_workers", 2))
cfg.test_dataloader.dataset.pipeline[1]["scale"] = (512, 512)

# validate 비활성화
for k in ("val_dataloader", "val_evaluator", "val_cfg", "val_loop"):
    cfg.pop(k, None)
cfg.train_cfg = cfg.get("train_cfg", {})
cfg.train_cfg["val_interval"] = 0

# 학습 config 수정
cfg.device = "cuda"
cfg.gpu_ids = [0]
cfg.randomness = dict(seed=2025, deterministic=False)
cfg.work_dir = "./work_dirs/faster_rcnn_r50_fpn_1x_trash"

cfg.model.roi_head.bbox_head.num_classes = len(classes)
cfg.optim_wrapper = {**cfg.get("optim_wrapper", {}), "clip_grad": dict(max_norm=35, norm_type=2)}

cfg.train_cfg.max_epochs = 12
cfg.default_hooks["checkpoint"]["max_keep_ckpts"] = 3
cfg.default_hooks["checkpoint"]["interval"] = 1

In [3]:
# dataset summarization 확인
train_ds_cfg = cfg.train_dataloader.dataset
train_ds = DATASETS.build(train_ds_cfg)

def summarize_dataset(ds):
    ds.full_init()
    num_images = len(ds)
    classes = list(ds.metainfo.get("classes", []))

    counts = Counter()
    for i in range(num_images):
        info = ds.get_data_info(i)
        for inst in info.get("instances", []):
            lbl = inst.get("bbox_label", None)
            if lbl is not None:
                counts[lbl] += 1

    df = pd.DataFrame({
        "category": [f"{i} [{c}]" for i, c in enumerate(classes)],
        "count": [counts.get(i, 0) for i in range(len(classes))]
    })

    print(f"\n [Info] CocoDataset Train dataset with number of images {num_images}, and instance counts:")
    display(df)

summarize_dataset(train_ds)

loading annotations into memory...
Done (t=0.08s)
creating index...
index created!

 [Info] CocoDataset Train dataset with number of images 4883, and instance counts:


Unnamed: 0,category,count
0,0 [General trash],3965
1,1 [Paper],6352
2,2 [Paper pack],897
3,3 [Metal],936
4,4 [Glass],982
5,5 [Plastic],2943
6,6 [Styrofoam],1263
7,7 [Plastic bag],5178
8,8 [Battery],159
9,9 [Clothing],468


In [4]:
train_ds_cfg = cfg.train_dataloader.dataset
train_ds = DATASETS.build(train_ds_cfg)

# dataset 크기 확인
print(f"[DEBUG] Train dataset length: {len(train_ds)}")

# batch_size 확인
batch_size = cfg.train_dataloader.batch_size
print(f"[DEBUG] train_dataloader.batch_size: {batch_size}")

# drop_last, shuffle 확인
drop_last = cfg.train_dataloader.get('drop_last', False)
shuffle = cfg.train_dataloader.get('shuffle', True)
print(f"[DEBUG] train_dataloader.drop_last: {drop_last}")
print(f"[DEBUG] train_dataloader.shuffle: {shuffle}")

# iteration 수 계산 (epoch 당)
num_samples = len(train_ds)
if drop_last:
    num_iters = num_samples // batch_size
else:
    num_iters = (num_samples + batch_size - 1) // batch_size
print(f"[DEBUG] Expected iterations per epoch: {num_iters}")

# test dataset도 확인
test_ds_cfg = cfg.test_dataloader.dataset
test_ds = DATASETS.build(test_ds_cfg)
print(f"[DEBUG] Test dataset length: {len(test_ds)}")
print(f"[DEBUG] test_dataloader.batch_size: {cfg.test_dataloader.batch_size}")


loading annotations into memory...
Done (t=0.07s)
creating index...
index created!
[DEBUG] Train dataset length: 4883
[DEBUG] train_dataloader.batch_size: 4
[DEBUG] train_dataloader.drop_last: False
[DEBUG] train_dataloader.shuffle: True
[DEBUG] Expected iterations per epoch: 1221
loading annotations into memory...
Done (t=0.01s)
creating index...
index created!
[DEBUG] Test dataset length: 4871
[DEBUG] test_dataloader.batch_size: 1


In [5]:
# 모델 학습
runner = Runner.from_cfg(cfg)
runner.train()

12/02 22:17:59 - mmengine - [4m[97mINFO[0m - 
------------------------------------------------------------
System environment:
    sys.platform: linux
    Python: 3.10.13 (main, Sep 11 2023, 13:44:35) [GCC 11.2.0]
    CUDA available: True
    MUSA available: False
    numpy_random_seed: 2025
    GPU 0: Tesla V100-SXM2-32GB
    CUDA_HOME: None
    GCC: n/a
    PyTorch: 2.1.0+cu118
    PyTorch compiling details: PyTorch built with:
  - GCC 9.3
  - C++ Version: 201703
  - Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications
  - Intel(R) MKL-DNN v3.1.1 (Git Hash 64f6bcbcbab628e96f33a62c3e975f8535a7bde4)
  - OpenMP 201511 (a.k.a. OpenMP 4.5)
  - LAPACK is enabled (usually provided by MKL)
  - NNPACK is enabled
  - CPU capability usage: AVX512
  - CUDA Runtime 11.8
  - NVCC architecture flags: -gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,co

/bin/sh: 1: gcc: not found


12/02 22:17:59 - mmengine - [4m[97mINFO[0m - Config:
auto_scale_lr = dict(base_batch_size=16, enable=False)
backend_args = None
data_root = 'data/coco/'
dataset_type = 'CocoDataset'
default_hooks = dict(
    checkpoint=dict(interval=1, max_keep_ckpts=3, type='CheckpointHook'),
    logger=dict(interval=50, type='LoggerHook'),
    param_scheduler=dict(type='ParamSchedulerHook'),
    sampler_seed=dict(type='DistSamplerSeedHook'),
    timer=dict(type='IterTimerHook'),
    visualization=dict(type='DetVisualizationHook'))
default_scope = 'mmdet'
device = 'cuda'
env_cfg = dict(
    cudnn_benchmark=False,
    dist_cfg=dict(backend='nccl'),
    mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0))
gpu_ids = [
    0,
]
load_from = None
log_level = 'INFO'
log_processor = dict(by_epoch=True, type='LogProcessor', window_size=50)
model = dict(
    backbone=dict(
        depth=50,
        frozen_stages=1,
        init_cfg=dict(checkpoint='torchvision://resnet50', type='Pretrained'),
        n

Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /data/ephemeral/home/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth



unexpected key in source state_dict: fc.weight, fc.bias

12/02 22:18:08 - mmengine - [4m[97mINFO[0m - Checkpoints will be saved to /data/ephemeral/home/jsw/mmdetection/work_dirs/faster_rcnn_r50_fpn_1x_trash.
12/02 22:18:18 - mmengine - [4m[97mINFO[0m - Epoch(train)  [1][  50/1221]  lr: 1.9820e-03  eta: 0:49:07  time: 0.2018  data_time: 0.0102  memory: 2019  grad_norm: 6.8811  loss: 1.1449  loss_rpn_cls: 0.4784  loss_rpn_bbox: 0.0626  loss_cls: 0.5121  acc: 93.9453  loss_bbox: 0.0919
12/02 22:18:28 - mmengine - [4m[97mINFO[0m - Epoch(train)  [1][ 100/1221]  lr: 3.9840e-03  eta: 0:47:43  time: 0.1917  data_time: 0.0074  memory: 2019  grad_norm: 2.6527  loss: 0.7509  loss_rpn_cls: 0.1522  loss_rpn_bbox: 0.0488  loss_cls: 0.3183  acc: 94.7754  loss_bbox: 0.2316
12/02 22:18:37 - mmengine - [4m[97mINFO[0m - Epoch(train)  [1][ 150/1221]  lr: 5.9860e-03  eta: 0:46:55  time: 0.1890  data_time: 0.0068  memory: 2020  grad_norm: 2.6263  loss: 0.7265  loss_rpn_cls: 0.1265  loss_rpn_bbo

FasterRCNN(
  (data_preprocessor): DetDataPreprocessor()
  (backbone): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): ResLayer(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=Tru