In [1]:
# 모듈 import
from mmengine.config import Config
from mmengine.runner import Runner
from mmdet.registry import DATASETS
from mmdet.utils import register_all_modules

In [2]:
# 모든 모듈 등록
register_all_modules()

In [4]:
classes = ("General trash", "Paper", "Paper pack", "Metal", "Glass", 
           "Plastic", "Styrofoam", "Plastic bag", "Battery", "Clothing")

# config file 들고오기
cfg = Config.fromfile('/data/ephemeral/home/kjh/mmdetection/configs/faster_rcnn/faster-rcnn_r50_fpn_1x_coco.py')

root='../dataset/'

# dataset config 수정
cfg.dataset_type = 'CocoDataset'
cfg.data_root = root

# Train dataset config 수정
cfg.train_dataloader = dict(
    batch_size=4,
    num_workers=2,
    persistent_workers=True,
    sampler=dict(type='DefaultSampler', shuffle=True),
    batch_sampler=dict(type='AspectRatioBatchSampler'),
    dataset=dict(
        type=cfg.dataset_type,
        data_root=cfg.data_root,
        ann_file='train.json',
        data_prefix=dict(img=''),
        filter_cfg=dict(filter_empty_gt=True, min_size=32),
        pipeline=cfg.train_pipeline,
        metainfo=dict(classes=classes)
    )
)

# Validation dataset config 수정
cfg.val_dataloader = dict(
    batch_size=1,
    num_workers=2,
    persistent_workers=True,
    drop_last=False,
    sampler=dict(type='DefaultSampler', shuffle=False),
    dataset=dict(
        type=cfg.dataset_type,
        data_root=cfg.data_root,
        ann_file=root+'test.json',
        data_prefix=dict(img=''),
        test_mode=True,
        pipeline=cfg.test_pipeline,
        metainfo=dict(classes=classes)
    )
)

# Test dataset config 수정 (validation과 동일하게 설정)
cfg.test_dataloader = cfg.val_dataloader

# Train, val, test evaluator 설정
cfg.train_evaluator = dict(
    type='CocoMetric',
    ann_file=cfg.data_root + 'train.json',
    metric='bbox',
    format_only=False
)

cfg.val_evaluator = dict(
    type='CocoMetric',
    ann_file=cfg.data_root + 'test.json',
    metric='bbox',
    format_only=False
)

cfg.test_evaluator = cfg.val_evaluator

# 기타 설정
cfg.train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=12, val_interval=1)
cfg.val_cfg = dict(type='ValLoop')
cfg.test_cfg = dict(type='TestLoop')

# 체크포인트 pth와 로그 저장
# logger : interval로 받은 숫자만큼의 iteration마다 로그를 저장함
# checkpoint : max_keep_ckpts의 수만큼 최근 체크 포인트를 저장하고 유지함
cfg.default_hooks = dict(
    timer=dict(type='IterTimerHook'),
    logger=dict(type='LoggerHook', interval=50),
    param_scheduler=dict(type='ParamSchedulerHook'),
    checkpoint=dict(type='CheckpointHook', interval=1, max_keep_ckpts=3),
    sampler_seed=dict(type='DistSamplerSeedHook'),
    visualization=dict(type='DetVisualizationHook')
)

cfg.env_cfg = dict(
    cudnn_benchmark=False,
    mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),
    dist_cfg=dict(backend='nccl'),
)

cfg.work_dir = './work_dirs/faster_rcnn_r50_fpn_1x_trash'

cfg.model.roi_head.bbox_head.num_classes = 10

cfg.optim_wrapper.optimizer.lr = 0.02
cfg.optim_wrapper.clip_grad = dict(max_norm=35, norm_type=2)

# Runner 생성 및 학습 시작
runner = Runner.from_cfg(cfg)
runner.train()

10/10 17:09:17 - mmengine - [4m[97mINFO[0m - 
------------------------------------------------------------
System environment:
    sys.platform: linux
    Python: 3.10.13 (main, Sep 11 2023, 13:44:35) [GCC 11.2.0]
    CUDA available: True
    numpy_random_seed: 1126022317
    GPU 0: Tesla V100-SXM2-32GB
    CUDA_HOME: None
    GCC: gcc (Ubuntu 9.4.0-1ubuntu1~20.04.2) 9.4.0
    PyTorch: 1.12.1+cu116
    PyTorch compiling details: PyTorch built with:
  - GCC 9.3
  - C++ Version: 201402
  - Intel(R) Math Kernel Library Version 2020.0.0 Product Build 20191122 for Intel(R) 64 architecture applications
  - Intel(R) MKL-DNN v2.6.0 (Git Hash 52b5f107dd9cf10910aaa19cb47f3abf9b349815)
  - OpenMP 201511 (a.k.a. OpenMP 4.5)
  - LAPACK is enabled (usually provided by MKL)
  - NNPACK is enabled
  - CPU capability usage: AVX2
  - CUDA Runtime 11.6
  - NVCC architecture flags: -gencode;arch=compute_37,code=sm_37;-gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=c

Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /data/ephemeral/home/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth



unexpected key in source state_dict: fc.weight, fc.bias

10/10 17:09:22 - mmengine - [4m[97mINFO[0m - Checkpoints will be saved to /data/ephemeral/home/kjh/mmdetection/work_dirs/faster_rcnn_r50_fpn_1x_trash.
10/10 17:09:39 - mmengine - [4m[97mINFO[0m - Epoch(train)  [1][  50/1221]  lr: 1.9820e-03  eta: 1:23:04  time: 0.3413  data_time: 0.0106  memory: 4462  grad_norm: 6.8595  loss: 1.1815  loss_rpn_cls: 0.4343  loss_rpn_bbox: 0.0515  loss_cls: 0.5871  acc: 97.2656  loss_bbox: 0.1086
10/10 17:09:54 - mmengine - [4m[97mINFO[0m - Epoch(train)  [1][ 100/1221]  lr: 3.9840e-03  eta: 1:18:37  time: 0.3071  data_time: 0.0074  memory: 4463  grad_norm: 2.3773  loss: 0.7926  loss_rpn_cls: 0.1450  loss_rpn_bbox: 0.0512  loss_cls: 0.3489  acc: 86.7676  loss_bbox: 0.2475
10/10 17:10:10 - mmengine - [4m[97mINFO[0m - Epoch(train)  [1][ 150/1221]  lr: 5.9860e-03  eta: 1:16:46  time: 0.3045  data_time: 0.0071  memory: 4462  grad_norm: 2.1223  loss: 0.7252  loss_rpn_cls: 0.1317  loss_rpn_bbo

FasterRCNN(
  (data_preprocessor): DetDataPreprocessor()
  (backbone): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): ResLayer(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=Tru