In [1]:
import os
import time
import datetime
import logging
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from tqdm import tqdm
import random

from detectron2 import model_zoo
from detectron2.config import get_cfg
from detectron2.engine import DefaultTrainer
from detectron2.data import DatasetCatalog, MetadataCatalog, build_detection_test_loader
from detectron2.data.datasets import register_coco_instances
from detectron2.modeling import BACKBONE_REGISTRY, Backbone, ShapeSpec
from detectron2.checkpoint import DetectionCheckpointer
from detectron2.evaluation import COCOEvaluator
from detectron2.utils.logger import setup_logger

import timm

# 추가적으로 필요한 import
from detectron2.evaluation import DatasetEvaluators
import logging
logging.getLogger("detectron2").setLevel(logging.WARNING)
logging.getLogger("detectron2").disabled = True

@BACKBONE_REGISTRY.register()
class TimmBackbone(Backbone):
    def __init__(self, cfg, input_shape):
        super().__init__()
        model_name = cfg.MODEL.BACKBONE.TIMM_MODEL
        self.model = timm.create_model(model_name, features_only=True, pretrained=True)
        feature_info = self.model.feature_info.get_dicts(keys=['num_chs', 'reduction'])
        
        self.out_channels = 256
        self.convs = nn.ModuleDict()
        for i, info in enumerate(feature_info):
            self.convs[f"p{i+2}"] = nn.Conv2d(info['num_chs'], self.out_channels, kernel_size=1)
        
        # P6, P7 레벨 추가 (RetinaNet용)
        self.p6 = nn.Conv2d(feature_info[-1]['num_chs'], self.out_channels, kernel_size=3, stride=2, padding=1)
        self.p7 = nn.Conv2d(self.out_channels, self.out_channels, kernel_size=3, stride=2, padding=1)
        
        self._out_features = ["p2", "p3", "p4", "p5", "p6", "p7"]
        self._out_feature_channels = {name: self.out_channels for name in self._out_features}
        self._out_feature_strides = {f"p{i+2}": info['reduction'] for i, info in enumerate(feature_info)}
        self._out_feature_strides["p6"] = self._out_feature_strides["p5"] * 2
        self._out_feature_strides["p7"] = self._out_feature_strides["p6"] * 2

    def forward(self, x):
        features = self.model(x)
        out = {f"p{i+2}": self.convs[f"p{i+2}"](feature) for i, feature in enumerate(features)}
        p6 = self.p6(features[-1])
        p7 = self.p7(F.relu(p6))
        out["p6"] = p6
        out["p7"] = p7
        return out

    def output_shape(self):
        return {
            name: ShapeSpec(
                channels=self._out_feature_channels[name], stride=self._out_feature_strides[name]
            )
            for name in self._out_features
        }

def setup_cfg(model_type):
    cfg = get_cfg()
    if model_type == "Cascade R-CNN":
        cfg.merge_from_file(model_zoo.get_config_file("Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml"))
        cfg.MODEL.FPN.IN_FEATURES = ["p2", "p3", "p4", "p5", "p6"]
    elif model_type == "RetinaNet":
        cfg.merge_from_file(model_zoo.get_config_file("COCO-Detection/retinanet_R_50_FPN_3x.yaml"))
        cfg.MODEL.FPN.IN_FEATURES = ["p2", "p3", "p4", "p5", "p6", "p7"]
    else:  # FPN-based Faster R-CNN
        cfg.merge_from_file(model_zoo.get_config_file("COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml"))
        cfg.MODEL.FPN.IN_FEATURES = ["p2", "p3", "p4", "p5", "p6"]
    
    cfg.DATASETS.TRAIN = ("my_dataset_train",)
    cfg.DATASETS.TEST = ("my_dataset_val",)

    cfg.MODEL.BACKBONE.NAME = "TimmBackbone"
    cfg.MODEL.BACKBONE.TIMM_MODEL = "resnet50"    
    
    cfg.MODEL.FPN.OUT_CHANNELS = 256     

    cfg.MODEL.ANCHOR_GENERATOR.SIZES = [[32], [64], [128], [256], [512]]
    
    cfg.MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS = [[0.5, 1.0, 2.0]]
    cfg.MODEL.RPN.IN_FEATURES = cfg.MODEL.FPN.IN_FEATURES

    cfg.MODEL.WEIGHTS = ""
    
    cfg.MODEL.ROI_HEADS.NUM_CLASSES = 10
    cfg.INPUT.MIN_SIZE_TRAIN = (1024,)
    cfg.INPUT.MAX_SIZE_TRAIN = 1024
    cfg.INPUT.MIN_SIZE_TEST = 1024
    cfg.INPUT.MAX_SIZE_TEST = 1024
    cfg.SOLVER.IMS_PER_BATCH = 4
    cfg.SOLVER.BASE_LR = 0.00025
    # epoch 설정
    dataset_size = 4883
    num_epochs = 3  # 원하는 epoch 수
    iterations_per_epoch = dataset_size // cfg.SOLVER.IMS_PER_BATCH
    cfg.SOLVER.MAX_ITER = iterations_per_epoch //num_epochs#* num_epochs
    cfg.TEST.EVAL_PERIOD = num_epochs

    # cuDNN 벤치마크 비활성화
    # torch.backends.cudnn.enabled = False
    cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128

    # 학습률 스케줄 조정 (선택사항)
    #cfg.SOLVER.STEPS = (iterations_per_epoch * 30, iterations_per_epoch * 40) 

    cfg.MODEL.MASK_ON = False

    return cfg
    
image_dir = '../../dataset'
json_file_path = '../../dataset/train.json'

# 데이터셋 등록 (COCO 형식 가정)
register_coco_instances("my_dataset_train", {}, image_dir + "/train.json", image_dir)
register_coco_instances("my_dataset_val", {}, image_dir + "/test.json", image_dir)



In [2]:
from detectron2.engine import DefaultTrainer
from detectron2.utils.events import EventStorage

def train_model(cfg, model_name):
    trainer = DefaultTrainer(cfg)
    trainer.resume_or_load(resume=False)
    
    with EventStorage() as storage:
        with tqdm(total=cfg.SOLVER.MAX_ITER, desc=f"Training {model_name}") as pbar:
            for _ in range(cfg.SOLVER.MAX_ITER):
                trainer.run_step()
                pbar.update(1)

    print(f"{model_name} 학습 완료")


# Set up configurations and train models
cfg_fpn = setup_cfg("FPN-based Faster R-CNN")
train_model(cfg_fpn, "FPN-based Faster R-CNN")

cfg_cascade = setup_cfg("Cascade R-CNN")
train_model(cfg_cascade, "Cascade R-CNN")

cfg_retinanet = setup_cfg("RetinaNet")
train_model(cfg_retinanet, "RetinaNet")




[32m[10/11 01:51:24 d2.engine.defaults]: [0mModel:
GeneralizedRCNN(
  (backbone): TimmBackbone(
    (model): FeatureListNet(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (act1): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (act1): ReLU(inplace=True)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (drop_block): Identity()
          (act2): ReLU(inplace=True)
          (aa): Identity()
  

  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]
Training FPN-based Faster R-CNN: 100%|██████████| 406/406 [03:48<00:00,  1.78it/s]


FPN-based Faster R-CNN 학습 완료
[32m[10/11 01:55:14 d2.engine.defaults]: [0mModel:
GeneralizedRCNN(
  (backbone): TimmBackbone(
    (model): FeatureListNet(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (act1): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (act1): ReLU(inplace=True)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (drop_block): Identity()
          (act2): ReLU(inplace=True)


Training Cascade R-CNN: 100%|██████████| 406/406 [04:03<00:00,  1.67it/s]
Loading config /opt/conda/lib/python3.10/site-packages/detectron2/model_zoo/configs/COCO-Detection/../Base-RetinaNet.yaml with yaml.unsafe_load. Your machine may be at risk if the file contains malicious content.


Cascade R-CNN 학습 완료
[32m[10/11 01:59:40 d2.engine.defaults]: [0mModel:
RetinaNet(
  (backbone): TimmBackbone(
    (model): FeatureListNet(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (act1): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (act1): ReLU(inplace=True)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (drop_block): Identity()
          (act2): ReLU(inplace=True)
          (aa):

Training RetinaNet: 100%|██████████| 406/406 [05:09<00:00,  1.31it/s]

RetinaNet 학습 완료



