In [1]:
import os
import time
import datetime
import logging
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from tqdm import tqdm
import random

from detectron2 import model_zoo
from detectron2.config import get_cfg
from detectron2.engine import DefaultTrainer, hooks
from detectron2.data import DatasetCatalog, MetadataCatalog, build_detection_test_loader
from detectron2.data.datasets import register_coco_instances
from detectron2.modeling import BACKBONE_REGISTRY, Backbone, ShapeSpec, build_model
from detectron2.utils.logger import setup_logger
from detectron2.checkpoint import DetectionCheckpointer
from detectron2.evaluation import COCOEvaluator, inference_on_dataset
from detectron2.utils.events import EventStorage
from detectron2.utils.comm import synchronize

import timm

# 추가적으로 필요한 import
from detectron2.engine.defaults import DefaultPredictor
from detectron2.structures import Instances
from detectron2.evaluation import DatasetEvaluators
from detectron2.utils.env import seed_all_rng

# inference_context를 위한 import
from contextlib import contextmanager

import logging
logging.getLogger("detectron2").setLevel(logging.WARNING)
logging.getLogger("detectron2").disabled = True

@BACKBONE_REGISTRY.register()
class TimmBackbone(Backbone):
    def __init__(self, cfg, input_shape):
        super().__init__()
        model_name = cfg.MODEL.BACKBONE.TIMM_MODEL
        self.model = timm.create_model(model_name, features_only=True, pretrained=True)
        feature_info = self.model.feature_info.get_dicts(keys=['num_chs', 'reduction'])
        
        self.out_channels = 256
        self.convs = nn.ModuleDict()
        for i, info in enumerate(feature_info):
            self.convs[f"p{i+2}"] = nn.Conv2d(info['num_chs'], self.out_channels, kernel_size=1)
        
        # P6, P7 레벨 추가 (RetinaNet용)
        self.p6 = nn.Conv2d(feature_info[-1]['num_chs'], self.out_channels, kernel_size=3, stride=2, padding=1)
        self.p7 = nn.Conv2d(self.out_channels, self.out_channels, kernel_size=3, stride=2, padding=1)
        
        self._out_features = ["p2", "p3", "p4", "p5", "p6", "p7"]
        self._out_feature_channels = {name: self.out_channels for name in self._out_features}
        self._out_feature_strides = {f"p{i+2}": info['reduction'] for i, info in enumerate(feature_info)}
        self._out_feature_strides["p6"] = self._out_feature_strides["p5"] * 2
        self._out_feature_strides["p7"] = self._out_feature_strides["p6"] * 2

    def forward(self, x):
        features = self.model(x)
        out = {f"p{i+2}": self.convs[f"p{i+2}"](feature) for i, feature in enumerate(features)}
        p6 = self.p6(features[-1])
        p7 = self.p7(F.relu(p6))
        out["p6"] = p6
        out["p7"] = p7
        return out

    def output_shape(self):
        return {
            name: ShapeSpec(
                channels=self._out_feature_channels[name], stride=self._out_feature_strides[name]
            )
            for name in self._out_features
        }

    
image_dir = '../../dataset'
json_file_path = '../../dataset/train.json'

# 데이터셋 등록 (COCO 형식 가정)
register_coco_instances("my_dataset_train", {}, image_dir + "/train.json", image_dir)
register_coco_instances("my_dataset_val", {}, image_dir + "/test.json", image_dir)
# 'version' 속성 추가
MetadataCatalog.get("my_dataset_train").set(version="1.0")
MetadataCatalog.get("my_dataset_val").set(version="1.0")


namespace(name='my_dataset_val',
          json_file='../../dataset/test.json',
          image_root='../../dataset',
          evaluator_type='coco',
          version='1.0')

In [2]:
def setup_cfg(model_type):
    cfg = get_cfg()
    if model_type == "Cascade R-CNN":
        cfg.merge_from_file(model_zoo.get_config_file("Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml"))
        cfg.MODEL.FPN.IN_FEATURES = ["p2", "p3", "p4", "p5", "p6"]
    elif model_type == "RetinaNet":
        cfg.merge_from_file(model_zoo.get_config_file("COCO-Detection/retinanet_R_50_FPN_3x.yaml"))
        cfg.MODEL.FPN.IN_FEATURES = ["p2", "p3", "p4", "p5", "p6", "p7"]
    else:  # FPN-based Faster R-CNN
        cfg.merge_from_file(model_zoo.get_config_file("COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml"))
        cfg.MODEL.FPN.IN_FEATURES = ["p2", "p3", "p4", "p5", "p6"]
    
    cfg.DATASETS.TRAIN = ("my_dataset_train",)
    cfg.DATASETS.TEST = ("my_dataset_val",)

    cfg.MODEL.BACKBONE.NAME = "TimmBackbone"
    cfg.MODEL.BACKBONE.TIMM_MODEL = "resnet50"    
    
    cfg.MODEL.FPN.OUT_CHANNELS = 256     

    cfg.MODEL.ANCHOR_GENERATOR.SIZES = [[32], [64], [128], [256], [512]]
    
    cfg.MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS = [[0.5, 1.0, 2.0]]
    cfg.MODEL.RPN.IN_FEATURES = cfg.MODEL.FPN.IN_FEATURES

    cfg.MODEL.WEIGHTS = ""
    
    cfg.MODEL.ROI_HEADS.NUM_CLASSES = 10
    cfg.INPUT.MIN_SIZE_TRAIN = (1024,)
    cfg.INPUT.MAX_SIZE_TRAIN = 1024
    cfg.INPUT.MIN_SIZE_TEST = 1024
    cfg.INPUT.MAX_SIZE_TEST = 1024
    cfg.SOLVER.IMS_PER_BATCH = 128
    cfg.SOLVER.BASE_LR = 0.00025
    # epoch 설정
    dataset_size = 4883
    num_epochs = 3  # 원하는 epoch 수
    iterations_per_epoch = dataset_size // cfg.SOLVER.IMS_PER_BATCH
    cfg.SOLVER.MAX_ITER = iterations_per_epoch * num_epochs
    cfg.TEST.EVAL_PERIOD = num_epochs
    # cuDNN 벤치마크 비활성화
    torch.backends.cudnn.benchmark = False
    cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128

    # 학습률 스케줄 조정 (선택사항)
    #cfg.SOLVER.STEPS = (iterations_per_epoch * 30, iterations_per_epoch * 40) 

    cfg.MODEL.MASK_ON = False

    return cfg


In [3]:
class TqdmTrainer(DefaultTrainer):
    def __init__(self, cfg):
        torch.backends.cudnn.enabled = True
        torch.backends.cudnn.benchmark = False
        torch.backends.cudnn.deterministic = True
        super().__init__(cfg)
        self.pbar = None
        self.eval_dataset_name = create_eval_dataset(cfg, cfg.DATASETS.TRAIN[0])

    def train(self):
        self.pbar = tqdm(total=self.max_iter, desc="Training")
        super().train()

    def after_step(self):
        super().after_step()
        self.iter += 1  # iteration 증가
        if self.pbar is not None:
            self.pbar.update(1)
        
        # 주기적으로 모델 저장 및 평가 (예: 매 1000 iteration마다)
        if self.iter % 1000 == 0:
            checkpointer = DetectionCheckpointer(self.model, self.cfg.OUTPUT_DIR)
            checkpointer.save(f"model_{self.iter:07d}")
            
            # 평가 데이터로 평가 수행
            self.test(self.cfg, self.model)

    def after_train(self):
        super().after_train()
        if self.pbar is not None:
            self.pbar.close()
        
        # 최종 모델 저장 및 평가
        checkpointer = DetectionCheckpointer(self.model, self.cfg.OUTPUT_DIR)
        checkpointer.save("model_final")
        self.test(self.cfg, self.model)

    @classmethod
    def build_evaluator(cls, cfg, dataset_name, output_folder=None):
        if output_folder is None:
            output_folder = os.path.join(cfg.OUTPUT_DIR, "inference")
        return COCOEvaluator(dataset_name, cfg, False, output_folder)

    def build_test_loader(self, cfg, dataset_name):
        print(f"Building test loader for dataset: {dataset_name}")
        return build_detection_test_loader(cfg, dataset_name)

    def test(self, cfg, model):
        evaluator = self.build_evaluator(cfg, self.eval_dataset_name)
        data_loader = self.build_test_loader(cfg, self.eval_dataset_name)
        results = inference_on_dataset(model, data_loader, evaluator)
        
        # 입력 데이터를 연속적으로 만들기
        def ensure_contiguous(data):
            if isinstance(data, torch.Tensor):
                return data.contiguous()
            elif isinstance(data, dict):
                return {k: ensure_contiguous(v) for k, v in data.items()}
            elif isinstance(data, list):
                return [ensure_contiguous(v) for v in data]
            return data
        
        contiguous_results = ensure_contiguous(results)
        return contiguous_results
    
def train_model(cfg, model_name):
    cfg.OUTPUT_DIR = f"./output_{model_name}"
    os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
    
    # 데이터셋 메타데이터 확인
    from detectron2.data import MetadataCatalog
    dataset_metadata = MetadataCatalog.get(cfg.DATASETS.TEST[0])
    print("Dataset structure:", dataset_metadata)
    
    trainer = TqdmTrainer(cfg)
    trainer.resume_or_load(resume=False)
    trainer.train()
    print(f"{model_name} training completed.")
    print(f"Model saved in {cfg.OUTPUT_DIR}")
    
    # 최종 평가 수행
    final_results = trainer.test(cfg, trainer.model)
    print(f"Final evaluation results: {final_results}")

def check_category_mapping(predictions, dataset_metadata):
    pred_classes = set([pred["category_id"] for pred in predictions])
    dataset_classes = set(range(len(dataset_metadata.thing_classes)))
    if pred_classes != dataset_classes:
        print(f"Warning: Mismatch in category IDs. Predictions: {pred_classes}, Dataset: {dataset_classes}")

def validate_coco_results(results):
    required_keys = ["image_id", "category_id", "bbox", "score"]
    for result in results:
        if not all(key in result for key in required_keys):
            print(f"Invalid result format: {result}")
            return False
    return True

@contextmanager
def inference_context(model):
    """
    A context where the model is temporarily changed to eval mode,
    and restored to previous mode afterwards.

    Args:
        model: a torch Module
    """
    training_mode = model.training
    model.eval()
    yield
    model.train(training_mode)

def inference_on_dataset(model, data_loader, evaluator, tqdm_desc="Evaluating"):
    """
    Run model on the data_loader and evaluate the metrics with evaluator.
    Also benchmark the inference speed of `model.forward` accurately.
    The model will be used in eval mode.

    Args:
        model (nn.Module): a module which accepts an object from
            `data_loader` and returns some outputs. It will be temporarily set to `eval` mode.

            If you wish to evaluate a model in `training` mode instead, you can
            wrap the given model and override its behavior of `.eval()` and `.train()`.
        data_loader: an iterable object with a length.
            The elements it generates will be the inputs to the model.
        evaluator (DatasetEvaluator): the evaluator to run. Use `None` if you only want
            to benchmark, but don't want to do any evaluation.

    Returns:
        The return value of `evaluator.evaluate()`
    """
    num_devices = torch.distributed.get_world_size() if torch.distributed.is_initialized() else 1
    logger = logging.getLogger(__name__)
    logger.info("Start inference on {} images".format(len(data_loader)))

    total = len(data_loader)  # inference data loader must have a fixed length
    if evaluator is None:
        # create a no-op evaluator
        evaluator = DatasetEvaluators([])
    evaluator.reset()

    num_warmup = min(5, total - 1)
    start_time = time.perf_counter()
    total_compute_time = 0
    # with torch.no_grad():
    #     for inputs in tqdm(data_loader, total=total, desc=tqdm_desc):
    #         # Ensure inputs are contiguous and on the correct device
    #         if isinstance(inputs, list):
    #             inputs = [{k: v.contiguous().cuda() if isinstance(v, torch.Tensor) else v 
    #                        for k, v in d.items()} for d in inputs]
    #         elif isinstance(inputs, dict):
    #             inputs = {k: v.contiguous().cuda() if isinstance(v, torch.Tensor) else v 
    #                       for k, v in inputs.items()}

    #         outputs = model(inputs)
    #         evaluator.process(inputs, outputs)
    with inference_context(model), torch.no_grad():
        for idx, inputs in enumerate(tqdm(data_loader, total=total, desc=tqdm_desc)):
            if idx == num_warmup:
                start_time = time.perf_counter()
                total_compute_time = 0

            start_compute_time = time.perf_counter()
            outputs = model(inputs)
            if torch.cuda.is_available():
                torch.cuda.synchronize()
            total_compute_time += time.perf_counter() - start_compute_time
            evaluator.process(inputs, outputs)

    # Measure the time only for this worker (before the synchronization barrier)
    total_time = time.perf_counter() - start_time
    total_time_str = str(datetime.timedelta(seconds=total_time))
    # NOTE this format is parsed by grep
    logger.info(
        "Total inference time: {} ({:.6f} s / img per device, on {} devices)".format(
            total_time_str, total_time * num_devices / total, num_devices
        )
    )
    total_compute_time_str = str(datetime.timedelta(seconds=int(total_compute_time)))
    logger.info(
        "Total inference pure compute time: {} ({:.6f} s / img per device, on {} devices)".format(
            total_compute_time_str, total_compute_time * num_devices / total, num_devices
        )
    )

    results = evaluator.evaluate()
    # An evaluator may return None when not in main process.
    # Replace it by an empty dict instead to make it easier for downstream code to handle
    if results is None:
        results = {}
    return results

def create_eval_dataset(cfg, dataset_name, percentage=0.02):
    # 전체 데이터셋 로드
    dataset_dicts = DatasetCatalog.get(dataset_name)
    
    # 클래스별로 데이터 분류
    class_data = {}
    for data in dataset_dicts:
        for ann in data['annotations']:
            category_id = ann['category_id']
            if category_id not in class_data:
                class_data[category_id] = []
            class_data[category_id].append(data)
    
    # 각 클래스에서 20% 샘플링
    eval_data = []
    for category, items in class_data.items():
        sample_size = max(1, int(len(items) * percentage))
        eval_data.extend(random.sample(items, sample_size))
    
    # 중복 제거
    eval_data = list({d['image_id']: d for d in eval_data}.values())
    
    # 평가 데이터셋 등록
    eval_dataset_name = f"{dataset_name}_eval"
    DatasetCatalog.register(eval_dataset_name, lambda: eval_data)
    MetadataCatalog.get(eval_dataset_name).set(thing_classes=MetadataCatalog.get(dataset_name).thing_classes)
    
    return eval_dataset_name
def train_model(cfg, model_name):
    cfg.OUTPUT_DIR = f"./output_{model_name}"
    os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
    
    trainer = TqdmTrainer(cfg)
    trainer.resume_or_load(resume=False)
    
    # 데이터셋 메타데이터 확인
    dataset_metadata = MetadataCatalog.get(cfg.DATASETS.TEST[0])
    print(f"Dataset version: {dataset_metadata.version}")
    
    trainer.train()
    print(f"{model_name} training completed.")
    print(f"Model saved in {cfg.OUTPUT_DIR}")
    
    # 최종 평가 수행
    final_results = trainer.test(cfg, trainer.model)
    print(f"Final evaluation results: {final_results}")


cfg_fpn = setup_cfg("FPN-based Faster R-CNN")
train_model(cfg_fpn, "FPN-based Faster R-CNN")
# 각 모델 설정 및 학습
cfg_cascade = setup_cfg("Cascade R-CNN")
train_model(cfg_cascade, "Cascade R-CNN")

cfg_retinanet = setup_cfg("RetinaNet")
train_model(cfg_retinanet, "RetinaNet")



[32m[10/11 01:15:18 d2.engine.defaults]: [0mModel:
GeneralizedRCNN(
  (backbone): TimmBackbone(
    (model): FeatureListNet(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (act1): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (act1): ReLU(inplace=True)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (drop_block): Identity()
          (act2): ReLU(inplace=True)
          (aa): Identity()
  

Training:   0%|          | 0/114 [00:00<?, ?it/s]

[32m[10/11 01:15:19 d2.engine.train_loop]: [0mStarting training from iteration 0
[4m[5m[31mERROR[0m [32m[10/11 01:15:23 d2.engine.train_loop]: [0mException during training:
Traceback (most recent call last):
  File "/opt/conda/lib/python3.10/site-packages/detectron2/engine/train_loop.py", line 155, in train
    self.run_step()
  File "/opt/conda/lib/python3.10/site-packages/detectron2/engine/defaults.py", line 496, in run_step
    self._trainer.run_step()
  File "/opt/conda/lib/python3.10/site-packages/detectron2/engine/train_loop.py", line 310, in run_step
    loss_dict = self.model(data)
  File "/opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1130, in _call_impl
    return forward_call(*input, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/detectron2/modeling/meta_arch/rcnn.py", line 158, in forward
    features = self.backbone(images.tensor)
  File "/opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1130, in _call_i

Training:   0%|          | 0/114 [00:03<?, ?it/s]


[32m[10/11 01:15:24 d2.evaluation.coco_evaluation]: [0mFast COCO eval is not built. Falling back to official COCO eval.
[32m[10/11 01:15:24 d2.evaluation.coco_evaluation]: [0mTrying to convert 'my_dataset_train_eval' to COCO format ...
Building test loader for dataset: my_dataset_train_eval
[32m[10/11 01:15:24 d2.data.build]: [0mDistribution of instances among all 10 categories:
[36m|   category    | #instances   |  category   | #instances   |  category  | #instances   |
|:-------------:|:-------------|:-----------:|:-------------|:----------:|:-------------|
| General trash | 745          |    Paper    | 1663         | Paper pack | 162          |
|     Metal     | 220          |    Glass    | 248          |  Plastic   | 692          |
|   Styrofoam   | 280          | Plastic bag | 1103         |  Battery   | 32           |
|   Clothing    | 81           |             |              |            |              |
|     total     | 5226         |             |              |      

  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]
Evaluating: 100%|██████████| 414/414 [00:30<00:00, 13.36it/s]

[32m[10/11 01:15:55 d2.evaluation.coco_evaluation]: [0mPreparing results for COCO format ...
[32m[10/11 01:15:55 d2.evaluation.coco_evaluation]: [0mSaving results to ./output_FPN-based Faster R-CNN/inference/coco_instances_results.json





[32m[10/11 01:15:55 d2.evaluation.coco_evaluation]: [0mEvaluating predictions with official COCO API...
Loading and preparing results...


AssertionError: Results do not correspond to current coco set