# Library Import

In [1]:
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
import numpy as np
import cv2
import os

import albumentations as A
from albumentations.pytorch import ToTensorV2

import torch
# faster rcnn model이 포함된 library
import torchvision

from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

from torch.utils.data import DataLoader, Dataset
import pandas as pd
from tqdm import tqdm

# Dataset 생성

In [None]:
class CustomDataset(Dataset):
    """
    coco: COCO(annotation)으로 미리 생성한 객체
    img_ids: 학습에 사용할 이미지 id 리스트
    data_dir: 이미지가 존재하는 폴더 경로
    transforms: Albumentations Compose
    """

    def __init__(self, coco, img_ids, data_dir, transforms=None):
        super().__init__()
        self.coco = coco
        self.img_ids = img_ids
        self.data_dir = data_dir
        self.transforms = transforms

        # 예측 결과 저장용(기존 코드 유지)
        self.predictions = {
            "images": self.coco.dataset["images"].copy(),
            "categories": self.coco.dataset["categories"].copy(),
            "annotations": None
        }

    def __len__(self) -> int:
        return len(self.img_ids)

    def __getitem__(self, index: int):
        image_id = self.img_ids[index]
        image_info = self.coco.loadImgs(image_id)[0]

        image = cv2.imread(os.path.join(self.data_dir, image_info['file_name']))
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
        image /= 255.0

        ann_ids = self.coco.getAnnIds(imgIds=image_info['id'])
        anns = self.coco.loadAnns(ann_ids)

        H, W = image.shape[:2]
        min_area = 16.0   # 필요하면 32, 64 등으로 조정

        boxes = []
        labels = []
        areas = []
        is_crowds = []

        for ann in anns:
            x, y, w, h = ann['bbox']

            # 1) width/height가 0 이하이면 제거
            if w <= 0 or h <= 0:
                continue

            # COCO → Pascal VOC
            x_min = x
            y_min = y
            x_max = x + w
            y_max = y + h

            # 2) 이미지 경계로 클리핑
            x_min = max(0, min(x_min, W - 1))
            y_min = max(0, min(y_min, H - 1))
            x_max = max(0, min(x_max, W - 1))
            y_max = max(0, min(y_max, H - 1))

            # 3) 클리핑 후 다시 유효성 검사
            if x_max <= x_min or y_max <= y_min:
                continue

            # 4) 너무 작은 박스 제거 (area 기준)
            box_area = (x_max - x_min) * (y_max - y_min)
            if box_area < min_area:
                continue

            boxes.append([x_min, y_min, x_max, y_max])
            labels.append(ann['category_id'] + 1)          # 1~10
            areas.append(box_area)                         # 클리핑 후 면적
            is_crowds.append(int(ann.get('iscrowd', 0)))

        if len(boxes) == 0:
            boxes = np.zeros((0, 4), dtype=np.float32)
            labels = np.zeros((0,), dtype=np.int64)
            areas = np.zeros((0,), dtype=np.float32)
            is_crowds = np.zeros((0,), dtype=np.int64)
        else:
            boxes = np.array(boxes, dtype=np.float32)
            labels = np.array(labels, dtype=np.int64)
            areas = np.array(areas, dtype=np.float32)
            is_crowds = np.array(is_crowds, dtype=np.int64)

        labels = torch.as_tensor(labels, dtype=torch.int64)
        areas = torch.as_tensor(areas, dtype=torch.float32)
        is_crowds = torch.as_tensor(is_crowds, dtype=torch.int64)

        target = {
            'boxes': boxes,
            'labels': labels,
            'image_id': torch.tensor([image_id]),
            'area': areas,
            'iscrowd': is_crowds
        }

        if self.transforms and len(boxes) > 0:
            sample = {
                'image': image,
                'bboxes': target['boxes'],
                'labels': labels
            }
            sample = self.transforms(**sample)
            image = sample['image']
            target['boxes'] = torch.tensor(sample['bboxes'],
                                        dtype=torch.float32)
            target['labels'] = torch.as_tensor(sample['labels'],
                                            dtype=torch.int64)
        else:
            image = torch.tensor(image).permute(2, 0, 1).float()
            target['boxes'] = torch.tensor(target['boxes'],
                                        dtype=torch.float32)

        return image, target, image_id


In [3]:
def get_train_transform():
    return A.Compose([
        A.Resize(1024, 1024),
        A.Flip(p=0.5),
        ToTensorV2(p=1.0)
    ], bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']})


def get_valid_transform():
    return A.Compose([
        ToTensorV2(p=1.0)
    ], bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']})

# Util Functions

In [4]:
class Averager:
    def __init__(self):
        self.current_total = 0.0
        self.iterations = 0.0

    def send(self, value):
        self.current_total += value
        self.iterations += 1

    @property
    def value(self):
        if self.iterations == 0:
            return 0
        else:
            return 1.0 * self.current_total / self.iterations

    def reset(self):
        self.current_total = 0.0
        self.iterations = 0.0


def collate_fn(batch):
    return tuple(zip(*batch))

In [None]:
def evaluate(model, data_loader, coco_gt, device):
    model.eval()
    results = []

    with torch.no_grad():
        for images, targets, image_ids in data_loader:
            images = [img.to(device) for img in images]
            outputs = model(images)

            for out, img_id in zip(outputs, image_ids):
                boxes = out['boxes'].cpu().numpy()
                scores = out['scores'].cpu().numpy()
                labels = out['labels'].cpu().numpy()

                for box, score, label in zip(boxes, scores, labels):
                    x_min, y_min, x_max, y_max = box
                    w = x_max - x_min
                    h = y_max - y_min
                    results.append({
                        "image_id": int(img_id),
                        "category_id": int(label - 1),     # 1~10 → 0~9
                        "bbox": [float(x_min), float(y_min), float(w), float(h)],
                        "score": float(score),
                    })

    coco_dt = coco_gt.loadRes(results)
    coco_eval = COCOeval(coco_gt, coco_dt, iouType="bbox")
    coco_eval.evaluate()
    coco_eval.accumulate()
    coco_eval.summarize()       # mAP, mAP50 등 출력
    model.train()

# Trainer

In [None]:
def train_fn(num_epochs, train_loader, val_loader, coco_val,
             optimizer, model, device):
    best_loss = float("inf")
    loss_hist = Averager()

    for epoch in range(num_epochs):
        loss_hist.reset()
        model.train()

        for images, targets, image_ids in tqdm(train_loader):
            images = [img.to(device) for img in images]
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

            loss_dict = model(images, targets)
            losses = sum(loss for loss in loss_dict.values())
            loss_value = losses.item()
            loss_hist.send(loss_value)

            optimizer.zero_grad()
            losses.backward()
            optimizer.step()

        print(f"Epoch #{epoch+1} train loss: {loss_hist.value:.4f}")

        # validation mAP
        evaluate(model, val_loader, coco_val, device)

        # best 모델 저장 (loss 기준 or 나중에 mAP 기준으로 변경 가능)
        if loss_hist.value < best_loss:
            os.makedirs("./checkpoints", exist_ok=True)
            torch.save(model.state_dict(),
                       "./checkpoints/faster_rcnn_best.pth")
            best_loss = loss_hist.value

In [None]:
## Image weighting for rare class sampling

from collections import defaultdict
from torch.utils.data import WeightedRandomSampler

def make_image_weights(annotation_path, rare_ratio=0.1,
                       rare_weight=3.0, normal_weight=1.0):
    coco = COCO(annotation_path)
    anns = coco.dataset["annotations"]

    # 클래스별 box 개수 집계
    cls_count = defaultdict(int)
    for ann in anns:
        cid = ann["category_id"]
        cls_count[cid] += 1

    max_cnt = max(cls_count.values())
    # 상위 빈도의 rare_ratio 비율 미만이면 희귀 클래스로 정의
    rare_classes = {c for c, n in cls_count.items()
                    if n < rare_ratio * max_cnt}

    # 이미지별 포함 클래스 집합
    img_to_cats = defaultdict(set)
    for ann in anns:
        img_to_cats[ann["image_id"]].add(ann["category_id"])

    img_ids = coco.getImgIds()
    img_weights = []
    for img_id in img_ids:
        cats = img_to_cats.get(img_id, set())
        # 희귀 클래스가 하나라도 있으면 더 큰 weight 부여
        if len(cats & rare_classes) > 0:
            img_weights.append(rare_weight)
        else:
            img_weights.append(normal_weight)

    return coco, img_ids, torch.DoubleTensor(img_weights)


In [None]:
def make_image_weights(annotation_path,
                       rare_ratio=0.1,
                       rare_weight=3.0,
                       normal_weight=1.0):
    """
    COCO annotation을 읽어서
    - 클래스별 box 개수
    - 희귀 클래스(rare_classes)
    - 이미지별 weight (희귀 클래스 포함 시 rare_weight)
    를 계산.
    """
    coco = COCO(annotation_path)
    anns = coco.dataset["annotations"]

    cls_count = defaultdict(int)
    for ann in anns:
        cid = ann["category_id"]
        cls_count[cid] += 1

    max_cnt = max(cls_count.values())
    rare_classes = {c for c, n in cls_count.items()
                    if n < rare_ratio * max_cnt}

    img_to_cats = defaultdict(set)
    for ann in anns:
        img_to_cats[ann["image_id"]].add(ann["category_id"])

    img_ids = coco.getImgIds()

    img_weights = []
    for img_id in img_ids:
        cats = img_to_cats.get(img_id, set())
        if len(cats & rare_classes) > 0:
            img_weights.append(rare_weight)
        else:
            img_weights.append(normal_weight)

    img_weights = torch.DoubleTensor(img_weights)
    return coco, img_ids, img_weights


def create_model(num_classes):
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(
        weights="DEFAULT"
    )
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features,
                                                      num_classes)
    return model

In [None]:
def build_loaders():
    # split해서 만든 json 사용
    train_ann = '../../dataset/train_split.json'
    val_ann   = '../../dataset/val_split.json'
    data_dir  = '../../dataset'

    # ---- train loader (샘플링 포함) ----
    coco_train, train_ids, img_weights = make_image_weights(
        annotation_path=train_ann,
        rare_ratio=0.1,
        rare_weight=3.0,
        normal_weight=1.0
    )

    train_dataset = CustomDataset(
        coco=coco_train,
        img_ids=train_ids,
        data_dir=data_dir,
        transforms=get_train_transform()
    )

    sampler = WeightedRandomSampler(
        weights=img_weights,
        num_samples=len(img_weights),
        replacement=True
    )

    train_loader = DataLoader(
        train_dataset,
        batch_size=16,
        shuffle=False,
        sampler=sampler,
        num_workers=0,
        collate_fn=collate_fn
    )

    # ---- val loader (샘플링 없음) ----
    coco_val = COCO(val_ann)
    val_ids = coco_val.getImgIds()

    val_dataset = CustomDataset(
        coco=coco_val,
        img_ids=val_ids,
        data_dir=data_dir,
        transforms=get_valid_transform()
    )

    val_loader = DataLoader(
        val_dataset,
        batch_size=8,
        shuffle=False,
        num_workers=0,
        collate_fn=collate_fn
    )

    return train_loader, val_loader, coco_val

In [None]:
from pycocotools.cocoeval import COCOeval

def evaluate(model, data_loader, coco_gt, device):
    model.eval()
    results = []

    with torch.no_grad():
        for images, targets, image_ids in data_loader:
            images = [img.to(device) for img in images]
            outputs = model(images)

            for out, img_id in zip(outputs, image_ids):
                boxes = out['boxes'].cpu().numpy()
                scores = out['scores'].cpu().numpy()
                labels = out['labels'].cpu().numpy()

                # label: 1~10 → COCO category_id(0~9)로 되돌림
                for box, score, label in zip(boxes, scores, labels):
                    x_min, y_min, x_max, y_max = box
                    w = x_max - x_min
                    h = y_max - y_min
                    results.append({
                        'image_id': int(img_id),
                        'category_id': int(label - 1),
                        'bbox': [float(x_min), float(y_min), float(w), float(h)],
                        'score': float(score)
                    })

    coco_dt = coco_gt.loadRes(results)
    coco_eval = COCOeval(coco_gt, coco_dt, iouType='bbox')
    coco_eval.evaluate()
    coco_eval.accumulate()
    coco_eval.summarize()   # 여기서 mAP@[0.5:0.95] 등 출력

# Main

In [None]:
def main():
    # 1) train/val DataLoader 준비
    train_loader, val_loader, coco_val = build_loaders()

    # 2) 디바이스, 모델, 옵티마이저
    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
    print(device)

    num_classes = 11  # 10 classes + background
    model = create_model(num_classes=num_classes)
    model.to(device)

    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = torch.optim.SGD(
        params, lr=0.005, momentum=0.9, weight_decay=0.0005
    )

    # 3) 학습 (train_fn이 val까지 받도록 수정되어 있어야 함)
    num_epochs = 12
    train_fn(num_epochs, train_loader, val_loader, coco_val,
             optimizer, model, device)

In [7]:
if __name__ == '__main__':
    main()

loading annotations into memory...
Done (t=0.09s)
creating index...
index created!
cuda


Downloading: "https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth" to /data/ephemeral/home/.cache/torch/hub/checkpoints/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth
100%|██████████| 160M/160M [00:02<00:00, 69.7MB/s] 
100%|██████████| 306/306 [04:34<00:00,  1.11it/s]


Epoch #1 loss: 0.654836126353616


100%|██████████| 306/306 [04:33<00:00,  1.12it/s]


Epoch #2 loss: 0.5142101999980951


100%|██████████| 306/306 [04:34<00:00,  1.11it/s]


Epoch #3 loss: 0.4757283618247587


100%|██████████| 306/306 [04:33<00:00,  1.12it/s]


Epoch #4 loss: 0.4529518393519657


100%|██████████| 306/306 [04:34<00:00,  1.12it/s]


Epoch #5 loss: 0.43425681473675115


100%|██████████| 306/306 [04:33<00:00,  1.12it/s]


Epoch #6 loss: 0.41960378923739483


100%|██████████| 306/306 [04:32<00:00,  1.12it/s]


Epoch #7 loss: 0.40160997638028434


100%|██████████| 306/306 [04:32<00:00,  1.12it/s]


Epoch #8 loss: 0.3920603515762909


100%|██████████| 306/306 [04:33<00:00,  1.12it/s]


Epoch #9 loss: 0.3810855712668568


100%|██████████| 306/306 [04:34<00:00,  1.11it/s]


Epoch #10 loss: 0.36799731767839855


100%|██████████| 306/306 [04:33<00:00,  1.12it/s]


Epoch #11 loss: 0.35614126367993604


100%|██████████| 306/306 [04:32<00:00,  1.12it/s]


Epoch #12 loss: 0.34828214505626487
