## 라이브러리

In [1]:
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval

import numpy as np
import cv2
import os

import albumentations as A
from albumentations.pytorch import ToTensorV2

import torch
import torch.nn.functional as F
from torch import Tensor
from torch.utils.data import DataLoader, Dataset

# faster rcnn model이 포함된 library
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.roi_heads import fastrcnn_loss
from torchvision.models.detection.rpn import concat_box_prediction_layers

import pandas as pd
from tqdm import tqdm

from sklearn.model_selection import train_test_split
from torch.optim.lr_scheduler import CosineAnnealingLR

import pytorch_lightning as pl
from pytorch_lightning import Trainer
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping

from typing import Tuple, List, Dict, Optional
from collections import OrderedDict

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import wandb
from pytorch_lightning import LightningModule, Trainer
from pytorch_lightning.loggers import WandbLogger

wandb.init(project='SEO_project_01')

[34m[1mwandb[0m: Using wandb-core as the SDK backend. Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33maforalex98[0m ([33msihari1115-chung-ang-university[0m). Use [1m`wandb login --relogin`[0m to force relogin


## 데이터셋 생성

In [3]:
import random
from torchvision import transforms
from pycocotools.coco import COCO

class CustomDataset(Dataset):
    '''
      data_dir: data가 존재하는 폴더 경로
      transforms: data transform (resize, crop, Totensor, etc,,,)
      train: True일 경우 훈련 데이터, False일 경우 검증 데이터
    '''

    def __init__(self, annotation, data_dir, transforms=None, train=True, split_ratio=0.8):
        super().__init__()
        self.data_dir = data_dir
        self.coco = COCO(annotation)
        
        # 이미지 IDs 가져오기
        self.img_ids = self.coco.getImgIds()

        # 데이터 분할
        random.shuffle(self.img_ids)  # 무작위로 섞기
        split_idx = int(len(self.img_ids) * split_ratio)
        if train:
            self.img_ids = self.img_ids[:split_idx]  # 훈련 데이터
        else:
            self.img_ids = self.img_ids[split_idx:]  # 검증 데이터

        self.transforms = transforms

    def __getitem__(self, index: int):
        image_id = self.img_ids[index]  # 분할된 이미지 ID 사용
        image_info = self.coco.loadImgs(image_id)[0]
        
        image = cv2.imread(os.path.join(self.data_dir, image_info['file_name']))
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
        image /= 255.0

        ann_ids = self.coco.getAnnIds(imgIds=image_info['id'])
        anns = self.coco.loadAnns(ann_ids)

        boxes = np.array([x['bbox'] for x in anns])

        # boxes (x_min, y_min, x_max, y_max)
        boxes[:, 2] = boxes[:, 0] + boxes[:, 2]
        boxes[:, 3] = boxes[:, 1] + boxes[:, 3]
        
        # class_id를 1~10으로 수정 
        labels = np.array([x['category_id'] + 1 for x in anns]) 
        labels = torch.as_tensor(labels, dtype=torch.int64)
        
        areas = np.array([x['area'] for x in anns])
        areas = torch.as_tensor(areas, dtype=torch.float32)
                                  
        is_crowds = np.array([x['iscrowd'] for x in anns])
        is_crowds = torch.as_tensor(is_crowds, dtype=torch.int64)

        target = {
            'boxes': boxes,
            'labels': labels,
            'image_id': torch.tensor([image_id]),
            'area': areas,
            'iscrowd': is_crowds
        }

        # transform
        if self.transforms:
            sample = {
                'image': image,
                'bboxes': target['boxes'],
                'labels': labels
            }
            sample = self.transforms(**sample)
            image = sample['image']
            target['boxes'] = torch.tensor(sample['bboxes'], dtype=torch.float32)

        return image, target, image_id
    
    def __len__(self) -> int:
        return len(self.img_ids)  # 나눠진 데이터의 길이 반환


## 데이터 증강 정의

In [4]:
# 학습 시 적용할 데이터 증강 기법 정의
def get_train_transform():
    return A.Compose([
        A.Resize(1024, 1024),
        A.Flip(p=0.5),
        ToTensorV2(p=1.0)
    ], bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']})

# 검증 시 적용할 전처리 정의
def get_valid_transform():
    return A.Compose([
        ToTensorV2(p=1.0)
    ], bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']})

## 유틸 함수 정의

In [5]:
# 손실 계산하며 손실 최소값에 대한 체크포인트를 차후에 만드려는 함수인 듯
# 라이트닝에서 자동으로 체크포인트 저장해주니까 안써도 될 것 같음
class Averager:
    def __init__(self):
        self.current_total = 0.0
        self.iterations = 0.0

    def send(self, value):
        self.current_total += value
        self.iterations += 1

    @property
    def value(self):
        if self.iterations == 0:
            return 0
        else:
            return 1.0 * self.current_total / self.iterations

    def reset(self):
        self.current_total = 0.0
        self.iterations = 0.0

In [6]:
# dataloader에 전달하여 batch 생성 방법론 정의 (현재 기본 형태)
def custom_collate_fn(batch):
    return tuple(zip(*batch))

## 모델

In [7]:
class FasterRCNN(pl.LightningModule):
    def __init__(self, train_dataset, val_dataset, batch_size=16, lr=5e-3, lr_backbone=5e-5, weight_decay=5e-4):
        super().__init__()
        self.model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True) # model load
        self.num_classes = 11 # 10 classes + 1 (background)

        # get number of input features for the classifier
        in_features = self.model.roi_heads.box_predictor.cls_score.in_features
        # classifier 재정의 (위에서 계산한 in_features, num_classes)
        self.model.roi_heads.box_predictor = FastRCNNPredictor(in_features, self.num_classes)

        self.model.train()

        self.param_dicts = {
            "etc": [p for n, p in self.named_parameters() if "backbone" not in n and p.requires_grad],
            "backbone": [p for n, p in self.named_parameters() if "backbone" in n and p.requires_grad]
            }


        # hyperparameter
        self.batch_size = batch_size
        self.lr = lr
        self.lr_backbone = lr_backbone
        self.weight_decay = weight_decay

        # dataset
        self.train_dataset = train_dataset
        self.val_dataset = val_dataset

    def forward(self, images, targets=None):
        if targets:  # 모델이 학습 중일 때
            return self.model(images, targets)  # 손실을 포함한 결과 반환
        else:  # 모델이 추론 중일 때
            return self.model(images)  # 예측 결과 반환

    # 이해 굳이 안해도 되는 부분
    # faster r-cnn이 eval 모드에서 loss를 출력하지 않아 eval 모드에서 loss_dict을 함께 output으로 출력하는 코드 가져옴
    def eval_forward(self, images, targets=None):
        # type: (List[Tensor], Optional[List[Dict[str, Tensor]]]) -> Tuple[Dict[str, Tensor], List[Dict[str, Tensor]]]
        """
        Args:
            images (list[Tensor]): images to be processed
            targets (list[Dict[str, Tensor]]): ground-truth boxes present in the image (optional)
        Returns:
            result (list[BoxList] or dict[Tensor]): the output from the model.
                It returns list[BoxList] contains additional fields
                like `scores`, `labels` and `mask` (for Mask R-CNN models).
        """
        model = self.model
        model.eval()

        original_image_sizes: List[Tuple[int, int]] = []
        for img in images:
            val = img.shape[-2:]
            assert len(val) == 2
            original_image_sizes.append((val[0], val[1]))

        images, targets = model.transform(images, targets)

        # Check for degenerate boxes
        # TODO: Move this to a function
        if targets is not None:
            for target_idx, target in enumerate(targets):
                boxes = target["boxes"]
                degenerate_boxes = boxes[:, 2:] <= boxes[:, :2]
                if degenerate_boxes.any():
                    # print the first degenerate box
                    bb_idx = torch.where(degenerate_boxes.any(dim=1))[0][0]
                    degen_bb: List[float] = boxes[bb_idx].tolist()
                    raise ValueError(
                        "All bounding boxes should have positive height and width."
                        f" Found invalid box {degen_bb} for target at index {target_idx}."
                    )

        features = model.backbone(images.tensors)
        if isinstance(features, torch.Tensor):
            features = OrderedDict([("0", features)])
        model.rpn.training=True
        #model.roi_heads.training=True


        #####proposals, proposal_losses = model.rpn(images, features, targets)
        features_rpn = list(features.values())
        objectness, pred_bbox_deltas = model.rpn.head(features_rpn)
        anchors = model.rpn.anchor_generator(images, features_rpn)

        num_images = len(anchors)
        num_anchors_per_level_shape_tensors = [o[0].shape for o in objectness]
        num_anchors_per_level = [s[0] * s[1] * s[2] for s in num_anchors_per_level_shape_tensors]
        objectness, pred_bbox_deltas = concat_box_prediction_layers(objectness, pred_bbox_deltas)
        # apply pred_bbox_deltas to anchors to obtain the decoded proposals
        # note that we detach the deltas because Faster R-CNN do not backprop through
        # the proposals
        proposals = model.rpn.box_coder.decode(pred_bbox_deltas.detach(), anchors)
        proposals = proposals.view(num_images, -1, 4)
        proposals, scores = model.rpn.filter_proposals(proposals, objectness, images.image_sizes, num_anchors_per_level)

        proposal_losses = {}
        assert targets is not None
        labels, matched_gt_boxes = model.rpn.assign_targets_to_anchors(anchors, targets)
        regression_targets = model.rpn.box_coder.encode(matched_gt_boxes, anchors)
        loss_objectness, loss_rpn_box_reg = model.rpn.compute_loss(
            objectness, pred_bbox_deltas, labels, regression_targets
        )
        proposal_losses = {
            "loss_objectness": loss_objectness,
            "loss_rpn_box_reg": loss_rpn_box_reg,
        }

        #####detections, detector_losses = model.roi_heads(features, proposals, images.image_sizes, targets)
        image_shapes = images.image_sizes
        proposals, matched_idxs, labels, regression_targets = model.roi_heads.select_training_samples(proposals, targets)
        box_features = model.roi_heads.box_roi_pool(features, proposals, image_shapes)
        box_features = model.roi_heads.box_head(box_features)
        class_logits, box_regression = model.roi_heads.box_predictor(box_features)

        result: List[Dict[str, torch.Tensor]] = []
        detector_losses = {}
        loss_classifier, loss_box_reg = fastrcnn_loss(class_logits, box_regression, labels, regression_targets)
        detector_losses = {"loss_classifier": loss_classifier, "loss_box_reg": loss_box_reg}
        boxes, scores, labels = model.roi_heads.postprocess_detections(class_logits, box_regression, proposals, image_shapes)
        num_images = len(boxes)
        for i in range(num_images):
            result.append(
                {
                    "boxes": boxes[i],
                    "labels": labels[i],
                    "scores": scores[i],
                }
            )
        detections = result
        detections = model.transform.postprocess(detections, images.image_sizes, original_image_sizes)  # type: ignore[operator]
        model.rpn.training=False
        model.roi_heads.training=False
        losses = {}
        losses.update(detector_losses)
        losses.update(proposal_losses)
        return losses, detections

    def training_step(self, batch, batch_idx):
        # 배치에서 이미지를 가져오기
        # image: torch.Size([3, 1024, 1024])
        # target: {
        # 'boxes': list of tensor, 
        # 'labels': list of int, 
        # 'image_id': int, 
        # 'area': list of tensor, 
        # 'iscrowd': list of int (0 or 1)
        # }
        # image_id: int
        images, targets, image_ids = batch

        # 모델의 forward pass
        loss_dict = self.model(images=images, targets=targets)
        
        #print(f'{loss_dict}')

        # 총 학습 손실 계산
        losses = sum(loss for loss in loss_dict.values())

        #print(f'{losses}')

        # 학습 손실 로깅
        self.log('train_loss', losses, on_epoch=True, prog_bar=True)

        # 각 학습 손실 로깅
        for k, v in loss_dict.items():
            self.log("train_" + k, v.item())

        return losses

    def validation_step(self, batch, batch_idx):
        # training_step과 동일한 batch

        images, targets, image_ids = batch

        # 모델의 forward pass
        loss_dict, _ = self.eval_forward(images=images, targets=targets)

        losses = sum(loss for loss in loss_dict.values())  # 손실 합산
        
        # 검증 손실 로깅
        self.log('val_loss', losses, on_step=True, on_epoch=True, prog_bar=True)

        # 각 검증 손실 로깅
        for k, v in loss_dict.items():
            self.log("val_" + k, v.item())

        return losses

    def configure_optimizers(self):
        # Optimizer와 scheduler 설정
        optimizer = torch.optim.SGD(
            [
                {'params': self.param_dicts['backbone'], 'lr': self.lr_backbone, 'weight_decay': self.weight_decay},  # Classifier에 대한 설정
                {'params': self.param_dicts['etc'], 'lr': self.lr, 'weight_decay': self.weight_decay}  # Swin에 대한 설정
            ]
        )
        
        # 학습률 스케줄러 설정
        lr_scheduler = CosineAnnealingLR(optimizer, T_max=4, eta_min=1e-6)

        return [optimizer], [lr_scheduler]

    def train_dataloader(self):
        # Define train_loader
        return DataLoader(
            self.train_dataset, 
            batch_size=self.batch_size, 
            shuffle=True, 
            collate_fn=custom_collate_fn,
            num_workers=7
            )

    def val_dataloader(self):
        # Define val_loader
        return DataLoader(
            self.val_dataset,
            batch_size=self.batch_size,
            shuffle=False,
            collate_fn=custom_collate_fn,
            num_workers=7
            )

## 학습

In [8]:
def train_fn(model, max_steps=20000):
    # checkpoint 콜백 함수 정의
    checkpoint_callback = ModelCheckpoint(
        dirpath="checkpoints/fasterrcnn",  # 체크포인트 저장 경로
        filename="{epoch:02d}-{val_loss:.2f}",  # 저장될 파일명 포맷
        save_top_k=3,  # 상위 몇 개의 모델만 저장할지
        monitor="val_loss",  # 검증 손실을 모니터링하여 체크포인트 저장
        mode="min",  # 손실이 가장 적을 때 저장 (최소화)
        save_weights_only=True  # 전체 모델을 저장 (가중치만 저장하려면 True)
    )

    # earlystop 콜백 함수 정의
    early_stop_callback = EarlyStopping(
        monitor='val_loss',  # 모니터링할 지표 (예: val_loss)
        patience=3,          # 개선이 없으면 학습을 멈추기까지 대기할 epoch 수
        verbose=False,        # 로그 출력 여부
        mode='min'           # 지표를 최소화할지('min') 또는 최대화할지('max')
    )

    # Wandb 로거 생성
    wandb_logger = WandbLogger()

    trainer = Trainer(max_steps=max_steps, gradient_clip_val=3, callbacks=[checkpoint_callback, early_stop_callback], accelerator='gpu', logger=wandb_logger)
    trainer.fit(model)

## 메인

In [9]:
def main():
    # 데이터셋 불러오기
    annotation = '../../../dataset/train.json' # annotation 경로
    data_dir = '../../../dataset' # data_dir 경로

    # CustomDataset 인스턴스 생성
    train_dataset = CustomDataset(annotation, data_dir, transforms=get_train_transform(), train=True)
    val_dataset = CustomDataset(annotation, data_dir, transforms=get_valid_transform(), train=False)
    
    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
    print(device)
    
    # load model
    model = FasterRCNN(train_dataset, val_dataset, batch_size=16, lr=5e-3, lr_backbone=5e-5, weight_decay=5e-4)
    model.to(device)
    
    # training
    train_fn(model)

In [10]:
if __name__ == '__main__':
    main()

loading annotations into memory...
Done (t=0.09s)
creating index...
index created!
loading annotations into memory...
Done (t=0.25s)
creating index...
index created!
cuda


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
/opt/conda/lib/python3.10/site-packages/pytorch_lightning/loggers/wandb.py:389: There is a wandb run already in progress and newly created instances of `WandbLogger` will reuse this run. If this is not desired, call `wandb.finish()` before instantiating `WandbLogger`.
/opt/conda/lib/python3.10/site-packages/pytorch_lightning/callbacks/model_checkpoint.py:639: Checkpoint directory /data/ephemeral/level2-objectdetection-cv-15/jseo/baseline/faster_rcnn/checkpoints/fasterrcnn exists and is not empty.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type       | Params
-------------------------------------
0 | model | FasterRCNN | 41.3 M
-------------------------------------
41.1 M    Trainable params
222 K     Non-trainable params
41.3 M    Total params
165.381   Total estimated model params size (MB)


Sanity Checking DataLoader 0:  50%|█████     | 1/2 [00:02<00:02,  0.47it/s]

/opt/conda/lib/python3.10/site-packages/pytorch_lightning/utilities/data.py:77: Trying to infer the `batch_size` from an ambiguous collection. The batch size we found is 3. To avoid any miscalculations, use `self.log(..., batch_size=batch_size)`.


Epoch 3:  26%|██▌       | 63/245 [01:01<02:56,  1.03it/s, v_num=zijo, train_loss_step=0.619, val_loss_step=0.561, val_loss_epoch=0.671, train_loss_epoch=0.666] 

/opt/conda/lib/python3.10/site-packages/pytorch_lightning/trainer/call.py:54: Detected KeyboardInterrupt, attempting graceful shutdown...
