In [1]:
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval

import numpy as np
import cv2
import os

import albumentations as A
from albumentations.pytorch import ToTensorV2

import torch
import torch.nn.functional as F
from torch import Tensor
from torch.utils.data import DataLoader, Dataset

# faster rcnn model이 포함된 library
import torchvision
from torchvision.models.detection.retinanet import retinanet_resnet50_fpn
from torchvision.models.detection.roi_heads import fastrcnn_loss
from torchvision.models.detection.rpn import concat_box_prediction_layers

import pandas as pd
from tqdm import tqdm

from sklearn.model_selection import train_test_split
from torch.optim.lr_scheduler import CosineAnnealingLR

import pytorch_lightning as pl
from pytorch_lightning import Trainer
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping

from typing import Tuple, List, Dict, Optional
from collections import OrderedDict

import random

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import wandb
from pytorch_lightning import LightningModule, Trainer
from pytorch_lightning.loggers import WandbLogger

wandb.init(project='SEO_project_01')

[34m[1mwandb[0m: Using wandb-core as the SDK backend. Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33maforalex98[0m ([33msihari1115-chung-ang-university[0m). Use [1m`wandb login --relogin`[0m to force relogin


In [3]:
# Custom Dataset Class


class CustomDataset(Dataset):
    def __init__(self, annotation, data_dir, transforms=None, train=True, split_ratio=0.8):
        super().__init__()
        self.data_dir = data_dir
        self.coco = COCO(annotation)

        # Get image IDs
        self.img_ids = self.coco.getImgIds()

        # Data split
        random.shuffle(self.img_ids)  # Shuffle image IDs
        split_idx = int(len(self.img_ids) * split_ratio)
        if train:
            self.img_ids = self.img_ids[:split_idx]  # Training data
        else:
            self.img_ids = self.img_ids[split_idx:]  # Validation data

        self.transforms = transforms

    def __getitem__(self, index: int):
        image_id = self.img_ids[index]
        image_info = self.coco.loadImgs(image_id)[0]

        image_path = os.path.join(self.data_dir, image_info['file_name'])
        image = cv2.imread(image_path)
        
        # 이미지가 제대로 읽히지 않았을 경우 예외 처리
        if image is None:
            raise FileNotFoundError(f"Image not found at path: {image_path}")

        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
        image /= 255.0

        ann_ids = self.coco.getAnnIds(imgIds=image_info['id'])
        anns = self.coco.loadAnns(ann_ids)

        boxes = np.array([x['bbox'] for x in anns])

        # Convert boxes (x_min, y_min, width, height) to (x_min, y_min, x_max, y_max)
        boxes[:, 2] = boxes[:, 0] + boxes[:, 2]
        boxes[:, 3] = boxes[:, 1] + boxes[:, 3]

        labels = np.array([x['category_id'] + 1 for x in anns])  # +1 to class_id
        labels = torch.as_tensor(labels, dtype=torch.int64)

        areas = np.array([x['area'] for x in anns])
        areas = torch.as_tensor(areas, dtype=torch.float32)

        is_crowds = np.array([x['iscrowd'] for x in anns])
        is_crowds = torch.as_tensor(is_crowds, dtype=torch.int64)

        target = {
            'boxes': boxes,
            'labels': labels,
            'image_id': torch.tensor([image_id]),
            'area': areas,
            'iscrowd': is_crowds
        }

        if self.transforms:
            sample = {
                'image': image,
                'bboxes': target['boxes'],
                'labels': labels
            }
            sample = self.transforms(**sample)
            image = sample['image']
            target['boxes'] = torch.tensor(sample['bboxes'], dtype=torch.float32)

        return image, target, image_id


    def __len__(self) -> int:
        return len(self.img_ids)

# Transform for training
def get_train_transform():
    return A.Compose([
        A.Resize(1024, 1024),
        A.Flip(p=0.5),
        ToTensorV2(p=1.0)
    ], bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']})

# Transform for validation
def get_valid_transform():
    return A.Compose([
        ToTensorV2(p=1.0)
    ], bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']})

# Custom collate function for the dataloader
def custom_collate_fn(batch):
    return tuple(zip(*batch))

In [4]:
class RetinaNetModel(pl.LightningModule):
    def __init__(self, train_dataset, val_dataset, batch_size=16, lr=5e-3, lr_backbone=5e-5, weight_decay=5e-4):
        super().__init__()
        # RetinaNet 모델을 num_classes에 맞게 초기화
        num_classes = 11  # 10 classes + 1 (background)
        self.model = retinanet_resnet50_fpn(pretrained=False, num_classes=num_classes)  # 여기서 num_classes 설정
        
        self.model.train()

        self.param_dicts = {
            "etc": [p for n, p in self.named_parameters() if "backbone" not in n and p.requires_grad],
            "backbone": [p for n, p in self.named_parameters() if "backbone" in n and p.requires_grad]
        }

        # hyperparameter
        self.batch_size = batch_size
        self.lr = lr
        self.lr_backbone = lr_backbone
        self.weight_decay = weight_decay

        # dataset
        self.train_dataset = train_dataset
        self.val_dataset = val_dataset

    def forward(self, images, targets=None):
        if targets:  # 모델이 학습 중일 때
            return self.model(images, targets)  # 손실을 포함한 결과 반환
        else:  # 모델이 추론 중일 때
            return self.model(images)  # 예측 결과 반환

    def training_step(self, batch, batch_idx):
        images, targets, image_ids = batch

        # 모델의 forward pass
        loss_dict = self.model(images=images, targets=targets)

        # 총 학습 손실 계산
        losses = sum(loss for loss in loss_dict.values())

        # 학습 손실 로깅
        self.log('train_loss', losses, on_epoch=True, prog_bar=True)

        # 각 학습 손실 로깅
        for k, v in loss_dict.items():
            self.log("train_" + k, v.item())

        return losses

    def validation_step(self, batch, batch_idx):
        images, targets, image_ids = batch

        # 모델의 forward pass
        loss_dict = self.model(images=images, targets=targets)

        losses = sum(loss for loss in loss_dict.values())

        # 검증 손실 로깅
        self.log('val_loss', losses, on_step=True, on_epoch=True, prog_bar=True)

        # 각 검증 손실 로깅
        for k, v in loss_dict.items():
            self.log("val_" + k, v.item())

        return losses

    def configure_optimizers(self):
        # Optimizer와 scheduler 설정
        optimizer = torch.optim.SGD(
            [
                {'params': self.param_dicts['backbone'], 'lr': self.lr_backbone, 'weight_decay': self.weight_decay},
                {'params': self.param_dicts['etc'], 'lr': self.lr, 'weight_decay': self.weight_decay}
            ]
        )

        # 학습률 스케줄러 설정
        lr_scheduler = CosineAnnealingLR(optimizer, T_max=4, eta_min=1e-6)

        return [optimizer], [lr_scheduler]

    def train_dataloader(self):
        return DataLoader(
            self.train_dataset, 
            batch_size=self.batch_size, 
            shuffle=True, 
            collate_fn=custom_collate_fn,
            num_workers=7
        )

    def val_dataloader(self):
        return DataLoader(
            self.val_dataset,
            batch_size=self.batch_size,
            shuffle=False,
            collate_fn=custom_collate_fn,
            num_workers=7
        )

In [5]:
# Training function
def train_fn(model, max_steps=20000):
    # checkpoint 콜백 함수 정의
    checkpoint_callback = ModelCheckpoint(
        dirpath="checkpoints/retinanet",
        filename="{epoch:02d}-{val_loss:.2f}",
        save_top_k=3,
        monitor="val_loss",
        mode="min",
        save_weights_only=True
    )

    # earlystop 콜백 함수 정의
    early_stop_callback = EarlyStopping(
        monitor='val_loss',
        patience=3,
        verbose=False,
        mode='min'
    )

    # Wandb 로거 생성
    wandb_logger = WandbLogger()

    trainer = Trainer(max_steps=max_steps, gradient_clip_val=3, callbacks=[checkpoint_callback, early_stop_callback], accelerator='gpu', logger=wandb_logger)
    trainer.fit(model)


In [6]:
# Main function
def main():
    # 데이터셋 불러오기
    annotation = '../../../dataset/train.json'
    data_dir = '../../../dataset'

    train_dataset = CustomDataset(annotation, data_dir, transforms=get_train_transform(), train=True)
    val_dataset = CustomDataset(annotation, data_dir, transforms=get_valid_transform(), train=False)

    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
    print(device)

    # 모델 초기화
    model = RetinaNetModel(train_dataset, val_dataset, batch_size=16, lr=5e-3, lr_backbone=5e-5, weight_decay=5e-4)
    model.to(device)

    # 학습 시작
    train_fn(model)

if __name__ == '__main__':
    main()


loading annotations into memory...
Done (t=0.08s)
creating index...
index created!
loading annotations into memory...
Done (t=0.08s)
creating index...
index created!
cuda


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
/opt/conda/lib/python3.10/site-packages/pytorch_lightning/loggers/wandb.py:389: There is a wandb run already in progress and newly created instances of `WandbLogger` will reuse this run. If this is not desired, call `wandb.finish()` before instantiating `WandbLogger`.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type      | Params
------------------------------------
0 | model | RetinaNet | 32.4 M
------------------------------------
32.1 M    Trainable params
222 K     Non-trainable params
32.4 M    Total params
129.422   Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

[ WARN:0@7.336] global loadsave.cpp:248 findDecoder imread_('../../../dataset/train/train/2189.jpg'): can't open/read file: check file path/integrity
[ WARN:0@7.336] global loadsave.cpp:248 findDecoder imread_('../../../dataset/train/train/4681.jpg'): can't open/read file: check file path/integrity
[ WARN:0@7.342] global loadsave.cpp:248 findDecoder imread_('../../../dataset/train/train/0851.jpg'): can't open/read file: check file path/integrity
[ WARN:0@7.342] global loadsave.cpp:248 findDecoder imread_('../../../dataset/train/train/2821.jpg'): can't open/read file: check file path/integrity
[ WARN:0@7.342] global loadsave.cpp:248 findDecoder imread_('../../../dataset/train/train/3281.jpg'): can't open/read file: check file path/integrity
[ WARN:0@7.344] global loadsave.cpp:248 findDecoder imread_('../../../dataset/train/train/3018.jpg'): can't open/read file: check file path/integrity
[ WARN:0@7.344] global loadsave.cpp:248 findDecoder imread_('../../../dataset/train/train/2601.jpg')

FileNotFoundError: Caught FileNotFoundError in DataLoader worker process 0.
Original Traceback (most recent call last):
  File "/opt/conda/lib/python3.10/site-packages/torch/utils/data/_utils/worker.py", line 302, in _worker_loop
    data = fetcher.fetch(index)
  File "/opt/conda/lib/python3.10/site-packages/torch/utils/data/_utils/fetch.py", line 49, in fetch
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/opt/conda/lib/python3.10/site-packages/torch/utils/data/_utils/fetch.py", line 49, in <listcomp>
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/tmp/ipykernel_363843/1205715645.py", line 32, in __getitem__
    raise FileNotFoundError(f"Image not found at path: {image_path}")
FileNotFoundError: Image not found at path: ../../../dataset/train/train/2189.jpg
