In [1]:
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval

import numpy as np
import cv2
import os

import albumentations as A
from albumentations.pytorch import ToTensorV2

import torch
import torch.nn.functional as F
from torch import Tensor
from torch.utils.data import DataLoader, Dataset

# faster rcnn model이 포함된 library
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.roi_heads import fastrcnn_loss
from torchvision.models.detection.rpn import concat_box_prediction_layers

import pandas as pd
from tqdm import tqdm

from sklearn.model_selection import train_test_split
from torch.optim.lr_scheduler import CosineAnnealingLR

import pytorch_lightning as pl
from pytorch_lightning import Trainer
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping

from typing import Tuple, List, Dict, Optional
from collections import OrderedDict

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import wandb
from pytorch_lightning import LightningModule, Trainer
from pytorch_lightning.loggers import WandbLogger

wandb.init(project='SEO_project_01')

[34m[1mwandb[0m: Using wandb-core as the SDK backend. Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mtjwlssla1[0m ([33msihari1115-chung-ang-university[0m). Use [1m`wandb login --relogin`[0m to force relogin


In [3]:
import random
from torchvision import transforms
from pycocotools.coco import COCO

class CustomDataset(Dataset):
    '''
      data_dir: data가 존재하는 폴더 경로
      transforms: data transform (resize, crop, Totensor, etc,,,)
      train: True일 경우 훈련 데이터, False일 경우 검증 데이터
    '''

    def __init__(self, annotation, data_dir, transforms=None, train=True, split_ratio=0.8):
        super().__init__()
        self.data_dir = data_dir
        self.coco = COCO(annotation)
        
        # 이미지 IDs 가져오기
        self.img_ids = self.coco.getImgIds()

        # 데이터 분할
        random.shuffle(self.img_ids)  # 무작위로 섞기
        split_idx = int(len(self.img_ids) * split_ratio)
        if train:
            self.img_ids = self.img_ids[:split_idx]  # 훈련 데이터
        else:
            self.img_ids = self.img_ids[split_idx:]  # 검증 데이터

        self.transforms = transforms

    def __getitem__(self, index: int):
        image_id = self.img_ids[index]  # 분할된 이미지 ID 사용
        image_info = self.coco.loadImgs(image_id)[0]
        
        image = cv2.imread(os.path.join(self.data_dir, image_info['file_name']))
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
        image /= 255.0

        ann_ids = self.coco.getAnnIds(imgIds=image_info['id'])
        anns = self.coco.loadAnns(ann_ids)

        boxes = np.array([x['bbox'] for x in anns])

        # boxes (x_min, y_min, x_max, y_max)
        boxes[:, 2] = boxes[:, 0] + boxes[:, 2]
        boxes[:, 3] = boxes[:, 1] + boxes[:, 3]
        
        # class_id를 1~10으로 수정 
        labels = np.array([x['category_id'] + 1 for x in anns]) 
        labels = torch.as_tensor(labels, dtype=torch.int64)
        
        areas = np.array([x['area'] for x in anns])
        areas = torch.as_tensor(areas, dtype=torch.float32)
                                  
        is_crowds = np.array([x['iscrowd'] for x in anns])
        is_crowds = torch.as_tensor(is_crowds, dtype=torch.int64)

        target = {
            'boxes': boxes,
            'labels': labels,
            'image_id': torch.tensor([image_id]),
            'area': areas,
            'iscrowd': is_crowds
        }

        # transform
        if self.transforms:
            sample = {
                'image': image,
                'bboxes': target['boxes'],
                'labels': labels
            }
            sample = self.transforms(**sample)
            image = sample['image']
            target['boxes'] = torch.tensor(sample['bboxes'], dtype=torch.float32)

        return image, target, image_id
    
    def __len__(self) -> int:
        return len(self.img_ids)  # 나눠진 데이터의 길이 반환


In [4]:
# 학습 시 적용할 데이터 증강 기법 정의
def get_train_transform():
    return A.Compose([
        A.Resize(1024, 1024),
        A.Flip(p=0.5),
        ToTensorV2(p=1.0)
    ], bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']})

# 검증 시 적용할 전처리 정의
def get_valid_transform():
    return A.Compose([
        ToTensorV2(p=1.0)
    ], bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']})

In [5]:
from torchvision.models.detection.retinanet import RetinaNetHead
from torchvision.models.detection import retinanet_resnet50_fpn

class RetinaNetLightning(pl.LightningModule):
    def __init__(self, num_classes):
        super().__init__()
        # 사전 학습된 RetinaNet 모델을 불러옴
        self.model = retinanet_resnet50_fpn(pretrained=True)
        in_features = self.model.head.classification_head.cls_logits.in_channels
        num_anchors = self.model.head.classification_head.num_anchors
        
        # 새로운 classification head로 교체 (num_classes를 맞춤)
        self.model.head.classification_head.cls_logits = torch.nn.Conv2d(in_features, num_anchors * num_classes, kernel_size=3, stride=1, padding=1)

    def forward(self, images, targets=None):
        return self.model(images, targets)

    def training_step(self, batch, batch_idx):
        images, targets = batch
        loss_dict = self.model(images, targets)
        losses = sum(loss for loss in loss_dict.values())
        self.log_dict(loss_dict)
        return losses

    def validation_step(self, batch, batch_idx):
        images, targets = batch
        loss_dict = self.model(images, targets)
        losses = sum(loss for loss in loss_dict.values())
        self.log_dict({'val_loss': losses})
        return losses

    def configure_optimizers(self):
        optimizer = torch.optim.SGD(self.parameters(), lr=0.005, momentum=0.9, weight_decay=0.0005)
        return optimizer


In [6]:
class COCODataModule(pl.LightningDataModule):
    def __init__(self, train_dir, train_annotations, test_dir, test_annotations, batch_size=4, num_workers=4):
        super().__init__()
        self.train_dir = train_dir
        self.train_annotations = train_annotations
        self.test_dir = test_dir
        self.test_annotations = test_annotations
        self.batch_size = batch_size
        self.num_workers = num_workers

    def setup(self, stage=None):
        self.train_dataset = COCODataset(self.train_dir, self.train_annotations, transforms=get_transforms())
        self.test_dataset = COCODataset(self.test_dir, self.test_annotations, transforms=get_transforms())

    def train_dataloader(self):
        return DataLoader(self.train_dataset, batch_size=self.batch_size, shuffle=True, num_workers=self.num_workers, collate_fn=self.collate_fn)

    def val_dataloader(self):
        return DataLoader(self.test_dataset, batch_size=self.batch_size, shuffle=False, num_workers=self.num_workers, collate_fn=self.collate_fn)

    @staticmethod
    def collate_fn(batch):
        return tuple(zip(*batch))

In [7]:
wandb_logger = WandbLogger(project='object_detection')

coco_data = COCODataModule(
    train_dir='/data/ephemeral/level2-objectdetection-cv-15/dataset',
    train_annotations='/data/ephemeral/level2-objectdetection-cv-15/dataset/train.json',
    test_dir='/data/ephemeral/level2-objectdetection-cv-15/dataset',
    test_annotations='/data/ephemeral/level2-objectdetection-cv-15/dataset/test.json',
    batch_size=4
)

model = RetinaNetLightning(num_classes=10)  # 11 classes (including background)

trainer = pl.Trainer(
    max_epochs=10,
    logger=wandb_logger,
    devices=1,  # GPU 사용
    accelerator='gpu'  # 'gpu' 가속기를 사용
)


trainer.fit(model, coco_data)


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
/opt/conda/lib/python3.10/site-packages/pytorch_lightning/loggers/wandb.py:389: There is a wandb run already in progress and newly created instances of `WandbLogger` will reuse this run. If this is not desired, call `wandb.finish()` before instantiating `WandbLogger`.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type      | Params
------------------------------------
0 | model | RetinaNet | 32.3 M
------------------------------------
32.1 M    Trainable params
222 K     Non-trainable params
32.3 M    Total params
129.339   Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

IndexError: Caught IndexError in DataLoader worker process 0.
Original Traceback (most recent call last):
  File "/opt/conda/lib/python3.10/site-packages/torch/utils/data/_utils/worker.py", line 302, in _worker_loop
    data = fetcher.fetch(index)
  File "/opt/conda/lib/python3.10/site-packages/torch/utils/data/_utils/fetch.py", line 49, in fetch
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/opt/conda/lib/python3.10/site-packages/torch/utils/data/_utils/fetch.py", line 49, in <listcomp>
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/tmp/ipykernel_664946/240978618.py", line 31, in __getitem__
    boxes[:, 2:] += boxes[:, :2]  # Convert width/height to x_max, y_max
IndexError: too many indices for tensor of dimension 1
