## Import

In [1]:
import random
import pandas as pd
import numpy as np
import os
import cv2
from matplotlib import pyplot as plt
import glob
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2
import torchvision.models as models

from tqdm import tqdm
import timm
from torchsummaryX import summary

from torch.utils.data import Dataset
from pycocotools.coco import COCO

## Config

In [2]:
from dotenv import load_dotenv
from setup import get_package_root_path
from src.global_exception_handler.v1 import GlobalExceptionHandler
from src.webhook.v1 import TeamsWebhook

load_dotenv()

pakage_name = os.environ.get("PACKAGE_NAME")
root_path = get_package_root_path()

# 웹훅 알림 url (없으면 빈 문자열)
webhook_url = os.environ.get("WEBHOOK_URL")
webhook = TeamsWebhook(webhook_url)

# 핸들링할 예외 종류
except_tuple = (Exception,)
GlobalExceptionHandler(except_tuple=except_tuple, sender=webhook, name="dacon_cars")

<src.global_exception_handler.v1.GlobalExceptionHandler at 0x7f939412bee0>

In [3]:
!nvidia-smi

Wed Jun 14 15:00:25 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 515.65.01    Driver Version: 515.65.01    CUDA Version: 11.7     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA RTX A6000    On   | 00000000:81:00.0 Off |                  Off |
| 35%   55C    P0    89W / 300W |      1MiB / 49140MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

## Hyperparameter Setting

In [4]:
class CFG:
    root_path = root_path
    # Job Id (보통 파일명과 동일하게)
    job_id = "DETECTION_2"

    # 원천 데이터 경로
    data_path = f"{root_path}/data"

    # 학습의 결과물이 저장될 경로
    outputs_path = f"{root_path}/outputs/{job_id}"
    predict_dir = f"{outputs_path}/predict"
    recorder_dir = f"{outputs_path}/recorder"

    learning_late = 0.0001
    batch_size = 8
    epoch = 10


CFG.__dict__

mappingproxy({'__module__': '__main__',
              'root_path': '/data/dacon_cars',
              'job_id': 'DETECTION_2',
              'data_path': '/data/dacon_cars/data',
              'outputs_path': '/data/dacon_cars/outputs/DETECTION_2',
              'predict_dir': '/data/dacon_cars/outputs/DETECTION_2/predict',
              'recorder_dir': '/data/dacon_cars/outputs/DETECTION_2/recorder',
              'learning_late': 0.0001,
              'batch_size': 8,
              'epoch': 10,
              '__dict__': <attribute '__dict__' of 'CFG' objects>,
              '__weakref__': <attribute '__weakref__' of 'CFG' objects>,
              '__doc__': None})

## CustomDataset

In [5]:
class COCODataset(Dataset):
    def __init__(self, json_path, data_path, transforms=None):
        self.coco = COCO(json_path)
        self.image_ids = list(self.coco.imgToAnns.keys())
        self.transforms = transforms
        self.data_path = data_path
        self.classes = []
        for v in self.coco.cats.values():
            self.classes.append(v["name"])

        # 어노테이션 정보는 있지만 이미지 파일은 없는 데이터 제거 시작
        temp_image_ids = []
        image_files = os.listdir(f"{data_path}")
        for image_id in self.image_ids:
            if self.coco.loadImgs(image_id)[0]["file_name"] in image_files:
                temp_image_ids.append(image_id)
        self.image_ids = temp_image_ids
        # 어노테이션 정보는 있지만 이미지 파일은 없는 데이터 제거 끝

    def get_mean_std(self):
        img_norm = list()
        img_std = list()
        for image_id in self.image_ids:
            file_name = self.coco.loadImgs(image_id)[0]["file_name"]
            file_name = os.path.join(self.data_path, file_name)
            img = cv2.imread(file_name, cv2.IMREAD_COLOR).astype(np.float32) / 255.0
            if len(img.shape) < 2:  # 흑백 이미지는 제외
                continue
            mean, std = np.mean(img, axis=(0, 1)), np.std(img, axis=(0, 1))
            img_norm.append(mean)
            img_std.append(std)

        print(np.mean(img_norm, axis=0), np.mean(img_std, axis=0))

    def get_categories(self):
        return self.coco.cats

    def get_classes(self):
        return self.classes

    def __len__(self):
        return len(self.image_ids)

    def __getitem__(self, idx):
        image_id = self.image_ids[idx]
        file_name = self.coco.loadImgs(image_id)[0]["file_name"]
        path = os.path.join(self.data_path, file_name)
        image = cv2.imread(path)

        annot_ids = self.coco.getAnnIds(imgIds=image_id)
        annots = [x for x in self.coco.loadAnns(annot_ids) if x["image_id"] == image_id]

        boxes = np.array([annot["bbox"] for annot in annots], dtype=np.float32)
        boxes[:, 2] = boxes[:, 0] + boxes[:, 2]
        boxes[:, 3] = boxes[:, 1] + boxes[:, 3]

        labels = np.array([annot["category_id"] for annot in annots], dtype=np.int32)

        area = np.array([annot["area"] for annot in annots], dtype=np.float32)
        iscrowd = np.array([annot["iscrowd"] for annot in annots], dtype=np.uint8)

        if self.transforms is not None:
            transformed = self.transforms(
                image=image,
                bboxes=boxes,
                category_ids=labels,
            )

        target = {
            "boxes": transformed["bboxes"],
            "labels": transformed["category_ids"],
            "area": area,
            "iscrowd": iscrowd,
        }

        target["boxes"] = torch.as_tensor(target["boxes"], dtype=torch.float32)
        target["labels"] = torch.as_tensor(target["labels"], dtype=torch.int64)
        target["area"] = torch.as_tensor(target["area"], dtype=torch.float32)
        target["iscrowd"] = torch.as_tensor(target["iscrowd"], dtype=torch.uint8)

        return transformed["image"], target, path

## Transforms

In [6]:
from src.my_albumentations.v1 import CustomBBoxSafeRandomCrop

train_transform = A.Compose(
    [
        A.PadIfNeeded(1080, 1920, border_mode=0, value=(0, 0, 0)),
        A.ShiftScaleRotate(
            scale_limit=0.01,
            rotate_limit=5,
            border_mode=0,
            value=(0, 0, 0),
            p=1,
        ),
        A.HorizontalFlip(),
        A.ToGray(p=1),
        A.Equalize(by_channels=False),
        A.Downscale(
            interpolation=2,
        ),
        # A.ElasticTransform(
        #     border_mode=0,
        #     value=(0, 0, 0),
        # ),
        A.GaussNoise(p=0.9),
        A.HueSaturationValue(p=0.5),
        A.RandomBrightnessContrast(brightness_limit=0.05, contrast_limit=0.05, p=1),
        A.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5), max_pixel_value=255.0),
        ToTensorV2(),
    ],
    bbox_params=A.BboxParams(format="pascal_voc", label_fields=["category_ids"]),
)


val_transform = A.Compose(
    [
        A.PadIfNeeded(1080, 1920, border_mode=0, value=(0, 0, 0)),
        A.ToGray(p=1),
        A.Equalize(by_channels=False),
        A.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5), max_pixel_value=255.0),
        ToTensorV2(),
    ],
    bbox_params=A.BboxParams(format="pascal_voc", label_fields=["category_ids"]),
)

## 파일 랜덤 fold 나누기

In [7]:
import shutil
from src.image_eda.v1 import divide_list_evenly

if not os.path.exists(f"{CFG.data_path}/train_0"):
    data_path_list = sorted(glob.glob(f"{CFG.data_path}/train/*.png"))

    print(len(data_path_list))

    # split_index = int(np.round(len(data_path_list) * 0.8))
    random.shuffle(data_path_list)
    list_chunked = divide_list_evenly(data_path_list, 5)

    fold_count = 5
    for fold_index in range(fold_count):
        os.makedirs(f"{CFG.data_path}/train_{fold_index}", exist_ok=True)
        os.makedirs(f"{CFG.data_path}/val_{fold_index}", exist_ok=True)

        train_image_list = []
        for i in range(fold_count):
            if i == fold_index:
                continue
            train_image_list += list_chunked[i]
        print(len(train_image_list))
        for data_path in train_image_list:
            shutil.copy(data_path, data_path.replace("train", f"train_{fold_index}"))

        val_image_list = list_chunked[fold_index]
        print(len(val_image_list))
        for data_path in val_image_list:
            shutil.copy(data_path, data_path.replace("train", f"val_{fold_index}"))

## Valid

In [8]:
from src.image_eda.v1 import tensor2im, apply_bbox, put_text
from src.rcnn_utils.v1 import eval_forward


def valid(model, data_loader, device):
    val_loss = []
    with torch.no_grad():
        for batch_index, (images, targets, paths) in enumerate(tqdm(data_loader)):
            images = [image.to(device) for image in images]
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

            losses_dict, detections = eval_forward(model, images, targets)

            if batch_index % 10 == 0:
                temp_img = images[0].detach().cpu()
                temp_img = tensor2im(temp_img)
                target = targets[0].copy()
                for k, v in target.items():
                    target[k] = target[k].detach().cpu().int().numpy()

                boxes = detections[0]["boxes"].detach().cpu().int().numpy()  #
                labels = detections[0]["labels"].detach().cpu().int().numpy()  #
                scores = detections[0]["scores"].detach().cpu().tolist()  #

                thresholded_preds_inidices = [
                    scores.index(i) for i in scores if i > 0.7
                ]
                thresholded_preds_count = len(thresholded_preds_inidices)

                if thresholded_preds_count > 0:
                    scores = scores[:thresholded_preds_count]
                    labels = labels[:thresholded_preds_count]
                    boxes = boxes[:thresholded_preds_count]
                    boxes = boxes.astype(np.int32).tolist()

                    temp_img = apply_bbox(
                        temp_img,
                        labels,
                        boxes,
                        data_loader.dataset.get_classes(),
                        scores,
                    )

                temp_img = put_text(
                    temp_img, os.path.basename(paths[0]), (0, 0), [0, 0, 255]
                )
                cv2.imwrite(f"{CFG.root_path}/temp/valid_img.jpg", temp_img)

            losses = sum(loss for loss in losses_dict.values())

            val_loss.append(losses.item())

    validation_loss = np.mean(val_loss)
    return validation_loss

## Train

In [9]:
def train(model, optimizer, data_loader, device, grad_scaler=None):
    model.to(device)
    model.train()
    train_loss = []
    for batch_index, (images, targets, paths) in enumerate(tqdm(data_loader)):
        if batch_index % 10 == 0:
            temp_img = images[0].detach().cpu()
            temp_img = tensor2im(temp_img)
            target = targets[0].copy()
            for k, v in target.items():
                target[k] = target[k].int().numpy()

            temp_img = apply_bbox(
                temp_img,
                target["labels"],
                target["boxes"],
                data_loader.dataset.get_classes(),
            )

            temp_img = put_text(
                temp_img, os.path.basename(paths[0]), (0, 0), [0, 0, 255]
            )
            cv2.imwrite(f"{CFG.root_path}/temp/train_img.jpg", temp_img)

        images = [image.to(device) for image in images]
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        optimizer.zero_grad()
        if grad_scaler is None:
            losses = model(images, targets)
            loss: torch.Tensor = sum(loss for loss in losses.values())
            loss.backward()
            optimizer.step()
        else:
            with torch.cuda.amp.autocast():
                losses = model(images, targets)
                loss: torch.Tensor = sum(loss for loss in losses.values())

            grad_scaler.scale(loss).backward()
            grad_scaler.step(optimizer)
            grad_scaler.update()

        train_loss.append(loss.item())

    _train_loss = np.mean(train_loss)
    return _train_loss

## Model Define

In [10]:
from src.rcnn_utils.v1 import CustomRCNNTransform
from torchvision.models.detection.faster_rcnn import (
    FastRCNNPredictor,
    FasterRCNN,
    AnchorGenerator,
)
from torchvision.models.detection.backbone_utils import resnet_fpn_backbone


def create_model(num_classes: int):
    backbone = resnet_fpn_backbone(
        backbone_name="resnext50_32x4d",
        weights=models.ResNeXt50_32X4D_Weights.DEFAULT,
        trainable_layers=5,
    )

    anchor_generator = AnchorGenerator(
        sizes=(
            (230,),
            (270,),
            (310,),
            (350,),
            (390,),
        ),
        aspect_ratios=(
            (0.8, 0.9, 1, 1.1),
            (0.8, 0.9, 1, 1.1),
            (0.8, 0.9, 1, 1.1),
            (0.8, 0.9, 1, 1.1),
            (0.8, 0.9, 1, 1.1),
        ),
    )

    model = FasterRCNN(
        backbone,
        rpn_anchor_generator=anchor_generator,
        num_classes=num_classes,
        rpn_fg_iou_thresh=0.85,
        rpn_bg_iou_thresh=0.4,
        # rpn_positive_fraction=0.5,
        box_fg_iou_thresh=0.85,
        box_bg_iou_thresh=0.4,
        # box_positive_fraction=0.5,
    )

    model.transform = CustomRCNNTransform()

    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

    return model

## Snapshot Notebook

In [11]:
import shutil

try:
    import IPython

    notebook_path = IPython.extract_module_locals()[1]["__vsc_ipynb_file__"]
except:
    notebook_path = f"{os.getcwd()}/{CFG.job_id}.ipynb"


os.makedirs(CFG.outputs_path, exist_ok=True)
shutil.copy(notebook_path, f"{CFG.outputs_path}/{os.path.split(notebook_path)[1]}")

'/data/dacon_cars/outputs/DETECTION_2/DETECTION_2.ipynb'

## Run!!

In [12]:
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
device

device(type='cuda')

In [13]:
from src.random_seed.v1 import seed_everything, seed_worker

seed_everything(0)

In [14]:
# 시간이 더 걸림
grad_scaler = None  # torch.cuda.amp.GradScaler()

In [15]:
from src.recorder.v1 import Recorder
from time import time


def collate_fn(batch):
    return tuple(zip(*batch))


for fold_index in range(5):
    train_dataset = COCODataset(
        f"{CFG.data_path}/annotations/train.json",
        f"{CFG.data_path}/train_{fold_index}",
        train_transform,
    )
    train_loader = DataLoader(
        train_dataset,
        batch_size=CFG.batch_size,
        shuffle=True,
        num_workers=16,  #
        pin_memory=True,
        drop_last=False,
        worker_init_fn=seed_worker,
        collate_fn=collate_fn,
    )
    #
    val_dataset = COCODataset(
        f"{CFG.data_path}/annotations/train.json",
        f"{CFG.data_path}/val_{fold_index}",
        val_transform,
    )
    val_loader = DataLoader(
        val_dataset,
        batch_size=CFG.batch_size,
        shuffle=True,
        num_workers=16,
        pin_memory=True,
        drop_last=False,
        worker_init_fn=seed_worker,
        collate_fn=collate_fn,
    )
    model = create_model(len(train_dataset.get_categories()) + 1)
    model.to(device)
    model.cuda()
    optimizer = torch.optim.AdamW(params=model.parameters(), lr=CFG.learning_late)
    scheduler = None

    recorder = Recorder(
        f"{CFG.recorder_dir}/fold_{fold_index}", model, optimizer, scheduler
    )
    print(f"fold_{fold_index} start")
    if recorder.load_checkpoint(device, "checkpoint.pt"):
        print(f"loaded current_epoch: {recorder.current_epoch}")

    best_val_loss = 100
    for epoch_index in range(recorder.current_epoch, CFG.epoch):
        seed_everything(epoch_index)

        train_start_timestamp = time()
        train_loss = train(model, optimizer, train_loader, device, grad_scaler)
        train_elapsed_time = time() - train_start_timestamp

        val_start_timestamp = time()
        val_loss = valid(model, val_loader, device)
        val_elapsed_time = time() - val_start_timestamp

        recorder.update_row_dict("epoch", epoch_index + 1)
        recorder.update_row_dict("train_loss", train_loss)
        recorder.update_row_dict("val_loss", val_loss)
        recorder.update_row_dict("train_elapsed_time", train_elapsed_time)
        recorder.update_row_dict("val_elapsed_time", val_elapsed_time)
        recorder.flush_row_dict(is_print=True)
        recorder.save_line_plot(["loss"], [0, 0.1])

        if recorder.is_best_score(val_loss, "min"):
            print(f"best epoch: {epoch_index + 1}")
            recorder.save_checkpoint(epoch_index, "best_model.pt")

        recorder.save_checkpoint(epoch_index)

loading annotations into memory...
Done (t=0.18s)
creating index...
index created!
loading annotations into memory...
Done (t=0.04s)
creating index...
index created!
fold_0 start


 16%|█▌        | 102/648 [26:18<2:20:52, 15.48s/it] 


KeyboardInterrupt: 

## Test

In [None]:
test_transform = A.Compose(
    [
        A.PadIfNeeded(1080, 1920, border_mode=0, value=(0, 0, 0)),
        A.ToGray(p=1),
        A.Equalize(by_channels=False),
        A.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5), max_pixel_value=255.0),
        ToTensorV2(),
    ],
)

In [None]:
class InferencePatchDataset(Dataset):
    def __init__(self, X_list, transforms):
        self.X_list = X_list
        self.transforms = transforms

    def __len__(self) -> int:
        return len(self.X_list)

    def __getitem__(self, index: int):
        img, x, y = self.X_list[index]
        img = self.transforms(image=img)["image"]

        return img, x, y

In [None]:
def crop(img, x, y, w, h):
    if y + h <= img.shape[0] and x + w <= img.shape[1]:
        # 이미지 범위 안에 들어오는 사이즈
        return img[y : y + h, x : x + w], y, x

    result_img = np.zeros((h, w, 3), dtype=np.uint8)

    # 이미지 사이즈를 넘어가는 crop의 경우
    if y + h > img.shape[0] and x + w > img.shape[1]:
        temp_img = img[y:, x:]
    elif y + h > img.shape[0]:
        temp_img = img[y:, x : x + w]
    elif x + w > img.shape[1]:
        temp_img = img[y : y + h, x:]
    else:
        raise Exception("crop error")

    # zero padding
    result_img[: temp_img.shape[0], : temp_img.shape[1]] = temp_img

    return result_img, x, y

In [None]:
temp_dataset = COCODataset(
    f"{CFG.data_path}/annotations/train.json",
    f"{CFG.data_path}/train_0",
    train_transform,
)


num_classes = len(temp_dataset.get_categories()) + 1
classes = temp_dataset.get_classes()

In [None]:
model = create_model(num_classes)
model.cuda()
model.to(device)

check_point = torch.load(
    f"{CFG.recorder_dir}/fold_0/checkpoint.pt",
    map_location=device,
)
model.load_state_dict(check_point["model"])

model.eval()

In [None]:
from src.my_nms.v1 import non_max_suppression

test_image_path = f"{CFG.data_path}/test"
test_image_list = sorted(glob.glob(f"{test_image_path}/*.png"))

result_save_path = f"{CFG.predict_dir}"
os.makedirs(result_save_path, exist_ok=True)


for test_image_path in test_image_list:
    print(test_image_path)
    img = cv2.imread(test_image_path)
    img = test_transform(image=img)["image"]
    img = img.to(device)
    print(img.shape)
    print(img)
    result = model(img)

    boxes = result["boxes"].detach().cpu().int().numpy()
    labels = result["labels"].detach().cpu().int().numpy()
    scores = result["scores"].detach().cpu().tolist()

    boxes = boxes[0]
    labels = labels[0]
    scores = scores[0]

    test_img = apply_bbox(
        test_img,
        labels,
        boxes,
        classes,
        scores,
        [0, 0, 255],
    )

    cv2.imwrite(
        f"{result_save_path}/{os.path.basename(test_image_path).replace('png', 'jpg')}",
        test_img,
    )