### import 및 설정값

In [2]:
import os
os.environ["WANDB_DISABLED"] = "True"

import pandas as pd
import numpy as np
import cv2
import shutil
import yaml
import warnings
warnings.filterwarnings("ignore")

from ultralytics import YOLO
from glob import glob
from tqdm import tqdm
from IPython.display import clear_output
from sklearn.model_selection import train_test_split

In [3]:
SEED = 42
BATCH_SIZE = 8
MODEL = "v2"

### 라벨링 데이터 -> YOLO 라벨링 데이터 변경 & 데이터셋 나누기

In [4]:
if os.path.exists("data/yolo"):
    shutil.rmtree("data/yolo")

if not os.path.exists("data/yolo/train"):
    os.makedirs("data/yolo/train")
    
if not os.path.exists("data/yolo/valid"):
    os.makedirs("data/yolo/valid")
    
if not os.path.exists("data/yolo/test"):
    os.makedirs("data/yolo/test")    
    
if not os.path.exists("results"):
    os.makedirs("results")

In [8]:
def make_yolo_dataset(image_paths, txt_paths, type="train"):
    for image_path, txt_path in tqdm(zip(image_paths, txt_paths if not type == "test" else image_paths), total=len(image_paths)):
        source_image = cv2.imread(image_path, cv2.IMREAD_COLOR)        
        image_height, image_width, _ = source_image.shape
        
        target_image_path = f"data/yolo/{type}/{os.path.basename(image_path)}"
        cv2.imwrite(target_image_path, source_image)
        
        if type == "test":
            continue
        
        with open(txt_path, "r") as reader:
            yolo_labels = []
            for line in reader.readlines():
                line = list(map(float, line.strip().split(" ")))
                class_name = int(line[0])
                x_min, y_min = float(min(line[5], line[7])), float(min(line[6], line[8]))
                x_max, y_max = float(max(line[1], line[3])), float(max(line[2], line[4]))
                x, y = float(((x_min + x_max) / 2) / image_width), float(((y_min + y_max) / 2) / image_height)
                w, h = abs(x_max - x_min) / image_width, abs(y_max - y_min) / image_height
                yolo_labels.append(f"{class_name} {x} {y} {w} {h}")
            
        target_label_txt = f"data/yolo/{type}/{os.path.basename(txt_path)}"      
        with open(target_label_txt, "w") as writer:
            for yolo_label in yolo_labels:
                writer.write(f"{yolo_label}\n")

In [9]:
image_paths = sorted(glob("datasets/train/*.png"))
txt_paths = sorted(glob("datasets/train/*.txt"))

train_images_paths, valid_images_paths, train_txt_paths, valid_txt_paths = train_test_split(image_paths, txt_paths, test_size=0.1, random_state=SEED)

make_yolo_dataset(train_images_paths, train_txt_paths, "train")
make_yolo_dataset(valid_images_paths, valid_txt_paths, "valid")
make_yolo_dataset(sorted(glob("datasets/test/*.png")), None, "test")

100%|██████████| 5832/5832 [22:24<00:00,  4.34it/s]
100%|██████████| 649/649 [02:30<00:00,  4.31it/s]
100%|██████████| 3400/3400 [11:02<00:00,  5.13it/s]


### yaml 파일 생성

In [10]:
with open("data/classes.txt", "r") as reader:
    lines = reader.readlines()
    classes = [line.strip().split(",")[1] for line in lines]

yaml_data = {
              "names": classes,
              "nc": len(classes),
              "path": "/media/jjaegii/disk/dacon/open/data/yolo/",
              "train": "train",
              "val": "valid",
              "test": "test"
            }

with open("data/yolo/custom.yaml", "w") as writer:
    yaml.dump(yaml_data, writer)