In [1]:
import os
import glob
import shutil
from tqdm import tqdm

# import pandas as pd
import numpy as np
import yaml
import cv2

from sklearn.model_selection import StratifiedGroupKFold

In [2]:
working_dir = 'synthesis-car-od'
dir_len = len(working_dir)

path = os.getcwd().replace('\\', '/')
index = path.find(working_dir)
working_dir = path[:index + dir_len + 1]
working_dir

'/Users/a16/Desktop/JJ/self_study/projects/synthesis-car-od/'

In [3]:
DATA_PATH = os.path.join(working_dir, 'data/')

TRAIN_PATH = os.path.join(DATA_PATH, 'train/')
TEST_PATH = os.path.join(DATA_PATH, 'test/')

DEST_PATH = os.path.join(DATA_PATH, 'yolo/')
DEST_IMAGE_PATH = os.path.join(DEST_PATH, 'images/')
DEST_LABEL_PATH = os.path.join(DEST_PATH, 'labels/')

os.makedirs(DEST_IMAGE_PATH, exist_ok=True)
os.makedirs(DEST_LABEL_PATH, exist_ok=True)

# Make Yolo Data Structure

In [4]:
def make_yolo_dataset(image_paths, txt_paths):
    for image_path, txt_path in zip(tqdm(image_paths), txt_paths):
        img = cv2.imread(image_path)

        with open(txt_path, 'r') as txt_file:
            yolo_labels = []

            img_h, img_w, _ = img.shape
            lines = txt_file.readlines()

            for line in lines:
                line = list(map(float, line.strip().split(' ')))

                label = int(line[0])

                bbox = line[1:]

                xs = [bbox[i] for i in range(0, 8, 2)]
                ys = [bbox[i] for i in range(1, 8, 2)]

                x_min = min(xs)
                y_min = min(ys)
                x_max = max(xs)
                y_max = max(ys)

                box_w = x_max - x_min 
                box_h = y_max - y_min

                normalized_x = (x_min + x_max) / (2 * img_w)
                normalized_y = (y_min + y_max) / (2 * img_h)

                normalized_box_w = box_w / img_w
                normalized_box_h = box_h / img_h

                yolo_labels.append(f'{label} {normalized_x} {normalized_y} {normalized_box_w} {normalized_box_h}')

        dest_label_path = os.path.join(DEST_LABEL_PATH, os.path.basename(txt_path))

        with open(dest_label_path, 'w') as label_file:
            for yolo_label in yolo_labels:
                    label_file.write(f'{yolo_label}\n')

In [5]:
train_imgs = sorted(glob.glob(os.path.join(TRAIN_PATH, '*.png')))
train_txts = sorted(glob.glob(os.path.join(TRAIN_PATH, '*.txt')))

In [6]:
for img in tqdm(train_imgs):
    dest_image_path = os.path.join(DEST_IMAGE_PATH, os.path.basename(img))

    shutil.copy(img, dest_image_path)

100%|██████████| 6481/6481 [00:38<00:00, 169.96it/s]


In [7]:
make_yolo_dataset(train_imgs, train_txts)

100%|██████████| 6481/6481 [05:08<00:00, 20.98it/s]


# Make SGKF Image Path TXT

In [8]:
img_names = []
labels = []

for img, txt in zip(train_imgs, train_txts):
    img_name = img.replace('\\', '/').split('/')[-1]
    txt_name = txt.replace('\\', '/').split('/')[-1]

    with open(txt, 'r') as t:
        lines = t.readlines()

        for line in lines:
            line = line.strip()
            label = int(float(line.split(' ')[0]))
            bbox = ' '.join(line.split(' ')[1:])

            img_names.append(img_name)
            labels.append(label)

In [10]:
SEED = 41
N_SPLITS = 4

sgkf = StratifiedGroupKFold(
    n_splits=N_SPLITS,
    shuffle=True,
    random_state=SEED
)

In [14]:
img_names = np.array(img_names)

for i, (train_idx, val_idx) in enumerate(sgkf.split(img_names, labels, img_names)):

    train_imgs = np.unique(img_names[train_idx])
    train_imgs = list(map(lambda x : os.path.join(DEST_IMAGE_PATH, x).replace(DATA_PATH, ''), train_imgs))

    val_imgs = np.unique(img_names[val_idx])
    val_imgs = list(map(lambda x : os.path.join(DEST_IMAGE_PATH, x).replace(DATA_PATH, ''), val_imgs))

    with open(os.path.join(DEST_PATH, f'train_{i}.txt'), 'w') as path_file:
        for img in train_imgs:
            path_file.write(f'{img}\n')

    with open(os.path.join(DEST_PATH, f'val_{i}.txt'), 'w') as path_file:
        for img in val_imgs:
            path_file.write(f'{img}\n')

In [15]:
test_imgs = sorted(glob.glob(os.path.join(TEST_PATH, '*.png')))
test_imgs = list(map(lambda x : os.path.join(DEST_IMAGE_PATH, x).replace(DATA_PATH, ''), test_imgs))

with open(os.path.join(DEST_PATH, 'test.txt'), 'w') as path_file:
    for img in test_imgs:
        path_file.write(f'{img}\n')

# Make YAML Config

In [16]:
category_names = []

with open(os.path.join(DATA_PATH, 'classes.txt'), 'r') as cls_file:
    lines = cls_file.readlines()

    for line in lines:
        line = line.strip()

        # category_name
        category_name = line.split(',')[-1]

        category_names.append(category_name)

In [17]:
categories = dict()

for i, name in enumerate(category_names):
    categories[i] = name

In [18]:
for i in range(N_SPLITS):
    yaml_config = {
    'path' : DATA_PATH,
    'train' : os.path.join(DEST_PATH, f'train_{i}.txt').replace(DATA_PATH, ''),
    'val' : os.path.join(DEST_PATH, f'val_{i}.txt').replace(DATA_PATH, ''),
    'test' : os.path.join(DEST_PATH, f'test.txt').replace(DATA_PATH, ''),
    'nc' : len(category_names),
    'names' : categories,
    }

    with open(os.path.join(DEST_PATH, f'fold_{i}.yaml'), 'w') as file:
        yaml.dump(yaml_config, file, sort_keys=False)