In [2]:
import os
import yaml
import shutil
from tqdm import tqdm
from glob import glob
from sklearn.model_selection import train_test_split
#pip freeze > requirements.txt

In [3]:
IMG_DIR = r"..\data\archive\images"      # your images folder
LBL_DIR = r"..\data\archive\folder_structure\yolov5\labels"      # your YOLO txt annotations folder
OUT_DIR = r"..\data"  # output dataset in YOLO format

'''
dataset/
    images/
        train/
        val/
        test/
    labels/
        train/
        val/
        test/
    .yaml
'''

for split in ["train", "val", "test"]:
    os.makedirs(f"{OUT_DIR}/images/{split}", exist_ok=True)
    os.makedirs(f"{OUT_DIR}/labels/{split}", exist_ok=True)

In [4]:
#train test val split
img_files = []
img_files.extend(glob(os.path.join(IMG_DIR, "*.png")))

img_files = sorted(img_files)

train_imgs, test_imgs = train_test_split(img_files, test_size=0.2, random_state=42)
test_imgs, val_imgs  = train_test_split(test_imgs, test_size=0.5, random_state=42)
print('train', len(train_imgs), 'val', len(val_imgs), 'test', len(test_imgs))

def move_files(image_list, split):
    for img_path in tqdm(image_list):
        filename = os.path.basename(img_path)
        name, _ = os.path.splitext(filename)
        
        label_path = os.path.join(LBL_DIR, name + ".txt")
        
        shutil.copy(img_path, f"{OUT_DIR}/images/{split}/{filename}")
        shutil.copy(label_path, f"{OUT_DIR}/labels/{split}/{name}.txt")


move_files(train_imgs, "train")
move_files(val_imgs,   "val")
move_files(test_imgs,  "test")


ValueError: With n_samples=0, test_size=0.2 and train_size=None, the resulting train set will be empty. Adjust any of the aforementioned parameters.

In [5]:
# create yaml file
yaml_path = os.path.join(OUT_DIR, "data.yaml")

def rel(p):
    return os.path.relpath(p, OUT_DIR).replace("\\", "/")

yaml_data = {
    "path": rel(OUT_DIR),
    "train": "images/train",
    "val": "images/val",
    "test": "images/test",
    "nc": 9,
    "names": ["boneanomaly", "bonelesion", "foreignbody", "fracture", "metal", "periostealreaction", "pronatorsign", "softtissue", "text"]
}

with open(yaml_path, "w") as f:
    yaml.dump(yaml_data, f, sort_keys=False)