In [None]:
import os
import yaml
import shutil
from tqdm import tqdm
from glob import glob
from sklearn.model_selection import train_test_split
#pip freeze > requirements.txt

In [None]:
IMG_DIR = r"..\data\images"      # your images folder
LBL_DIR = r"..\data\folder_structure\yolov5\labels"       # your YOLO txt annotations folder
OUT_DIR = r"..\dataset_final" #Create a dedicated folder for the final ready-to-train dataset

'''
dataset/
    images/
        train/
        val/
        test/
    labels/
        train/
        val/
        test/
    .yaml
'''

for split in ["train", "val", "test"]:
    os.makedirs(f"{OUT_DIR}/images/{split}", exist_ok=True)
    os.makedirs(f"{OUT_DIR}/labels/{split}", exist_ok=True)

In [None]:
#train test val split
img_files = []
img_files.extend(glob(os.path.join(IMG_DIR, "*.png")))

img_files = sorted(img_files)

train_imgs, test_imgs = train_test_split(img_files, test_size=0.2, random_state=42)
test_imgs, val_imgs  = train_test_split(test_imgs, test_size=0.5, random_state=42)
print('train', len(train_imgs), 'val', len(val_imgs), 'test', len(test_imgs))

'''def move_files(image_list, split):
    for img_path in tqdm(image_list):
        filename = os.path.basename(img_path)
        name, _ = os.path.splitext(filename)
        
        label_path = os.path.join(LBL_DIR, name + ".txt")
        
        shutil.copy(img_path, f"{OUT_DIR}/images/{split}/{filename}")
        shutil.copy(label_path, f"{OUT_DIR}/labels/{split}/{name}.txt")'''

def move_files(image_list, split): #Avoids FileNotFound Error, if no fracture image has no label
    for img_path in tqdm(image_list):
        filename = os.path.basename(img_path)
        name, _ = os.path.splitext(filename)
        
        label_path = os.path.join(LBL_DIR, name + ".txt")
        
        # 1. Copy Image
        shutil.copy(img_path, f"{OUT_DIR}/images/{split}/{filename}")
        
        # 2. Safety Check for Label
        dest_label_path = f"{OUT_DIR}/labels/{split}/{name}.txt"
        
        if os.path.exists(label_path):
            # If label exists, copy it
            shutil.copy(label_path, dest_label_path)
        else:
            # If label is missing (healthy image), create an empty .txt file
            # YOLO needs this to know it's a "background" image with no objects
            with open(dest_label_path, 'w') as f:
                pass


move_files(train_imgs, "train")
move_files(val_imgs,   "val")
move_files(test_imgs,  "test")


In [None]:
# create yaml file
yaml_path = os.path.join(OUT_DIR, "data.yaml")

def rel(p):
    return os.path.relpath(p, OUT_DIR).replace("\\", "/")

yaml_data = {
    "path": os.path.abspath(OUT_DIR), #avoids dependency on notebook location, prevents YOLO crash bc of folder structure
    "train": "images/train",
    "val": "images/val",
    "test": "images/test",
    "nc": 9,
    "names": ["boneanomaly", "bonelesion", "foreignbody", "fracture", "metal", "periostealreaction", "pronatorsign", "softtissue", "text"]
}

with open(yaml_path, "w") as f:
    yaml.dump(yaml_data, f, sort_keys=False)

### Check

In [None]:
print(f"✅ YAML created with ABSOLUTE path: {os.path.abspath(OUT_DIR)}")
print(f"✅ Success! Dataset created at: {os.path.abspath(OUT_DIR)}")
print(f"Use this path in your train_fracture.ipynb: {os.path.join(OUT_DIR, 'data.yaml')}")