In [5]:
import csv
import random
import shutil
import os
from pathlib import Path

csv_path="datasets/LARD/LARD_train.csv"
output_dir="datasets/LARD-yolo"
train_ratio=0.8
test_ratio=0.1
val_ratio=0.1

"""
1) Reads a semicolon-separated LARD CSV with columns:
     image, height, width, x_A, y_A, x_B, y_B, x_C, y_C, x_D, y_D
2) Splits into train/test/val (80%, 10%, 10%).
3) Copies images into 'datasets/LARD-yolo/images/<split>'.
4) Writes YOLOv11 segmentation label files with one line per image:
     class x1 y1 x2 y2 x3 y3 x4 y4
   where xN, yN are normalized coords in [0..1].
   Class is always 0 (runway).
5) Writes 'dataset.yaml' with a single class: runway.
"""

csv_file = Path(csv_path).resolve()
out_dir = Path(output_dir).resolve()

rows = []
with csv_file.open("r", encoding="utf-8", newline="") as f:
    reader = csv.DictReader(f, delimiter=";")
    for r in reader:
        # Convert numeric fields
        r["height"] = float(r["height"])
        r["width"]  = float(r["width"])
        r["x_A"]    = float(r["x_A"])
        r["y_A"]    = float(r["y_A"])
        r["x_B"]    = float(r["x_B"])
        r["y_B"]    = float(r["y_B"])
        r["x_C"]    = float(r["x_C"])
        r["y_C"]    = float(r["y_C"])
        r["x_D"]    = float(r["x_D"])
        r["y_D"]    = float(r["y_D"])
        rows.append(r)

# Shuffle and split
random.shuffle(rows)
total = len(rows)
train_end = int(train_ratio * total)
test_end  = int((train_ratio + test_ratio) * total)

train_rows = rows[:train_end]
test_rows  = rows[train_end:test_end]
val_rows   = rows[test_end:]

images_dir = out_dir / "images"
labels_dir = out_dir / "labels"
for split in ["train", "test", "val"]:
    (images_dir / split).mkdir(parents=True, exist_ok=True)
    (labels_dir / split).mkdir(parents=True, exist_ok=True)

def process_split(split_rows, split_name):
    """
    For each row, copy the image to images/<split_name> and write a .txt in
    labels/<split_name> with the YOLOv8 segmentation format:
       class x1 y1 x2 y2 x3 y3 x4 y4
    """
    for r in split_rows:
        source_dir = Path("datasets/LARD").resolve()
        img_path = source_dir / r["image"]
        if not img_path.is_file():
            print(f"[WARNING] Missing image: {img_path}, skipping.")
            continue

        dest_img = images_dir / split_name / img_path.name
        if dest_img.exists():
            dest_img.unlink()  # remove any existing file or symlink

        try:
            os.symlink(img_path, dest_img)
        except OSError as e:
            print(f"[WARNING] Unable to create symlink {dest_img}: {e}")

        # Normalize corners
        w = r["width"]
        h = r["height"]
        xA = r["x_A"] / w
        yA = r["y_A"] / h
        xB = r["x_B"] / w
        yB = r["y_B"] / h
        xC = r["x_C"] / w
        yC = r["y_C"] / h
        xD = r["x_D"] / w
        yD = r["y_D"] / h

        # class xA yA xB yB xC yC xD yD
        label_line = (
            f"0 {xA:.6f} {yA:.6f} "
            f"{xB:.6f} {yB:.6f} "
            f"{xC:.6f} {yC:.6f} "
            f"{xD:.6f} {yD:.6f}"
        )

        label_file = labels_dir / split_name / (img_path.stem + ".txt")
        with label_file.open("w") as lf:
            lf.write(label_line + "\n")

process_split(train_rows, "train")
process_split(test_rows,  "test")
process_split(val_rows,   "val")

dataset_yaml = f"""# YOLOv11 segmentation dataset
path: LARD-yolo
train: images/train
val: images/val
test: images/test

names:
    0: runway
"""

(out_dir / "dataset.yaml").write_text(dataset_yaml, encoding="utf-8")

print(f"[DONE] Created dataset in: {out_dir.resolve()}")
print("[INFO] Splits:")
print(f"  train = {len(train_rows)} images")
print(f"  test  = {len(test_rows)} images")
print(f"  val   = {len(val_rows)} images")
print(f"[INFO] Symlinks created in:")
print(f"  {images_dir}/train, {images_dir}/test, {images_dir}/val")


[DONE] Created dataset in: /home/gustavo-depaula/SyntheticRunwayDataset/datasets/LARD-yolo
[INFO] Splits:
  train = 11546 images
  test  = 1443 images
  val   = 1444 images
[INFO] Symlinks created in:
  /home/gustavo-depaula/SyntheticRunwayDataset/datasets/LARD-yolo/images/train, /home/gustavo-depaula/SyntheticRunwayDataset/datasets/LARD-yolo/images/test, /home/gustavo-depaula/SyntheticRunwayDataset/datasets/LARD-yolo/images/val


In [10]:
import csv
import os
from pathlib import Path

def create_lard_val_dataset(csv_path, output_dir):
    """
    Reads a LARD-style CSV (semicolon separated) containing:
        image, height, width, x_A, y_A, x_B, y_B, x_C, y_C, x_D, y_D
    and places all images + labels into the 'val' folder under 'images' and 'labels'.
    
    A dataset.yaml is also created, referencing the 'val' folder for train/val/test.
    """

    csv_file = Path(csv_path).resolve()
    out_dir = Path(output_dir).resolve()

    rows = []
    with csv_file.open("r", encoding="utf-8", newline="") as f:
        reader = csv.DictReader(f, delimiter=";")
        for r in reader:
            r["height"] = float(r["height"])
            r["width"]  = float(r["width"])
            r["x_A"]    = float(r["x_A"])
            r["y_A"]    = float(r["y_A"])
            r["x_B"]    = float(r["x_B"])
            r["y_B"]    = float(r["y_B"])
            r["x_C"]    = float(r["x_C"])
            r["y_C"]    = float(r["y_C"])
            r["x_D"]    = float(r["x_D"])
            r["y_D"]    = float(r["y_D"])
            rows.append(r)

    images_val_dir = out_dir / "images" / "val"
    labels_val_dir = out_dir / "labels" / "val"
    images_val_dir.mkdir(parents=True, exist_ok=True)
    labels_val_dir.mkdir(parents=True, exist_ok=True)

    for r in rows:
        source_dir = csv_file.parent
        img_path = source_dir / r["image"]
        if not img_path.is_file():
            print(f"[WARNING] Missing image: {img_path}, skipping.")
            continue

        dest_img = images_val_dir / img_path.name
        if dest_img.exists():
            dest_img.unlink()

        # Symlink
        try:
            os.symlink(img_path, dest_img)
        except OSError as e:
            print(f"[WARNING] Unable to create symlink {dest_img}: {e}")

        # Normalize the corners
        w = r["width"]
        h = r["height"]
        xA = r["x_A"] / w
        yA = r["y_A"] / h
        xB = r["x_B"] / w
        yB = r["y_B"] / h
        xC = r["x_C"] / w
        yC = r["y_C"] / h
        xD = r["x_D"] / w
        yD = r["y_D"] / h

        label_line = (
            f"0 {xA:.6f} {yA:.6f} "
            f"{xB:.6f} {yB:.6f} "
            f"{xC:.6f} {yC:.6f} "
            f"{xD:.6f} {yD:.6f}"
        )

        label_file = labels_val_dir / (img_path.stem + ".txt")
        with label_file.open("w", encoding="utf-8") as lf:
            lf.write(label_line + "\n")

    dataset_yaml = f"""# YOLOv11 segmentation dataset
path: {out_dir.name}
train: images/val
val: images/val
test: images/val

names:
  0: runway
"""
    (out_dir / "dataset.yaml").write_text(dataset_yaml, encoding="utf-8")

    print(f"[DONE] Created all-val dataset in: {out_dir.resolve()}")
    print(f"[INFO] {len(rows)} images placed in 'val'")
    print(f"[INFO] Symlinks (or copies) created in: {images_val_dir}")


In [11]:
create_lard_val_dataset('datasets/LARD/LARD_test_real/LARD_test_real_nominal/Test_Real_Nominal.csv', "datasets/LARD-real-nominal")
create_lard_val_dataset('datasets/LARD/LARD_test_real/LARD_test_real_edge_cases/Test_Real_Edge_Cases.csv', "datasets/LARD-real-edge-cases")

[DONE] Created all-val dataset in: /home/gustavo-depaula/SyntheticRunwayDataset/datasets/LARD-real-nominal
[INFO] 1500 images placed in 'val'
[INFO] Symlinks (or copies) created in: /home/gustavo-depaula/SyntheticRunwayDataset/datasets/LARD-real-nominal/images/val
[DONE] Created all-val dataset in: /home/gustavo-depaula/SyntheticRunwayDataset/datasets/LARD-real-edge-cases
[INFO] 311 images placed in 'val'
[INFO] Symlinks (or copies) created in: /home/gustavo-depaula/SyntheticRunwayDataset/datasets/LARD-real-edge-cases/images/val
