In [1]:
import os
import shutil
import random

# ================================================
# CONFIG
# ================================================
OUT_DIR = "merged_yolo"
TRAIN_SPLIT_QR = 0.9

QR_DIR = "C:\\Users\\user\\Desktop\\My_Projects\\innovatex2025_armeta_solution\\datasets\\qr_yolo9"
SIG_DIR = "C:\\Users\\user\\Desktop\\My_Projects\\innovatex2025_armeta_solution\\datasets\\signature_yolov9"
STAMP_DIR = "C:\\Users\\user\\Desktop\\My_Projects\\innovatex2025_armeta_solution\\datasets\\stamp_yolov9_filtered"

CLS_SIGNATURE = 0
CLS_STAMP = 1
CLS_QR = 2

# ================================================
os.makedirs(f"{OUT_DIR}/images/train", exist_ok=True)
os.makedirs(f"{OUT_DIR}/images/val", exist_ok=True)
os.makedirs(f"{OUT_DIR}/labels/train", exist_ok=True)
os.makedirs(f"{OUT_DIR}/labels/val", exist_ok=True)


def copy_and_remap(img_path, lbl_path, subset, remap_fn):
    """ Copy image and rewrite its label using remap_fn(cls). """
    new_img = f"{OUT_DIR}/images/{subset}/{os.path.basename(img_path)}"
    new_lbl = f"{OUT_DIR}/labels/{subset}/{os.path.basename(lbl_path)}"

    shutil.copy(img_path, new_img)

    out_lines = []
    for line in open(lbl_path):
        parts = line.strip().split()
        if len(parts) < 5:
            continue

        cls, *coords = parts
        new_cls = remap_fn(int(cls))
        if new_cls is None:
            continue

        out_lines.append(f"{new_cls} {' '.join(coords)}")

    if out_lines:
        with open(new_lbl, "w") as f:
            f.write("\n".join(out_lines))



# ================================================
# 1) SIGNATURE DATASET (train/valid/test)
# ================================================

print("\n=== Normalizing SIGNATURE dataset ===")

for subset in ["train", "valid", "test"]:
    img_dir = os.path.join(SIG_DIR, subset, "images")
    lbl_dir = os.path.join(SIG_DIR, subset, "labels")

    if not os.path.exists(img_dir):
        continue

    out_subset = "train" if subset == "train" else "val"

    for fn in os.listdir(img_dir):
        if not fn.lower().endswith((".jpg", ".png", ".jpeg")):
            continue

        img_path = os.path.join(img_dir, fn)
        lbl_path = os.path.join(lbl_dir, fn.rsplit(".", 1)[0] + ".txt")

        if not os.path.exists(lbl_path):
            continue

        def only_signature(c):
            return CLS_SIGNATURE if c == 0 else None

        copy_and_remap(img_path, lbl_path, out_subset, only_signature)



# ================================================
# 2) STAMP DATASET (train/valid/test)
# ================================================

print("\n=== Normalizing STAMP dataset ===")

for subset in ["train", "valid", "test"]:
    img_dir = os.path.join(STAMP_DIR, subset, "images")
    lbl_dir = os.path.join(STAMP_DIR, subset, "labels")

    if not os.path.exists(img_dir):
        continue

    out_subset = "train" if subset == "train" else "val"

    for fn in os.listdir(img_dir):
        if not fn.lower().endswith((".jpg", ".png", ".jpeg")):
            continue

        img_path = os.path.join(img_dir, fn)
        lbl_path = os.path.join(lbl_dir, fn.rsplit(".", 1)[0] + ".txt")

        if not os.path.exists(lbl_path):
            continue

        copy_and_remap(img_path, lbl_path, out_subset,
                       remap_fn=lambda c: CLS_STAMP)



# ================================================
# 3) QR DATASET (NO split â†’ random split)
# ================================================

print("\n=== Normalizing QR dataset ===")

qr_img_dir = os.path.join(QR_DIR, "images")
qr_lbl_dir = os.path.join(QR_DIR, "labels")

for fn in os.listdir(qr_img_dir):
    if not fn.lower().endswith((".jpg", ".png", ".jpeg")):
        continue

    img_path = os.path.join(qr_img_dir, fn)
    lbl_path = os.path.join(qr_lbl_dir, fn.rsplit(".", 1)[0] + ".txt")

    if not os.path.exists(lbl_path):
        continue

    subset = "train" if random.random() < TRAIN_SPLIT_QR else "val"

    copy_and_remap(img_path, lbl_path, subset,
                   remap_fn=lambda c: CLS_QR)



# ================================================
# 4) Create final data.yaml
# ================================================

yaml = f"""path: {OUT_DIR}

train: images/train
val: images/val

names:
  0: signature
  1: stamp
  2: qr
"""

with open(f"{OUT_DIR}/data.yaml", "w") as f:
    f.write(yaml)

print("\n=== DONE! Normalized dataset created:", OUT_DIR)



=== Normalizing SIGNATURE dataset ===

=== Normalizing STAMP dataset ===

=== Normalizing QR dataset ===

=== DONE! Normalized dataset created: merged_yolo
