In [1]:
from pathlib import Path
import random, json
import random

In [2]:
ROOT = Path(".").resolve()
SEED = 20251011
VAL_RATIO = 0.2  # 验证集比例；train = 1 - VAL_RATIO

PAIRS = [
    ("AA_half", "AA_half_txt"),
    ("QS",      "QS_txt"),
    ("T",       "T_txt"),
]

IMG_EXTS = {".jpg", ".jpeg", ".png", ".bmp", ".tif", ".tiff", ".webp"}

def find_label(lbl_dir: Path, stem: str):
    """在标签目录找与 stem 同名的 .txt（大小写不敏感，含子目录）。"""
    p = lbl_dir / f"{stem}.txt"
    if p.exists():
        return p
    p2 = lbl_dir / f"{stem}.TXT"
    if p2.exists():
        return p2
    for q in lbl_dir.rglob(stem + ".*"):
        if q.suffix.lower() == ".txt":
            return q
    return None

In [3]:
random.seed(SEED)
train_lines, val_lines = [], []

for img_dir_name, lbl_dir_name in PAIRS:
    img_dir = ROOT / img_dir_name
    lbl_dir = ROOT / lbl_dir_name
    assert img_dir.exists(), f"缺少图片目录: {img_dir}"
    assert lbl_dir.exists(), f"缺少标签目录: {lbl_dir}"

    # 仅收集“图片有同名txt”的样本
    pairs = []
    for img in sorted(img_dir.rglob("*")):
        if img.is_file() and img.suffix.lower() in IMG_EXTS:
            lab = find_label(lbl_dir, img.stem)
            if lab is not None:
                pairs.append((img.relative_to(ROOT), lab.relative_to(ROOT)))

    n = len(pairs)
    if n == 0:
        print(f"[WARN] {img_dir_name}: 没有匹配上的样本。")
        continue

    random.shuffle(pairs)
    n_train = int(round(n * (1 - VAL_RATIO)))
    tr, va = pairs[:n_train], pairs[n_train:]

    train_lines += [f"{a.as_posix()} {b.as_posix()}" for a, b in tr]
    val_lines   += [f"{a.as_posix()} {b.as_posix()}" for a, b in va]

    print(f"[{img_dir_name}] matched={n}  train={len(tr)}  val={len(va)}")

Path("train_list.txt").write_text("\n".join(train_lines), encoding="utf-8")
Path("val_list.txt").write_text("\n".join(val_lines), encoding="utf-8")
print("OK -> 生成 train_list.txt / val_list.txt")

[AA_half] matched=164  train=131  val=33
[QS] matched=82  train=66  val=16
[T] matched=35  train=28  val=7
OK -> 生成 train_list.txt / val_list.txt
