這段會：

掃描 images/ 與 labels/，

以「影像是否有標註」做分層五折（若無法分層則退回普通 KFold），

為每折產生 fold{k}_train.txt / fold{k}_val.txt 與 data_fold{k}.yaml。

In [1]:
# kfold_prepare.py
import os, glob, random, pathlib, yaml
from sklearn.model_selection import StratifiedKFold, KFold

# ====== 依你的專案調整這裡 ======
ROOT_DIR = "/NFS/celine/aicup/yoloNew"  # 你的專案根資料夾
IMG_DIR  = os.path.join(ROOT_DIR, "images")
LBL_DIR  = os.path.join(ROOT_DIR, "labels")
NC       = 1
NAMES    = ["heart_valve"]
KFOLDS   = 5
SEED     = 42
# =================================

random.seed(SEED)

# 1) 收集所有影像（支援 jpg/png/jpeg）
exts = ("*.jpg", "*.png", "*.jpeg", "*.JPG", "*.PNG", "*.JPEG")
imgs = []
for ext in exts:
    imgs.extend(glob.glob(os.path.join(IMG_DIR, "**", ext), recursive=True))
imgs = sorted(imgs)

assert len(imgs) > 0, "找不到影像，請確認 IMG_DIR 路徑。"

# 2) 建立「是否有標註」的標籤，用於分層（單類別很適合）
y = []
for ip in imgs:
    stem = os.path.splitext(os.path.basename(ip))[0]
    lp = os.path.join(LBL_DIR, f"{stem}.txt")
    y.append(1 if os.path.exists(lp) and os.path.getsize(lp) > 0 else 0)

# 若全是 1 或全是 0，StratifiedKFold 會失敗 → 退回 KFold
use_stratified = (min(y) != max(y))

if use_stratified:
    splitter = StratifiedKFold(n_splits=KFOLDS, shuffle=True, random_state=SEED)
    splits = list(splitter.split(imgs, y))
else:
    splitter = KFold(n_splits=KFOLDS, shuffle=True, random_state=SEED)
    splits = list(splitter.split(imgs))

out_dir = os.path.join(ROOT_DIR, f"kfold_{KFOLDS}")
os.makedirs(out_dir, exist_ok=True)

def write_lines(path, lines):
    with open(path, "w") as f:
        for x in lines:
            f.write(x + "\n")

# 3) 逐折輸出 .txt 與 data_fold{k}.yaml
for k, (train_idx, val_idx) in enumerate(splits):
    fold_dir = os.path.join(out_dir, f"fold{k}")
    os.makedirs(fold_dir, exist_ok=True)

    train_list = [imgs[i] for i in train_idx]
    val_list   = [imgs[i] for i in val_idx]
    write_lines(os.path.join(fold_dir, f"fold{k}_train.txt"), train_list)
    write_lines(os.path.join(fold_dir, f"fold{k}_val.txt"),   val_list)

    data_yaml = {
        "path": ROOT_DIR,                # 可保留
        "train": os.path.join(fold_dir, f"fold{k}_train.txt"),
        "val":   os.path.join(fold_dir, f"fold{k}_val.txt"),
        "test":  os.path.join(ROOT_DIR, "test_images"),
        "nc":    NC,
        "names": NAMES,
    }
    with open(os.path.join(fold_dir, f"data_fold{k}.yaml"), "w") as f:
        yaml.safe_dump(data_yaml, f, sort_keys=False)

print(f"完成！已在 {out_dir} 內產生 {KFOLDS} 折的 train/val 清單與 data_fold*.yaml")


完成！已在 /NFS/celine/aicup/yoloNew/kfold_5 內產生 5 折的 train/val 清單與 data_fold*.yaml
