In [3]:
# ==== Cell 1：參數與路徑自動偵測（請放在 notebook 最上面） ====
from pathlib import Path
import platform, os

# 如果你要直接指定路徑，可改 WIN_PATH（Windows 形式）
WIN_PATH = r"C:\Users\anywhere4090\Desktop\0902 finalcode\dataset\mvtec3d"

# 嘗試自動對應 WSL 與 Windows 路徑
wsl_candidate = Path("/mnt/c") / Path(WIN_PATH.replace("C:\\", "").replace("C:/", "") )
win_candidate = Path(WIN_PATH)

# 自動選擇存在的路徑（優先 WSL）
if wsl_candidate.exists():
    ORIG_ROOT = wsl_candidate
    RUN_ENV = "WSL"
elif win_candidate.exists():
    ORIG_ROOT = win_candidate
    RUN_ENV = "Windows"
else:
    ORIG_ROOT = wsl_candidate  # 預設為 WSL 形式，若不存在會在執行時提醒
    RUN_ENV = "Unknown"

# 輸出資料夾（會在 ORIG_ROOT.parent 下建立 mvtec3dfusion）
OUT_ROOT = ORIG_ROOT.parent / "mvtec3dfusion"

# 合成權重（可調，程式會正規化）
W_RGB = 0.5
W_XYZ = 0.5

# dry-run 模式：True = 只檢查並列出，不寫檔；False = 真正建立並寫入檔案
DRY_RUN = False

# 若尺寸不合時是否自動把 XYZ resize 成 RGB 尺寸（False = 跳過該張）
RESIZE_ON_MISMATCH = False

# 顯示詳細資訊（True/False）
VERBOSE = True

# 如果你想覆寫任何參數，直接修改變數後重新執行這個 cell 即可
print("執行環境偵測：", RUN_ENV)
print("來源 ORIG_ROOT =", ORIG_ROOT)
print("輸出 OUT_ROOT  =", OUT_ROOT)
print(f"權重（待正規化）: W_RGB={W_RGB}, W_XYZ={W_XYZ}")
print("DRY_RUN =", DRY_RUN, " RESIZE_ON_MISMATCH =", RESIZE_ON_MISMATCH, " VERBOSE =", VERBOSE)
print()

if ORIG_ROOT.exists():
    cats = sorted([p.name for p in ORIG_ROOT.iterdir() if p.is_dir()])
    print(f"已偵測到來源資料夾，category 數量：{len(cats)}，樣本前三：{cats[:10]}")
else:
    print("❌ 找不到來源資料夾！請確認路徑或在此 cell 修改 WIN_PATH/ORIG_ROOT。")
    print("若你在 WSL，請確認使用 /mnt/c/... 的路徑；若在 Windows kernel，請使用 C:\\... 或 C:/... 樣式。")


執行環境偵測： Windows
來源 ORIG_ROOT = C:\Users\anywhere4090\Desktop\0902 finalcode\dataset\mvtec3d
輸出 OUT_ROOT  = C:\Users\anywhere4090\Desktop\0902 finalcode\dataset\mvtec3dfusion
權重（待正規化）: W_RGB=0.5, W_XYZ=0.5
DRY_RUN = False  RESIZE_ON_MISMATCH = False  VERBOSE = True

已偵測到來源資料夾，category 數量：10，樣本前三：['bagel', 'cable_gland', 'carrot', 'cookie', 'dowel', 'foam', 'peach', 'potato', 'rope', 'tire']


In [4]:
# ==== Cell 2：執行主程式（dry-run 預設 True） ====
import cv2
import numpy as np
import shutil
from pathlib import Path
from tqdm import tqdm

# 讀取上面 cell 的變數（確保已執行 Cell 1）
try:
    ORIG_ROOT, OUT_ROOT, W_RGB, W_XYZ, DRY_RUN, RESIZE_ON_MISMATCH, VERBOSE
except NameError:
    raise RuntimeError("請先執行參數 cell（Cell 1），並確認變數已設定。")

# 正規化權重
s = float(W_RGB) + float(W_XYZ)
if s == 0:
    raise ValueError("W_RGB + W_XYZ 不能同時為 0。")
wr = float(W_RGB) / s
wx = float(W_XYZ) / s

if DRY_RUN:
    print("** 注意：目前為 DRY_RUN（僅檢查、不寫檔）**")
else:
    print("** 實際執行並寫檔到 OUT_ROOT **")
    OUT_ROOT.mkdir(parents=True, exist_ok=True)

def find_xyz_for_rgb(rgb_path: Path, xyz_dir: Path):
    """優先找 .png，再 .tiff，再找同 stem 的任何檔案"""
    stem = rgb_path.stem
    for ext in (".png", ".tiff", ".tif", ".PNG", ".TIFF"):
        cand = xyz_dir / (stem + ext)
        if cand.exists():
            return cand
    # fallback: search same stem
    if xyz_dir.exists():
        for p in xyz_dir.iterdir():
            if p.is_file() and p.stem == stem:
                return p
    return None

def read_to_3ch(p: Path):
    """讀取影像並回傳 3-channel uint8，若失敗回 None"""
    img = cv2.imread(str(p), cv2.IMREAD_UNCHANGED)
    if img is None:
        return None
    if img.ndim == 2:
        img = np.stack([img]*3, axis=-1)
    if img.shape[-1] > 3:
        img = img[..., :3]
    return img

def combine_and_save(rgb_path: Path, xyz_path: Path, out_path: Path):
    """合成並寫入（out_path 的 parent 須先建立）"""
    a = read_to_3ch(rgb_path)
    b = read_to_3ch(xyz_path)
    if a is None or b is None:
        return False, "read_fail"
    if a.shape[:2] != b.shape[:2]:
        if RESIZE_ON_MISMATCH:
            b = cv2.resize(b, (a.shape[1], a.shape[0]), interpolation=cv2.INTER_LINEAR)
        else:
            return False, f"size_mismatch {a.shape[:2]} vs {b.shape[:2]}"
    comb = (a.astype(np.float32)*wr + b.astype(np.float32)*wx)
    comb = np.clip(comb, 0, 255).astype(np.uint8)
    if not DRY_RUN:
        out_path.parent.mkdir(parents=True, exist_ok=True)
        cv2.imwrite(str(out_path), comb)
    return True, "ok"

# 統計字典
stats = {
    "categories": 0,
    "train_ok": 0, "train_skipped": 0,
    "test_ok": 0, "test_skipped": 0,
    "gt_copied": 0, "errors": 0, "warnings": 0
}

# 驗證來源路徑
if not ORIG_ROOT.exists():
    raise FileNotFoundError(f"來源資料夾不存在：{ORIG_ROOT}")

categories = sorted([p for p in ORIG_ROOT.iterdir() if p.is_dir()])
print(f"開始處理 {len(categories)} 個 category（樣本前三）：{[c.name for c in categories[:10]]}{'...' if len(categories)>10 else ''}")

for cat in categories:
    stats["categories"] += 1
    cat_dst = OUT_ROOT / cat.name

    if DRY_RUN and VERBOSE:
        print(f"\n[DRY] 將在 {cat_dst} 建立 MVTec 結構 (train/good, test, ground_truth)")

    # ---------- ground_truth 複製：test/*/gt -> OUT_ROOT/<cat>/ground_truth/<defect>/ ----------
    test_src = cat / "test"
    if test_src.exists():
        for defect in sorted([d for d in test_src.iterdir() if d.is_dir()]):
            gt_src = defect / "gt"
            if gt_src.exists():
                dst_gt_folder = cat_dst / "ground_truth" / defect.name
                if not DRY_RUN:
                    dst_gt_folder.mkdir(parents=True, exist_ok=True)
                cnt = 0
                for f in sorted([x for x in gt_src.iterdir() if x.is_file()]):
                    if not DRY_RUN:
                        shutil.copy2(str(f), str(dst_gt_folder / f.name))
                    cnt += 1
                    stats["gt_copied"] += 1
                if VERBOSE:
                    print(f"  ground_truth: {cat.name}/{defect.name} -> {cnt} files")
    else:
        if VERBOSE:
            print(f"  ⚠ {cat.name} 沒有 test 資料夾，跳過 ground_truth")

    # ---------- train/good 合成 ----------
    rgb_train = cat / "train" / "good" / "rgb"
    xyz_train = cat / "train" / "good" / "xyz"
    dst_train = cat_dst / "train" / "good"

    if rgb_train.exists() and xyz_train.exists():
        rgb_list = sorted([p for p in rgb_train.iterdir() if p.is_file() and p.suffix.lower()==".png"])
        for rf in rgb_list:
            xf = find_xyz_for_rgb(rf, xyz_train)
            if xf is None:
                stats["train_skipped"] += 1
                if VERBOSE:
                    print(f"  ⚠ train {cat.name} 找不到 xyz：{rf.name}")
                continue
            out_path = dst_train / rf.name
            ok, msg = combine_and_save(rf, xf, out_path)
            if ok:
                stats["train_ok"] += 1
                if VERBOSE and stats["train_ok"] <= 5:
                    print(f"  train 合成: {cat.name} {rf.name} <- {xf.name}")
            else:
                stats["errors"] += 1
                if VERBOSE:
                    print(f"  ❌ train 合成失敗 {rf.name} <- {xf.name} : {msg}")
    else:
        if VERBOSE:
            print(f"  ⚠ train/good 欄位缺失： rgb?{rgb_train.exists()} xyz?{xyz_train.exists()}")

    # ---------- test/* 合成（包含 good 與 defect types） ----------
    if test_src.exists():
        for defect in sorted([d for d in test_src.iterdir() if d.is_dir()]):
            rgb_def = defect / "rgb"
            xyz_def = defect / "xyz"
            dst_def = cat_dst / "test" / defect.name
            if not DRY_RUN:
                dst_def.mkdir(parents=True, exist_ok=True)
            if rgb_def.exists() and xyz_def.exists():
                for rf in sorted([p for p in rgb_def.iterdir() if p.is_file() and p.suffix.lower()==".png"]):
                    xf = find_xyz_for_rgb(rf, xyz_def)
                    if xf is None:
                        stats["test_skipped"] += 1
                        if VERBOSE:
                            print(f"  ⚠ test/{defect.name} 找不到 xyz：{rf.name}")
                        continue
                    out_path = dst_def / rf.name
                    ok, msg = combine_and_save(rf, xf, out_path)
                    if ok:
                        stats["test_ok"] += 1
                        if VERBOSE and stats["test_ok"] <= 5:
                            print(f"  test/{defect.name} 合成: {rf.name} <- {xf.name}")
                    else:
                        stats["errors"] += 1
                        if VERBOSE:
                            print(f"  ❌ test/{defect.name} 合成失敗 {rf.name} <- {xf.name} : {msg}")
            else:
                if VERBOSE:
                    print(f"  ⚠ test/{defect.name} 缺 rgb 或 xyz")
                stats["warnings"] += 1

# ---------- 完成統計與顯示 ----------
print("\n=== 完成統計 ===")
for k,v in stats.items():
    print(f"{k}: {v}")
print(f"輸出根目錄：{OUT_ROOT}")
if DRY_RUN:
    print("提示：目前為 DRY_RUN 模式（未寫檔）。如要寫檔請把參數 cell（Cell 1）中的 DRY_RUN 改成 False，並重新執行 Cell 2。")
else:
    print("已實際寫入檔案到輸出資料夾。")


** 實際執行並寫檔到 OUT_ROOT **
開始處理 10 個 category（樣本前三）：['bagel', 'cable_gland', 'carrot', 'cookie', 'dowel', 'foam', 'peach', 'potato', 'rope', 'tire']
  ground_truth: bagel/combined -> 23 files
  ground_truth: bagel/contamination -> 22 files
  ground_truth: bagel/crack -> 22 files
  ground_truth: bagel/good -> 22 files
  ground_truth: bagel/hole -> 21 files
  train 合成: bagel 000.png <- 000.png
  train 合成: bagel 001.png <- 001.png
  train 合成: bagel 002.png <- 002.png
  train 合成: bagel 003.png <- 003.png
  train 合成: bagel 004.png <- 004.png
  test/combined 合成: 000.png <- 000.png
  test/combined 合成: 001.png <- 001.png
  test/combined 合成: 002.png <- 002.png
  test/combined 合成: 003.png <- 003.png
  test/combined 合成: 004.png <- 004.png
  ground_truth: cable_gland/bent -> 21 files
  ground_truth: cable_gland/cut -> 22 files
  ground_truth: cable_gland/good -> 21 files
  ground_truth: cable_gland/hole -> 22 files
  ground_truth: cable_gland/thread -> 22 files
  ground_truth: carrot/combined -> 27 f