In [1]:
import sys
sys.path.append("/kaggle/input/marigold_model/transformers/default/2/marigold_source")
from Marigold.marigold.marigold_depth_pipeline import MarigoldDepthPipeline

2025-08-14 15:45:34.323301: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1755186334.495169      36 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1755186334.548240      36 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [2]:
import os, sys, gc, shutil
import cv2
import numpy as np
import torch
from PIL import Image

# ========== 1) 加载 Marigold ==========
model_path = "/kaggle/input/marigold_model/transformers/default/2/marigold_model"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
pipe = MarigoldDepthPipeline.from_pretrained(model_path).to(device)

# ========== 2) 目录与工具 ==========
VAL_ROOT = "/kaggle/input/diode-val/val"
OUT_ROOT = "/kaggle/working/download"
os.makedirs(OUT_ROOT, exist_ok=True)

def mm_png_save(path, depth_m: np.ndarray):
    """以 16-bit PNG(毫米)保存深度图"""
    depth_mm = np.clip(depth_m * 1000.0, 0, 65535).astype(np.uint16)
    os.makedirs(os.path.dirname(path), exist_ok=True)
    cv2.imwrite(path, depth_mm)

def npy_save(path, arr: np.ndarray):
    os.makedirs(os.path.dirname(path), exist_ok=True)
    np.save(path, arr.astype(np.float32))

def scale_match(pred: np.ndarray, gt: np.ndarray, mask: np.ndarray) -> float:
    """每张图的纯缩放对齐（scale-only）"""
    v = mask & np.isfinite(gt) & np.isfinite(pred) & (gt > 0)
    if v.sum() == 0:
        return 1.0
    p = pred[v]; g = gt[v]
    den = float((p**2).sum())
    return float((g * p).sum() / den) if den > 0 else 1.0

# ========== 3) 主循环：递归遍历，保存 预测 + GT ==========
count_total, count_saved = 0, 0

for root, _, files in os.walk(VAL_ROOT):
    pngs = [f for f in sorted(files) if f.endswith(".png")]
    if not pngs:
        continue

    # 相对 val 的路径（如 'outdoor/scene_00024/scan_00185'）
    rel_dir = os.path.relpath(root, VAL_ROOT)

    # 导出子目录（按相对路径展开）
    out_pred_raw_npy   = os.path.join(OUT_ROOT, rel_dir, "pred_raw_npy")
    out_pred_aligned   = os.path.join(OUT_ROOT, rel_dir, "pred_aligned_npy")  # 可选：scale-only 对齐后
    out_pred_raw_png16 = os.path.join(OUT_ROOT, rel_dir, "pred_raw_png16")
    out_gt_npy         = os.path.join(OUT_ROOT, rel_dir, "gt_npy")
    out_gt_png16       = os.path.join(OUT_ROOT, rel_dir, "gt_png16")

    for img_name in pngs:
        count_total += 1
        rgb_path  = os.path.join(root, img_name)
        gt_path   = rgb_path.replace(".png", "_depth.npy")
        mask_path = rgb_path.replace(".png", "_depth_mask.npy")

        # DIODE 每张 RGB 对应 *_depth.npy 与 *_depth_mask.npy；缺失则跳过
        if not (os.path.exists(gt_path) and os.path.exists(mask_path)):
            continue

        # 读取数据
        rgb_bgr = cv2.imread(rgb_path)
        rgb = cv2.cvtColor(rgb_bgr, cv2.COLOR_BGR2RGB)
        gt_depth = np.load(gt_path).astype(np.float32)
        if gt_depth.ndim == 3:
            gt_depth = gt_depth[..., 0]
        mask = np.load(mask_path).astype(bool)

        # 推理（Marigold 相对深度）
        with torch.no_grad():
            result = pipe(
                Image.fromarray(rgb),
                denoising_steps=4,
                ensemble_size=1,
                processing_res=768,
                match_input_res=True,
                batch_size=0,
                show_progress_bar=False,
            )
        pred_depth = np.asarray(result.depth_np, dtype=np.float32)

        # 尺寸对齐到 GT
        if pred_depth.shape != gt_depth.shape:
            pred_depth = cv2.resize(
                pred_depth, (gt_depth.shape[1], gt_depth.shape[0]),
                interpolation=cv2.INTER_LINEAR
            )

        # 可选：“scale-only 对齐后”的预测 .npy
        s = scale_match(pred_depth, gt_depth, mask)
        pred_aligned = pred_depth * s

        stem = os.path.splitext(img_name)[0]

        # 保存预测（原始）
        npy_save(os.path.join(out_pred_raw_npy,   f"{stem}.npy"), pred_depth)
        mm_png_save(os.path.join(out_pred_raw_png16, f"{stem}.png"), pred_depth)

        # 保存预测（对齐后 .npy）
        npy_save(os.path.join(out_pred_aligned,   f"{stem}.npy"), pred_aligned)

        # 保存 GT
        npy_save(os.path.join(out_gt_npy,     f"{stem}.npy"), gt_depth)
        mm_png_save(os.path.join(out_gt_png16, f"{stem}.png"), gt_depth)

        count_saved += 1

        # 清理
        del rgb_bgr, rgb, gt_depth, mask, pred_depth, pred_aligned, result
    gc.collect()
    if torch.cuda.is_available():
        torch.cuda.empty_cache()

print(f"[Marigold SAVE] scanned PNGs: {count_total}, saved pairs: {count_saved}")

# ========== 4)打包 ==========
zip_base = "/kaggle/working/marigold_diode_preds_and_gt"
shutil.make_archive(zip_base, "zip", OUT_ROOT)
print(f"打包完成: {zip_base}.zip")

The config attributes {'prediction_type': 'depth'} were passed to MarigoldDepthPipeline, but are not expected and will be ignored. Please verify your model_index.json configuration file.
Keyword arguments {'prediction_type': 'depth'} are not expected by MarigoldDepthPipeline and will be ignored.


Loading pipeline components...:   0%|          | 0/5 [00:00<?, ?it/s]

[Marigold SAVE] scanned PNGs: 771, saved pairs: 771
打包完成: /kaggle/working/marigold_diode_preds_and_gt.zip
