In [1]:
import os, gc, cv2, torch, h5py, shutil
import numpy as np
from PIL import Image
from tqdm import tqdm
from transformers import AutoImageProcessor, ZoeDepthForDepthEstimation

# ===== 1) 设备与模型 =====
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_name = "Intel/zoedepth-nyu-kitti"
processor = AutoImageProcessor.from_pretrained(model_name)
model = ZoeDepthForDepthEstimation.from_pretrained(model_name).to(device).eval()

# ===== 2) 数据路径与输出 =====
mat_path = "/kaggle/input/nyuv2-dataset/nyu_depth_v2_labeled.mat"
idx_list_path = "/kaggle/input/nyu2-test/nyu2_test.txt"

OUT_ROOT = "/kaggle/working/download/nyuv2_zoe"
out_pred_raw_npy   = os.path.join(OUT_ROOT, "pred_raw_npy")       # 原始米制预测
out_pred_aligned   = os.path.join(OUT_ROOT, "pred_aligned_npy")   # （可选）scale-only 对齐
out_gt_npy         = os.path.join(OUT_ROOT, "gt_npy")             # GT 米制
out_gt_png16       = os.path.join(OUT_ROOT, "gt_png16")           # GT 毫米 16-bit PNG
for d in [out_pred_raw_npy, out_pred_aligned, out_gt_npy, out_gt_png16]:
    os.makedirs(d, exist_ok=True)

# ===== 3) 读 NYUv2 =====
with h5py.File(mat_path, 'r') as f:
    images = f['images'][:]   # (1449, 3, 640, 480)
    depths = f['depths'][:]   # (1449, 640, 480)
# 转成 (480, 640, 3, N) 与 (480, 640, N)
images = np.transpose(images, (3, 2, 1, 0)).astype(np.uint8)
depths = np.transpose(depths, (2, 1, 0)).astype(np.float32)
idx_list = np.loadtxt(idx_list_path).astype(int) - 1  # 1-based -> 0-based

# ===== 4) 工具函数 =====
def npy_save(path, arr):
    os.makedirs(os.path.dirname(path), exist_ok=True)
    np.save(path, arr.astype(np.float32))

def mm_png_save(path, depth_m):
    """以毫米写 16-bit PNG（输入单位=米）"""
    os.makedirs(os.path.dirname(path), exist_ok=True)
    depth_mm = np.clip(depth_m * 1000.0, 0, 65535).astype(np.uint16)
    cv2.imwrite(path, depth_mm)

def scale_match(pred, gt, mask):
    """每张图的纯缩放对齐（scale-only）"""
    v = mask & np.isfinite(gt) & np.isfinite(pred) & (gt > 0)
    if not np.any(v):
        return 1.0
    p, g = pred[v], gt[v]
    den = float((p**2).sum())
    return float((g * p).sum() / den) if den > 0 else 1.0

# ===== 5) 主循环：只保存 预测 + GT =====
saved = 0
for idx in tqdm(idx_list, desc="Saving NYUv2 (ZoeDepth)"):
    try:
        rgb = images[..., idx]         # (480,640,3) uint8
        gt  = depths[..., idx]         # (480,640)   float32, 米
        mask = (gt > 0) & np.isfinite(gt)

        # 前向（Zoe 输出米制）
        with torch.no_grad():
            inputs = processor(images=Image.fromarray(rgb), return_tensors="pt").to(device)
            outputs = model(**inputs)
            processed = processor.post_process_depth_estimation(
                outputs, source_sizes=[(rgb.shape[0], rgb.shape[1])]
            )
            pred_metric = processed[0]["predicted_depth"].squeeze().detach().cpu().numpy().astype(np.float32)

        # 尺寸对齐到 GT
        if pred_metric.shape != gt.shape:
            pred_metric = cv2.resize(pred_metric, (gt.shape[1], gt.shape[0]), interpolation=cv2.INTER_LINEAR)

        # （可选）scale-only 对齐一份（用于快速对比；统一评测时用 raw + alignment="ls"）
        s = scale_match(pred_metric, gt, mask)
        pred_aligned = (pred_metric * s).astype(np.float32)

        stem = f"{int(idx):04d}"

        # 保存预测（原始米制 + 对齐后米制）
        npy_save(os.path.join(out_pred_raw_npy, f"{stem}.npy"), pred_metric)
        npy_save(os.path.join(out_pred_aligned, f"{stem}.npy"), pred_aligned)  #可注释

        # 保存 GT（米制 + 毫米 PNG）
        npy_save(os.path.join(out_gt_npy,   f"{stem}.npy"), gt)
        mm_png_save(os.path.join(out_gt_png16, f"{stem}.png"), gt)

        saved += 1

        # 清理
        del rgb, gt, pred_metric, pred_aligned, inputs, outputs, processed, mask
        gc.collect()
        if torch.cuda.is_available():
            torch.cuda.empty_cache()

    except Exception:
        # 静默跳过异常样本
        continue

print(f"[NYUv2 ZoeDepth SAVE] saved pairs: {saved} / {len(idx_list)}")

# ===== 6) 打包下载 =====
zip_base = "/kaggle/working/zoe_nyuv2_preds_and_gt"
shutil.make_archive(zip_base, "zip", OUT_ROOT)
print(f"打包完成: {zip_base}.zip")

2025-08-14 21:53:39.513855: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1755208419.748825      36 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1755208419.815796      36 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


preprocessor_config.json:   0%|          | 0.00/723 [00:00<?, ?B/s]

Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/1.38G [00:00<?, ?B/s]

Saving NYUv2 (ZoeDepth): 100%|██████████| 666/666 [06:36<00:00,  1.68it/s]


[NYUv2 ZoeDepth SAVE] saved pairs: 666 / 666
打包完成: /kaggle/working/zoe_nyuv2_preds_and_gt.zip
