In [1]:
import os, gc, cv2, torch, shutil
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image
from transformers import AutoImageProcessor, ZoeDepthForDepthEstimation

# ================== 模型加载 ==================
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_name = "Intel/zoedepth-nyu-kitti"
processor = AutoImageProcessor.from_pretrained(model_name)
model = ZoeDepthForDepthEstimation.from_pretrained(model_name).to(device)

# ================== 可视化（4 连图） ==================
def save_comparison_figure(img_name, rgb, gt, pred_metric, pred_aligned, mask, vis_dir):
    os.makedirs(vis_dir, exist_ok=True)
    save_path = os.path.join(vis_dir, img_name.replace(".png", "_compare.png"))

    valid_m = mask & (gt > 0) & np.isfinite(gt) & np.isfinite(pred_metric)
    valid_a = mask & (gt > 0) & np.isfinite(gt) & np.isfinite(pred_aligned)
    pool = []
    if np.any(valid_m):
        pool += [gt[valid_m], pred_metric[valid_m]]
    if np.any(valid_a):
        pool += [gt[valid_a], pred_aligned[valid_a]]
    if len(pool) == 0:
        pool = [gt[mask & (gt > 0)]] if np.any(mask & (gt > 0)) else [gt.flatten()]
    pool = np.concatenate(pool)
    vmin, vmax = np.percentile(pool, 2), np.percentile(pool, 98)

    plt.figure(figsize=(20, 5))
    plt.subplot(1, 4, 1); plt.imshow(rgb); plt.title("RGB"); plt.axis("off")
    plt.subplot(1, 4, 2); plt.imshow(np.where(mask, gt, np.nan), cmap="viridis", vmin=vmin, vmax=vmax)
    plt.title("GT Depth"); plt.colorbar(fraction=0.046, pad=0.04); plt.axis("off")
    plt.subplot(1, 4, 3); plt.imshow(np.where(mask, pred_metric, np.nan), cmap="viridis", vmin=vmin, vmax=vmax)
    plt.title("Predicted (Metric)"); plt.colorbar(fraction=0.046, pad=0.04); plt.axis("off")
    plt.subplot(1, 4, 4); plt.imshow(np.where(mask, pred_aligned, np.nan), cmap="viridis", vmin=vmin, vmax=vmax)
    plt.title("Predicted (Aligned)"); plt.colorbar(fraction=0.046, pad=0.04); plt.axis("off")
    plt.tight_layout(); plt.savefig(save_path, dpi=150); plt.close()

# ================== 指标函数 ==================
def compute_abs_rel(pred, gt, mask):
    valid = mask & (gt > 0) & np.isfinite(gt) & np.isfinite(pred)
    pred, gt = pred[valid], gt[valid]
    return np.mean(np.abs(pred - gt) / gt) if gt.size else np.nan

def compute_delta1(pred, gt, mask):
    valid = mask & (gt > 1e-6) & (pred > 1e-6) & np.isfinite(gt) & np.isfinite(pred)
    pred, gt = pred[valid], gt[valid]
    if gt.size == 0:
        return np.nan
    thresh = np.maximum(pred / gt, gt / pred)
    return np.mean(thresh < 1.25)

def scale_match(pred, gt, mask):
    valid = mask & np.isfinite(gt) & np.isfinite(pred)
    pv, gv = pred[valid], gt[valid]
    denom = np.sum(pv ** 2)
    return float(np.sum(gv * pv) / denom) if denom > 0 else 1.0

# ================== 单个 scan 处理 ==================
def process_scan(scan_path, split, scene, scan, output_root, global_rows):
    vis_dir = os.path.join(output_root, "visualizations", split, scene, scan)
    csv_path = os.path.join(output_root, "csv", split, scene)
    os.makedirs(csv_path, exist_ok=True)
    csv_file = os.path.join(csv_path, f"{scan}.csv")

    rows = []

    for img_file in sorted(os.listdir(scan_path)):
        if not img_file.endswith(".png"):
            continue
        try:
            rgb_path = os.path.join(scan_path, img_file)
            gt_path = rgb_path.replace(".png", "_depth.npy")
            mask_path = rgb_path.replace(".png", "_depth_mask.npy")
            if not (os.path.exists(gt_path) and os.path.exists(mask_path)):
                print(f"[缺GT/Mask] 跳过: {split}/{scene}/{scan}/{img_file}")
                continue

            # 读取
            rgb = cv2.cvtColor(cv2.imread(rgb_path), cv2.COLOR_BGR2RGB)
            gt = np.load(gt_path).astype(np.float32)
            if gt.ndim == 3: gt = gt[:, :, 0]
            mask = np.load(mask_path).astype(bool)

            # 推理
            input_image = Image.fromarray(rgb)
            inputs = processor(images=input_image, return_tensors="pt").to(device)
            with torch.no_grad():
                outputs = model(**inputs)
            processed = processor.post_process_depth_estimation(
                outputs, source_sizes=[(rgb.shape[0], rgb.shape[1])]
            )
            pred_metric = processed[0]["predicted_depth"].squeeze().cpu().numpy()

            # 尺寸对齐
            if pred_metric.shape != gt.shape:
                pred_metric = cv2.resize(pred_metric, (gt.shape[1], gt.shape[0]), interpolation=cv2.INTER_LINEAR)

            # Metric（原始预测）
            absrel_metric = compute_abs_rel(pred_metric, gt, mask)
            delta1_metric = compute_delta1(pred_metric, gt, mask)

            # Relative（scale 对齐）
            s = scale_match(pred_metric, gt, mask)
            pred_aligned = pred_metric * s
            absrel_rel = compute_abs_rel(pred_aligned, gt, mask)
            delta1_rel = compute_delta1(pred_aligned, gt, mask)

            print(f"[{split}/{scene}/{scan}/{img_file}] "
                  f"Metric: AbsRel {absrel_metric:.4f}, δ1 {delta1_metric*100:.2f}% | "
                  f"Relative: AbsRel {absrel_rel:.4f}, δ1 {delta1_rel*100:.2f}%")

            # 记录
            row = [split, scene, scan, img_file,
                   absrel_metric, delta1_metric, absrel_rel, delta1_rel]
            rows.append(row)
            global_rows.append(row)

            # 可视化
            save_comparison_figure(img_file, rgb, gt, pred_metric, pred_aligned, mask, vis_dir)

            # 释放
            del rgb, gt, mask, pred_metric, pred_aligned, inputs, outputs, processed, input_image
            gc.collect()
            if torch.cuda.is_available():
                torch.cuda.empty_cache()

        except Exception as e:
            print(f"[失败] {split}/{scene}/{scan}/{img_file}: {e}")
            continue

    # 保存 per-scan CSV（含均值行）
    if rows:
        df = pd.DataFrame(rows, columns=[
            "split","scene","scan","image",
            "AbsRel_metric","Delta1_metric","AbsRel_relative","Delta1_relative"
        ])
        mean_row = pd.DataFrame([[
            df["split"].iloc[0], df["scene"].iloc[0], df["scan"].iloc[0], "mean",
            df["AbsRel_metric"].mean(),
            df["Delta1_metric"].mean(),
            df["AbsRel_relative"].mean(),
            df["Delta1_relative"].mean()
        ]], columns=df.columns)
        df = pd.concat([df, mean_row], ignore_index=True)
        df.to_csv(csv_file, index=False)
        print(f"[保存] CSV: {csv_file}")

        # 打包可视化
        zip_path = os.path.join(output_root, f"visualizations_{split}_{scene}_{scan}.zip")
        shutil.make_archive(base_name=zip_path.replace(".zip",""), format="zip", root_dir=vis_dir)
        print(f"[打包] {zip_path}")

# ================== 主流程：一次性测试所有 scan ==================
val_root = "/kaggle/input/diode-val/val"
output_root = "/kaggle/working"
os.makedirs(os.path.join(output_root, "csv"), exist_ok=True)

global_rows = []
splits = ["indoors", "outdoor"]
for split in splits:
    split_root = os.path.join(val_root, split)
    if not os.path.isdir(split_root):
        continue
    for scene in sorted(os.listdir(split_root)):
        scene_path = os.path.join(split_root, scene)
        if not os.path.isdir(scene_path):
            continue
        for scan in sorted(os.listdir(scene_path)):
            scan_path = os.path.join(scene_path, scan)
            if not os.path.isdir(scan_path):
                continue
            print(f"\n=== 处理 {split}/{scene}/{scan} ===")
            process_scan(scan_path, split, scene, scan, output_root, global_rows)

# ================== 全量汇总 CSV ==================
if global_rows:
    all_df = pd.DataFrame(global_rows, columns=[
        "split","scene","scan","image",
        "AbsRel_metric","Delta1_metric","AbsRel_relative","Delta1_relative"
    ])
    # 可再加整体均值一行
    overall_mean = pd.DataFrame([[
        "ALL","ALL","ALL","mean",
        all_df["AbsRel_metric"].mean(),
        all_df["Delta1_metric"].mean(),
        all_df["AbsRel_relative"].mean(),
        all_df["Delta1_relative"].mean()
    ]], columns=all_df.columns)
    all_df = pd.concat([all_df, overall_mean], ignore_index=True)

    all_csv = os.path.join(output_root, "eval_results_all_scans.csv")
    all_df.to_csv(all_csv, index=False)
    print(f"\n[汇总] 全量结果保存至: {all_csv}")

gc.collect()
if torch.cuda.is_available():
    torch.cuda.empty_cache()


2025-08-12 16:51:34.569907: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1755017494.763463      36 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1755017494.821073      36 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


preprocessor_config.json:   0%|          | 0.00/723 [00:00<?, ?B/s]

Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/1.38G [00:00<?, ?B/s]


=== 处理 indoors/scene_00019/scan_00183 ===
[indoors/scene_00019/scan_00183/00019_00183_indoors_000_010.png] Metric: AbsRel 0.5210, δ1 0.95% | Relative: AbsRel 0.0746, δ1 94.33%


  xa[xa < 0] = -1


[indoors/scene_00019/scan_00183/00019_00183_indoors_000_040.png] Metric: AbsRel 0.7531, δ1 0.00% | Relative: AbsRel 0.1095, δ1 91.68%
[indoors/scene_00019/scan_00183/00019_00183_indoors_010_000.png] Metric: AbsRel 0.4156, δ1 3.09% | Relative: AbsRel 0.0895, δ1 92.93%
[indoors/scene_00019/scan_00183/00019_00183_indoors_010_020.png] Metric: AbsRel 0.6568, δ1 0.01% | Relative: AbsRel 0.0610, δ1 97.79%
[indoors/scene_00019/scan_00183/00019_00183_indoors_020_030.png] Metric: AbsRel 0.7137, δ1 0.01% | Relative: AbsRel 0.0395, δ1 99.07%
[indoors/scene_00019/scan_00183/00019_00183_indoors_020_050.png] Metric: AbsRel 0.7144, δ1 0.02% | Relative: AbsRel 0.1143, δ1 96.85%
[indoors/scene_00019/scan_00183/00019_00183_indoors_030_010.png] Metric: AbsRel 0.4941, δ1 1.06% | Relative: AbsRel 0.0657, δ1 93.30%
[indoors/scene_00019/scan_00183/00019_00183_indoors_030_040.png] Metric: AbsRel 0.6890, δ1 0.01% | Relative: AbsRel 0.0816, δ1 92.00%
[indoors/scene_00019/scan_00183/00019_00183_indoors_040_000.pn