In [3]:
!pip install -r /kaggle/input/depthanythingv2/transformers/default/1/Depth-Anything-V2/requirements.txt

Collecting gradio_imageslider (from -r /kaggle/input/depthanythingv2/transformers/default/1/Depth-Anything-V2/requirements.txt (line 1))
  Downloading gradio_imageslider-0.0.20-py3-none-any.whl.metadata (10 kB)
Collecting gradio==4.36.0 (from -r /kaggle/input/depthanythingv2/transformers/default/1/Depth-Anything-V2/requirements.txt (line 2))
  Downloading gradio-4.36.0-py3-none-any.whl.metadata (15 kB)
Collecting gradio-client==1.0.1 (from gradio==4.36.0->-r /kaggle/input/depthanythingv2/transformers/default/1/Depth-Anything-V2/requirements.txt (line 2))
  Downloading gradio_client-1.0.1-py3-none-any.whl.metadata (7.1 kB)
Collecting markupsafe~=2.0 (from gradio==4.36.0->-r /kaggle/input/depthanythingv2/transformers/default/1/Depth-Anything-V2/requirements.txt (line 2))
  Downloading MarkupSafe-2.1.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.0 kB)
Collecting pillow<11.0,>=8.0 (from gradio==4.36.0->-r /kaggle/input/depthanythingv2/transformers/default/1/Depth

In [1]:
import os, sys, gc
import cv2
import numpy as np
import torch

# -------------------------
# 1) 模型加载（DepthAnythingV2）
# -------------------------
sys.path.append("/kaggle/input/depthanythingv2/transformers/default/1/Depth-Anything-V2")
from depth_anything_v2.dpt import DepthAnythingV2

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = DepthAnythingV2(encoder='vitl', features=256, out_channels=[256, 512, 1024, 1024])
model.load_state_dict(torch.load(
    '/kaggle/input/depthanythingv2/transformers/default/1/Depth-Anything-V2/checkpoints/depth_anything_v2_vitl.pth',
    map_location=device
))
model.to(device).eval()

# -------------------------
# 2) 路径与工具
# -------------------------
VAL_ROOT = '/kaggle/input/diode-val/val'
OUT_ROOT = '/kaggle/working/download' #导出目录
os.makedirs(OUT_ROOT, exist_ok=True)

def mm_png_save(path, depth_m):
    """以 16-bit PNG(毫米)保存深度图"""
    depth_mm = np.clip(depth_m * 1000.0, 0, 65535).astype(np.uint16)
    os.makedirs(os.path.dirname(path), exist_ok=True)
    cv2.imwrite(path, depth_mm)

def npy_save(path, arr):
    os.makedirs(os.path.dirname(path), exist_ok=True)
    np.save(path, arr.astype(np.float32))

def scale_match(pred, gt, mask):
    """每张图的纯缩放对齐（scale-only），便于快速对比"""
    v = mask & np.isfinite(gt) & np.isfinite(pred) & (gt > 0)
    if v.sum() == 0:
        return 1.0
    p = pred[v]; g = gt[v]
    den = (p**2).sum()
    return float((g * p).sum() / den) if den > 0 else 1.0

# -------------------------
# 3) 递归遍历 val，保存预测与 GT
# -------------------------
count_total = 0
count_kept = 0
for root, _, files in os.walk(VAL_ROOT):
    # 只处理 RGB PNG
    pngs = [f for f in sorted(files) if f.endswith('.png')]
    if not pngs:
        continue

    # 计算相对路径与对应的输出子目录
    rel_dir = os.path.relpath(root, VAL_ROOT) # e.g. 'outdoor/scene_00024/scan_00185'
    out_dir_pred_raw_npy   = os.path.join(OUT_ROOT, rel_dir, 'pred_raw_npy')
    out_dir_pred_aligned   = os.path.join(OUT_ROOT, rel_dir, 'pred_aligned_npy')  # 可选
    out_dir_pred_raw_png16 = os.path.join(OUT_ROOT, rel_dir, 'pred_raw_png16')
    out_dir_gt_npy         = os.path.join(OUT_ROOT, rel_dir, 'gt_npy')
    out_dir_gt_png16       = os.path.join(OUT_ROOT, rel_dir, 'gt_png16')

    for img_name in pngs:
        count_total += 1
        rgb_path  = os.path.join(root, img_name)
        gt_path   = rgb_path.replace('.png', '_depth.npy')
        mask_path = rgb_path.replace('.png', '_depth_mask.npy')

        if not (os.path.exists(gt_path) and os.path.exists(mask_path)):
            # DIODE 每张 RGB 对应 *_depth.npy 与 *_depth_mask.npy；缺失则跳过
            continue

        # 读取数据
        rgb = cv2.cvtColor(cv2.imread(rgb_path), cv2.COLOR_BGR2RGB)
        gt_depth = np.load(gt_path).astype(np.float32)
        if gt_depth.ndim == 3:  # 某些数据可能带通道
            gt_depth = gt_depth[..., 0]
        mask = np.load(mask_path).astype(bool)

        # 推理
        with torch.no_grad():
            pred_depth = model.infer_image(rgb)  # float32，相对深度

        # 尺寸对齐到 GT
        if pred_depth.shape != gt_depth.shape:
            pred_depth = cv2.resize(pred_depth, (gt_depth.shape[1], gt_depth.shape[0]), interpolation=cv2.INTER_LINEAR)

        # （可选）保存一份“scale-only 对齐后”的预测 .npy
        s = scale_match(pred_depth, gt_depth, mask)
        pred_aligned = pred_depth * s

        # 文件名（与 RGB 同名）
        stem = os.path.splitext(img_name)[0]

        # 保存预测（原始）
        npy_save(os.path.join(out_dir_pred_raw_npy,   f'{stem}.npy'), pred_depth)
        mm_png_save(os.path.join(out_dir_pred_raw_png16, f'{stem}.png'), pred_depth)

        # 保存预测（可选）
        npy_save(os.path.join(out_dir_pred_aligned,   f'{stem}.npy'), pred_aligned)

        # 保存 GT
        npy_save(os.path.join(out_dir_gt_npy,     f'{stem}.npy'), gt_depth)
        mm_png_save(os.path.join(out_dir_gt_png16, f'{stem}.png'), gt_depth)

        count_kept += 1

        # 释放
        del rgb, gt_depth, mask, pred_depth, pred_aligned
    gc.collect()
    if torch.cuda.is_available():
        torch.cuda.empty_cache()

print(f"Done. Found PNGs: {count_total}, saved pairs: {count_kept}.")

Done. Found PNGs: 771, saved pairs: 771.


In [2]:
import shutil

zip_path = "/kaggle/working/dav2_diode_preds_and_gt"
shutil.make_archive(zip_path, 'zip', OUT_ROOT)
print(f"打包完成: {zip_path}.zip")

打包完成: /kaggle/working/dav2_diode_preds_and_gt.zip
