In [1]:
!pip install -r /kaggle/input/depthanythingv2/transformers/default/1/Depth-Anything-V2/requirements.txt

Collecting gradio_imageslider (from -r /kaggle/input/depthanythingv2/transformers/default/1/Depth-Anything-V2/requirements.txt (line 1))
  Downloading gradio_imageslider-0.0.20-py3-none-any.whl.metadata (10 kB)
Collecting gradio==4.36.0 (from -r /kaggle/input/depthanythingv2/transformers/default/1/Depth-Anything-V2/requirements.txt (line 2))
  Downloading gradio-4.36.0-py3-none-any.whl.metadata (15 kB)
Collecting gradio-client==1.0.1 (from gradio==4.36.0->-r /kaggle/input/depthanythingv2/transformers/default/1/Depth-Anything-V2/requirements.txt (line 2))
  Downloading gradio_client-1.0.1-py3-none-any.whl.metadata (7.1 kB)
Collecting markupsafe~=2.0 (from gradio==4.36.0->-r /kaggle/input/depthanythingv2/transformers/default/1/Depth-Anything-V2/requirements.txt (line 2))
  Downloading MarkupSafe-2.1.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.0 kB)
Collecting pillow<11.0,>=8.0 (from gradio==4.36.0->-r /kaggle/input/depthanythingv2/transformers/default/1/Depth

In [2]:
import os, gc, cv2, sys, torch, shutil
import numpy as np
from PIL import Image
import torchvision.transforms as T

# ==== 1) 模型加载（DepthAnythingV2） ====
sys.path.append("/kaggle/input/depthanythingv2/transformers/default/1/Depth-Anything-V2")
from depth_anything_v2.dpt import DepthAnythingV2

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = DepthAnythingV2(encoder='vitl', features=256, out_channels=[256, 512, 1024, 1024])
model.load_state_dict(torch.load(
    '/kaggle/input/depthanythingv2/transformers/default/1/Depth-Anything-V2/checkpoints/depth_anything_v2_vitl.pth',
    map_location=device
))
model.to(device).eval()

# ==== 2) 路径 ====
base_dir = "/kaggle/input/kitti-dataset/depth_selection/val_selection_cropped"
img_dir = os.path.join(base_dir, "image")
gt_dir  = os.path.join(base_dir, "groundtruth_depth")

OUT_ROOT = "/kaggle/working/download/kitti_dav2"
out_pred_raw_npy   = os.path.join(OUT_ROOT, "pred_raw_npy")       # 相对深度 .npy
out_pred_aligned   = os.path.join(OUT_ROOT, "pred_aligned_npy")   # （可选）scale-only 对齐 .npy
out_gt_npy         = os.path.join(OUT_ROOT, "gt_npy")             # GT 米制 .npy
out_gt_png16       = os.path.join(OUT_ROOT, "gt_png16")           # GT 毫米 16-bit PNG
for d in [out_pred_raw_npy, out_pred_aligned, out_gt_npy, out_gt_png16]:
    os.makedirs(d, exist_ok=True)

# ==== 3) 预处理 & 工具 ====
transform = T.Compose([
    T.Resize((518, 518)),
    T.ToTensor(),
    T.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

def npy_save(path, arr):
    os.makedirs(os.path.dirname(path), exist_ok=True)
    np.save(path, arr.astype(np.float32))

def mm_png_save(path, depth_m):
    """保存为 16-bit PNG（毫米）"""
    os.makedirs(os.path.dirname(path), exist_ok=True)
    depth_mm = np.clip(depth_m * 1000.0, 0, 65535).astype(np.uint16)
    cv2.imwrite(path, depth_mm)

def scale_match(pred, gt, mask):
    """每张图纯缩放对齐（scale-only）"""
    v = mask & np.isfinite(gt) & np.isfinite(pred) & (gt > 0)
    if not np.any(v): 
        return 1.0
    p, g = pred[v], gt[v]
    den = float((p**2).sum())
    return float((g * p).sum() / den) if den > 0 else 1.0

# ==== 4) 主循环：只保存 预测 + GT ====
img_files = sorted(f for f in os.listdir(img_dir) if f.endswith(".png"))
saved = 0

for img_file in img_files:
    # GT 文件名：只替换第一个 "_image_" → "_groundtruth_depth_"
    gt_file = img_file.replace("_image_", "_groundtruth_depth_", 1)
    rgb_path = os.path.join(img_dir, img_file)
    gt_path  = os.path.join(gt_dir,  gt_file)
    if not os.path.exists(gt_path):
        continue

    # 读取 RGB / GT（KITTI 16-bit PNG → 米）
    rgb_bgr = cv2.imread(rgb_path)
    if rgb_bgr is None:
        continue
    rgb = cv2.cvtColor(rgb_bgr, cv2.COLOR_BGR2RGB)
    gt_png = cv2.imread(gt_path, cv2.IMREAD_UNCHANGED)
    if gt_png is None:
        continue
    gt_depth = gt_png.astype(np.float32) / 256.0
    mask = gt_depth > 0

    # 推理（相对深度）
    with torch.no_grad():
        inp = transform(Image.fromarray(rgb)).unsqueeze(0).to(device)
        pred = model(inp)                      # [1, H', W'] 相对深度
        pred_rel = pred.squeeze().detach().cpu().numpy().astype(np.float32)

    # 尺寸对齐到 GT
    if pred_rel.shape != gt_depth.shape:
        pred_rel = cv2.resize(pred_rel, (gt_depth.shape[1], gt_depth.shape[0]), interpolation=cv2.INTER_LINEAR)

    # （可选）scale-only 对齐
    s = scale_match(pred_rel, gt_depth, mask)
    pred_aligned = pred_rel * s

    stem = os.path.splitext(img_file)[0]

    # 保存预测（相对深度 .npy；以及对齐后的 .npy）
    npy_save(os.path.join(out_pred_raw_npy,   f"{stem}.npy"), pred_rel)
    npy_save(os.path.join(out_pred_aligned,   f"{stem}.npy"), pred_aligned)   # 不需要可注释掉

    # 保存 GT（米制 .npy + 16-bit 毫米 PNG）
    npy_save(os.path.join(out_gt_npy,     f"{stem}.npy"), gt_depth)
    mm_png_save(os.path.join(out_gt_png16, f"{stem}.png"), gt_depth)

    saved += 1

    # 清理
    del rgb_bgr, rgb, gt_depth, pred, pred_rel, pred_aligned, inp
    gc.collect()
    if torch.cuda.is_available():
        torch.cuda.empty_cache()

print(f"[KITTI DepthAnythingV2 SAVE] saved pairs: {saved} / {len(img_files)}")

# ==== 5) 打包下载 ====
zip_base = "/kaggle/working/dav2_kitti_preds_and_gt"
shutil.make_archive(zip_base, "zip", OUT_ROOT)
print(f"打包完成: {zip_base}.zip")

[KITTI DepthAnythingV2 SAVE] saved pairs: 1000 / 1000
打包完成: /kaggle/working/dav2_kitti_preds_and_gt.zip
