In [None]:
!pip install -U transformers==4.53.0

In [None]:
import os, gc, cv2, torch
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import re
import torch
from PIL import Image
from tqdm import tqdm
from transformers import AutoImageProcessor, ZoeDepthForDepthEstimation

#模型加载
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_name = "Intel/zoedepth-nyu-kitti"
processor = AutoImageProcessor.from_pretrained(model_name)
model = ZoeDepthForDepthEstimation.from_pretrained(model_name).to(device)

# ========== 路径配置 ==========
base_dir = "/kaggle/input/kitti-dataset/depth_selection/val_selection_cropped"
img_dir = os.path.join(base_dir, "image")
gt_dir = os.path.join(base_dir, "groundtruth_depth")
K_dir = os.path.join(base_dir, "intrinsics")

save_dir = "/kaggle/working/"
os.makedirs(save_dir, exist_ok=True)
vis_dir = os.path.join(save_dir, "visualizations")
os.makedirs(vis_dir, exist_ok=True)
pred_dir = os.path.join(save_dir, "predicted_depth")
os.makedirs(pred_dir, exist_ok=True)

# ========== 可视化 ==========
def save_vis(rgb, gt, pred, mask, save_path):
    plt.figure(figsize=(15, 5))
    valid = mask & (gt > 0) & np.isfinite(gt) & np.isfinite(pred)
    all_depths = np.concatenate([gt[valid], pred[valid]])
    vmin = np.percentile(all_depths, 2)
    vmax = np.percentile(all_depths, 98)

    plt.subplot(1, 3, 1)
    plt.imshow(rgb)
    plt.title("RGB")
    plt.axis("off")

    plt.subplot(1, 3, 2)
    plt.imshow(gt, cmap='viridis', vmin=vmin, vmax=vmax)
    plt.title("GT Depth")
    plt.colorbar(fraction=0.046, pad=0.04)
    plt.axis("off")

    plt.subplot(1, 3, 3)
    plt.imshow(pred, cmap='viridis', vmin=vmin, vmax=vmax)
    plt.title("Predicted (Aligned)")
    plt.colorbar(fraction=0.046, pad=0.04)
    plt.axis("off")

    plt.tight_layout()
    plt.savefig(save_path)
    plt.close()

# ========== 评估函数 ==========
"""
def compute_abs_rel(pred, gt, mask):
    pred, gt = pred[mask], gt[mask]
    return np.mean(np.abs(pred - gt) / gt)
"""
#去除杂质
def compute_abs_rel(pred, gt, mask):
    valid = mask & (gt > 0)
    pred, gt = pred[valid], gt[valid]
    if len(gt) == 0:
        return np.nan
    return np.mean(np.abs(pred - gt) / gt)

def compute_delta1(pred, gt, mask):
    valid = mask & (gt > 1e-6) & (pred > 1e-6)
    pred, gt = pred[valid], gt[valid]
    thresh = np.maximum(pred / gt, gt / pred)
    return np.mean(thresh < 1.25) if len(thresh) > 0 else np.nan

def scale_match(pred, gt, mask):
    pred, gt = pred[mask], gt[mask]
    return np.sum(gt * pred) / np.sum(pred ** 2)

# ========== 主处理流程 ==========
results = []
img_files = sorted([f for f in os.listdir(img_dir) if f.endswith(".png")])

for img_file in tqdm(img_files, desc="Evaluating"):
    try:
        # 构建文件路径
        rgb_path = os.path.join(img_dir, img_file)
        gt_file = re.sub("image", "groundtruth_depth", img_file, count=1)
        gt_path = os.path.join(gt_dir, gt_file)
        K_path = os.path.join(K_dir, img_file.replace(".png", ".txt"))

        # 加载 RGB
        rgb = cv2.imread(rgb_path)
        rgb = cv2.cvtColor(rgb, cv2.COLOR_BGR2RGB)
        input_image = Image.fromarray(rgb)

        # 加载 GT（PNG 转 float）
        gt_png = cv2.imread(gt_path, cv2.IMREAD_UNCHANGED).astype(np.float32)
        gt_depth = gt_png / 256.0  # KITTI GT 深度单位为毫米，需转换为米
        mask = gt_depth > 0

        # 加载内参
        with open(K_path, 'r') as f:
            vals = list(map(float, f.read().split()))
            fx, fy = vals[0], vals[4]
            cx, cy = vals[2], vals[5]

        K = torch.tensor([[fx, 0, cx],
                          [0, fy, cy],
                          [0, 0, 1]], dtype=torch.float32).unsqueeze(0).to(device)

        # ZoeDepth 推理
        inputs = processor(images=input_image, return_tensors="pt").to(device)
        #inputs["camera_intrinsics"] = K

        with torch.no_grad():
            outputs = model(**inputs)

        processed = processor.post_process_depth_estimation(outputs, source_sizes=[(rgb.shape[0], rgb.shape[1])])
        pred_depth = processed[0]["predicted_depth"].squeeze().cpu().numpy()

        # 对齐预测深度（scale-matching）
        if pred_depth.shape != gt_depth.shape:
            pred_depth = cv2.resize(pred_depth, (gt_depth.shape[1], gt_depth.shape[0]), interpolation=cv2.INTER_LINEAR)

        scale = scale_match(pred_depth, gt_depth, mask)
        pred_aligned = pred_depth * scale

        # 计算评估指标
        absrel = compute_abs_rel(pred_aligned, gt_depth, mask)
        delta1 = compute_delta1(pred_aligned, gt_depth, mask)
        results.append([img_file, absrel, delta1])
        print(f"[{img_file}] AbsRel: {absrel:.4f}, δ1: {delta1 * 100:.2f}%")

        # 保存可视化图
        vis_path = os.path.join(vis_dir, img_file.replace(".png", "_vis.png"))
        save_vis(rgb, gt_depth, pred_aligned, mask, vis_path)

        # 保存预测深度图（可选）
        np.save(os.path.join(pred_dir, img_file.replace(".png", "_pred.npy")), pred_aligned)

        del rgb, gt_depth, pred_depth, pred_aligned, inputs, outputs
        gc.collect()
        torch.cuda.empty_cache()

    except Exception as e:
        print(f"跳过 {img_file}，错误：{e}")
        continue

# ========== 保存评估结果 ==========
"""
df = pd.DataFrame(results, columns=["image", "AbsRel", "Delta1"])
df.to_csv(os.path.join(save_dir, "eval_results.csv"), index=False)
print(f"\n所有评估结果保存至：{os.path.join(save_dir, 'eval_results.csv')}")
"""
df = pd.DataFrame(results, columns=["image", "AbsRel", "Delta1"])
mean_absrel = df["AbsRel"].mean()
mean_delta1 = df["Delta1"].mean()
mean_row = pd.DataFrame([["mean", mean_absrel, mean_delta1]], columns=["image", "AbsRel", "Delta1"])
df = pd.concat([df, mean_row], ignore_index=True)
df.to_csv(os.path.join(save_dir, "eval_results.csv"), index=False)
print(f"\n所有评估结果保存至：{os.path.join(save_dir, 'eval_results.csv')}")

#打包
import shutil
zip_pre_path = os.path.join(save_dir, "predicted_depth.zip")
shutil.make_archive(base_name=zip_pre_path.replace('.zip', ''), format='zip', root_dir=pred_dir)
print(f"\n预测深度npy已打包至: {zip_pre_path}")
zip_vis_path = os.path.join(save_dir, "visualizations.zip")
shutil.make_archive(base_name=zip_vis_path.replace('.zip', ''), format='zip', root_dir=vis_dir)
print(f"打包完成: {zip_vis_path}")