In [1]:
!pip install -U transformers==4.53.0

Collecting transformers==4.53.0
  Downloading transformers-4.53.0-py3-none-any.whl.metadata (39 kB)
Downloading transformers-4.53.0-py3-none-any.whl (10.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.8/10.8 MB[0m [31m82.9 MB/s[0m eta [36m0:00:00[0m:00:01[0m:01[0m
[?25hInstalling collected packages: transformers
  Attempting uninstall: transformers
    Found existing installation: transformers 4.52.4
    Uninstalling transformers-4.52.4:
      Successfully uninstalled transformers-4.52.4
Successfully installed transformers-4.53.0


In [8]:
import os, gc
import cv2
import torch
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image
from transformers import AutoImageProcessor, ZoeDepthForDepthEstimation

# 模型加载
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_name = "Intel/zoedepth-nyu-kitti"
processor = AutoImageProcessor.from_pretrained(model_name)
model = ZoeDepthForDepthEstimation.from_pretrained(model_name).to(device)

# ================== 可视化函数 ==================
def save_comparison_figure(img_name, rgb, gt_depth, pred_depth_aligned, mask, vis_dir):
    os.makedirs(vis_dir, exist_ok=True)
    save_path = os.path.join(vis_dir, img_name.replace('.png', '_compare.png'))

    # 有效像素的联合范围
    valid = mask & (gt_depth > 0) & np.isfinite(gt_depth) & np.isfinite(pred_depth_aligned)
    all_depths = np.concatenate([gt_depth[valid], pred_depth_aligned[valid]])
    vmin = np.percentile(all_depths, 2)
    vmax = np.percentile(all_depths, 98)

    plt.figure(figsize=(15, 5))
    plt.subplot(1, 3, 1)
    plt.imshow(rgb)
    plt.title('Input RGB')
    plt.axis('off')

    plt.subplot(1, 3, 2)
    plt.imshow(np.where(mask, gt_depth, np.nan), cmap='viridis', vmin=vmin, vmax=vmax)
    plt.title('Ground Truth Depth')
    plt.colorbar(fraction=0.046, pad=0.04)
    plt.axis('off')

    plt.subplot(1, 3, 3)
    plt.imshow(np.where(mask, pred_depth_aligned, np.nan), cmap='viridis', vmin=vmin, vmax=vmax)
    plt.title('Predicted Depth (Aligned)')
    plt.colorbar(fraction=0.046, pad=0.04)
    plt.axis('off')

    plt.tight_layout()
    plt.savefig(save_path)
    plt.close()

# ================== 评估函数 ==================
def compute_abs_rel(pred, gt, mask):
    # mask和gt都为True且gt>0的才算有效
    valid = mask & (gt > 0)
    pred, gt = pred[valid], gt[valid]
    if len(gt) == 0:
        return np.nan  # 没有有效像素时返回nan，防止出错
    return np.mean(np.abs(pred - gt) / gt)

def compute_delta1(pred, gt, mask):
    valid_mask = mask & (gt > 1e-6) & (pred > 1e-6)
    pred, gt = pred[valid_mask], gt[valid_mask]
    thresh = np.maximum(pred / gt, gt / pred)
    return np.mean(thresh < 1.25) if len(thresh) > 0 else np.nan

def scale_match(pred, gt, mask):
    pred, gt = pred[mask], gt[mask]
    return np.sum(gt * pred) / np.sum(pred ** 2)

# ================== 主循环：遍历多个 scan 文件夹 ==================
base_folder = '/kaggle/input/diode-val/val/outdoor/scene_00024'
output_dir = '/kaggle/working'

# 获取 base_folder 下所有文件夹名称
for scan_name in sorted(os.listdir(base_folder)):
    folder = os.path.join(base_folder, scan_name)
    if not os.path.isdir(folder):
        continue  # 跳过非文件夹

    print(f"\n正在处理: {scan_name}")

    csv_path = os.path.join(output_dir, f"{scan_name}.csv")
    vis_dir = os.path.join(output_dir, f"visualizations", scan_name)

    results = []

    for img_file in sorted(os.listdir(folder)):
        if not img_file.endswith('.png'):
            continue

        try:
            rgb_path = os.path.join(folder, img_file)
            gt_path = rgb_path.replace('.png', '_depth.npy')
            mask_path = rgb_path.replace('.png', '_depth_mask.npy')

            if not (os.path.exists(gt_path) and os.path.exists(mask_path)):
                print(f"跳过无 GT/mask 的图像: {img_file}")
                continue

            rgb = cv2.imread(rgb_path)
            rgb = cv2.cvtColor(rgb, cv2.COLOR_BGR2RGB)
            gt_depth = np.load(gt_path).astype(np.float32)
            if gt_depth.ndim == 3:
                gt_depth = gt_depth[:, :, 0]
            mask = np.load(mask_path).astype(bool)

            # 模型推理
            input_image = Image.fromarray(rgb)
            inputs = processor(images=input_image, return_tensors="pt").to(device)

            with torch.no_grad():
                outputs = model(**inputs)

            processed = processor.post_process_depth_estimation(
                outputs,
                source_sizes=[(rgb.shape[0], rgb.shape[1])]
            )
            pred_depth = processed[0]['predicted_depth'].squeeze().cpu().numpy()

            if pred_depth.shape != gt_depth.shape:
                pred_depth = cv2.resize(pred_depth, (gt_depth.shape[1], gt_depth.shape[0]), interpolation=cv2.INTER_LINEAR)

            scale = scale_match(pred_depth, gt_depth, mask)
            pred_aligned = pred_depth * scale

            abs_rel = compute_abs_rel(pred_aligned, gt_depth, mask)
            delta1 = compute_delta1(pred_aligned, gt_depth, mask)

            print(f"[{img_file}] AbsRel: {abs_rel:.4f}, δ1: {delta1*100:.2f}%")
            results.append([img_file, abs_rel, delta1])

            save_comparison_figure(img_file, rgb, gt_depth, pred_aligned, mask, vis_dir)

            # 清理内存和显存
            #del rgb, gt_depth, mask, pred_depth, pred_aligned
            #torch.cuda.empty_cache()
            del rgb, gt_depth, mask, pred_depth, pred_aligned
            del input_image, inputs, outputs, processed
            

        except Exception as e:
            print(f"处理图像失败: {img_file}，错误: {e}")
            continue

    # 保存 CSV
    df = pd.DataFrame(results, columns=['image', 'AbsRel', 'Delta1'])
    df.to_csv(csv_path, index=False)
    print(f"结果保存至: {csv_path}")
    print(f"可视化保存至: {vis_dir}")
    
    #打包
    import shutil
    #zip_output_path = "/kaggle/working/visualizations.zip"
    zip_output_path = os.path.join(output_dir, f"visualizations_{scan_name}.zip")
    # 打包文件夹
    shutil.make_archive(base_name=zip_output_path.replace('.zip', ''), format='zip', root_dir=vis_dir)
    print(f"打包完成: {zip_output_path}")

gc.collect()
torch.cuda.empty_cache()


正在处理: scan_00201
[00024_00201_outdoor_000_000.png] AbsRel: 1.0389, δ1: 7.31%
[00024_00201_outdoor_000_020.png] AbsRel: 1.6384, δ1: 17.06%
[00024_00201_outdoor_010_010.png] AbsRel: 1.5049, δ1: 17.23%
[00024_00201_outdoor_030_000.png] AbsRel: 0.9522, δ1: 10.86%
[00024_00201_outdoor_050_030.png] AbsRel: 1.4156, δ1: 38.23%
[00024_00201_outdoor_060_000.png] AbsRel: 0.8785, δ1: 11.53%
[00024_00201_outdoor_060_040.png] AbsRel: 0.9089, δ1: 29.97%
[00024_00201_outdoor_070_010.png] AbsRel: 0.7860, δ1: 33.19%
[00024_00201_outdoor_070_030.png] AbsRel: 0.6830, δ1: 71.38%
[00024_00201_outdoor_080_000.png] AbsRel: 0.3927, δ1: 54.03%
[00024_00201_outdoor_080_020.png] AbsRel: 0.2592, δ1: 81.31%
[00024_00201_outdoor_080_040.png] AbsRel: 0.2238, δ1: 89.11%
[00024_00201_outdoor_090_010.png] AbsRel: 0.4405, δ1: 61.58%
[00024_00201_outdoor_090_030.png] AbsRel: 0.1959, δ1: 90.09%
[00024_00201_outdoor_090_050.png] AbsRel: 0.0854, δ1: 95.88%
[00024_00201_outdoor_100_000.png] AbsRel: 0.3357, δ1: 55.42%
[00024_

In [9]:

import pandas as pd
import os

output_dir = '/kaggle/working'
csv_files = [os.path.join(output_dir, f) for f in os.listdir(output_dir) if f.endswith('.csv') and f.startswith('scan_')]

all_dfs = []
summary = []

for f in csv_files:
    df = pd.read_csv(f)
    all_dfs.append(df)
    summary.append({
        'FileName': os.path.basename(f),
        'NumSamples': len(df),
        'MeanAbsRel': df['AbsRel'].mean(),
        'MeanDelta1': df['Delta1'].mean()
    })

if all_dfs:
    df_all = pd.concat(all_dfs, ignore_index=True)
    # 总体均值
    total = {
        'FileName': 'ALL',
        'NumSamples': len(df_all),
        'MeanAbsRel': df_all['AbsRel'].mean(),
        'MeanDelta1': df_all['Delta1'].mean()
    }
    summary.append(total)
    # 保存
    mean_df = pd.DataFrame(summary)
    mean_df.to_csv(os.path.join(output_dir, "mean.csv"), index=False)
    print(mean_df)
    print(f"所有scan子文件统计均值已保存到: {os.path.join(output_dir, 'mean.csv')}")
else:
    print("未找到任何scan_xxxxx.csv结果文件！")

          FileName  NumSamples  MeanAbsRel  MeanDelta1
0   scan_00188.csv          25    0.244026    0.697890
1   scan_00183.csv          73    0.089119    0.936319
2   scan_00186.csv          48    0.188786    0.761202
3   scan_00196.csv          53    0.738801    0.417155
4   scan_00200.csv          40    0.521550    0.574079
5   scan_00187.csv          33    0.073646    0.939969
6   scan_00197.csv          61    0.483616    0.580195
7   scan_00198.csv          27    0.437965    0.376347
8   scan_00199.csv          38    0.495851    0.440074
9   scan_00185.csv          29    0.138684    0.828988
10  scan_00195.csv          52    0.281115    0.752675
11  scan_00201.csv          43    0.659171    0.534534
12  scan_00184.csv          27    0.073470    0.935488
13  scan_00194.csv          44    0.778473    0.484157
14  scan_00190.csv          25    0.182844    0.767955
15  scan_00191.csv          18    0.080689    0.920787
16  scan_00192.csv          25    0.191086    0.711483
17  scan_0