In [None]:
import os
os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"

import torch
import transformers
from transformers import DPTImageProcessor, DPTForDepthEstimation
from transformers import AutoImageProcessor, AutoModelForDepthEstimation
from tabulate import tabulate
import cv2
import pandas as pd
from tqdm.notebook import tqdm
from collections import defaultdict

import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

print(torch.__version__)
print(transformers.__version__)
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
import time
from pathlib import Path

def timeit(func):
    def wrapper(*args, **kwargs):
        start_time = time.time()
        result = func(*args, **kwargs)
        end_time = time.time()
        print(f"Function '{func.__name__}' took {end_time - start_time:.4f} seconds")
        return result
    return wrapper

In [None]:
# Select best available device
if torch.cuda.is_available():
    device = torch.device("cuda")
elif torch.backends.mps.is_available():
    device = torch.device("mps")
else:
    device = torch.device("cpu")

print(f"The device is: {device}")

In [None]:
# prepare image for the model
class DepthModel:
    def __init__(self, model_name: str, device):
        self.image_processor = AutoImageProcessor.from_pretrained(model_name, use_fast=True)
        self.model = AutoModelForDepthEstimation.from_pretrained(model_name)
        self.model = self.model.to(device)
        self.device = device

    @timeit
    def __call__(self, image: Image):
        inputs = self.image_processor(images=image, return_tensors="pt")
        inputs = {k:v.to(self.device) for k,v in inputs.items()}
    
        with torch.no_grad():
            outputs = self.model(**inputs)
            predicted_depth = outputs.predicted_depth
    
        # interpolate to original size
        prediction = torch.nn.functional.interpolate(
            predicted_depth.cpu().unsqueeze(1),
            size=image.size[::-1],
            mode="bicubic",
            align_corners=False,
        )
        return prediction.cpu()

model_name = "depth-anything/Depth-Anything-V2-Metric-Indoor-Large-hf"
depth_model = DepthModel(model_name=model_name, device=device)

In [None]:
image_path = "../data/saved_images/real1_color.png"
image = Image.open(image_path).convert("RGB")
plt.imshow(image)

In [None]:
depth = depth_model(image=image)

# Create interactive image
fig = px.imshow(depth.squeeze().numpy(), color_continuous_scale='viridis')

# Customize hover to show depth value in meters
fig.update_traces(
    hovertemplate='x: %{x}<br>y: %{y}<br>depth: %{z:.3f} m<extra></extra>'
)

# Display it in notebook
fig.show()

In [None]:
def visualize_rgb_and_depth(rgb: np.ndarray, gt_depth: np.ndarray, pred_depth: np.ndarray):
    """
    Visualize an RGB image with corresponding ground truth and predicted depth maps.

    Args:
        rgb (np.ndarray): RGB image as a NumPy array (H, W, 3).
        gt_depth (np.ndarray): Ground truth depth map in meters (H, W).
        pred_depth (np.ndarray): Predicted depth map in meters (H, W).
    """
    fig = make_subplots(
        rows=1, cols=3,
        subplot_titles=["RGB Image", "GT Depth Map (m)", "Predicted Depth Map (m)"],
        horizontal_spacing=0.05
    )

    # RGB image (static)
    fig.add_trace(
        go.Image(z=rgb),
        row=1, col=1
    )

    # Ground Truth Depth Map
    fig.add_trace(
        go.Heatmap(
            z=gt_depth,
            colorscale='viridis',
            colorbar=dict(title='Depth (m)'),
            hovertemplate='x: %{x}<br>y: %{y}<br>depth: %{z:.3f} m<extra></extra>',
        ),
        row=1, col=2
    )

    # Predicted Depth Map (hide second colorbar to avoid clutter)
    fig.add_trace(
        go.Heatmap(
            z=pred_depth,
            colorscale='viridis',
            showscale=False,
            hovertemplate='x: %{x}<br>y: %{y}<br>depth: %{z:.3f} m<extra></extra>',
        ),
        row=1, col=3
    )

    # Flip y-axis for image coordinates
    for i in range(1, 4):
        fig.update_yaxes(autorange='reversed', row=1, col=i)

    fig.update_layout(
        width=1400,
        height=480,
        margin=dict(t=40)
    )

    fig.show()


# PREDICTED DEPTH MAPTH FROM DEPTH PRO

image_ids = ["409238553452200", "409243305337100", "409547560576900", "409570843067100", "409573045097900", "409592125011600"]

for image_id in image_ids:
    # --- Load depth map and convert to meters ---
    depth_path = f"../data/saved_images/{image_id}_depth.png"
    depth_mm = cv2.imread(depth_path, cv2.IMREAD_UNCHANGED)
    gt_depth = depth_mm.astype(np.float32) / 1000.0  # mm to meters

    # --- Load RGB image ---
    rgb_path = f"../data/saved_images/{image_id}_color.png"
    rgb = cv2.cvtColor(cv2.imread(rgb_path), cv2.COLOR_BGR2RGB)

    # --- Load depth map and convert to meters ---
    depth_path = f"../data/saved_images/{image_id}_depthpro.pt"
    pred_depth = torch.load(depth_path, weights_only=True).cpu().numpy()

    visualize_rgb_and_depth(rgb=rgb, gt_depth=gt_depth, pred_depth=pred_depth)

In [None]:
image_ids = ["409238553452200", "409243305337100", "409547560576900", "409570843067100", "409573045097900", "409592125011600"]

model_name = "depth-anything/Depth-Anything-V2-Metric-Indoor-Small-hf"
depth_model = DepthModel(model_name=model_name, device=device)

for image_id in image_ids:
    # --- Load depth map and convert to meters ---
    depth_path = f"../data/saved_images/{image_id}_depth.png"
    depth_mm = cv2.imread(depth_path, cv2.IMREAD_UNCHANGED)
    gt_depth = depth_mm.astype(np.float32) / 1000.0  # mm to meters

    # --- Load RGB image ---
    rgb_path = f"../data/saved_images/{image_id}_color.png"
    rgb = cv2.cvtColor(cv2.imread(rgb_path), cv2.COLOR_BGR2RGB)

    # --- Load depth map and convert to meters ---
    pred_depth = depth_model(image=Image.open(rgb_path)).squeeze().cpu().numpy()

    visualize_rgb_and_depth(rgb=rgb, gt_depth=gt_depth, pred_depth=pred_depth)