# 04 Depth Estimation

Predict dense depth maps using DINOv3 features.

In [None]:
import torch
import matplotlib.pyplot as plt
from PIL import Image
from transformers import AutoImageProcessor
from dinov3_lab.core.backbone import build_dinov3_hf
from dinov3_lab.tasks.depth.heads import DepthHead

# 1. Setup
backbone = build_dinov3_hf()
head = DepthHead(in_channels=1024)
processor = AutoImageProcessor.from_pretrained("facebook/dinov3-vitl16-pretrain-lvd1689m")

# 2. Load Image
image_path = "../data/test_images/demo.jpg"
try:
    image = Image.open(image_path).convert("RGB")
except FileNotFoundError:
    print("Demo image not found. Using random noise.")
    image = Image.new('RGB', (448, 448), color = 'gray')

inputs = processor(images=image, return_tensors="pt")

# 3. Forward Pass
with torch.no_grad():
    out = backbone(inputs.pixel_values)
    grid = backbone.tokens_to_grid(out.patch_tokens, out.patch_hw)
    
    # Cast grid to float32 to match head weights
    depth_pred = head(grid.float())
print(f"Depth prediction shape: {depth_pred.shape}")

# Upsample to image size
image_size = image.size[::-1]
depth_up = backbone.upsample_grid_to_image(depth_pred, image_size)

plt.figure(figsize=(10, 5))
plt.subplot(1, 2, 1)
plt.imshow(image)
plt.title("Input")
plt.axis("off")

plt.subplot(1, 2, 2)
plt.imshow(depth_up[0, 0].float().cpu().numpy(), cmap="plasma")
plt.title("Predicted Depth (Untrained)")
plt.axis("off")
plt.show()