If Dockerfiles have not been modified, connect to the Jupyter server with:  
- ```http://localhost:8012/tree?token=depth-segment-images-cpu``` to run Torch on CPU  

This notebook describes a pipeline to estimate the metric depth of an image with the [Depth Anything V2](https://github.com/DepthAnything/Depth-Anything-V2).  It takes an input image and saves the inference of the metric depth of each pixel as a ```.json``` and a visualisation of the estimation as a ```.jpg```.  

> Yang, L., Kang, B., Huang, Z., Zhao, Z., Xu, X., Feng, J., & Zhao, H. (2024). Depth Anything V2 (arXiv:2406.09414). arXiv. https://doi.org/10.48550/arXiv.2406.09414

In [43]:
target_dir = "data"
input_dir = "place-pulse-singapore-panos-512-1024"
output_dir = "place-pulse-singapore-depths-512-1024"
visualisation_dir = "place-pulse-singapore-depths-visualisation-512-1024"
checkpoint_file = "./depth_anything_v2_metric_vkitti_vitl.pth"

In [44]:
import cv2
import matplotlib.patheffects as path_effects
import matplotlib.pyplot as plt
import seaborn as sns
import torch

from depth_anything_v2.dpt import DepthAnythingV2

import json
import os
from pathlib import Path

In [45]:
device = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"
model_configs = {
    'vits': {'encoder': 'vits', 'features': 64, 'out_channels': [48, 96, 192, 384]},
    'vitb': {'encoder': 'vitb', 'features': 128, 'out_channels': [96, 192, 384, 768]},
    'vitl': {'encoder': 'vitl', 'features': 256, 'out_channels': [256, 512, 1024, 1024]},
    'vitg': {'encoder': 'vitg', 'features': 384, 'out_channels': [1536, 1536, 1536, 1536]}
}
model = DepthAnythingV2(**model_configs["vitl"])
model.load_state_dict(torch.load(checkpoint_file, map_location='cpu'))
model = model.to(device).eval()

  model.load_state_dict(torch.load(checkpoint_file, map_location='cpu'))


In [46]:
images_path = []
for dirpath, dirnames, filenames in os.walk(os.path.join(target_dir, input_dir)):
    images_path.extend(filenames)
    break
images_id = {}
for image_path in images_path:
    images_id['.'.join(image_path.split('.')[:-1])] = os.path.join(target_dir, input_dir, image_path)

Path(os.path.join(target_dir, output_dir)).mkdir(parents=True, exist_ok=True)
Path(os.path.join(target_dir, visualisation_dir)).mkdir(parents=True, exist_ok=True)
for id, path in images_id.items():
    if Path(os.path.join(target_dir, output_dir, f"{id}.json")).is_file():
        continue
    image = cv2.imread(path)
    depth = model.infer_image(image)
    with open(os.path.join(target_dir, output_dir, f"{id}.json"), 'w') as fp:
        json.dump(depth.tolist(), fp)

    fig, ax = plt.subplots()
    cax = plt.imshow(depth, cmap=sns.color_palette("rocket", as_cmap=True))
    ax.axis("off")
    cbar = plt.colorbar(cax, ax=ax, orientation="horizontal", fraction=0.05, pad=-0.1)
    cbar.outline.set_edgecolor("white")
    for label in cbar.ax.get_xticklabels():
        label.set_color("white")
        label.set_path_effects([path_effects.withStroke(linewidth=1, foreground='black')])
    cbar.ax.tick_params(labelsize=8)
    plt.savefig(os.path.join(target_dir, visualisation_dir, f"{id}.png"), bbox_inches="tight")
    plt.close(fig)