# Monocular Depth Estimation

This notebook demonstrates how to perform monocular depth estimation using DeGirum PySDK. Users will learn how to:

* Load a monocular depth estimation model
* Create a custom postprocessor to transform the results
* Visualize the resulting depth map

Simply uncomment a model of your choice and provide the necessary configurations.

## Configure and load model

In [None]:
import degirum as dg, degirum_tools as dgt

# Choose inference host address
inference_host_address = "@cloud"
# inference_host_address = "@local"

# Choose zoo url
zoo_url = "degirum/hailo"
# zoo_url = "<path to local folder>"

# Choose model
model_name = "scdepthv3--256x320_quant_hailort_hailo8_1"
# model_name = "fastdepth--224x224_quant_hailort_hailo8_1"

# Set token
token = dgt.get_token()
# token = ''  # leave empty for local inference

model = dg.load_model(
    model_name=model_name,
    inference_host_address=inference_host_address,
    zoo_url=zoo_url,
    token=token
)

## Create a custom postprocessor

In [None]:
import cv2, numpy as np


def dequantize(result):
    zero_pt = result['quantization']['zero']
    scale = result['quantization']['scale']
    
    dq_data = (result['data'].astype(np.float32) - zero_pt) * scale

    return dq_data


def sigmoid(z):
    return 1 / (1 + np.exp(-z))


# All custom postprocessors inherit from dg.postprocessor.InferenceResults
class DepthResults(dg.postprocessor.InferenceResults):
    # Color map for visualization
    # For more color maps see:
    # https://docs.opencv.org/4.x/d3/d50/group__imgproc__colormap.html#ga9a805d8262bcbe273f16be9ea2055a65
    color_map = cv2.COLORMAP_VIRIDIS
    # Toggle for scdepth specific postprocessing
    use_scdepth = False
    # Toggle to automatically normalize results
    # This ensures the resulting depth map values fall within [0.0, 1.0]
    normalize_results = False

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)  # call base class constructor first

        _resize_map = {
            'nearest': cv2.INTER_NEAREST,
            'bilinear': cv2.INTER_LINEAR,
            'area': cv2.INTER_AREA,
            'bicubic': cv2.INTER_CUBIC,
            'lanczos': cv2.INTER_LANCZOS4
        }

        # Retrieve the raw tensor from the inference
        # Raw tensors are elements in the list self._inference_results
        # Depth models only return one tensor, so we reference the first element
        data = self._inference_results[0]['data']
    
        # Dequantize if datatype is unsigned int or int
        if (data.dtype.kind == 'u' or
            data.dtype.kind == 'i'):
            data = dequantize(self._inference_results[0])

        data = data.squeeze(0)

        # Reshape raw output to model input dimensions
        # self._model_params contains the model JSON parameters
        hwc_layout = self._model_params.InputTensorLayout[0] == 'NHWC'
        if hwc_layout:
            shape_idxs = (0, 1)
        else:
            shape_idxs = (1, 2)

        if (data.shape[shape_idxs[0]] != self._model_params.InputH[0] or 
            data.shape[shape_idxs[1]] != self._model_params.InputW[0]):
            data = np.reshape(data.squeeze(),
                              (self._model_params.InputH[0], self._model_params.InputW[0], 1))
        
        # Fix tensor layout
        if not hwc_layout:
            data = np.transpose(data, (1, 2, 0))

        # Resize the depth map back to the original image size
        # self.image contains the raw (no preprocessing) input image
        if self.image is not None:
            resize_mode = _resize_map[self._model_params.InputResizeMethod[0]]
            image_size = self.image.shape[:2][::-1]
            data = cv2.resize(data, image_size, interpolation=resize_mode)
            data = np.expand_dims(data, axis=0)
        
        if DepthResults.use_scdepth:
            data = 1 / (sigmoid(data) * 10 + 0.009)  # hailo_model_zoo/core/postprocessing/depth_estimation_postprocessing.py

        if DepthResults.normalize_results:
            data = data.squeeze(0)
            data = self.normalize_depth_map(data)
            data = np.expand_dims(data, axis=0)

        self._inference_results[0]['data'] = data

    @staticmethod
    def normalize_depth_map(depth_map):
        # Linear transform from [min, max] to [0.0, 1.0]
        return (depth_map - depth_map.min()) / (depth_map.max() - depth_map.min())
    
    # Function to convert a depth map into a RGB image.
    def _convert_depth_to_image(self, depth_map):
        depth_map = depth_map.squeeze(0)
        if not DepthResults.normalize_results:
            depth_map = self.normalize_depth_map(depth_map)
        depth_map = (depth_map * 255).astype(np.uint8)
        depth_map = cv2.applyColorMap(depth_map, DepthResults.color_map)
        return depth_map
    
    # Visualization results are generated from this function.
    @property
    def image_overlay(self):
        image = self._convert_depth_to_image(self._inference_results[0]['data'])
        return image

## Perform inference

In [None]:
image_source = "../assets/dark_room.jpg"

# Set custom postprocessor
if "scdepth" in model_name:
    DepthResults.use_scdepth = True
model.custom_postprocessor = DepthResults

print(f"Running inference using '{model_name}' on image source '{image_source}'")
inference_result = model(image_source)

print("Press 'x' or 'q' to stop.")

# show results of inference
with dgt.Display("AI Camera") as output_display:
    # Stack original image above image overlay.
    depth_visuals = dgt.stack_images(
        inference_result.image,
        inference_result.image_overlay,
        dimension="vertical")
    output_display.show_image(depth_visuals)

## Retrieve relative depth value

In [None]:
pixel_x = 128
pixel_y = 256

depth_map = inference_result.results[0]['data']

if not DepthResults.normalize_results:
    depth_map = DepthResults.normalize_depth_map(depth_map)
    
depth_val = depth_map[0, pixel_y, pixel_x]

print(f"The relative depth value at coordinates {pixel_x}, {pixel_y} is {depth_val}.")