In [2]:
import numpy as np
import matplotlib.pyplot as plt
# import mpl_toolkits.mplot3d.Axes3D
from transformers import DPTImageProcessor, DPTForDepthEstimation
import torch
from PIL import Image
import plotly.graph_objs as go



  from .autonotebook import tqdm as notebook_tqdm


In [3]:

# Load the image from the URL
url = "/workspaces/DepthMapExplorer-3DPointCloudLab/Images/e4360037-800px-wm.jpg"
image = Image.open(url)

# Initialize the DPT model
processor = DPTImageProcessor.from_pretrained("Intel/dpt-large")
model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large")


Some weights of DPTForDepthEstimation were not initialized from the model checkpoint at Intel/dpt-large and are newly initialized: ['neck.fusion_stage.layers.0.residual_layer1.convolution2.bias', 'neck.fusion_stage.layers.0.residual_layer1.convolution1.bias', 'neck.fusion_stage.layers.0.residual_layer1.convolution2.weight', 'neck.fusion_stage.layers.0.residual_layer1.convolution1.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [4]:

inputs = processor(images=image, return_tensors="pt")

# Model inference
with torch.no_grad():
    outputs = model(**inputs)
    predicted_depth = outputs.predicted_depth

# Interpolate to original size
prediction = torch.nn.functional.interpolate(
    predicted_depth.unsqueeze(1),
    size=image.size[::-1],
    mode="bicubic",
    align_corners=False,
)

# numpy array and normalize
depth_map = prediction.squeeze().cpu().numpy()
depth_map = (depth_map * 255 / np.max(depth_map)).astype("uint8")


In [5]:

image_np = np.array(image)

# Parameters
height, width = depth_map.shape
fx, fy = 525.0, 525.0  # Focal length
cx, cy = width / 2, height / 2  # Optical center


points = []
colors = []


In [6]:

for v in range(height):
    for u in range(width):
        Z = depth_map[v, u]
        if Z == 0: continue  # Skip zero depth values
        X = (u - cx) * Z / fx
        Y = (v - cy) * Z / fy
        points.append([X, Y, Z])
        colors.append('rgb({},{},{})'.format(*image_np[v, u]))  # Convert to RGB string

points = np.array(points)


trace = go.Scatter3d(
    x=points[:, 0],
    y=points[:, 1],
    z=points[:, 2],
    mode='markers',
    marker=dict(
        size=1,
        color=colors,  # Set color to the RGB values
    )
)


data = [trace]

# Layout for 3D plot
layout = go.Layout(
    margin=dict(l=0, r=0, b=0, t=0)
)

fig = go.Figure(data=data, layout=layout)
fig.write_html("3d_plot.html")
