In [None]:
pip install transformers

In [None]:
import numpy as np
# Numpy is fundamental for scientific computing in Python. 
# It provides support for arrays, mathematical functions, random number generation, and more.

import matplotlib.pyplot as plt
# Matplotlib is a plotting library. Here, we import the pyplot interface to provide 
# easy-to-use functions for creating plots and figures.

# import mpl_toolkits.mplot3d.Axes3D
# This is commonly used for 3D plotting in Matplotlib. However, it's a part of 
# Matplotlib and does not need to be explicitly imported unless you are using it directly.

from transformers import DPTImageProcessor, DPTForDepthEstimation
# Importing from the Hugging Face Transformers library. DPTImageProcessor is used for 
# preparing images for the DPT model, and DPTForDepthEstimation is the model itself for 
# estimating depth from images.

import torch
# PyTorch is a machine learning library. It is used here because the DPT model 
# is implemented in PyTorch.

from PIL import Image
# PIL (Python Imaging Library) is used for opening, manipulating, and saving 
# many different image file formats.

import plotly.graph_objs as go
# Plotly's graph objects are used for creating and manipulating complex interactive plots.
# This will be useful for creating interactive 3D visualizations of the depth maps.

# Possible next steps in your script:
# - Load and preprocess an image
# - Use the DPT model to estimate the depth map from the image


In [None]:
# Import necessary libraries
from PIL import Image
from transformers import DPTImageProcessor, DPTForDepthEstimation

# Load the image
# The image is loaded from a specific path within the project workspace.
# Replace the path with the correct path to your image file.
url = "Images/170606123840-01-homo-sapiens-fossils.jpg"
image = Image.open(url)

# Initialize the Dense Prediction Transformer (DPT) model
# DPT is a state-of-the-art model for depth estimation, developed by Intel.
# The model 'dpt-large' is used here for its high accuracy in depth estimation.
# The model and its corresponding processor are loaded from Hugging Face's transformers library.
processor = DPTImageProcessor.from_pretrained("Intel/dpt-large")
model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large")

# Note: Ensure that the Intel/dpt-large model is available in your environment.
# You may need to download and cache the model prior to running this code.


In [None]:
# Preparing the input image for the DPT model
# The processor reformats the image (resizing, normalizing, etc.) to be compatible with the model's input requirements.
inputs = processor(images=image, return_tensors="pt")

# Model inference
# Using a no_grad() context for inference because gradients are not needed for model evaluation.
# This reduces memory consumption and speeds up computations.
with torch.no_grad():
    outputs = model(**inputs)
    predicted_depth = outputs.predicted_depth

# Interpolate the predicted depth to match the original image size
# This is necessary because the depth estimation model often changes the resolution of the input.
prediction = torch.nn.functional.interpolate(
    predicted_depth.unsqueeze(1),
    size=image.size[::-1],  # Resizing to the original image size
    mode="bicubic",         # Bicubic interpolation for resizing
    align_corners=False
)

# Convert the prediction to a numpy array and normalize
# The depth map is converted from a PyTorch tensor to a numpy array for easier manipulation and visualization.
# Normalization is done to scale the depth values to a range that is suitable for visualization (0-255).
depth_map = prediction.squeeze().cpu().numpy()
depth_map = (depth_map * 255 / np.max(depth_map)).astype("uint8")


In [None]:
# Convert the original PIL image to a NumPy array
# This step is necessary to extract color information for each pixel in the image.
image_np = np.array(image)

# Extracting the dimensions of the depth map
# 'height' and 'width' will be used to iterate over the depth map and to calculate 3D coordinates.
height, width = depth_map.shape

# Camera parameters
# 'fx' and 'fy' represent the focal lengths of the camera in the horizontal and vertical directions.
# These are used to convert 2D pixel coordinates to 3D world coordinates.
fx, fy = 525.0, 525.0  # Example values, should be set according to your camera specs

# 'cx' and 'cy' represent the optical center of the image, typically at the center of the image.
cx, cy = width / 2, height / 2

# Initialize lists to store 3D points and their corresponding colors
points = []
colors = []

# Next steps (not shown in this snippet):
# - Iterate over each pixel in the depth map.
# - Use the depth value and camera parameters to calculate the 3D coordinates for each pixel.
# - Extract the color of each pixel from the original image and associate it with the corresponding 3D point.
# - Append each 3D point and its color to the 'points' and 'colors' lists, respectively.


In [5]:
# Iterate over each pixel in the depth map
for v in range(height):
    for u in range(width):
        # Get the depth value from the depth map
        Z = depth_map[v, u]

        # Skip processing if depth value is zero (indicating no data or background)
        if Z == 0: continue

        # Convert depth map coordinates to 3D coordinates
        # X and Y coordinates are calculated using the intrinsic camera parameters
        # (u, v) are the pixel coordinates, (cx, cy) is the optical center, and fx, fy are the focal lengths
        X = (u - cx) * Z / fx
        Y = (v - cy) * Z / fy

        # Append the 3D coordinates to the points list
        points.append([X, Y, Z])

        # Get the corresponding color from the original image and append it to the colors list
        # The color is formatted as an RGB string for Plotly
        colors.append('rgb({},{},{})'.format(*image_np[v, u]))

# Convert the list of points to a NumPy array for easier manipulation
points = np.array(points)

# Create a Plotly 3D scatter plot trace
# x, y, and z are the coordinates for each point
# The color of each point is set to the corresponding color extracted from the original image
trace = go.Scatter3d(
    x=points[:, 0],
    y=points[:, 1],
    z=points[:, 2],
    mode='markers',
    marker=dict(
        size=1,         # Size of the markers
        color=colors,   # Colors of the markers
    )
)

# Assemble the plot data
data = [trace]

# Define the layout for the 3D plot
layout = go.Layout(
    margin=dict(l=0, r=0, b=0, t=0)  # Minimal margins for the plot
)

# Create a figure with the data and layout
fig = go.Figure(data=data, layout=layout)

# Save the figure as an HTML file for interactive viewing
fig.write_html("3d_plot2.html")


In [None]:
fig.show()

In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Convert the PIL image to a NumPy array for color extraction
image_np = np.array(image)

# Extract the dimensions of the depth map
height, width = depth_map.shape

# Define the camera parameters
fx, fy = 525.0, 525.0  # Focal lengths (assumed values; replace with actual values if known)
cx, cy = width / 2, height / 2  # Optical center (assumed to be at the center of the image)

# Initialize lists for storing 3D points and their corresponding colors
points = []
colors = []

# Iterate over each pixel in the depth map
for v in range(height):
    for u in range(width):
        Z = depth_map[v, u]  # Depth value
        if Z == 0: continue  # Skip points with zero depth

        # Convert pixel coordinates (u, v) and depth (Z) to 3D coordinates (X, Y, Z)
        X = (u - cx) * Z / fx
        Y = (v - cy) * Z / fy
        points.append([X, Y, Z])  # Add the 3D point to the points list

        # Normalize the color values and add to the colors list
        colors.append(image_np[v, u] / 255)  # Assuming image_np is in RGB format

# Convert the lists to NumPy arrays for easier manipulation and plotting
points = np.array(points)
colors = np.array(colors)

# Create a 3D scatter plot using Matplotlib
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')

# Plot all points. The color of each point is determined by the corresponding pixel color in the image
ax.scatter(points[:, 0], points[:, 1], points[:, 2], c=colors, marker='.')

# Label the axes
ax.set_xlabel('X Axis')
ax.set_ylabel('Y Axis')
ax.set_zlabel('Z Axis')

# Display the plot
plt.show()
