In [None]:
!pip install open3d --user

In [1]:
import matplotlib
matplotlib.use('TkAgg')
from matplotlib import pyplot as plt
from PIL import Image
import torch
#from transformers import GLPNImageProcessor, GLPNForDepthEstimation
from transformers import AutoImageProcessor, AutoModelForDepthEstimation

import numpy as np
from torchvision import transforms
import open3d as o3d

Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.


## Get Model

In [2]:
feature_extractor = AutoImageProcessor.from_pretrained("depth-anything/Depth-Anything-V2-Large-hf")
model = AutoModelForDepthEstimation.from_pretrained("depth-anything/Depth-Anything-V2-Large-hf")

Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.48, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


## Image Pre-processing

In [5]:
filename = "test (15).jpg"
path = f"./data/{filename}"

orig_image = Image.open(path)
new_height = 720 if image.height > 720 else image.height
new_height -= (new_height % 32)
new_width = int(new_height* image.width/image.height)
diff = new_width % 32

new_width = new_width - diff if diff < 16 else new_width + 32 - diff
new_size=(new_width, new_height)
image = orig_image.resize(new_size)

In [6]:
# Preparing image for model
inputs = feature_extractor(images=image, return_tensors='pt')

NameError: name 'image' is not defined

## Getting prediction from model

In [None]:
with torch.no_grad():
    outputs = model(**inputs)

## Post-processing Image

In [None]:
post_processed_output = feature_extractor.post_process_depth_estimation(
    outputs,
    target_sizes=[(image.height, image.width)],
)
predicted_depth = post_processed_output[0]["predicted_depth"]
output = predicted_depth.detach().cpu().numpy() * 100


## Display output

In [None]:
#fig ,ax = plt.subplots(1,2)
#ax[0].imshow(image)
#ax[1].imshow(output, cmap='plasma')
#plt.tight_layout()
#plt.pause(1)

## Prepare depth image for open3d

In [None]:
width, height = image.size
depth_image = ((output - output.min()) * 255 / (output.max() - output.min())) # keep values in 0,255
depth_image = depth_image.max() - depth_image  # Invert depth values, because depthanything model gives higher value to close objects and lower to far. Point cloud is inverted if this step not done.

threshold = 125 # points below the threshold will be pushed back
lower_bound = 75 # minimum depth to avoid convergence of nearby pixels towards principle center
#Essentially, we want the values below 125 to gradually increase towards 125, starting from 75. closer it is to 125, lesser it will be pushed back. (threshold - 75) is basically a scale of how much to push back.
depth_image[depth_image < threshold] = (depth_image[depth_image < threshold] / threshold) * (threshold - 75) + 75
 
# Ensure that no value goes above 255 (in case of overflow)
depth_image = np.clip(depth_image, 0, 255)

image_np = np.array(image)

# Create rgbd image
depth_o3d = o3d.geometry.Image(depth_image)
#depth_o3d = o3d.geometry.Image(scaled_depth_map)
image_o3d = o3d.geometry.Image(image_np)
rgbd_image = o3d.geometry.RGBDImage.create_from_color_and_depth(image_o3d, depth_o3d, convert_rgb_to_intensity=False)

## Setup camera setting

In [None]:
camera_intrinsic = o3d.camera.PinholeCameraIntrinsic()
camera_intrinsic.set_intrinsics(width, height, 1000,1000, width/2, height/2)

## Create o3d point cloud

In [None]:
raw_pcd = o3d.geometry.PointCloud.create_from_rgbd_image(rgbd_image, camera_intrinsic)
#o3d.visualization.draw_geometries([raw_pcd])

## Post-process point cloud

In [None]:
# Remove outliers
cl, ind = raw_pcd.remove_statistical_outlier(nb_neighbors=20, std_ratio=10.0)
pcd = raw_pcd.select_by_index(ind)

# Estimate normals
pcd.estimate_normals()
pcd.orient_normals_to_align_with_direction()
#o3d.visualization.draw_geometries([pcd])


## Surface reconstruction

In [250]:
mesh, densities = o3d.geometry.TriangleMesh.create_from_point_cloud_poisson(pcd, depth=12, n_threads=4)

# rotate mesh
rotation = mesh.get_rotation_matrix_from_xyz((np.pi,0,0))
mesh.rotate(rotation, center=(0,0,0))

# remove vertices with low densities
vertices_to_remove = densities < np.quantile(densities, 0.1)
mesh.remove_vertices_by_mask(vertices_to_remove)

# visualize the mesh
o3d.visualization.draw_geometries([mesh], mesh_show_back_face=True)



In [251]:
# Compute nearest neighbor distances to determine pivot radii
#distances = pcd.compute_nearest_neighbor_distance()
#avg_dist = np.mean(distances)
#radii = [avg_dist * factor for factor in [1, 4, 9]]  # Adjust factors as needed

# Run BPA
#bpa_mesh = o3d.geometry.TriangleMesh.create_from_point_cloud_ball_pivoting( pcd, o3d.utility.DoubleVector(radii))

In [252]:
o3d.io.write_triangle_mesh(f"./meshes/{filename}.ply", mesh)

True

In [None]:
o3d.visualization.draw_geometries([mesh], mesh_show_back_face=True)