In [3]:
import cv2
import torch
import open3d as o3d
import numpy as np
import segmentation_models_pytorch as smp
from monodepth2 import networks
import torch.nn.functional as F

# Load input image
image_path = 'saxena_monocular_depth_2/combined1-p-170t0.jpg'
image = cv2.imread(image_path)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
image = cv2.resize(image, (256, 256))

# Normalize image for deep learning models
input_image = image.astype(np.float32) / 255.0
input_image = torch.from_numpy(input_image).permute(2, 0, 1).unsqueeze(0)


Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.


In [4]:
# Define the segmentation model (U-Net for segmentation task)
seg_model = smp.Unet(encoder_name='resnet34', encoder_weights='imagenet', in_channels=3, classes=1)
seg_model.eval()

# Segmentation to obtain the object mask
with torch.no_grad():
    seg_mask = seg_model(input_image)

# Threshold to get binary mask
seg_mask = torch.sigmoid(seg_mask).squeeze().cpu().numpy()
seg_mask = (seg_mask > 0.5).astype(np.uint8)

# Apply mask on the input image
segmented_image = image * np.stack([seg_mask, seg_mask, seg_mask], axis=-1)


In [5]:

depth_model = networks.ResnetEncoder(18, False)
depth_decoder = networks.DepthDecoder(num_ch_enc=depth_model.num_ch_enc, scales=range(4))
# Load the pre-trained weights for the depth model
depth_model.load_state_dict(torch.load("models/encoder.pth", map_location=torch.device('cpu')), strict=False)

depth_decoder.load_state_dict(torch.load("models/depth.pth",map_location=torch.device('cpu')))
depth_model.eval()
depth_decoder.eval()

# Predict depth map
with torch.no_grad():
    features = depth_model(input_image)
    outputs = depth_decoder(features)
    depth_map = outputs[("disp", 0)]
    depth_map = F.interpolate(depth_map, (256, 256), mode="bilinear", align_corners=False).squeeze().cpu().numpy()

# Normalize depth map for visualization
depth_map = (depth_map - depth_map.min()) / (depth_map.max() - depth_map.min())




In [6]:
# Convert depth map to 3D point cloud using Open3D
def depth_to_point_cloud(depth, intrinsic):
    # Create grid for pixel coordinates
    h, w = depth.shape
    i, j = np.meshgrid(np.arange(w), np.arange(h), indexing='xy')

    # Calculate normalized camera coordinates
    z = depth
    x = (i - intrinsic[0, 2]) * z / intrinsic[0, 0]
    y = (j - intrinsic[1, 2]) * z / intrinsic[1, 1]

    # Create 3D point cloud
    point_cloud = np.stack((x, y, z), axis=-1).reshape(-1, 3)
    return point_cloud

# Define intrinsic camera parameters (assumed or given by your dataset)
intrinsic = np.array([[500.0, 0, 128.0], [0, 500.0, 128.0], [0, 0, 1]])

# Convert depth map to point cloud
points = depth_to_point_cloud(depth_map, intrinsic)

# Create Open3D point cloud object
pcd = o3d.geometry.PointCloud()
pcd.points = o3d.utility.Vector3dVector(points)

# Visualize the point cloud
o3d.visualization.draw_geometries([pcd])


: 

In [None]:
# Perform Poisson surface reconstruction from the point cloud
pcd = pcd.voxel_down_sample(voxel_size=0.05)
mesh, densities = o3d.geometry.TriangleMesh.create_from_point_cloud_poisson(pcd, depth=7)

# Visualize the mesh
o3d.visualization.draw_geometries([mesh])
