In [None]:
# environment: thesisPlayground_pointClouds_env

In [3]:
import h5py
import numpy as np
import open3d as o3d
import pandas as pd
import glob
import os

Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.


# Using positions - Point cloud from one image using the camera-space (use M_cam_from_world)

Remark that everything was prepared as if the code had to handle more than one image. However, working in the camera-space would require some modifications in the code, so at the end this section just works to render an image, I guess. A revision is needed in any case; I kept the code because maybe it will turn out useful.

In [4]:
def load_hdf5_file(file_paths, dataset_key):
    data = []
    for file in file_paths:
        with h5py.File(file, 'r') as f:
            data.append(np.array(f[dataset_key]))
    return data


def build_M_cam_from_world(frame_id, camera_dir):

    camera_positions_hdf5_file    = os.path.join(camera_dir, "camera_keyframe_positions.hdf5")
    camera_orientations_hdf5_file = os.path.join(camera_dir, "camera_keyframe_orientations.hdf5")

    with h5py.File(camera_positions_hdf5_file,    "r") as f: camera_positions    = f["dataset"][:]
    with h5py.File(camera_orientations_hdf5_file, "r") as f: camera_orientations = f["dataset"][:]

    # get position and rotation matrix for Hypersim image
    camera_position_world = camera_positions[frame_id]
    R_world_from_cam      = camera_orientations[frame_id]

    t_world_from_cam = np.array(camera_position_world).reshape((3, 1))
    R_cam_from_world = np.array(R_world_from_cam).T
    t_cam_from_world = -R_cam_from_world @ t_world_from_cam

    M_cam_from_world = np.block([[R_cam_from_world, t_cam_from_world],
                                 [np.zeros((1, 3)), np.array([[1.0]])]])
    
    return M_cam_from_world
    

def extract_frame_id(filename):
    base = os.path.basename(filename)
    frame_id = int(base.split('.')[1])
    return frame_id


def generate_point_cloud(image_files, positions, camera_dir):
    point_clouds = []

    for img_file, position in zip(image_files, positions):
        # Extract frame ID from the image filename
        frame_id = extract_frame_id(img_file)

        # Build the M_cam_from_world matrix for the current frame
        M_cam_from_world = build_M_cam_from_world(frame_id, camera_dir)

        # Extracting 3D coordinates of each pixel from positions
        X = position[:, :, 0]
        Y = position[:, :, 1]
        Z = position[:, :, 2]

        # Stack X, Y, Z to get point cloud
        point_cloud = np.stack((X, Y, Z), axis=-1)

        # Reshape point cloud to flatten the array
        point_cloud_flat = point_cloud.reshape(-1, 3)

        # Convert world-space points to camera-space points
        homogeneous_points = np.hstack((point_cloud_flat, np.ones((point_cloud_flat.shape[0], 1))))
        camera_space_points = np.dot(M_cam_from_world, homogeneous_points.T).T[:, :3]

        # Convert camera-space points to Open3D point cloud format
        pcd = o3d.geometry.PointCloud()
        pcd.points = o3d.utility.Vector3dVector(camera_space_points)

        # Append point cloud to list
        point_clouds.append(pcd)

    return point_clouds


def render_point_cloud(point_cloud):

    pcd = point_cloud

    # Create a visualization window
    vis = o3d.visualization.Visualizer()

    # Add the point cloud to the visualization window
    vis.create_window()
    vis.add_geometry(pcd)

    # Set the render options (optional)
    render_options = vis.get_render_option()
    render_options.point_size = 2  # Adjust the size of the points

    # Render the visualization
    vis.run()

    # Close the visualization window
    vis.destroy_window()


In [16]:
path_metadata = '/local/home/gmarsich/Desktop/Thesis/0Code_playground/pointClouds/pointClouds_ChatGPT/metadata_camera_parameters.csv'
base_path = '/local/home/gmarsich/data2TB/Hypersim/evermotion_dataset/scenes'
scene = 'ai_007_008'  # name of the scene, with format ai_VVV_NNN
cam_xx = 'cam_00'

# Get directory for info
camera_dir = os.path.join(base_path, scene, "_detail", cam_xx)

# Get list of image and position HDF5 files
image_files = sorted(glob.glob(os.path.join(base_path, scene, 'images', 'scene_' + cam_xx + '_final_hdf5', '*.color.hdf5')))
position_files = sorted(glob.glob(os.path.join(base_path, scene, 'images', 'scene_' + cam_xx + '_geometry_hdf5', '*.position.hdf5')))

# HERE WE SELECT JUST ONE IMAGE
frame = 0
image_files = [image_files[frame]]
position_files = [position_files[frame]]


# Ensure the number of image and position files match
if len(image_files) != len(position_files):
    raise ValueError("The number of image files and position files do not match.")

In [17]:
# Load images and position data
images = load_hdf5_file(image_files, 'dataset')
positions = load_hdf5_file(position_files, 'dataset')

# Generate point clouds for each view
point_clouds = generate_point_cloud(image_files, positions, camera_dir)

cloud = point_clouds[0]
print(cloud)

render_point_cloud(point_clouds[0])

PointCloud with 786432 points.


# Using positions - Point cloud from more images using the world-space

In [18]:
def load_hdf5_file(file_paths, dataset_key):
    data = []
    for file in file_paths:
        with h5py.File(file, 'r') as f:
            data.append(np.array(f[dataset_key]))
    return data


def generate_point_cloud(images, positions):
    point_clouds = []

    for position in positions:
        # Extracting 3D coordinates of each pixel from positions
        X = position[:, :, 0]
        Y = position[:, :, 1]
        Z = position[:, :, 2]

        # Stack X, Y, Z to get point cloud
        point_cloud = np.stack((X, Y, Z), axis=-1)

        # Convert numpy array to Open3D point cloud format
        pcd = o3d.geometry.PointCloud()
        pcd.points = o3d.utility.Vector3dVector(point_cloud.reshape(-1, 3))

        # Append point cloud to list
        point_clouds.append(pcd)

    return point_clouds


def combine_point_clouds(point_clouds):
    # Combine all point clouds into a single point cloud
    combined_cloud = o3d.geometry.PointCloud()
    for pcd in point_clouds:
        combined_cloud += pcd

    return combined_cloud


def render_point_cloud(point_cloud):

    pcd = point_cloud

    # Create a visualization window
    vis = o3d.visualization.Visualizer()

    # Add the point cloud to the visualization window
    vis.create_window()
    vis.add_geometry(pcd)

    # Set the render options (optional)
    render_options = vis.get_render_option()
    render_options.point_size = 2  # Adjust the size of the points

    # Render the visualization
    vis.run()

    # Close the visualization window
    vis.destroy_window()


In [21]:
path_metadata = '/local/home/gmarsich/Desktop/Thesis/0Code_playground/pointClouds/pointClouds_ChatGPT/metadata_camera_parameters.csv'
base_path = '/local/home/gmarsich/data2TB/Hypersim/evermotion_dataset/scenes'
scene = 'ai_007_008'  # name of the scene, with format ai_VVV_NNN
cam_xx = 'cam_00'

# Get list of image and position HDF5 files
image_files = sorted(glob.glob(os.path.join(base_path, scene, 'images', 'scene_' + cam_xx + '_final_hdf5', '*.color.hdf5')))
position_files = sorted(glob.glob(os.path.join(base_path, scene, 'images', 'scene_' + cam_xx + '_geometry_hdf5', '*.position.hdf5')))

# Ensure the number of image and position files match
if len(image_files) != len(position_files):
    raise ValueError("The number of image files and position files do not match.")

In [22]:
# Load images and position data
images = load_hdf5_file(image_files, 'dataset')
positions = load_hdf5_file(position_files, 'dataset')


# Generate point clouds for each view
point_clouds = generate_point_cloud(images, positions)

# Combine all individual point clouds into a single point cloud
combined_point_cloud = combine_point_clouds(point_clouds)

# Render the visualization of the combined point cloud
render_point_cloud(combined_point_cloud)
