In [None]:
# environment: thesisPlayground_pointClouds_env

In [None]:
import h5py
import numpy as np
import open3d as o3d
import pandas as pd
import glob
import os
import math

# Using depths - Point cloud from more images using the world-space
### Colour given by default by the height from the floor

Hints on how to deal with this problem from:
- https://github.com/apple/ml-hypersim/issues/9
- https://github.com/apple/ml-hypersim/issues/10

In [None]:
def get_depths_oneImage(path_file_depths, intWidth, intHeight, factor_assets_to_meters, fltFocal):

    with h5py.File(path_file_depths, 'r') as file:

        dataset = file['dataset']
        data = dataset[:] # len(data) is 768, len(data[0]) is 1024, data[0][0] is the value of the depth

        # Transform the distances from the optical center of the camera to depths (distance from the plane passing by the optical center)
        npyImageplaneX = np.linspace((-0.5 * intWidth) + 0.5, (0.5 * intWidth) - 0.5, intWidth).reshape(1, intWidth).repeat(intHeight, 0).astype(np.float32)[:, :, None]
        npyImageplaneY = np.linspace((-0.5 * intHeight) + 0.5, (0.5 * intHeight) - 0.5, intHeight).reshape(intHeight, 1).repeat(intWidth, 1).astype(np.float32)[:, :, None]
        npyImageplaneZ = np.full([intHeight, intWidth, 1], fltFocal, np.float32)
        npyImageplane = np.concatenate([npyImageplaneX, npyImageplaneY, npyImageplaneZ], 2)

        npyDepth_meters = data / np.linalg.norm(npyImageplane, 2, 2) * fltFocal

        # Convert from meters to asset units
        npyDepth_assets = npyDepth_meters / factor_assets_to_meters

        return npyDepth_assets
    

def get_focalLength(path_metadata_camera_parameters, scene):

    df_camera_parameters = pd.read_csv(path_metadata_camera_parameters, index_col="scene_name")
    df_ = df_camera_parameters.loc[scene]

    intWidth = int(df_["settings_output_img_width"])
    fov = math.pi/3.0
    focal_length_pixels = intWidth/(2 * math.tan(fov/2))

    return focal_length_pixels


def get_depths(path_list_depths, path_metadata_camera_parameters, scene):
    
    # Get the useful parameters from path_metadata_camera_parameters
    df_camera_parameters = pd.read_csv(path_metadata_camera_parameters, index_col="scene_name")
    df_ = df_camera_parameters.loc[scene]

    intWidth = int(df_["settings_output_img_width"])
    intHeight = int(df_["settings_output_img_height"])
    factor_assets_to_meters = df_["settings_units_info_meters_scale"]

    # Depending on the scene, a default value for the focal length may be assumed or not
    focal_length_pixels = get_focalLength(path_metadata_camera_parameters, scene)

    # Get the depths files for each image
    depth_files = []
    for i in range(len(path_list_depths)):
        depth_files.append(get_depths_oneImage(path_list_depths[i], intWidth, intHeight, factor_assets_to_meters, focal_length_pixels))

    return depth_files


In [None]:
def extract_frames_ids(paths_filenames):
    """
    Extracts frame IDs from a list of filenames.
    
    Parameters:
        paths_filenames (list): List of file paths. Last info of a path is the filename.
    
    Returns:
        list: List of extracted frame IDs.
    """

    frame_ids = []
    for path_filename in paths_filenames:
        filename = path_filename.split('/')[-1]  # Extracting just the filename from the path
        frame_id_str = filename.split('.')[1]
        frame_id = int(frame_id_str)
        frame_ids.append(frame_id)

    return frame_ids


def get_extrinsics_oneImage(path_positions, path_orientations, frame_id):
    """
    Load camera position and orientation from HDF5 files and compute the extrinsic matrix.

    Parameters:
        path_position (str): Path to the camera positions HDF5 file.
        path_orientation (str): Path to the camera orientations HDF5 file.
        frame_id (int): Frame ID to extract the extrinsics for.

    Returns:
        np.ndarray: The extrinsic matrix [R|t].
    """

    # Load camera position
    with h5py.File(path_positions, "r") as f:
        camera_positions = f["dataset"][:]
    
    # Load camera orientation
    with h5py.File(path_orientations, "r") as f:
        camera_orientations = f["dataset"][:]
    
    # Get position and rotation matrix for the specified frame
    camera_position_world = camera_positions[frame_id]
    R_world_from_cam = camera_orientations[frame_id]

    # Construct the extrinsic matrix [R|t]
    extrinsic_matrix = np.hstack((R_world_from_cam, camera_position_world.reshape(3, 1)))
    
    return extrinsic_matrix


def get_extrinsics(path_positions, path_orientations, frames_ids):
    extrinsics_matrices = []
    for i in range(len(frames_ids)):
        extrinsics_matrices.append(get_extrinsics_oneImage(path_positions, path_orientations, frames_ids[i]))

    return extrinsics_matrices

In [None]:
def generate_point_cloud(files_depths, extrinsics_matrices, path_metadata_camera_parameters, scene):

    df_camera_parameters = pd.read_csv(path_metadata_camera_parameters, index_col="scene_name")
    df_ = df_camera_parameters.loc[scene]

    intWidth = int(df_["settings_output_img_width"])
    intHeight = int(df_["settings_output_img_height"])

    fx = get_focalLength(path_metadata_camera_parameters, scene)
    fy = fx

    point_clouds = []

    for i in range(len(files_depths)):
        depths = files_depths[i]
        world_coo = []

        for y in range(intHeight):
            for x in range(intWidth):
                z_cam = - depths[y][x] # the - is because of the coordinate system
                x_cam = (x - intWidth/2) / fx * (-z_cam) 
                y_cam = -(y - intHeight/2) / fy * (-z_cam)
                xyz_cam = np.array([x_cam, y_cam, z_cam])
                xyz_cam_One = np.append(xyz_cam, 1)

                world_coordinates = np.dot(extrinsics_matrices[i], xyz_cam_One.reshape(4, 1)).flatten()
                world_coo.append(world_coordinates)

        pcd = o3d.geometry.PointCloud()
        pcd.points = o3d.utility.Vector3dVector(world_coo)
        point_clouds.append(pcd)


    return point_clouds


def merge_point_clouds(point_clouds):
    """
    Merges multiple Open3D PointCloud objects into a single PointCloud object.

    Parameters:
        point_clouds (list of o3d.geometry.PointCloud): A list of Open3D PointCloud objects.

    Returns:
        o3d.geometry.PointCloud: A single Open3D PointCloud object containing the merged 3D coordinates of all point clouds.
    """
    # Create an empty Open3D PointCloud object to store the merged point cloud
    merged_point_cloud = o3d.geometry.PointCloud()
    
    # Concatenate all the individual point clouds into a single point cloud
    for pcd in point_clouds:
        merged_point_cloud += pcd
    
    return merged_point_cloud


In [None]:
path_metadata_camera_parameters = '/local/home/gmarsich/Desktop/Thesis/0Code_playground/pointClouds_Hypersim/withDepths/metadata_camera_parameters.csv' # TODO TOSET: change with what you need
base_path = '/local/home/gmarsich/data2TB/Hypersim/evermotion_dataset/scenes' # TODO TOSET: change with what you need
scene = 'ai_001_001'  # name of the scene, with format ai_VVV_NNN # TODO TOSET: change with what you need
cam_xx = 'cam_00' # TODO TOSET: change with what you need

# Get list of depth HDF5 files
paths_depthEuclidean = sorted(glob.glob(os.path.join(base_path, scene, 'images', 'scene_' + cam_xx + '_geometry_hdf5', '*.depth_meters.hdf5')))

# Get orientations and positions
paths_orientations = os.path.join(base_path, scene, '_detail', cam_xx, 'camera_keyframe_orientations.hdf5')
paths_positions = os.path.join(base_path, scene, '_detail', cam_xx, 'camera_keyframe_positions.hdf5')


In [None]:
depth_files = get_depths(paths_depthEuclidean, path_metadata_camera_parameters, scene)

frames_ids = extract_frames_ids(paths_depthEuclidean)
extrinsics_matrices = get_extrinsics(paths_positions, paths_orientations, frames_ids)
print(extrinsics_matrices[0])

In [None]:
point_clouds = generate_point_cloud(depth_files, extrinsics_matrices, path_metadata_camera_parameters, scene)
merged_point_clouds = merge_point_clouds(point_clouds)


In [None]:
# o3d.visualization.draw_geometries([merged_point_clouds]) # to visualise the point cloud
# o3d.io.write_point_cloud("point_cloud_DEPTHS.ply", merged_point_clouds) # to save the point cloud as .ply file

In [None]:
# # How to open a point cloud
# pcd = o3d.io.read_point_cloud("/local/home/gmarsich/Desktop/Thesis/0Code_playground/pointClouds_Hypersim/point_cloud.ply")
# render_point_cloud(pcd)

# Using depths - Point cloud from more images using the world-space
### Colour given by a tonemapping

Here one first needs to preprocess the `*color.hdf5 files`, applying a tonemap. I considered the same tonemap as the one used by Hypersim (and described in this code: https://github.com/apple/ml-hypersim/blob/main/code/python/tools/scene_generate_images_tonemap.py). I created a file `apply_tonemap.py` that converts `.color.hdf5` files into hdf5 files with the tonemap, and saves them.

In [None]:
def get_depths_oneImage(path_file_depths, intWidth, intHeight, factor_assets_to_meters, fltFocal):

    with h5py.File(path_file_depths, 'r') as file:

        dataset = file['dataset']
        data = dataset[:] # len(data) is 768, len(data[0]) is 1024, data[0][0] is the value of the depth

        # Transform the distances from the optical center of the camera to depths (distance from the plane passing by the optical center)
        npyImageplaneX = np.linspace((-0.5 * intWidth) + 0.5, (0.5 * intWidth) - 0.5, intWidth).reshape(1, intWidth).repeat(intHeight, 0).astype(np.float32)[:, :, None]
        npyImageplaneY = np.linspace((-0.5 * intHeight) + 0.5, (0.5 * intHeight) - 0.5, intHeight).reshape(intHeight, 1).repeat(intWidth, 1).astype(np.float32)[:, :, None]
        npyImageplaneZ = np.full([intHeight, intWidth, 1], fltFocal, np.float32)
        npyImageplane = np.concatenate([npyImageplaneX, npyImageplaneY, npyImageplaneZ], 2)

        npyDepth_meters = data / np.linalg.norm(npyImageplane, 2, 2) * fltFocal

        # Convert from meters to asset units
        npyDepth_assets = npyDepth_meters / factor_assets_to_meters

        return npyDepth_assets
    

def get_focalLength(path_metadata_camera_parameters, scene):

    df_camera_parameters = pd.read_csv(path_metadata_camera_parameters, index_col="scene_name")
    df_ = df_camera_parameters.loc[scene]

    intWidth = int(df_["settings_output_img_width"])
    fov = math.pi/3.0
    focal_length_pixels = intWidth/(2 * math.tan(fov/2))

    return focal_length_pixels


def get_depths(path_list_depths, path_metadata_camera_parameters, scene):
    
    # Get the useful parameters from path_metadata_camera_parameters
    df_camera_parameters = pd.read_csv(path_metadata_camera_parameters, index_col="scene_name")
    df_ = df_camera_parameters.loc[scene]

    intWidth = int(df_["settings_output_img_width"])
    intHeight = int(df_["settings_output_img_height"])
    factor_assets_to_meters = df_["settings_units_info_meters_scale"]

    # Depending on the scene, a default value for the focal length may be assumed or not
    focal_length_pixels = get_focalLength(path_metadata_camera_parameters, scene)

    # Get the depths files for each image
    depth_files = []
    for i in range(len(path_list_depths)):
        depth_files.append(get_depths_oneImage(path_list_depths[i], intWidth, intHeight, factor_assets_to_meters, focal_length_pixels))

    return depth_files


In [None]:
def extract_frames_ids(paths_filenames):
    """
    Extracts frame IDs from a list of filenames.
    
    Parameters:
        paths_filenames (list): List of file paths. Last info of a path is the filename.
    
    Returns:
        list: List of extracted frame IDs.
    """

    frame_ids = []
    for path_filename in paths_filenames:
        filename = path_filename.split('/')[-1]  # Extracting just the filename from the path
        frame_id_str = filename.split('.')[1]
        frame_id = int(frame_id_str)
        frame_ids.append(frame_id)

    return frame_ids


def get_extrinsics_oneImage(path_positions, path_orientations, frame_id):
    """
    Load camera position and orientation from HDF5 files and compute the extrinsic matrix.

    Parameters:
        path_position (str): Path to the camera positions HDF5 file.
        path_orientation (str): Path to the camera orientations HDF5 file.
        frame_id (int): Frame ID to extract the extrinsics for.

    Returns:
        np.ndarray: The extrinsic matrix [R|t].
    """

    # Load camera position
    with h5py.File(path_positions, "r") as f:
        camera_positions = f["dataset"][:]
    
    # Load camera orientation
    with h5py.File(path_orientations, "r") as f:
        camera_orientations = f["dataset"][:]
    
    # Get position and rotation matrix for the specified frame
    camera_position_world = camera_positions[frame_id]
    R_world_from_cam = camera_orientations[frame_id]

    # Construct the extrinsic matrix [R|t]
    extrinsic_matrix = np.hstack((R_world_from_cam, camera_position_world.reshape(3, 1)))
    
    return extrinsic_matrix


def get_extrinsics(path_positions, path_orientations, frames_ids):
    extrinsics_matrices = []
    for i in range(len(frames_ids)):
        extrinsics_matrices.append(get_extrinsics_oneImage(path_positions, path_orientations, frames_ids[i]))

    return extrinsics_matrices


In [None]:
def generate_point_cloud(paths_images, files_depths, extrinsics_matrices, path_metadata_camera_parameters, scene):

    images = []
    for image in paths_images:
        with h5py.File(image, 'r') as f:
            images.append(np.array(f['tonemapped_rgb']))

    df_camera_parameters = pd.read_csv(path_metadata_camera_parameters, index_col="scene_name")
    df_ = df_camera_parameters.loc[scene]

    intWidth = int(df_["settings_output_img_width"])
    intHeight = int(df_["settings_output_img_height"])

    fx = get_focalLength(path_metadata_camera_parameters, scene)
    fy = fx

    point_clouds = []

    for i in range(len(files_depths)):

        # Extracting color information from images
        image = images[i]
        R = image[:, :, 0]
        G = image[:, :, 1]
        B = image[:, :, 2]
        colors = np.stack((R, G, B), axis=-1) # stack R, G, B to get point cloud colors


        depths = files_depths[i]
        world_coo = []

        for y in range(intHeight):
            for x in range(intWidth):
                z_cam = - depths[y][x] # the - is because of the coordinate system
                x_cam = (x - intWidth/2) / fx * (-z_cam) 
                y_cam = -(y - intHeight/2) / fy * (-z_cam)
                xyz_cam = np.array([x_cam, y_cam, z_cam])
                xyz_cam_One = np.append(xyz_cam, 1)

                world_coordinates = np.dot(extrinsics_matrices[i], xyz_cam_One.reshape(4, 1)).flatten()
                world_coo.append(world_coordinates)

        pcd = o3d.geometry.PointCloud()
        pcd.points = o3d.utility.Vector3dVector(world_coo)
        pcd.colors = o3d.utility.Vector3dVector(colors.reshape(-1, 3))
        point_clouds.append(pcd)

    return point_clouds


def merge_point_clouds(point_clouds):
    """
    Merges multiple Open3D PointCloud objects into a single PointCloud object.

    Parameters:
        point_clouds (list of o3d.geometry.PointCloud): A list of Open3D PointCloud objects.

    Returns:
        o3d.geometry.PointCloud: A single Open3D PointCloud object containing the merged 3D coordinates of all point clouds.
    """
    # Create an empty Open3D PointCloud object to store the merged point cloud
    merged_point_cloud = o3d.geometry.PointCloud()
    
    # Concatenate all the individual point clouds into a single point cloud
    for pcd in point_clouds:
        merged_point_cloud += pcd
    
    return merged_point_cloud


In [None]:
import apply_tonemap # TODO TOSET uncomment if you need to get the hdf5 files with tonemap

path_metadata_camera_parameters = '/local/home/gmarsich/Desktop/Thesis/0Code_playground/pointClouds_Hypersim/withDepths/metadata_camera_parameters.csv' # TODO TOSET: change with what you need
base_path = '/local/home/gmarsich/data2TB/Hypersim/evermotion_dataset/scenes' # TODO TOSET: change with what you need
scene = 'ai_007_008'  # name of the scene, with format ai_VVV_NNN # TODO TOSET: change with what you need
cam_xx = 'cam_00' # TODO TOSET: change with what you need

# Get the tonemapped images
# TODO TOSET uncomment the following if need to get the hdf5 files with tonemap
in_dir = os.path.join(base_path, scene, 'images', 'scene_' + cam_xx + '_final_hdf5')
out_dir = os.path.join(base_path, scene, 'images', 'scene_' + cam_xx + '_final_hdf5', 'scene_' + cam_xx + '_final_hdf5_TONEMAP')
apply_tonemap.apply_tonemapping_to_directory(in_dir, out_dir)

# Get list of image and depth HDF5 files
paths_images = sorted(glob.glob(os.path.join(base_path, scene, 'images', 'scene_' + cam_xx + '_final_hdf5', 'scene_' + cam_xx + '_final_hdf5_TONEMAP', '*.color.hdf5')))
paths_depthEuclidean = sorted(glob.glob(os.path.join(base_path, scene, 'images', 'scene_' + cam_xx + '_geometry_hdf5', '*.depth_meters.hdf5')))

# Ensure the number of image and depth files match
if len(paths_images) != len(paths_depthEuclidean):
    raise ValueError("The number of image files and depth files do not match.")

# Get orientations and positions
paths_orientations = os.path.join(base_path, scene, '_detail', cam_xx, 'camera_keyframe_orientations.hdf5')
paths_positions = os.path.join(base_path, scene, '_detail', cam_xx, 'camera_keyframe_positions.hdf5')

In [None]:
depth_files = get_depths(paths_depthEuclidean, path_metadata_camera_parameters, scene)

frames_ids = extract_frames_ids(paths_depthEuclidean)
extrinsics_matrices = get_extrinsics(paths_positions, paths_orientations, frames_ids)
print(extrinsics_matrices[0])

In [None]:
point_clouds = generate_point_cloud(paths_images, depth_files, extrinsics_matrices, path_metadata_camera_parameters, scene)
merged_point_clouds = merge_point_clouds(point_clouds)

In [None]:
# o3d.visualization.draw_geometries([merged_point_clouds]) # to visualise the point cloud
# o3d.io.write_point_cloud("point_cloud_DEPTHS_ai_007_008_cam_00.ply", merged_point_clouds) # to save the point cloud as .ply file

In [None]:
# # How to open a point cloud
# pcd = o3d.io.read_point_cloud("/local/home/gmarsich/Desktop/Thesis/0Code_playground/pointClouds_Hypersim/withDepths/point_cloud_DEPTHS.ply")
# o3d.visualization.draw_geometries([pcd])