In [1]:
# environment: thesisPlayground_pointClouds_env

In [2]:
import h5py
import numpy as np
import open3d as o3d
import pandas as pd
import glob
import os

Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.


# Using depths

Hints on how to deal with this problem from:
- https://github.com/apple/ml-hypersim/issues/9

In [3]:
path_metadata_camera_parameters = '/local/home/gmarsich/Desktop/Thesis/0Code_playground/pointClouds_Hypersim/withDepths/metadata_camera_parameters.csv'
base_path = '/local/home/gmarsich/data2TB/Hypersim/evermotion_dataset/scenes'
scene = 'ai_001_001'  # name of the scene, with format ai_VVV_NNN
cam_xx = 'cam_00'

# Get list of image and depth HDF5 files
paths_images = sorted(glob.glob(os.path.join(base_path, scene, 'images', 'scene_' + cam_xx + '_final_hdf5', '*.color.hdf5')))
paths_depthEuclidean = sorted(glob.glob(os.path.join(base_path, scene, 'images', 'scene_' + cam_xx + '_geometry_hdf5', '*.depth_meters.hdf5')))

# Get orientations and positions
path_orientations = os.path.join(base_path, scene, '_detail', cam_xx, 'camera_keyframe_orientations.hdf5')
path_positions = os.path.join(base_path, scene, '_detail', cam_xx, 'camera_keyframe_positions.hdf5')


### Computations

In [4]:
from decimal import Decimal, getcontext

def get_metadata(path_metadata_camera_parameters, scene):
    # Load the metadata CSV file
    df = pd.read_csv(path_metadata_camera_parameters)
    
    # Filter the DataFrame to get the row corresponding to the specified scene
    scene_row = df[df['scene_name'] == scene]

    return scene_row

# Set the precision for the decimal module
getcontext().prec = 50

scene_row = get_metadata(path_metadata_camera_parameters, scene)

# Extract the specific column value
column_value = scene_row['settings_units_info_meters_scale'].values[0]

# Convert to Decimal for highest precision
decimal_value = Decimal(column_value)

# Print the entire number with full precision
print(decimal_value)


0.0253999996930359996094583863168736570514738559722900390625


### Real code

In [5]:
def get_depths_oneImage(path_file_depths, intWidth = 1024, intHeight = 768, fltFocal = 886.81, factor_assets_to_meters = 0.025399999693035999609458386316873657):

    with h5py.File(path_file_depths, 'r') as file:
        # Access the dataset
        dataset = file['dataset']
        
        # Read the data from the dataset
        data = dataset[:] # len(data) is 768, len(data[0]) is 1024, data[0][0] is the value of the depth

        npyImageplaneX = np.linspace((-0.5 * intWidth) + 0.5, (0.5 * intWidth) - 0.5, intWidth).reshape(1, intWidth).repeat(intHeight, 0).astype(np.float32)[:, :, None]
        npyImageplaneY = np.linspace((-0.5 * intHeight) + 0.5, (0.5 * intHeight) - 0.5, intHeight).reshape(intHeight, 1).repeat(intWidth, 1).astype(np.float32)[:, :, None]
        npyImageplaneZ = np.full([intHeight, intWidth, 1], fltFocal, np.float32)
        npyImageplane = np.concatenate([npyImageplaneX, npyImageplaneY, npyImageplaneZ], 2)

        npyDepth_meters = data / np.linalg.norm(npyImageplane, 2, 2) * fltFocal

        npyDepth_assets = npyDepth_meters / factor_assets_to_meters

        return npyDepth_assets
    

def get_depths(path_list):
    '''Each element of depth_files will be a representation of an image'''
    
    depth_files = []
    for i in range(len(path_list)):
        depth_files.append(get_depths_oneImage(path_list[i]))

    return depth_files



In [6]:
def generate_point_cloud_SIMPLE(files_depths):

    width_pixels = 1024 # TODO maybe should be a parameter
    height_pixels = 768 # TODO maybe should be a parameter

    fx = 886.81 # TODO maybe should be a parameter
    fy = fx

    point_clouds = []

    for depths in files_depths:

        cam_coo = []

        for y in range(height_pixels):
            for x in range(width_pixels):
                z_cam = - depths[y][x] # the - is because of the coordinate system
                x_cam = (x - width_pixels/2) / fx * (-z_cam) 
                y_cam = -(y - height_pixels/2) / fy * (-z_cam) # TODO maybe something to change the coordinates should be done
                xyz_cam = [x_cam, y_cam, z_cam]

                cam_coo.append(xyz_cam)

        pcd = o3d.geometry.PointCloud()
        pcd.points = o3d.utility.Vector3dVector(cam_coo)
        point_clouds.append(pcd)


    return point_clouds


In [7]:
def extract_frames_ids(paths_filenames):
    """
    Extracts frame IDs from a list of filenames.
    
    Parameters:
        paths_filenames (list): List of file paths. Last info of a path is the filename.
    
    Returns:
        list: List of extracted frame IDs.
    """

    frame_ids = []
    for path_filename in paths_filenames:
        filename = path_filename.split('/')[-1]  # Extracting just the filename from the path
        frame_id_str = filename.split('.')[1]
        frame_id = int(frame_id_str)
        frame_ids.append(frame_id)

    return frame_ids


def get_extrinsics_oneImage(path_position, path_orientation, frame_id):
    """
    Load camera position and orientation from HDF5 files and compute the extrinsic matrix.

    Parameters:
        path_position (str): Path to the camera positions HDF5 file.
        path_orientation (str): Path to the camera orientations HDF5 file.
        frame_id (int): Frame ID to extract the extrinsics for.

    Returns:
        np.ndarray: The extrinsic matrix [R|t].
    """

    # Load camera position
    with h5py.File(path_position, "r") as f:
        camera_positions = f["dataset"][:]
    
    # Load camera orientation
    with h5py.File(path_orientation, "r") as f:
        camera_orientations = f["dataset"][:]
    
    # Get position and rotation matrix for the specified frame
    camera_position_world = camera_positions[frame_id]
    R_world_from_cam = camera_orientations[frame_id]

    # Construct the extrinsic matrix [R|t]
    extrinsic_matrix = np.hstack((R_world_from_cam, camera_position_world.reshape(3, 1)))
    
    return extrinsic_matrix


def get_extrinsics(path_positions, path_orientations, frames_ids):
    extrinsics_matrices = []
    for i in range(len(frames_ids)):
        extrinsics_matrices.append(get_extrinsics_oneImage(path_positions, path_orientations, frames_ids[i]))

    return extrinsics_matrices

In [8]:
def generate_point_cloud(files_depths, extrinsics_matrices):

    width_pixels = 1024 # TODO maybe should be a parameter
    height_pixels = 768 # TODO maybe should be a parameter

    fx = 886.81 # TODO maybe should be a parameter
    fy = fx

    point_clouds = []

    for i in range(len(files_depths)):
        depths = files_depths[i]
        world_coo = []

        for y in range(height_pixels):
            for x in range(width_pixels):
                z_cam = - depths[y][x] # the - is because of the coordinate system
                x_cam = (x - width_pixels/2) / fx * (-z_cam) 
                y_cam = -(y - height_pixels/2) / fy * (-z_cam) # TODO maybe something to change the coordinates should be done
                xyz_cam = np.array([x_cam, y_cam, z_cam])
                xyz_cam_One = np.append(xyz_cam, 1)

                world_coordinates = np.dot(extrinsics_matrices[i], xyz_cam_One.reshape(4, 1)).flatten()
                world_coo.append(world_coordinates)

        pcd = o3d.geometry.PointCloud()
        pcd.points = o3d.utility.Vector3dVector(world_coo)
        point_clouds.append(pcd)


    return point_clouds


In [9]:
depth_files = get_depths(paths_depthEuclidean)
frames_ids = extract_frames_ids(paths_depthEuclidean)

extrinsics_matrices = get_extrinsics(path_positions, path_orientations, frames_ids)
print(extrinsics_matrices[0])

[[ 7.41193831e-01 -3.51180434e-01  5.72105408e-01  1.03228874e+02]
 [ 5.79402149e-01 -9.57124457e-02 -8.09399724e-01 -1.17227142e+02]
 [ 3.39002311e-01  9.31402802e-01  1.32532686e-01  6.23017311e+01]]


In [10]:
point_clouds = generate_point_cloud(depth_files, extrinsics_matrices)

In [12]:
def merge_point_clouds(point_clouds):
    """
    Merges multiple Open3D PointCloud objects into a single PointCloud object.

    Parameters:
        point_clouds (list of o3d.geometry.PointCloud): A list of Open3D PointCloud objects.

    Returns:
        o3d.geometry.PointCloud: A single Open3D PointCloud object containing the merged 3D coordinates of all point clouds.
    """
    # Create an empty Open3D PointCloud object to store the merged point cloud
    merged_point_cloud = o3d.geometry.PointCloud()
    
    # Concatenate all the individual point clouds into a single point cloud
    for pcd in point_clouds:
        merged_point_cloud += pcd
    
    return merged_point_cloud


In [13]:
merged = merge_point_clouds(point_clouds)

In [14]:
o3d.visualization.draw_geometries([merged])

In [15]:




# # Ensure the number of image and depth files match
# if len(image_files) != len(depth_files):
#     raise ValueError("The number of image files and depth files do not match.")