In [1]:
# environment: thesisPlayground_pointClouds_env

In [2]:
import h5py
import numpy as np
import open3d as o3d
import pandas as pd
import glob
import os

Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.


# Using the files with the position

In [16]:
def load_hdf5_file(file_path, dataset_key):
    with h5py.File(file_path, 'r') as f:
        data = np.array(f[dataset_key])
    return data

def load_transformation_matrix_by_name(csv_file, row_name):
    df = pd.read_csv(csv_file)
    matrix_row = df[df.iloc[:, 0] == row_name].iloc[0]
    M = matrix_row[['M_cam_from_uv_00', 'M_cam_from_uv_01', 'M_cam_from_uv_02',
                    'M_cam_from_uv_10', 'M_cam_from_uv_11', 'M_cam_from_uv_12',
                    'M_cam_from_uv_20', 'M_cam_from_uv_21', 'M_cam_from_uv_22']].values
    return M.reshape((3, 3))

def create_point_cloud_from_image_and_position(image_data, position_data, transformation_matrix=None, scale_factor=1.0):
    # Ensure both data arrays have the same shape
    assert image_data.shape[:2] == position_data.shape[:2], "Image and position data must have the same shape"

    # Flatten the image and position data
    height, width = image_data.shape[:2]
    colors = image_data.reshape(-1, image_data.shape[2]) if image_data.ndim == 3 else image_data.reshape(-1, 1)
    positions = position_data.reshape(-1, position_data.shape[2])

    # Apply scale factor to positions
    positions *= scale_factor

    # Apply transformation matrix if provided
    if transformation_matrix is not None:
        positions = positions @ transformation_matrix.T

    # Create an Open3D PointCloud object
    point_cloud = o3d.geometry.PointCloud()
    
    # Set the points
    point_cloud.points = o3d.utility.Vector3dVector(positions)
    
    # Normalize the colors to [0, 1] if they are not already in this range
    if colors.max() > 1.0:
        colors = colors / 255.0

    # Set the colors
    point_cloud.colors = o3d.utility.Vector3dVector(colors)

    return point_cloud

def generate_point_cloud(images, positions, csv_file, row_name, image_dataset_key='data', position_dataset_key='data', scale_factor=1.0):
    # Load transformation matrix from the specified row in the CSV
    transformation_matrix = load_transformation_matrix_by_name(csv_file, row_name)

    point_clouds = []

    for image_file, position_file in zip(images, positions):
        # Load image and position data from the HDF5 files
        image_data = load_hdf5_file(image_file, 'dataset')
        position_data = load_hdf5_file(position_file, 'dataset')

        # Create point cloud from image and position data
        point_cloud = create_point_cloud_from_image_and_position(image_data, position_data, transformation_matrix, scale_factor)
        
        # Add the point cloud to the list
        point_clouds.append(point_cloud)
    
    return point_clouds
    # Load transformation matrix from the specified row in the CSV
    transformation_matrix = load_transformation_matrix_by_name(csv_file, row_name)

    point_clouds = []

    for image_file, position_file in zip(images, positions):
        # Load image and position data from the HDF5 files
        image_data = load_hdf5_file(image_file, image_dataset_key)
        position_data = load_hdf5_file(position_file, position_dataset_key)

        # Create point cloud from image and position data
        point_cloud = create_point_cloud_from_image_and_position(image_data, position_data, transformation_matrix, scale_factor)
        
        # Add the point cloud to the list
        point_clouds.append(point_cloud)
    
    return point_clouds

    point_clouds = []


def render_point_cloud(point_cloud):

    pcd = point_cloud

    # Create a visualization window
    vis = o3d.visualization.Visualizer()

    # Add the point cloud to the visualization window
    vis.create_window()
    vis.add_geometry(pcd)

    # Set the render options (optional)
    render_options = vis.get_render_option()
    render_options.point_size = 2  # Adjust the size of the points

    # Render the visualization
    vis.run()

    # Close the visualization window
    vis.destroy_window()


In [17]:
path_metadata = '/local/home/gmarsich/Desktop/Thesis/0Code_playground/pointClouds/pointClouds_ChatGPT/metadata_camera_parameters.csv'
base_path = '/local/home/gmarsich/data2TB/Hypersim/evermotion_dataset/scenes'
scene = 'ai_007_008'  # name of the scene, with format ai_VVV_NNN
cam_xx = 'cam_00'

# Get list of image and position HDF5 files
image_files = sorted(glob.glob(os.path.join(base_path, scene, 'images', 'scene_' + cam_xx + '_final_hdf5', '*.color.hdf5')))
position_files = sorted(glob.glob(os.path.join(base_path, scene, 'images', 'scene_' + cam_xx + '_geometry_hdf5', '*.position.hdf5')))

# Ensure the number of image and position files match
if len(image_files) != len(position_files):
    raise ValueError("The number of image files and position files do not match.")

In [18]:
# Generate the list of point clouds
point_clouds = generate_point_cloud(image_files, position_files, csv_file = path_metadata, row_name=scene, scale_factor=1)

# Visualize the first point cloud as an example
render_point_cloud(point_clouds[0])

# Using images, depths and intrinsics

In [19]:
def get_intrinsics_from_metadata_COMPLETE(scene_row):
    """
    Derive the intrinsic matrix from metadata.

    Parameters:
        scene_row (pd.Series): Metadata row for the scene.

    Returns:
        np.ndarray: The intrinsic matrix K.
    """

    # Extract the necessary parameters from the metadata
    img_width = scene_row["settings_output_img_width"]
    img_height = scene_row["settings_output_img_height"]
    focal_length = scene_row["camera_physical_focal_length"]
    sensor_width = scene_row["camera_physical_film_width"]

    # Calculate fx and fy using the focal length and sensor width
    fx = focal_length / sensor_width * img_width
    fy = fx  # Assuming square pixels

    # Calculate the principal point (cx, cy) considering horizontal and vertical shifts
    cx = img_width / 2 + scene_row["camera_physical_horizontal_shift"]
    print('img_width: ', img_width)
    print('camera_physical_horizontal_shift":', scene_row["camera_physical_horizontal_shift"])
    cy = img_height / 2 + scene_row["camera_physical_lens_shift"]
    print('img_height : ', img_height )
    print('camera_physical_lens_shift":', scene_row["camera_physical_lens_shift"])

    # Construct the intrinsic matrix
    intrinsic_matrix = np.array([
        [fx, 0, cx],
        [0, fy, cy],
        [0, 0, 1]
    ])
    
    return intrinsic_matrix



def get_intrinsics_with_tilt_shift_COMPLETE(path_metadata, scene_name):
    """
    Load camera metadata from a CSV file and compute the intrinsic matrix considering tilt-shift parameters.

    Parameters:
        path_metadata (str): Path to the metadata CSV file.
        scene_name (str): Name of the scene to analyze.

    Returns:
        np.ndarray: The updated intrinsic matrix K.
    """
    
    # Load the metadata CSV file
    df = pd.read_csv(path_metadata)
    
    # Filter the DataFrame to get the row corresponding to the specified scene
    scene_row = df[df['scene_name'] == scene_name]
    
    if scene_row.empty:
        raise ValueError(f"Scene '{scene_name}' not found in the metadata.")
    
    # Use the first matching row
    scene_row = scene_row.iloc[0]
    
    # Get the intrinsic matrix from metadata
    intrinsic_matrix = get_intrinsics_from_metadata_COMPLETE(scene_row)
    
    return intrinsic_matrix



def load_hdf5_data(image_files, depth_files):
    """
    Load images and depth data from multiple HDF5 files.
    
    Parameters:
        image_files (list): List of paths to the HDF5 files containing images.
        depth_files (list): List of paths to the HDF5 files containing depth maps.
    
    Returns:
        images (list): List of images.
        depths (list): List of depth maps.
    """

    images = []
    depths = []

    for image_file, depth_file in zip(image_files, depth_files):
        with h5py.File(image_file, 'r') as f:
            images.append(np.array(f['dataset']))  # Adjust the key according to your HDF5 structure

        with h5py.File(depth_file, 'r') as f:
            depths.append(np.array(f['dataset']))  # Adjust the key according to your HDF5 structure

    return images, depths



def generate_point_clouds(images, depths, intrinsic_matrix, extrinsic_matrices): # extrinsic_matrices is not useful here, but will be used afterwards, having more images to put together
    """
    Generate point clouds from images and depth maps and apply the camera extrinsic transformations.

    Parameters:
        images (list): List of images.
        depths (list): List of depth maps.
        intrinsic_matrix (np.ndarray): Intrinsic matrix of the camera.
        extrinsic_matrices (list): List of camera extrinsic matrices.

    Returns:
        point_clouds (list): List of Open3D PointCloud objects.
    """
    point_clouds = []

    for image, depth in zip(images, depths):

        # Get the image dimensions
        h, w = depth.shape

        # Create a mesh grid of pixel coordinates
        u, v = np.meshgrid(np.arange(w), np.arange(h))
        u = u.flatten()
        v = v.flatten()

        # Get the corresponding depth values
        z = depth.flatten()

        # Filter out points with zero depth
        valid = z > 0
        u = u[valid]
        v = v[valid]
        z = z[valid]

        # Convert pixel coordinates to normalized image coordinates; create 3D points in the camera coordinate system
        x = (u - intrinsic_matrix[0, 2]) / intrinsic_matrix[0, 0] * z
        y = (v - intrinsic_matrix[1, 2]) / intrinsic_matrix[1, 1] * z

        # Stack the coordinates into a single array
        points = np.vstack((x, y, z)).T


        # The commented things will be useful when dealing with more than one image

        # # Apply extrinsic transformation
        # ones = np.ones((points.shape[0], 1))
        # points_homogeneous = np.hstack((points, ones))
        # points_transformed = (extrinsic_matrix @ points_homogeneous.T).T[:, :3]

        # Create Open3D PointCloud object and add it to the list
        pcd = o3d.geometry.PointCloud()
        #pcd.points = o3d.utility.Vector3dVector(points_transformed)
        pcd.points = o3d.utility.Vector3dVector(points) # to be deleted when having more than one image
        point_clouds.append(pcd)

    return point_clouds



def render_point_cloud(point_cloud):

    pcd = point_cloud

    # Create a visualization window
    vis = o3d.visualization.Visualizer()

    # Add the point cloud to the visualization window
    vis.create_window()
    vis.add_geometry(pcd)

    # Set the render options (optional)
    render_options = vis.get_render_option()
    render_options.point_size = 2  # Adjust the size of the points

    # Render the visualization
    vis.run()

    # Close the visualization window
    vis.destroy_window()


In [20]:
path_metadata = '/local/home/gmarsich/Desktop/Thesis/0Code_playground/pointClouds/pointClouds_ChatGPT/metadata_camera_parameters.csv'
base_path = '/local/home/gmarsich/data2TB/Hypersim/evermotion_dataset/scenes'
scene = 'ai_007_008'  # name of the scene, with format ai_VVV_NNN
cam_xx = 'cam_00'

# Get list of image and depth HDF5 files
image_files = sorted(glob.glob(os.path.join(base_path, scene, 'images', 'scene_' + cam_xx + '_final_hdf5', '*.color.hdf5')))
position_files = sorted(glob.glob(os.path.join(base_path, scene, 'images', 'scene_' + cam_xx + '_geometry_hdf5', '*.depth_meters.hdf5')))

# Ensure the number of image and depth files match
if len(image_files) != len(position_files):
    raise ValueError("The number of image files and depth files do not match.")

## With everything

In [21]:
# Get the intrinsic matrix for the scene
intrinsics = get_intrinsics_with_tilt_shift_COMPLETE(path_metadata, scene)
print("Intrinsic Matrix for the scene:")
print(intrinsics)

# Load images and depth data
image_files, position_files = load_hdf5_data(image_files, position_files)

# Generate point clouds for each view
point_clouds = generate_point_clouds(image_files, position_files, intrinsics, 1) # here extrinsics_matrices = 1 because it is not used, the value 1 is random

cloud = point_clouds[0]
print(cloud)

render_point_cloud(point_clouds[0])

img_width:  1024.0
camera_physical_horizontal_shift": 0.0
img_height :  768.0
camera_physical_lens_shift": -0.0799999982118606
Intrinsic Matrix for the scene:
[[1.43810735e+03 0.00000000e+00 5.12000000e+02]
 [0.00000000e+00 1.43810735e+03 3.83920000e+02]
 [0.00000000e+00 0.00000000e+00 1.00000000e+00]]
PointCloud with 786431 points.


## MOD - No scene_row["camera_physical_horizontal_shift"] and scene_row["camera_physical_lens_shift"]

In [None]:
def get_intrinsics_from_metadata_NoHorNoLens(scene_row):
    """
    Derive the intrinsic matrix from metadata.

    Parameters:
        scene_row (pd.Series): Metadata row for the scene.

    Returns:
        np.ndarray: The intrinsic matrix K.
    """

    # Extract the necessary parameters from the metadata
    img_width = scene_row["settings_output_img_width"]
    img_height = scene_row["settings_output_img_height"]
    focal_length = scene_row["camera_physical_focal_length"]
    sensor_width = scene_row["camera_physical_film_width"]

    # Calculate fx and fy using the focal length and sensor width
    fx = focal_length / sensor_width * img_width
    fy = fx  # Assuming square pixels

    # Calculate the principal point (cx, cy) considering horizontal and vertical shifts
    cx = img_width / 2
    print('img_width: ', img_width)
    print('camera_physical_horizontal_shift:', scene_row["camera_physical_horizontal_shift"])
    cy = img_height / 2
    print('img_height: ', img_height)
    print('camera_physical_lens_shift:', scene_row["camera_physical_lens_shift"])

    # Construct the intrinsic matrix
    intrinsic_matrix = np.array([
        [fx, 0, cx],
        [0, fy, cy],
        [0, 0, 1]
    ])
    
    return intrinsic_matrix



def get_intrinsics_with_tilt_shift_NoHorNoLens(path_metadata, scene_name):
    """
    Load camera metadata from a CSV file and compute the intrinsic matrix considering tilt-shift parameters.

    Parameters:
        path_metadata (str): Path to the metadata CSV file.
        scene_name (str): Name of the scene to analyze.

    Returns:
        np.ndarray: The updated intrinsic matrix K.
    """
    
    # Load the metadata CSV file
    df = pd.read_csv(path_metadata)
    
    # Filter the DataFrame to get the row corresponding to the specified scene
    scene_row = df[df['scene_name'] == scene_name]
    
    if scene_row.empty:
        raise ValueError(f"Scene '{scene_name}' not found in the metadata.")
    
    # Use the first matching row
    scene_row = scene_row.iloc[0]
    
    # Get the intrinsic matrix from metadata
    intrinsic_matrix = get_intrinsics_from_metadata_NoHorNoLens(scene_row)
    
    return intrinsic_matrix


In [None]:
# Get the intrinsic matrix for the scene
intrinsics = get_intrinsics_with_tilt_shift_NoHorNoLens(path_metadata, scene)
print("Intrinsic Matrix for the scene:")
print(intrinsics)

# Load images and depth data
image_files, position_files = load_hdf5_data(image_files, position_files)

# Generate point clouds for each view
point_clouds = generate_point_clouds(image_files, position_files, intrinsics, 1) # here extrinsics_matrices = 1 because it is not used, the value 1 is random

cloud = point_clouds[0]
print(cloud)

render_point_cloud(point_clouds[0])