# Ground Truth generation

# table of content
1) [Load stats](#load-stats)
2) [Show histograms and barplots](#show-histograms-and-barplots)
3) [Pie on heights](#pie-on-heights)

### Dependencies and general utils

In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import open3d as o3d
import laspy
import pdal
import json
from tqdm import tqdm

### Generation

#### Utils

In [2]:
def convert_las_to_laz(in_las, out_laz, verbose=True):
    """
    Convert a LAS file to a LAZ file, stripping all extra dimensions.

    Parameters:
    - in_las: str, path to the input .las file
    - out_laz: str, path to the output .laz file
    - verbose: bool, whether to print a success message

    Returns:
    - None
    """
    pipeline_json = {
        "pipeline": [
            {
                "type": "readers.las",
                "filename": in_las
            },
            {
                "type": "writers.las",
                "filename": out_laz,
                "compression": "laszip",  # Ensure compression to LAZ
                # "extra_dims": "none"
            }
        ]
    }

    # Create and execute the pipeline
    pipeline = pdal.Pipeline(json.dumps(pipeline_json))
    pipeline.execute()

    if verbose:
        print(f"LAZ file saved at {out_laz}")

def convert_pcd_to_laz(in_pcd, out_laz, verbose=True):
    # pcd = laspy.read('../data/testing_samples/split_0332.pcd')
    pipeline_json = {
        "pipeline": [
            in_pcd,  # Read the PCD file
            {
                "type": "writers.las",
                "filename": out_laz,
                "compression": "laszip"  # Ensures .laz compression
                ""
            },
            {
                "type": "filters.reprojection",
                "in_srs": "EPSG:4326",
                "out_srs": "EPSG:2056"
            }
        ]
    }

    # Run the PDAL pipeline
    pipeline = pdal.Pipeline(json.dumps(pipeline_json))
    pipeline.execute()
    
    if verbose:
        print(f"LAZ file saved in {out_laz}")

def convert_laz_to_pcd(in_laz, out_pcd, verbose=True):
    laz = laspy.read(in_laz)

    # Gathering all attributes from laz file
    points = np.vstack((laz.x, laz.y, laz.z)).T

    attributes = {}
    for attribute in laz.point_format.dimensions:
        if attribute.name in ['X', 'Y', 'Z']:
            continue
        attributes[attribute.name] = getattr(laz, attribute.name)
    
    # Preparing data for pcd
    num_points = points.shape[0]
    fields = ["x", "y", "z"] + list(attributes.keys())  # All field names
    types = ["F", "F", "F"] + ["F" for _ in attributes]  # Float32 fields
    sizes = [4] * len(fields)  # 4-byte float per field

    # Stack all data into a single NumPy array
    data = np.column_stack([points] + [attributes[key] for key in attributes])

    # Write to a PCD file
    with open(out_pcd, "w") as f:
        # f.write(f"# .PCD v0.7 - Point Cloud Data file format\n")
        f.write(f"VERSION 0.7\n")
        f.write(f"FIELDS {' '.join(fields)}\n")
        f.write(f"SIZE {' '.join(map(str, sizes))}\n")
        f.write(f"TYPE {' '.join(types)}\n")
        f.write(f"COUNT {' '.join(['1'] * len(fields))}\n")
        f.write(f"WIDTH {num_points}\n")
        f.write(f"HEIGHT 1\n")
        f.write(f"VIEWPOINT 0 0 0 1 0 0 0\n")
        f.write(f"POINTS {num_points}\n")
        f.write(f"DATA ascii\n")
    
        # Write data
        np.savetxt(f, data, fmt=" ".join(["%.6f"] * len(fields)))

    if verbose:
        print(f"PCD file saved in {out_pcd}")


# convert_pcd_to_laz(r"C:\temp_stockage_pdm\PDM_repos\Data_samples_cat\Single\color_grp_000020.pcd",r"C:\temp_stockage_pdm\PDM_repos\Data_samples_cat\Single\color_grp_000020.laz")

#### Generate

In [8]:
# Loading sources
src_folder_instances = r"D:\PDM_repo\Github\PDM\data\full_dataset\selection\clusters_4\cluster_4\gt"
src_original_prediction = r"D:\PDM_repo\Github\PDM\data\full_dataset\selection\clusters_4\cluster_4\color_grp_full_tile_331.laz"
src_folder_result = r"..\data\full_dataset\selection\clusters_4\gt"

In [9]:
# Generate from laz to pcd for manual cleaning of the samples
files = [x for x in os.listdir(src_folder_instances) if x.endswith('.laz')]
src_pcd_loc = os.path.join(src_folder_instances, 'pcd')
os.makedirs(src_pcd_loc, exist_ok=True)
for _, file in tqdm(enumerate(files), total=len(files)):
    file_out = file.split('.laz')[0] + '.pcd'
    convert_laz_to_pcd(os.path.join(src_folder_instances, file), os.path.join(src_pcd_loc, file_out), verbose=False)

100%|██████████| 5/5 [00:00<00:00, 68.04it/s]


In [10]:
# Once cleaned, generate from pcd to laz in new folder
src_folder_instances = os.path.join(src_folder_instances, 'pcd/modified_samples')
files = [x for x in os.listdir(src_folder_instances) if x.endswith('.pcd')]
for _, file in tqdm(enumerate(files), total=len(files)):
    src_in = os.path.join(src_folder_instances, file)
    src_out = os.path.join(src_folder_instances, file.split('.pcd')[0] + '.laz')
    convert_pcd_to_laz(src_in, src_out,verbose=False)

100%|██████████| 5/5 [00:00<00:00, 32.82it/s]


In [None]:
# Load original and reset/create gt columns
full_tile = laspy.read(src_original_prediction)
full_tile.add_extra_dim(laspy.ExtraBytesParams('gt_semantic',type="uint16"))
full_tile.add_extra_dim(laspy.ExtraBytesParams('gt_instance',type="uint16"))

In [None]:
# Loop on gt instances and set the correct values in the full tile
list_instances_src = [x for x in os.listdir(src_folder_instances) if x.endswith('.laz')]
rounding = 2
semantic_layer = np.zeros(len(full_tile))
instance_layer = np.zeros(len(full_tile))
for id_instance, instance_src in tqdm(enumerate(list_instances_src), total=len(list_instances_src)):
    instance = laspy.read(os.path.join(src_folder_instances, instance_src))
    coords = list(zip(np.round(instance.x, rounding), np.round(instance.y, rounding), np.round(instance.z, rounding)))
    mask = np.array([(x,y,z) in coords for x, y, z in zip(np.round(full_tile.x, rounding), np.round(full_tile.y, rounding), np.round(full_tile.z, rounding))])
    semantic_layer[mask] = 1
    instance_layer[mask] = id_instance + 1
    # print(np.sum(mask))
    # print(len(coords))
    # assert np.sum(mask) == len(coords)

setattr(full_tile, 'gt_semantic', semantic_layer)
setattr(full_tile, 'gt_instance', instance_layer)

# save file
new_file = os.path.join(os.path.join(src_folder_result), os.path.basename(src_original_prediction).split('.laz')[0] + '_gt.laz')
full_tile.write(new_file)

100%|██████████| 5/5 [00:09<00:00,  1.97s/it]


### Addition

In [None]:
# Loading sources
src_folder_instances = r"D:\PDM_repo\Github\PDM\data\full_dataset\selection\clusters_4\cluster_2\gt\round2"
src_target = r"D:\PDM_repo\Github\PDM\data\full_dataset\selection\clusters_4\gt\color_grp_full_tile_331_gt.laz"
tile_target = laspy.read(src_target)

assert "gt_semantic" in tile_target.point_format.dimension_names
assert "gt_instance" in tile_target.point_format.dimension_names

In [46]:
# Generate from laz to pcd for manual cleaning of the samples
files = [x for x in os.listdir(src_folder_instances) if x.endswith('.laz')]
src_pcd_loc = os.path.join(src_folder_instances, 'pcd')
os.makedirs(src_pcd_loc, exist_ok=True)
for _, file in tqdm(enumerate(files), total=len(files)):
    file_out = file.split('.laz')[0] + '.pcd'
    convert_laz_to_pcd(os.path.join(src_folder_instances, file), os.path.join(src_pcd_loc, file_out), verbose=False)

100%|██████████| 10/10 [00:00<00:00, 40.47it/s]


In [55]:
# Once cleaned, generate from pcd to laz in new folder
src_folder_instances = os.path.join(src_folder_instances, 'pcd/modified_samples')
files = [x for x in os.listdir(src_folder_instances) if x.endswith('.pcd')]
for _, file in tqdm(enumerate(files), total=len(files)):
    src_in = os.path.join(src_folder_instances, file)
    src_out = os.path.join(src_folder_instances, file.split('.pcd')[0] + '.laz')
    convert_pcd_to_laz(src_in, src_out,verbose=False)

100%|██████████| 9/9 [00:00<00:00, 30.12it/s]


In [None]:
# Loop on gt instances and set the correct values in the full tile
list_instances_src = [x for x in os.listdir(src_folder_instances) if x.endswith('.laz')]
rounding = 2
semantic_layer = np.array(tile_target.gt_semantic)
instance_layer = np.array(tile_target.gt_instance)
# instance_layer = np.zeros(len(tile_target))
instance_val = np.max(tile_target.gt_instance) + 1
for id_instance, instance_src in tqdm(enumerate(list_instances_src), total=len(list_instances_src)):
    instance = laspy.read(os.path.join(src_folder_instances, instance_src))
    coords = list(zip(np.round(instance.x, rounding), np.round(instance.y, rounding), np.round(instance.z, rounding)))
    mask = np.array([(x,y,z) in coords for x, y, z in zip(np.round(tile_target.x, rounding), np.round(tile_target.y, rounding), np.round(tile_target.z, rounding))])
    semantic_layer[mask] = 1
    instance_layer[mask] = instance_val
    instance_val += 1
    # print(np.sum(mask))
    # print(len(coords))
    # assert np.sum(mask) == len(coords)

setattr(tile_target, 'gt_semantic', semantic_layer)
setattr(tile_target, 'gt_instance', instance_layer)


100%|██████████| 9/9 [01:39<00:00, 11.05s/it]


In [59]:

# save file
new_file = os.path.join(os.path.join(src_folder_result), os.path.basename(src_target).split('.laz')[0] + '_2.laz')
tile_target.write(new_file)

In [53]:
print(new_file)

..\data\full_dataset\selection\clusters_4\gt\color_grp_full_tile_331_gt_2.laz


### Erase clusters

In [27]:
tree_ids_to_erase = [100]
src_tile = r"D:\PDM_repo\Github\PDM\data\full_dataset\selection\clusters_4\gt\color_grp_full_tile_317_gt.laz"
tile = laspy.read(src_tile)
assert "gt_instance_segmentation" in list(tile.point_format.dimension_names)

for tree_id in tree_ids_to_erase:
    mask = tile.gt_instance_segmentation == tree_id
    tile.gt_instance_segmentation[mask] = 0.0
    # setattr(tile[tile.gt_instance_segmentation == float(tree_id)], 'gt_instance_segmentation', 0.0)
    print(f"Tree with id {tree_id} of size {np.sum(mask)} deleted")
tile.write(src_tile)

Tree with id 100 of size 0 deleted


### Clean ids of clusters

In [43]:
src_tile = r"D:\PDM_repo\Github\PDM\data\full_dataset\selection\clusters_4\gt\color_grp_full_tile_317_gt.laz"
tile = laspy.read(src_tile)
assert "gt_instance_segmentation" in list(tile.point_format.dimension_names)
max_id = np.max(tile.gt_instance_segmentation)
down_jump = 0
for _, id in tqdm(enumerate(range(max_id+1)), total=max_id+1):
    mask = tile.gt_instance_segmentation == id
    if np.sum(mask) == 0:
        print(f"Empty id: {id}")
        down_jump += 1
        continue

    if down_jump > 0:
        print(id, " -> ", id - down_jump)
        tile.gt_instance_segmentation[mask] = id - down_jump
tile.write(src_tile)


100%|██████████| 144/144 [00:00<00:00, 615.63it/s]


### Other stuff

#### Change name of semantic and segmentation columns

In [None]:
src_tile = r"D:\PDM_repo\Github\PDM\data\gt\color_grp_full_tile_331_gt.laz"
las = laspy.read(src_tile)
# print(las.extra_dimensions)
for old_val, new_val in zip(['gt_semantic_segmentation', 'gt_instance_segmentation'],['gt_semantic', 'gt_instance']):
    print("Old val: ", old_val)
    print("New val: ", new_val)
    # Get the values
    values = las[old_val]

    # Remove the old dimension from extra dimensions (only works for ExtraBytes dimensions)
    if old_val in (las.point_format.dimension_names):
        las.remove_extra_dim(old_val)

    # Add new dimension
    las.add_extra_dim(laspy.ExtraBytesParams(name=new_val, type=np.float32))  # Change type if needed
    las[new_val] = values

# Save to new file
las.write(src_tile)

Old val:  gt_semantic_segmentation
New val:  gt_semantic
Old val:  gt_instance_segmentation
New val:  gt_instance
