# Classification training

### Dependencies

In [1]:
import os
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import laspy
import pdal
import shutil
import json
from tqdm import tqdm

### Extract all samples of files

In [2]:
# Utils
def instance_extraction(src_tile, src_dest, inst_column='PredInstance', verbose=False):
    las = laspy.read(src_tile)
    instances = getattr(las, inst_column)
    instances_unique = np.unique(instances)
    for _, inst_id in tqdm(enumerate(instances_unique), total=len(instances_unique), disable = verbose == False):
        if inst_id == 0:
            continue
        mask = instances == inst_id

        sub_points = las.points[mask]

        if len(sub_points) == 0:
            continue

        # Create a new LAS object with the same header
        # header = laspy.LasHeader(point_format=las.header.point_format, version=las.header.version)
        # header.scales = las.header.scales
        # header.offsets = las.header.offsets
        # # header.add_crs(las.header.parse_crs())
        sub_las = laspy.LasData(las.header)
        sub_las.points = sub_points

        # Define output path
        out_path = os.path.join(src_dest, f"{''.join(os.path.basename(src_tile).split('.')[:-1])}_{inst_id}.laz")

        # Save the new instance file
        os.makedirs(src_dest, exist_ok=True)
        sub_las.write(out_path)

    if verbose:
        print(f"Saved instance {inst_id} with {len(sub_points)} points → {out_path}")

def convert_laz_to_pcd(in_laz, out_pcd, verbose=False):
    laz = laspy.read(in_laz)

    # Gathering all attributes from laz file
    points = np.vstack((laz.x, laz.y, laz.z)).T

    attributes = {}
    for attribute in laz.point_format.dimensions:
        if attribute.name in ['X', 'Y', 'Z']:
            continue
        attributes[attribute.name] = getattr(laz, attribute.name)
    
    # Preparing data for pcd
    num_points = points.shape[0]
    fields = ["x", "y", "z"] + list(attributes.keys())  # All field names
    types = ["F", "F", "F"] + ["F" for _ in attributes]  # Float32 fields
    sizes = [4] * len(fields)  # 4-byte float per field

    # Stack all data into a single NumPy array
    data = np.column_stack([points] + [attributes[key] for key in attributes])

    # Write to a PCD file
    with open(out_pcd, "w") as f:
        # f.write(f"# .PCD v0.7 - Point Cloud Data file format\n")
        f.write(f"VERSION 0.7\n")
        f.write(f"FIELDS {' '.join(fields)}\n")
        f.write(f"SIZE {' '.join(map(str, sizes))}\n")
        f.write(f"TYPE {' '.join(types)}\n")
        f.write(f"COUNT {' '.join(['1'] * len(fields))}\n")
        f.write(f"WIDTH {num_points}\n")
        f.write(f"HEIGHT 1\n")
        f.write(f"VIEWPOINT 0 0 0 1 0 0 0\n")
        f.write(f"POINTS {num_points}\n")
        f.write(f"DATA ascii\n")
    
        # Write data
        np.savetxt(f, data, fmt=" ".join(["%.6f"] * len(fields)))

    if verbose:
        print(f"PCD file saved in {out_pcd}")
        

In [6]:
src_in = r"D:\GitHubProjects\Terranum_repo\TreeSegmentation\data\lausanne\train\temp_instances_treelearn"
src_out = os.path.join(src_in, "instances")
instance_column = "treeID"

# Extract all samples:
list_files = [x for x in os.listdir(src_in) if x.endswith('laz')]
for _, f in tqdm(enumerate(list_files), total=len(list_files), desc='Extracting samples'):
    instance_extraction(os.path.join(src_in, f), src_out, instance_column)

# Convert samples to pcd
list_samples = [x for x in os.listdir(src_out) if x.endswith('laz')]
for _, file in tqdm(enumerate(list_samples), total=len(list_samples), desc='Converting samples to PCD'):
    file_in = os.path.join(src_out, file)
    file_out = file_in.split('.laz')[0] + '.pcd'
    convert_laz_to_pcd(file_in, file_out, False)

# Remove all laz files
for _, file in tqdm(enumerate(list_samples), total=len(list_samples), desc='Removing LAZ files'):
    if file.endswith('.laz'):
        os.remove(os.path.join(src_out, file))

Extracting samples: 100%|██████████| 13/13 [00:01<00:00, 10.81it/s]
Converting samples to PCD: 100%|██████████| 60/60 [00:08<00:00,  7.42it/s]
Removing LAZ files: 100%|██████████| 60/60 [00:00<00:00, 5127.72it/s]
