In [6]:
import os

import numpy as np
import pandas as pd

from src import Config
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [2]:
import open3d as o3d

from src.common.logger import Logger
from src.feature import LasFileHandler, FeatureExtractor, Voxelizer
from src.feature.point_cloud_file import FileType, Filename, IFileHandler, DirectoryType

Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.


In [3]:
logger: Logger = Logger()
point_cloud_file_handler: IFileHandler = LasFileHandler(logger=logger)

## Extracting features and createing labeled dataset for each class

In [4]:
filenames: list[Filename] = [Filename.BUILDINGS, Filename.TERRAIN, Filename.RIVERS, Filename.TREES]
datasets: list[pd.DataFrame] = []

for filename in filenames:
    file_handler: IFileHandler = LasFileHandler(logger=logger)
    file_handler.open(
        directory_type=DirectoryType.CROPPED,
        filename=filename,
        file_type=FileType.LAS
    )
    point_cloud: o3d.geometry.PointCloud = file_handler.render_point_cloud_object()
    voxelizer: Voxelizer = Voxelizer(point_cloud=point_cloud, logger=logger)
    voxels: list[tuple[np.ndarray, o3d.geometry.PointCloud]] = voxelizer.extract_voxels()
    extractors: list[tuple[np.ndarray, o3d.geometry.PointCloud, FeatureExtractor]] = []

    for voxel_tuple in voxels:
        grid_index: np.ndarray = voxel_tuple[0]
        voxel: o3d.geometry.PointCloud = voxel_tuple[1]

        extractor: FeatureExtractor = FeatureExtractor(point_cloud=voxel, logger=logger)
        extractors.append((grid_index, voxel, extractor))

    feature_frame: pd.DataFrame = pd.DataFrame(columns=[
        "l1", "l2", "l3", "planarity", "linearity", "scattering",
        "omnivariance", "sum_of_eigenvalues", "anisotropy", "change_of_curvature",
        "z_range", "label"
    ])

    for ext in extractors:
        grid_index: np.ndarray = ext[0]
        voxel: o3d.geometry.PointCloud = ext[1]
        feature_extractor: FeatureExtractor = ext[2]

        features: np.ndarray = feature_extractor.features.array
        feature_frame.loc[len(feature_frame)] = features

    feature_frame["label"] = filename.value
    datasets.append(feature_frame)

INFO:src.common.logger.logger:Loaded 1516824 points from buildings.las
INFO:src.common.logger.logger:Rendering point cloud object
INFO:src.common.logger.logger:Rendered point cloud object
INFO:src.common.logger.logger:Voxel grid created with 78005 voxels.


Extracting voxels...:   0%|          | 0/78005 [00:00<?, ?it/s]

INFO:src.common.logger.logger:Extracted 49611 voxels, 28394 voxels were insufficient.
INFO:src.common.logger.logger:Loaded 2133081 points from terrain.las
INFO:src.common.logger.logger:Rendering point cloud object
INFO:src.common.logger.logger:Rendered point cloud object
INFO:src.common.logger.logger:Voxel grid created with 67706 voxels.


Extracting voxels...:   0%|          | 0/67706 [00:00<?, ?it/s]

INFO:src.common.logger.logger:Extracted 57436 voxels, 10270 voxels were insufficient.
INFO:src.common.logger.logger:Loaded 213299 points from river.las
INFO:src.common.logger.logger:Rendering point cloud object
INFO:src.common.logger.logger:Rendered point cloud object
INFO:src.common.logger.logger:Voxel grid created with 19082 voxels.


Extracting voxels...:   0%|          | 0/19082 [00:00<?, ?it/s]

INFO:src.common.logger.logger:Extracted 10197 voxels, 8885 voxels were insufficient.
INFO:src.common.logger.logger:Loaded 718861 points from trees.las
INFO:src.common.logger.logger:Rendering point cloud object
INFO:src.common.logger.logger:Rendered point cloud object
INFO:src.common.logger.logger:Voxel grid created with 55426 voxels.


Extracting voxels...:   0%|          | 0/55426 [00:00<?, ?it/s]

INFO:src.common.logger.logger:Extracted 30506 voxels, 24920 voxels were insufficient.


## Merging the train data and shuffling it

In [10]:
labeled_data = pd.concat(datasets)
labeled_data = labeled_data.sample(frac=1)
labeled_data.reset_index(drop=True, inplace=True)
filepath: str = os.path.join(Config.DATASETS_DIR.value, "labled_dataset.csv")
labeled_data.to_csv(path_or_buf=filepath)