# Process Data

## LiDAR Data

In [1]:
import os
import re
import json
import pdal
import rasterio
import numpy as np

In [3]:
lidar_dir = "../data/lidar"
dsm_dir = "../data/dsm"
dtm_dir = "../data/dtm"
ndsm_dir = "../data/ndsm"

os.makedirs(dsm_dir, exist_ok=True)
os.makedirs(dtm_dir, exist_ok=True)
os.makedirs(ndsm_dir, exist_ok=True)

In [11]:
# Loop through each file in the lidar directory.
for file in os.listdir(lidar_dir):
    # Use a regex to match files ending with _<number>.las
    match = re.search(r'_(\d+)\.las$', file)
    if match:
        num = int(match.group(1))
        # Process only if the number is between 100 and 200.
        if 231 <= num <= 430:
            file_path = os.path.join(lidar_dir, file)

            output_dsm = os.path.join(dsm_dir, f"{file.split('.')[0]}_dsm.tif")
            output_dtm = os.path.join(dtm_dir, f"{file.split('.')[0]}_dtm.tif")
            output_ndsm = os.path.join(ndsm_dir, f"{file.split('.')[0]}_ndsm.tif")

            # Create a pipeline to read the LAS file and get metadata.
            pipeline_json = {
                "pipeline": [
                    file_path,
                    {
                        "type": "filters.info"
                    }
                ]
            }

            # Run PDAL pipeline.
            pipeline = pdal.Pipeline(json.dumps(pipeline_json))
            pipeline.execute()

            # Retrieve metadata.
            metadata = pipeline.metadata
            nav = metadata['metadata']['filters.info']['bbox']

            max_x = nav['maxx']
            max_y = nav['maxy']
            min_x = nav['minx']
            min_y = nav['miny']

            dtm_pipeline = {
                "pipeline": [
                    file_path,
                    {
                        "type": "filters.smrf",  # Simple Morphological Filter to classify ground points.
                        "ignore": "Classification[7:7]",  # Ignore noise.
                        "slope": 0.2,
                        "window": 16,
                        "threshold": 0.5,
                        "cell": 1.0
                    },
                    {
                        "type": "filters.range",
                        "limits": "Classification[2:2]"  # Select only ground points.
                    },
                    {
                        "type": "writers.gdal",
                        "filename": output_dtm,
                        "output_type": "idw",  # Inverse Distance Weighting interpolation.
                        "resolution": 1.0,
                        "bounds": f"([{min_x}, {max_x}], [{min_y}, {max_y}])"
                    }
                ]
            }

            # Initialize the PDAL pipeline for DTM.
            p_dtm = pdal.Pipeline(json.dumps(dtm_pipeline))
            p_dtm.execute()

            dsm_pipeline = {
                "pipeline": [
                    file_path,
                    {
                        "type": "writers.gdal",
                        "filename": output_dsm,
                        "output_type": "idw",
                        "resolution": 1.0,
                        "bounds": f"([{min_x}, {max_x}], [{min_y}, {max_y}])"
                    }
                ]
            }

            # Execute the DSM pipeline.
            p_dsm = pdal.Pipeline(json.dumps(dsm_pipeline))
            p_dsm.execute()

            # Load the DSM and DTM data.
            with rasterio.open(output_dsm) as dsm_src, rasterio.open(output_dtm) as dtm_src:
                dsm_data = dsm_src.read(1)
                dtm_data = dtm_src.read(1)

                # Compute nDSM.
                ndsm_data = dsm_data - dtm_data

                # Save the result as a new raster.
                ndsm_meta = dsm_src.meta.copy()
                ndsm_meta.update({"dtype": "float32"})

                with rasterio.open(output_ndsm, "w", **ndsm_meta) as dst:
                    dst.write(ndsm_data.astype(np.float32), 1)


# Building Footprints

In [2]:
import geopandas as gpd

In [3]:
# Load building footprints
building = gpd.read_file("../data/LI_BUILDING_FOOTPRINTS.geojson")

# Load one of the ndsm files
ndsm_100 = rasterio.open("../data/ndsm/Philadelphia_100_ndsm.tif")

print(ndsm_100.crs)

NameError: name 'rasterio' is not defined