# Process Data

## LiDAR Data

In [1]:
import os
import re
import json
import pdal
import rasterio
import numpy as np

In [2]:
lidar_dir = "../data/lidar"
dsm_dir = "../data/dsm"
dtm_dir = "../data/dtm"
ndsm_dir = "../data/ndsm"

os.makedirs(dsm_dir, exist_ok=True)
os.makedirs(dtm_dir, exist_ok=True)
os.makedirs(ndsm_dir, exist_ok=True)

Check crs. https://epsg.io/6565

In [3]:
pipeline_json = {
    "pipeline": [
        "../data/lidar/Philadelphia_100.las",
        {
            "type": "filters.info"
        }
    ]
}

# Run PDAL pipeline.
pipeline = pdal.Pipeline(json.dumps(pipeline_json))
pipeline.execute()

# Retrieve metadata.
metadata = pipeline.metadata

# Extract CRS information from LAS file
crs_wkt = metadata['metadata']['filters.info']['srs']['wkt']

print(crs_wkt)


PROJCS["NAD83(2011) / Pennsylvania South (ftUS)",GEOGCS["NAD83(2011)",DATUM["NAD83_National_Spatial_Reference_System_2011",SPHEROID["GRS 1980",6378137,298.257222101,AUTHORITY["EPSG","7019"]],TOWGS84[0,0,0,0,0,0,0],AUTHORITY["EPSG","1116"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["Degree",0.0174532925199433,AUTHORITY["EPSG","9102"]]],PROJECTION["Lambert_Conformal_Conic_2SP"],PARAMETER["false_easting",1968500],PARAMETER["false_northing",0],PARAMETER["central_meridian",-77.75],PARAMETER["standard_parallel_1",40.966666666667],PARAMETER["standard_parallel_2",39.933333333333],PARAMETER["latitude_of_origin",39.333333333333],UNIT["US Survey Foot",0.304800609601219,AUTHORITY["EPSG","9003"]],AXIS["Easting",EAST],AXIS["Northing",NORTH],AUTHORITY["EPSG","6565"]]


In [4]:
# Loop through each file in the lidar directory.
for file in os.listdir(lidar_dir):
    # Use a regex to match files ending with _<number>.las
    match = re.search(r'_(\d+)\.las$', file)
    if match:
        num = int(match.group(1))
        # Process only if the number is between 100 and 200.
        if 100 <= num <= 100:
            file_path = os.path.join(lidar_dir, file)

            output_dsm = os.path.join(dsm_dir, f"{file.split('.')[0]}_dsm.tif")
            output_dtm = os.path.join(dtm_dir, f"{file.split('.')[0]}_dtm.tif")
            output_ndsm = os.path.join(ndsm_dir, f"{file.split('.')[0]}_ndsm.tif")

            # Create a pipeline to read the LAS file and get metadata.
            pipeline_json = {
                "pipeline": [
                    file_path,
                    {
                        "type": "filters.info"
                    }
                ]
            }

            # Run PDAL pipeline.
            pipeline = pdal.Pipeline(json.dumps(pipeline_json))
            pipeline.execute()

            # Retrieve metadata.
            metadata = pipeline.metadata
            nav = metadata['metadata']['filters.info']['bbox']

            max_x = nav['maxx']
            max_y = nav['maxy']
            min_x = nav['minx']
            min_y = nav['miny']

            dtm_pipeline = {
                "pipeline": [
                    file_path,
                    {
                        "type": "filters.smrf",  # Simple Morphological Filter to classify ground points.
                        "ignore": "Classification[7:7]",  # Ignore noise.
                        "slope": 0.2,
                        "window": 16,
                        "threshold": 0.5,
                        "cell": 1.0
                    },
                    {
                        "type": "filters.range",
                        "limits": "Classification[2:2]"  # Select only ground points.
                    },
                    {
                        "type": "writers.gdal",
                        "filename": output_dtm,
                        "output_type": "idw",  # Inverse Distance Weighting interpolation.
                        "resolution": 1.0,
                        "bounds": f"([{min_x}, {max_x}], [{min_y}, {max_y}])",
                        "override_srs": "EPSG:6565"  # Correct CRS
                    }
                ]
            }

            # Initialize the PDAL pipeline for DTM.
            p_dtm = pdal.Pipeline(json.dumps(dtm_pipeline))
            p_dtm.execute()

            dsm_pipeline = {
                "pipeline": [
                    file_path,
                    {
                        "type": "writers.gdal",
                        "filename": output_dsm,
                        "output_type": "idw",
                        "resolution": 1.0,
                        "bounds": f"([{min_x}, {max_x}], [{min_y}, {max_y}])",
                        "override_srs": "EPSG:6565"  # Correct CRS
                    }
                ]
            }

            # Execute the DSM pipeline.
            p_dsm = pdal.Pipeline(json.dumps(dsm_pipeline))
            p_dsm.execute()

            # Load the DSM and DTM data.
            with rasterio.open(output_dsm) as dsm_src, rasterio.open(output_dtm) as dtm_src:
                dsm_data = dsm_src.read(1)
                dtm_data = dtm_src.read(1)

                # Compute nDSM.
                ndsm_data = dsm_data - dtm_data

                # Save the result as a new raster.
                ndsm_meta = dsm_src.meta.copy()
                ndsm_meta.update({
                    "dtype": "float32",
                    "crs": "EPSG:6565"  # Set correct CRS for nDSM
                    })

                with rasterio.open(output_ndsm, "w", **ndsm_meta) as dst:
                    dst.write(ndsm_data.astype(np.float32), 1)


In [5]:
dsm = rasterio.open(os.path.join(dsm_dir, "Philadelphia_100_dsm.tif"))
dtm = rasterio.open(os.path.join(dtm_dir, "Philadelphia_100_dtm.tif"))
ndsm = rasterio.open(os.path.join(ndsm_dir, "Philadelphia_100_ndsm.tif"))

print(dsm.crs, dtm.crs, ndsm.crs)

EPSG:6565 EPSG:6565 EPSG:6565


# Building Footprints

In [6]:
import geopandas as gpd
import rasterio
from rasterio.features import rasterize
import rasterio.transform
import matplotlib.pyplot as plt

In [9]:
# Load building footprints
buildings = gpd.read_file("../data/LI_BUILDING_FOOTPRINTS.geojson").to_crs("EPSG:6565")

In [10]:
# Define the raster grid using the total bounds of the reprojected GeoDataFrame
minx, miny, maxx, maxy = buildings.total_bounds
pixel_size = 1  # adjust resolution as needed
width = int((maxx - minx) / pixel_size)
height = int((maxy - miny) / pixel_size)
transform = rasterio.transform.from_origin(minx, maxy, pixel_size, pixel_size)

# Rasterize: burn a value (e.g., 1) for each building footprint
rasterized = rasterize(
    [(geom, 1) for geom in buildings.geometry],
    out_shape=(height, width),
    transform=transform,
    fill=0,
    dtype=rasterio.uint8
)

# Save the rasterized image as a TIF file
with rasterio.open(
    "../data/building_footprints.tif",
    "w",
    driver="GTiff",
    height=height,
    width=width,
    count=1,
    dtype=rasterio.uint8,
    crs="EPSG:6565",
    transform=transform,
) as dst:
    dst.write(rasterized, 1)

Next step

1. Mosaic the ndsm into one big image
2. cut the ndsm, building raster data, tree raster data into tiles
3. get tree and building dsm for each tile