# Processing Plot Lidar

In this notebook we will create a lidar point cloud for each plot. This includes
- Cropping to the plot geometry
- Loading Z field into Altitude
- Loading Height Above Ground into Z
- Filtering out labeled noise
- Saving cloud as a cloud optimised point cloud (COPC)

To process the data we will use PDAL pipelines.
We will also use dask to run the processing in parallel.

NOTE: Processing of plot lidar is really only doene for visualisation purposes.

In [5]:
from pathlib import Path
import json

from jinja2 import Template
import geopandas as gpd
import pdal
import pandas as pd

## Pipeline Template

In [6]:
# Note, we could have this just a string, but as a dict allows us to add comments
pipeline_template_dict = [
    # Read the input LAS file
    {
        "type": "readers.copc",
        "filename": "{{ input_path }}",
        "polygon": "{{ polygon_wkt }}",

    },
    # Only take unclassified, ground and vegetation points
    {
        "type": "filters.range",
        "limits": "Classification[0:5]",
    },
    # Load Z into Altitude and HeightAboveGround into Z
    {
        "type": "filters.ferry",
        "dimensions": "Z => Altitude, HeightAboveGround => Z"
    },
    # Classify points below the ground as noise
    {
        "type": "filters.assign",
        "value": ["Classification = 2 WHERE Z < 0", "Z = 0 WHERE Z < 0"],
    },
    # Add the weight field (1 / Number of Returns)
    {
        "type": "filters.assign",
        "value": "Weight = 1 / NumberOfReturns"
    },
    # Save as a COPC file
    {
        "type": "writers.copc",
        "filename": "{{ output_path }}",
        "forward": "scale,offset",
        "extra_dims": "all"
    }
]

pipeline_template = json.dumps(pipeline_template_dict, indent=2)

# Function to replace variables
def replace_pipeline_variables(pipeline_template: str, context: dict):
    template = Template(pipeline_template)
    return template.render(context)

### Plot pipelines

In [7]:
import geopandas as gpd

plots_gdf = gpd.read_file("../data/outputs/plots/plots.geojson")
plots_gdf.head()

Unnamed: 0,site,plot_number,site_plot_id,id,geometry
0,AGG_O_01,1,AGG_O_01_P1,AGG_O_01_P1,"POLYGON ((463042.83 5259846.736, 463025.797 52..."
1,AGG_O_01,2,AGG_O_01_P2,AGG_O_01_P2,"POLYGON ((463124.556 5259819.234, 463116.068 5..."
2,AGG_O_01,3,AGG_O_01_P3,AGG_O_01_P3,"POLYGON ((463201.174 5259815.806, 463200.551 5..."
3,AGG_O_01,4,AGG_O_01_P4,AGG_O_01_P4,"POLYGON ((463257.777 5259801.962, 463245.303 5..."
4,AGG_O_01,5,AGG_O_01_P5,AGG_O_01_P5,"POLYGON ((463303.022 5259789.552, 463289.794 5..."


In [8]:
outputs_dir = Path("../data/outputs")
sites_lidar_dir = outputs_dir / "sites" / "lidar"
plots_lidar_dir = outputs_dir / "plots" / "lidar"
plots_lidar_dir.mkdir(parents=True, exist_ok=True)

def create_pipeline_from_plot(plot_row):
    site_id = plot_row['site']
    site_plot_id = plot_row['site_plot_id']

    context = {
        "input_path": str(sites_lidar_dir / f"{site_id}.copc.laz"),
        "output_path": str(plots_lidar_dir / f"{site_plot_id}.copc.laz"),
        "polygon_wkt": plot_row.geometry.wkt
    }
    return replace_pipeline_variables(pipeline_template, context)

pipelines = plots_gdf.apply(create_pipeline_from_plot, axis=1).to_list()
print(pipelines[0])

[
  {
    "type": "readers.copc",
    "filename": "../data/outputs/sites/lidar/AGG_O_01.copc.laz",
    "polygon": "POLYGON ((463042.83002541395 5259846.735807601, 463025.79692534194 5259799.726515798, 462975.6639067796 5259817.891450633, 462992.69700685085 5259864.900742435, 463042.83002541395 5259846.735807601))"
  },
  {
    "type": "filters.range",
    "limits": "Classification[0:5]"
  },
  {
    "type": "filters.ferry",
    "dimensions": "Z => Altitude, HeightAboveGround => Z"
  },
  {
    "type": "filters.assign",
    "value": [
      "Classification = 2 WHERE Z < 0",
      "Z = 0 WHERE Z < 0"
    ]
  },
  {
    "type": "filters.assign",
    "value": "Weight = 1 / NumberOfReturns"
  },
  {
    "type": "writers.copc",
    "filename": "../data/outputs/plots/lidar/AGG_O_01_P1.copc.laz",
    "forward": "scale,offset",
    "extra_dims": "all"
  }
]


## Processing

In [9]:
def process_pdal_pipeline(pipeline: str, return_data: bool = False):
    """
    Process a PDAL pipeline string.

    Args:
        pipeline (str): The PDAL pipeline JSON string.
        return_data (bool): If True, return the PDAL Pipeline object after execution. Defaults to False. Returning pipeline data
        will contain metadata and all the points processed by the pipeline. This can be a large object so defaults to False.
    """
    pipeline_obj = pdal.Pipeline(pipeline)
    count = pipeline_obj.execute()  # Execute the pipeline
    return (count, pipeline_obj if return_data else None)

In [10]:
%%time

(count, pl) = process_pdal_pipeline(pipelines[0], return_data=True)
print(f"Processed {count} points.")

points = pl.arrays[0]
points_df = pd.DataFrame(pl.arrays[0])
points_df.head()

Processed 252716 points.
CPU times: user 939 ms, sys: 51.7 ms, total: 990 ms
Wall time: 745 ms


Unnamed: 0,X,Y,Z,Intensity,ReturnNumber,NumberOfReturns,ScanDirectionFlag,EdgeOfFlightLine,Classification,Synthetic,...,PointSourceId,GpsTime,ScanChannel,Red,Green,Blue,Infrared,HeightAboveGround,Altitude,Weight
0,463021.212,5259854.447,0.0,28212,2,2,0,0,2,0,...,1,411879500.0,0,33924,31354,30326,40725,0.0,505.426,0.5
1,463020.976,5259853.864,9.702999,30729,1,2,0,0,0,0,...,1,411879500.0,0,38036,36237,34695,46277,9.702999,515.146,0.5
2,463021.434,5259854.039,0.0,28635,3,3,0,0,2,0,...,1,411879500.0,0,26471,24929,24672,41676,0.0,505.447,0.333333
3,463020.953,5259852.859,9.075999,29359,1,3,0,0,0,0,...,1,411879500.0,0,28013,25700,25443,40341,9.075999,514.615,0.333333
4,463021.229,5259853.532,9.157,29605,2,3,0,0,0,0,...,1,411879500.0,0,27499,26214,25957,43753,9.157,514.723,0.333333


In [16]:
from dask.distributed import Client

client = Client()  # Start a Dask client
client

0,1
Connection method: Cluster object,Cluster type: distributed.LocalCluster
Dashboard: http://127.0.0.1:8787/status,

0,1
Dashboard: http://127.0.0.1:8787/status,Workers: 4
Total threads: 8,Total memory: 16.00 GiB
Status: running,Using processes: True

0,1
Comm: tcp://127.0.0.1:55750,Workers: 0
Dashboard: http://127.0.0.1:8787/status,Total threads: 0
Started: Just now,Total memory: 0 B

0,1
Comm: tcp://127.0.0.1:55762,Total threads: 2
Dashboard: http://127.0.0.1:55765/status,Memory: 4.00 GiB
Nanny: tcp://127.0.0.1:55753,
Local directory: /var/folders/37/j4yld2bd7pz4_0p7b249nvv40000gn/T/dask-scratch-space/worker-e6vzuiqk,Local directory: /var/folders/37/j4yld2bd7pz4_0p7b249nvv40000gn/T/dask-scratch-space/worker-e6vzuiqk

0,1
Comm: tcp://127.0.0.1:55761,Total threads: 2
Dashboard: http://127.0.0.1:55768/status,Memory: 4.00 GiB
Nanny: tcp://127.0.0.1:55755,
Local directory: /var/folders/37/j4yld2bd7pz4_0p7b249nvv40000gn/T/dask-scratch-space/worker-qihfoilh,Local directory: /var/folders/37/j4yld2bd7pz4_0p7b249nvv40000gn/T/dask-scratch-space/worker-qihfoilh

0,1
Comm: tcp://127.0.0.1:55764,Total threads: 2
Dashboard: http://127.0.0.1:55767/status,Memory: 4.00 GiB
Nanny: tcp://127.0.0.1:55757,
Local directory: /var/folders/37/j4yld2bd7pz4_0p7b249nvv40000gn/T/dask-scratch-space/worker-cmm7ss1k,Local directory: /var/folders/37/j4yld2bd7pz4_0p7b249nvv40000gn/T/dask-scratch-space/worker-cmm7ss1k

0,1
Comm: tcp://127.0.0.1:55763,Total threads: 2
Dashboard: http://127.0.0.1:55766/status,Memory: 4.00 GiB
Nanny: tcp://127.0.0.1:55759,
Local directory: /var/folders/37/j4yld2bd7pz4_0p7b249nvv40000gn/T/dask-scratch-space/worker-_1x94u4a,Local directory: /var/folders/37/j4yld2bd7pz4_0p7b249nvv40000gn/T/dask-scratch-space/worker-_1x94u4a


In [17]:
%%time

futures = client.map(process_pdal_pipeline, pipelines, key=plots_gdf["site_plot_id"].tolist())
results = client.gather(futures)

CPU times: user 2.06 s, sys: 629 ms, total: 2.69 s
Wall time: 51.8 s


In [18]:
client.close()

In [20]:
total_points = 0

for r in results:
    total_points += r[0]

f"Total points: {total_points:,}"

'Total points: 52,940,584'