In [10]:
from pathlib import Path

import geopandas as gpd
import pandas as pd
import numpy as np
import xarray as xr
import pdal
from dask.distributed import Client

np.set_printoptions(precision=4, suppress=True)

from forest_structure_tools.metrics import forest_structure_metrics

In [11]:
data_dir = Path("../data")
plots_dir = data_dir / "outputs" / "plots"
plots_lidar_dir = plots_dir / "lidar"

plots_gdf = gpd.read_file(plots_dir / "plots.geojson")
plot_ids = plots_gdf['id'].to_list()

def read_plot_lidar(plot_id: str):
    pl = pdal.Reader(str(plots_lidar_dir / f"{plot_id}.copc.laz")).pipeline()
    pl.execute()

    return pl.arrays[0]

In [12]:
metrics_dir = plots_dir / "metrics"
metrics_dir.mkdir(parents=True, exist_ok=True)

In [13]:
def add_plot_dimension_to_ds(ds: xr.Dataset, plot_id: str) -> xr.Dataset:
    site = plot_id[:-3]
    site_type = site[:3]

    ds = ds.expand_dims(plot=[plot_id]).assign_coords(
        plot=("plot", [plot_id]), site=("plot", [site]), site_type=("plot", [site_type])
    )

    return ds


def calculate_metrics(plot_id: id, xy_bin_size: float | None = None, z_bin_size: float | None = 1, weighted = False):
    points = read_plot_lidar(plot_id)
    x = points["X"]
    y = points["Y"]
    z = points["Z"]

    if weighted:
        weights = 1 / points["NumberOfReturns"]
    else:
        weights = None

    metrics = forest_structure_metrics(x=x, y=y, z=z, xy_bin_size=xy_bin_size, z_bin_size=z_bin_size, weights=weights)


    if xy_bin_size is not None:
        x_offset = metrics.x.min().item()
        y_offset = metrics.y.min().item()

        metrics['x_offset'] = x_offset
        metrics['y_offset'] = y_offset

        metrics['x'] = metrics['x'] - x_offset
        metrics['y'] = metrics['y'] - y_offset


    metrics = add_plot_dimension_to_ds(metrics, plot_id)

    metrics.attrs["xy_bin_size"] = str(xy_bin_size)
    metrics.attrs["z_bin_size"] = str(z_bin_size)
    metrics.attrs["weighted"] = str(weighted)
    
    return metrics


In [14]:
calculate_metrics("AGG_O_01_P1", xy_bin_size=5, z_bin_size=1, weighted=False)

In [15]:
def calculate_and_save_metrics(plot_ids : list[str], client : Client, xy_bin_size: float | None = None, z_bin_size: float | None = 1, weighted = False):
    futures = client.map(calculate_metrics, plot_ids, xy_bin_size=xy_bin_size, z_bin_size=z_bin_size, weighted=weighted, retries=5)
    results = client.gather(futures)
    ds = xr.concat(results, dim='plot')

    xy = f'{xy_bin_size}m' if xy_bin_size is not None else 'none'
    z = f'{z_bin_size}m' if z_bin_size is not None else 'none'
    w = '_w' if weighted else ''

    ds.to_netcdf(metrics_dir / f"metrics_grid_{xy}_z_{z}{w}.nc")

In [16]:
client = Client()  # Start a Dask client
client

Perhaps you already have a cluster running?
Hosting the HTTP server on port 51284 instead


0,1
Connection method: Cluster object,Cluster type: distributed.LocalCluster
Dashboard: http://127.0.0.1:51284/status,

0,1
Dashboard: http://127.0.0.1:51284/status,Workers: 4
Total threads: 8,Total memory: 16.00 GiB
Status: running,Using processes: True

0,1
Comm: tcp://127.0.0.1:51285,Workers: 0
Dashboard: http://127.0.0.1:51284/status,Total threads: 0
Started: Just now,Total memory: 0 B

0,1
Comm: tcp://127.0.0.1:51297,Total threads: 2
Dashboard: http://127.0.0.1:51303/status,Memory: 4.00 GiB
Nanny: tcp://127.0.0.1:51288,
Local directory: /var/folders/37/j4yld2bd7pz4_0p7b249nvv40000gn/T/dask-scratch-space/worker-8x4odcp8,Local directory: /var/folders/37/j4yld2bd7pz4_0p7b249nvv40000gn/T/dask-scratch-space/worker-8x4odcp8

0,1
Comm: tcp://127.0.0.1:51298,Total threads: 2
Dashboard: http://127.0.0.1:51302/status,Memory: 4.00 GiB
Nanny: tcp://127.0.0.1:51290,
Local directory: /var/folders/37/j4yld2bd7pz4_0p7b249nvv40000gn/T/dask-scratch-space/worker-bju_4yo_,Local directory: /var/folders/37/j4yld2bd7pz4_0p7b249nvv40000gn/T/dask-scratch-space/worker-bju_4yo_

0,1
Comm: tcp://127.0.0.1:51296,Total threads: 2
Dashboard: http://127.0.0.1:51300/status,Memory: 4.00 GiB
Nanny: tcp://127.0.0.1:51292,
Local directory: /var/folders/37/j4yld2bd7pz4_0p7b249nvv40000gn/T/dask-scratch-space/worker-sfxdql5q,Local directory: /var/folders/37/j4yld2bd7pz4_0p7b249nvv40000gn/T/dask-scratch-space/worker-sfxdql5q

0,1
Comm: tcp://127.0.0.1:51299,Total threads: 2
Dashboard: http://127.0.0.1:51301/status,Memory: 4.00 GiB
Nanny: tcp://127.0.0.1:51294,
Local directory: /var/folders/37/j4yld2bd7pz4_0p7b249nvv40000gn/T/dask-scratch-space/worker-0n0vsu24,Local directory: /var/folders/37/j4yld2bd7pz4_0p7b249nvv40000gn/T/dask-scratch-space/worker-0n0vsu24


In [None]:
# XY None
calculate_and_save_metrics(plot_ids, client, xy_bin_size=None, z_bin_size=1, weighted=False)

In [12]:
# XY None Weighted
calculate_and_save_metrics(plot_ids, client, xy_bin_size=None, z_bin_size=1, weighted=True)

In [10]:
# XY 5m
calculate_and_save_metrics(plot_ids, client, xy_bin_size=5, z_bin_size=1, weighted=False)

In [13]:
# XY 5m Weighted
calculate_and_save_metrics(plot_ids, client, xy_bin_size=5, z_bin_size=1, weighted=True)

In [8]:
# XY 10m
calculate_and_save_metrics(plot_ids, client, xy_bin_size=10, z_bin_size=1, weighted=False)
calculate_and_save_metrics(plot_ids, client, xy_bin_size=10, z_bin_size=1, weighted=True)

In [17]:
# XY 20m
calculate_and_save_metrics(plot_ids, client, xy_bin_size=20, z_bin_size=1, weighted=False)
calculate_and_save_metrics(plot_ids, client, xy_bin_size=20, z_bin_size=1, weighted=True)

In [None]:
# client.close()