In [1]:
from pathlib import Path

import geopandas as gpd
import pandas as pd
import numpy as np
import xarray as xr
import pdal
from dask.distributed import Client

np.set_printoptions(precision=4, suppress=True)

from forest_structure_tools.metrics import forest_structure_metrics

In [2]:
data_dir = Path("../data")
plots_dir = data_dir / "outputs" / "plots"
plots_lidar_dir = plots_dir / "lidar"

plots_gdf = gpd.read_file(plots_dir / "plots.geojson")
plot_ids = plots_gdf['id'].to_list()

def read_plot_lidar(plot_id: str):
    pl = pdal.Reader(str(plots_lidar_dir / f"{plot_id}.copc.laz")).pipeline()
    pl.execute()

    return pl.arrays[0]

In [3]:
metrics_dir = plots_dir / "metrics"
metrics_dir.mkdir(parents=True, exist_ok=True)

In [4]:
def add_suffix(ds: xr.Dataset, suffix: str):
    new_names = {var: var + suffix for var in ds.data_vars}
    ds_suffixed = ds.rename(new_names)
    return ds_suffixed


def calculate_metrics(
    plot_id: id,
    xy_bin_size: float | None = None,
    z_bin_size: float | None = 1,
):
    points = read_plot_lidar(plot_id)
    x = points["X"]
    y = points["Y"]
    z = points["Z"]

    weights = 1 / points["NumberOfReturns"]

    unweighted_metrics = forest_structure_metrics(
        x=x,
        y=y,
        z=z,
        xy_bin_size=xy_bin_size,
        z_bin_size=z_bin_size,
        include_basic=True,
        cover_threshold=0,
        percentiles=np.arange(10, 100, 10),
        weights=None,
    )
    weighted_metrics = forest_structure_metrics(
        x=x,
        y=y,
        z=z,
        xy_bin_size=xy_bin_size,
        z_bin_size=z_bin_size,
        # Only need cover and veg profile metrics for weighted
        include_basic=False,
        percentiles=None,
        cover_threshold=None,
        weights=weights,
    ).pipe(lambda ds: add_suffix(ds, "[w]"))
    
    metrics = xr.merge([unweighted_metrics, weighted_metrics])


    metrics.attrs["xy_bin_size"] = str(xy_bin_size)
    metrics.attrs["z_bin_size"] = str(z_bin_size)

    return metrics

def calculate_and_save_metrics(plot_id : str, xy_bin_size: float | None = None, z_bin_size: float | None = 1):
    
    xy = f'{xy_bin_size}' if xy_bin_size is not None else 'none'
    z = f'{z_bin_size}' if z_bin_size is not None else 'none'

    sub_dir = metrics_dir / f'xy_{xy}_z_{z}'
    
    if sub_dir.exists() is False:
        sub_dir.mkdir(parents=True, exist_ok=True)

    metrics = calculate_metrics(plot_id, xy_bin_size=xy_bin_size, z_bin_size=z_bin_size)
    metrics.to_netcdf(sub_dir / f"{plot_id}.nc")




In [5]:
calculate_metrics("AGG_O_01_P1", xy_bin_size=10, z_bin_size=1)

In [6]:
calculate_and_save_metrics("AGG_O_01_P1", xy_bin_size=0.5, z_bin_size=None)

In [7]:
client = Client()  # Start a Dask client
client

0,1
Connection method: Cluster object,Cluster type: distributed.LocalCluster
Dashboard: http://127.0.0.1:8787/status,

0,1
Dashboard: http://127.0.0.1:8787/status,Workers: 4
Total threads: 8,Total memory: 16.00 GiB
Status: running,Using processes: True

0,1
Comm: tcp://127.0.0.1:55594,Workers: 0
Dashboard: http://127.0.0.1:8787/status,Total threads: 0
Started: Just now,Total memory: 0 B

0,1
Comm: tcp://127.0.0.1:55605,Total threads: 2
Dashboard: http://127.0.0.1:55607/status,Memory: 4.00 GiB
Nanny: tcp://127.0.0.1:55597,
Local directory: /var/folders/37/j4yld2bd7pz4_0p7b249nvv40000gn/T/dask-scratch-space/worker-647_af_s,Local directory: /var/folders/37/j4yld2bd7pz4_0p7b249nvv40000gn/T/dask-scratch-space/worker-647_af_s

0,1
Comm: tcp://127.0.0.1:55606,Total threads: 2
Dashboard: http://127.0.0.1:55608/status,Memory: 4.00 GiB
Nanny: tcp://127.0.0.1:55599,
Local directory: /var/folders/37/j4yld2bd7pz4_0p7b249nvv40000gn/T/dask-scratch-space/worker-ulvjncpj,Local directory: /var/folders/37/j4yld2bd7pz4_0p7b249nvv40000gn/T/dask-scratch-space/worker-ulvjncpj

0,1
Comm: tcp://127.0.0.1:55609,Total threads: 2
Dashboard: http://127.0.0.1:55613/status,Memory: 4.00 GiB
Nanny: tcp://127.0.0.1:55601,
Local directory: /var/folders/37/j4yld2bd7pz4_0p7b249nvv40000gn/T/dask-scratch-space/worker-j9tv4ree,Local directory: /var/folders/37/j4yld2bd7pz4_0p7b249nvv40000gn/T/dask-scratch-space/worker-j9tv4ree

0,1
Comm: tcp://127.0.0.1:55612,Total threads: 2
Dashboard: http://127.0.0.1:55615/status,Memory: 4.00 GiB
Nanny: tcp://127.0.0.1:55603,
Local directory: /var/folders/37/j4yld2bd7pz4_0p7b249nvv40000gn/T/dask-scratch-space/worker-90zk43h_,Local directory: /var/folders/37/j4yld2bd7pz4_0p7b249nvv40000gn/T/dask-scratch-space/worker-90zk43h_


In [8]:
# XY None
futures = client.map(calculate_and_save_metrics, plot_ids, xy_bin_size=None)
results = client.gather(futures)

In [9]:
# XY 10 m
futures = client.map(calculate_and_save_metrics, plot_ids, xy_bin_size=10)
results = client.gather(futures)

In [10]:
# XY 5 m
futures = client.map(calculate_and_save_metrics, plot_ids, xy_bin_size=5)
results = client.gather(futures)

In [11]:
# XY 1 m 
futures = client.map(calculate_and_save_metrics, plot_ids, xy_bin_size=1)
results = client.gather(futures)

In [12]:
# XY 0.5 m (no z)
futures = client.map(calculate_and_save_metrics, plot_ids, xy_bin_size=0.5, z_bin_size=None)
results = client.gather(futures)

In [13]:
# client.close()

In [14]:
# These snippets are useful for merging datasets.
# def add_plot_dimension_to_ds(ds: xr.Dataset, plot_id: str) -> xr.Dataset:
#     site = plot_id[:-3]
#     site_type = site[:3]

#     ds = ds.expand_dims(plot=[plot_id]).assign_coords(
#         plot=("plot", [plot_id]), site=("plot", [site]), site_type=("plot", [site_type])
#     )

#     return ds

# if xy_bin_size is not None:
#     x_offset = metrics.x.min().item()
#     y_offset = metrics.y.min().item()

#     metrics["x_offset"] = x_offset
#     metrics["y_offset"] = y_offset

#     metrics["x"] = metrics["x"] - x_offset
#     metrics["y"] = metrics["y"] - y_offset

# metrics = add_plot_dimension_to_ds(metrics, plot_id)