In [1]:
from pathlib import Path

import geopandas as gpd
import pandas as pd
import numpy as np
import xarray as xr
import pdal
from dask.distributed import Client
from jinja2 import Template
import json

from dask.distributed import Client

from forest_structure_tools.metrics_v2 import forest_structure_metrics
import rioxarray

In [3]:
data_dir = Path('../data/outputs')
sites_dir = data_dir / "sites"
sites_lidar_dir = sites_dir / "lidar"
sites_metrics_dir = sites_dir / "metrics"
output_dir = sites_metrics_dir / "x1-y1-z1"

output_dir.mkdir(parents=True, exist_ok=True)

def read_site(site_id):
    input_path = sites_lidar_dir / f"{site_id}.copc.laz"
    pl = (
        pdal.Reader(str(input_path), type="readers.copc")
        | pdal.Filter(type="filters.range", limits="Classification[0:5]")
        | pdal.Filter(type='filters.ferry', dimensions="Z => Altitude, HeightAboveGround => Z")
        | pdal.Filter(type="filters.assign", value=["Classification = 2 WHERE Z < 0", "Z = 0 WHERE Z < 0"])
    )
    count = pl.execute()
    print(f"Read in {count:,d} points")
    return pl.arrays[0]

def calculate_metrics(
    site_id: str,
    xy_bin_size = 1,
    z_bin_size = 1,
):
    points = read_site(site_id)
    metrics = forest_structure_metrics(
        points,
        xy_bin_size=xy_bin_size,
        z_bin_size=z_bin_size,
    )
    metrics.attrs["site_id"] = site_id

    # Add coordinate reference system and spatial dimensions
    metrics.rio.write_crs(7855, inplace=True)
    metrics.rio.set_spatial_dims(x_dim="x", y_dim="y", inplace=True)
    metrics.rio.write_coordinate_system(inplace=True)

    return metrics

def calculate_and_save_metrics(site_id: str):
    metrics = calculate_metrics(site_id)
    metrics.to_netcdf(output_dir / f"{site_id}.nc")


In [4]:
metrics = calculate_metrics("EPO_Y_10")

Read in 3,037,385 points


In [18]:
grid_metrics = metrics[[v for v in metrics.data_vars if metrics[v].ndim == 2]]
grid_metrics.transpose('y', 'x').rio.to_raster("EPO_Y_10_grid_metrics.tif")

In [12]:
metrics['max_height'].T.rio.to_raster("EPO_Y_10_max_height.tif")

In [3]:
sites_gdf = gpd.read_file(sites_dir / "sites.geojson")
site_ids = sites_gdf['id'].to_list()

In [22]:
metrics.transpose('y', 'x', 'z').to_netcdf("EPO_Y_10.nc")

In [4]:
client = Client(n_workers=3)
client

0,1
Connection method: Cluster object,Cluster type: distributed.LocalCluster
Dashboard: http://127.0.0.1:8787/status,

0,1
Dashboard: http://127.0.0.1:8787/status,Workers: 3
Total threads: 9,Total memory: 16.00 GiB
Status: running,Using processes: True

0,1
Comm: tcp://127.0.0.1:62069,Workers: 0
Dashboard: http://127.0.0.1:8787/status,Total threads: 0
Started: Just now,Total memory: 0 B

0,1
Comm: tcp://127.0.0.1:62081,Total threads: 3
Dashboard: http://127.0.0.1:62083/status,Memory: 5.33 GiB
Nanny: tcp://127.0.0.1:62072,
Local directory: /var/folders/37/j4yld2bd7pz4_0p7b249nvv40000gn/T/dask-scratch-space/worker-54tfqk_0,Local directory: /var/folders/37/j4yld2bd7pz4_0p7b249nvv40000gn/T/dask-scratch-space/worker-54tfqk_0

0,1
Comm: tcp://127.0.0.1:62080,Total threads: 3
Dashboard: http://127.0.0.1:62084/status,Memory: 5.33 GiB
Nanny: tcp://127.0.0.1:62074,
Local directory: /var/folders/37/j4yld2bd7pz4_0p7b249nvv40000gn/T/dask-scratch-space/worker-274eqs06,Local directory: /var/folders/37/j4yld2bd7pz4_0p7b249nvv40000gn/T/dask-scratch-space/worker-274eqs06

0,1
Comm: tcp://127.0.0.1:62082,Total threads: 3
Dashboard: http://127.0.0.1:62087/status,Memory: 5.33 GiB
Nanny: tcp://127.0.0.1:62076,
Local directory: /var/folders/37/j4yld2bd7pz4_0p7b249nvv40000gn/T/dask-scratch-space/worker-xuuv7jxk,Local directory: /var/folders/37/j4yld2bd7pz4_0p7b249nvv40000gn/T/dask-scratch-space/worker-xuuv7jxk


In [5]:
futures = client.map(calculate_and_save_metrics, site_ids, key=site_ids)

In [44]:
client.cancel(futures)

In [6]:
results = client.gather(futures)

In [7]:
client.close()