In [1]:
from pathlib import Path

import geopandas as gpd
import pandas as pd
import numpy as np
import xarray as xr
import pdal
from dask.distributed import Client

from dask.distributed import Client

from forest_structure_tools.metrics_v2 import forest_structure_metrics

In [None]:
data_dir = Path('../data/outputs')
sites_dir = data_dir / "sites"
sites_lidar_dir = sites_dir / "lidar"
sites_metrics_dir = sites_dir / "metrics"
output_dir = sites_metrics_dir / "x1-y1-z1" / "net_cdf"

output_dir.mkdir(parents=True, exist_ok=True)

def read_site(site_id):
    input_path = sites_lidar_dir / f"{site_id}.copc.laz"
    pl = (
        pdal.Reader(str(input_path), type="readers.copc")
        | pdal.Filter(type="filters.range", limits="Classification[0:5]")
        | pdal.Filter(type='filters.ferry', dimensions="Z => Altitude, HeightAboveGround => Z")
        | pdal.Filter(type="filters.assign", value=["Classification = 2 WHERE Z < 0", "Z = 0 WHERE Z < 0"])
    )
    count = pl.execute()
    print(f"Read in {count:,d} points")
    return pl.arrays[0]

def calculate_metrics(
    site_id: str,
    xy_bin_size = 1,
    z_bin_size = 1,
):
    points = read_site(site_id)
    metrics = forest_structure_metrics(
        points,
        xy_bin_size=xy_bin_size,
        z_bin_size=z_bin_size,
    )
    metrics.attrs["site_id"] = site_id

    return metrics

def calculate_and_save_metrics(site_id: str):
    metrics = calculate_metrics(site_id)
    metrics.to_netcdf(output_dir / f"{site_id}.nc")


In [3]:
metrics = calculate_metrics("EPO_Y_10")
metrics

Read in 3,037,385 points


In [6]:
calculate_and_save_metrics("EPO_Y_10")

Read in 3,037,385 points


In [5]:
sites_gdf = gpd.read_file(sites_dir / "sites.geojson")
site_ids = sites_gdf['id'].to_list()

In [6]:
client = Client(n_workers=2, threads_per_worker=2)
client

0,1
Connection method: Cluster object,Cluster type: distributed.LocalCluster
Dashboard: http://127.0.0.1:8787/status,

0,1
Dashboard: http://127.0.0.1:8787/status,Workers: 2
Total threads: 4,Total memory: 16.00 GiB
Status: running,Using processes: True

0,1
Comm: tcp://127.0.0.1:56539,Workers: 0
Dashboard: http://127.0.0.1:8787/status,Total threads: 0
Started: Just now,Total memory: 0 B

0,1
Comm: tcp://127.0.0.1:56546,Total threads: 2
Dashboard: http://127.0.0.1:56549/status,Memory: 8.00 GiB
Nanny: tcp://127.0.0.1:56542,
Local directory: /var/folders/37/j4yld2bd7pz4_0p7b249nvv40000gn/T/dask-scratch-space/worker-2px52rb0,Local directory: /var/folders/37/j4yld2bd7pz4_0p7b249nvv40000gn/T/dask-scratch-space/worker-2px52rb0

0,1
Comm: tcp://127.0.0.1:56547,Total threads: 2
Dashboard: http://127.0.0.1:56548/status,Memory: 8.00 GiB
Nanny: tcp://127.0.0.1:56544,
Local directory: /var/folders/37/j4yld2bd7pz4_0p7b249nvv40000gn/T/dask-scratch-space/worker-hre_l7uf,Local directory: /var/folders/37/j4yld2bd7pz4_0p7b249nvv40000gn/T/dask-scratch-space/worker-hre_l7uf


In [7]:
futures = client.map(calculate_and_save_metrics, site_ids, key=site_ids, retries=4)

In [None]:
# client.cancel(futures)

In [8]:
results = client.gather(futures)

Read in 2,472,162 points
Read in 2,825,310 points
Read in 1,757,776 points
Read in 1,356,276 points
Read in 1,783,411 points
Read in 1,339,062 points
Read in 3,037,385 points
Read in 1,230,908 points
Read in 3,091,743 points
Read in 1,897,784 points
Read in 2,037,109 points
Read in 2,101,354 points
Read in 1,624,396 points
Read in 3,178,109 points
Read in 4,431,451 points
Read in 2,144,975 points
Read in 1,719,966 points
Read in 2,251,938 points
Read in 999,737 points
Read in 999,895 points
Read in 1,847,662 points
Read in 1,038,449 points
Read in 1,762,503 points
Read in 1,979,075 points
Read in 1,086,956 points
Read in 1,822,132 points
Read in 2,755,508 points
Read in 897,568 points
Read in 2,878,163 points
Read in 2,134,222 points
Read in 1,756,331 points
Read in 1,411,970 points
Read in 2,728,484 points
Read in 1,731,502 points
Read in 1,646,614 points
Read in 1,675,394 points
Read in 1,850,166 points
Read in 1,053,959 points
Read in 1,309,716 points
Read in 1,619,967 points
Read i

In [9]:
client.close()