In [1]:
from pathlib import Path

import geopandas as gpd
import pandas as pd
import numpy as np
import xarray as xr
import pdal
from dask.distributed import Client

from dask.distributed import Client

from forest_structure_tools.metrics_v2 import forest_structure_metrics

In [2]:
data_dir = Path('../data/outputs')
sites_dir = data_dir / "sites"
sites_lidar_dir = sites_dir / "lidar"
sites_metrics_dir = sites_dir / "metrics"
output_dir = sites_metrics_dir / "x1-y1-z1" / "net_cdf"

output_dir.mkdir(parents=True, exist_ok=True)

def read_site(site_id):
    input_path = sites_lidar_dir / f"{site_id}.copc.laz"
    pl = (
        pdal.Reader(str(input_path), type="readers.copc")
        | pdal.Filter(type="filters.range", limits="Classification[0:5]")
        | pdal.Filter(type='filters.ferry', dimensions="Z => Altitude, HeightAboveGround => Z")
        | pdal.Filter(type="filters.assign", value=["Classification = 2 WHERE Z < 0", "Z = 0 WHERE Z < 0"])
    )
    count = pl.execute()
    print(f"Read in {count:,d} points")
    return pl.arrays[0]

def calculate_metrics(
    site_id: str,
    xy_bin_size = 1,
    z_bin_size = 1,
):
    points = read_site(site_id)
    metrics = forest_structure_metrics(
        points,
        xy_bin_size=xy_bin_size,
        z_bin_size=z_bin_size,
    )
    metrics.attrs["site_id"] = site_id

    return metrics

def calculate_and_save_metrics(site_id: str):
    metrics = calculate_metrics(site_id)
    metrics.to_netcdf(output_dir / f"{site_id}.nc")


In [3]:
metrics = calculate_metrics("EPO_Y_10")
metrics

Read in 3,037,385 points


In [6]:
%%time

calculate_and_save_metrics("EPO_Y_10")

Read in 3,037,385 points
CPU times: user 1min 16s, sys: 2.4 s, total: 1min 18s
Wall time: 1min 16s


In [9]:
sites_gdf = gpd.read_file(sites_dir / "sites.geojson")
site_ids = sites_gdf['id'].to_list()[:10]

In [5]:
client = Client(n_workers=2, threads_per_worker=2)
client

0,1
Connection method: Cluster object,Cluster type: distributed.LocalCluster
Dashboard: http://127.0.0.1:8787/status,

0,1
Dashboard: http://127.0.0.1:8787/status,Workers: 2
Total threads: 4,Total memory: 16.00 GiB
Status: running,Using processes: True

0,1
Comm: tcp://127.0.0.1:55917,Workers: 0
Dashboard: http://127.0.0.1:8787/status,Total threads: 0
Started: Just now,Total memory: 0 B

0,1
Comm: tcp://127.0.0.1:55924,Total threads: 2
Dashboard: http://127.0.0.1:55926/status,Memory: 8.00 GiB
Nanny: tcp://127.0.0.1:55920,
Local directory: /var/folders/37/j4yld2bd7pz4_0p7b249nvv40000gn/T/dask-scratch-space/worker-axl3y73v,Local directory: /var/folders/37/j4yld2bd7pz4_0p7b249nvv40000gn/T/dask-scratch-space/worker-axl3y73v

0,1
Comm: tcp://127.0.0.1:55925,Total threads: 2
Dashboard: http://127.0.0.1:55928/status,Memory: 8.00 GiB
Nanny: tcp://127.0.0.1:55922,
Local directory: /var/folders/37/j4yld2bd7pz4_0p7b249nvv40000gn/T/dask-scratch-space/worker-8e8i02zv,Local directory: /var/folders/37/j4yld2bd7pz4_0p7b249nvv40000gn/T/dask-scratch-space/worker-8e8i02zv


In [10]:
%%time

futures = client.map(calculate_and_save_metrics, site_ids, key=site_ids, retries=4)
results = client.gather(futures)

Read in 2,075,108 points
Read in 2,283,721 points
Read in 2,365,219 points
Read in 3,572,055 points
Read in 2,472,162 points
Read in 2,825,310 points
Read in 1,757,776 points
Read in 1,783,411 points
Read in 1,897,784 points
Read in 3,037,385 points
CPU times: user 14.2 s, sys: 3.3 s, total: 17.5 s
Wall time: 12min 14s


In [9]:
client.close()