# Comparison using kerchunk / datatree

In [1]:
from datatree import DataTree
from datatree import map_over_subtree
import xarray as xr
import pandas as pd
import dask
import xclim
import thermofeel as tf
import numpy as np 
from distributed import Client
from fsspec.implementations.reference import ReferenceFileSystem

from utils import wbgt, load_elev, adjust_pressure

In [2]:
# Read the reference catalog into a Pandas DataFrame
cat_df = pd.read_csv(
    "s3://carbonplan-share/nasa-nex-reference/reference_catalog_nested.csv"
)
cat_df = cat_df.iloc[0:10]
# Convert the DataFrame into a dictionary
catalog = cat_df.set_index("ID").T.to_dict("records")[0]

In [3]:
def load_ref_ds(url: str, gcm_scenario: str):

    fs = ReferenceFileSystem(
        url,
        remote_protocol="s3",
        target_protocol="s3",
        target_options={"anon": True},
        lazy=True,
    )
    ds = xr.open_dataset(
        fs.get_mapper(),
        engine="zarr",
        backend_kwargs={"consolidated": False},
        chunks={"time": 300},
    )
    ds.attrs['gcm_scenario'] = gcm_scenario
    return ds 


tasks = {id: dask.delayed(load_ref_ds)(url, id) for id, url in catalog.items()}

In [4]:
client = Client(n_workers=8)
client


Perhaps you already have a cluster running?
Hosting the HTTP server on port 52614 instead


0,1
Connection method: Cluster object,Cluster type: distributed.LocalCluster
Dashboard: http://127.0.0.1:52614/status,

0,1
Dashboard: http://127.0.0.1:52614/status,Workers: 8
Total threads: 8,Total memory: 16.00 GiB
Status: running,Using processes: True

0,1
Comm: tcp://127.0.0.1:52615,Workers: 8
Dashboard: http://127.0.0.1:52614/status,Total threads: 8
Started: Just now,Total memory: 16.00 GiB

0,1
Comm: tcp://127.0.0.1:52637,Total threads: 1
Dashboard: http://127.0.0.1:52638/status,Memory: 2.00 GiB
Nanny: tcp://127.0.0.1:52618,
Local directory: /var/folders/mb/7d7yq_4j2qgdfm_j3j4tsyl40000gn/T/dask-scratch-space/worker-4fx8d3yk,Local directory: /var/folders/mb/7d7yq_4j2qgdfm_j3j4tsyl40000gn/T/dask-scratch-space/worker-4fx8d3yk

0,1
Comm: tcp://127.0.0.1:52646,Total threads: 1
Dashboard: http://127.0.0.1:52648/status,Memory: 2.00 GiB
Nanny: tcp://127.0.0.1:52619,
Local directory: /var/folders/mb/7d7yq_4j2qgdfm_j3j4tsyl40000gn/T/dask-scratch-space/worker-ld09vwid,Local directory: /var/folders/mb/7d7yq_4j2qgdfm_j3j4tsyl40000gn/T/dask-scratch-space/worker-ld09vwid

0,1
Comm: tcp://127.0.0.1:52640,Total threads: 1
Dashboard: http://127.0.0.1:52641/status,Memory: 2.00 GiB
Nanny: tcp://127.0.0.1:52620,
Local directory: /var/folders/mb/7d7yq_4j2qgdfm_j3j4tsyl40000gn/T/dask-scratch-space/worker-iggijuhw,Local directory: /var/folders/mb/7d7yq_4j2qgdfm_j3j4tsyl40000gn/T/dask-scratch-space/worker-iggijuhw

0,1
Comm: tcp://127.0.0.1:52643,Total threads: 1
Dashboard: http://127.0.0.1:52644/status,Memory: 2.00 GiB
Nanny: tcp://127.0.0.1:52621,
Local directory: /var/folders/mb/7d7yq_4j2qgdfm_j3j4tsyl40000gn/T/dask-scratch-space/worker-xvuf8_y7,Local directory: /var/folders/mb/7d7yq_4j2qgdfm_j3j4tsyl40000gn/T/dask-scratch-space/worker-xvuf8_y7

0,1
Comm: tcp://127.0.0.1:52647,Total threads: 1
Dashboard: http://127.0.0.1:52651/status,Memory: 2.00 GiB
Nanny: tcp://127.0.0.1:52622,
Local directory: /var/folders/mb/7d7yq_4j2qgdfm_j3j4tsyl40000gn/T/dask-scratch-space/worker-5h4kii13,Local directory: /var/folders/mb/7d7yq_4j2qgdfm_j3j4tsyl40000gn/T/dask-scratch-space/worker-5h4kii13

0,1
Comm: tcp://127.0.0.1:52650,Total threads: 1
Dashboard: http://127.0.0.1:52654/status,Memory: 2.00 GiB
Nanny: tcp://127.0.0.1:52623,
Local directory: /var/folders/mb/7d7yq_4j2qgdfm_j3j4tsyl40000gn/T/dask-scratch-space/worker-5o3lzf9o,Local directory: /var/folders/mb/7d7yq_4j2qgdfm_j3j4tsyl40000gn/T/dask-scratch-space/worker-5o3lzf9o

0,1
Comm: tcp://127.0.0.1:52656,Total threads: 1
Dashboard: http://127.0.0.1:52659/status,Memory: 2.00 GiB
Nanny: tcp://127.0.0.1:52624,
Local directory: /var/folders/mb/7d7yq_4j2qgdfm_j3j4tsyl40000gn/T/dask-scratch-space/worker-4un81l06,Local directory: /var/folders/mb/7d7yq_4j2qgdfm_j3j4tsyl40000gn/T/dask-scratch-space/worker-4un81l06

0,1
Comm: tcp://127.0.0.1:52653,Total threads: 1
Dashboard: http://127.0.0.1:52657/status,Memory: 2.00 GiB
Nanny: tcp://127.0.0.1:52625,
Local directory: /var/folders/mb/7d7yq_4j2qgdfm_j3j4tsyl40000gn/T/dask-scratch-space/worker-3bcvkjyc,Local directory: /var/folders/mb/7d7yq_4j2qgdfm_j3j4tsyl40000gn/T/dask-scratch-space/worker-3bcvkjyc


In [5]:
catalog_computed = dask.compute(tasks)

In [6]:
dt = DataTree.from_dict(catalog_computed[0])

In [7]:
ds = dt['ACCESS-CM2/historical'].to_dataset()

In [18]:

elev = load_elev()

@map_over_subtree
def calc_wbgt(ds):
    ds = ds.isel(time=slice(0,10))
    # calculate elevation-adjusted pressure
    ds["ps"] = xr.apply_ufunc(
        adjust_pressure, ds["tas"], elev, dask="allowed"
    ).rename({"elevation": "ps"})["ps"]
    ds["ps"].attrs["units"] = "Pa"
    ds["hurs"] = xclim.indices.relative_humidity(
        tas=ds["tasmax"], huss=ds["huss"], ps=ds["ps"]
    )
    ds["tasmax"].attrs = {}

    # windspeed assumption of 0.5 m/s (approximating shaded/indoor
    # conditions)
    ds["sfcWind"] = (ds["tas"] - ds["tas"]) + 0.5
    ds["WBT"] = tf.thermofeel.calculate_wbt(
        ds["tasmax"] - 273.15, ds["hurs"]
    )

    ds["BGT"] = tf.thermofeel.calculate_bgt(
        ds["tasmax"], ds["tasmax"], ds["sfcWind"]
    )
    ds["WBGT"] = wbgt(ds["WBT"], ds["BGT"], ds["tasmax"] - 273.15)
    ds["WBGT"].attrs["units"] = "degC"
    ds = ds[["WBGT"]]
    ds = dask.optimize(ds)[0]
    output = (
        f"s3://carbonplan-scratch/TEMP_NASA_NEX/wbgt-shade-"
        f"gridded/years/{ds.attrs['gcm_scenario']}.zarr"
    )

    
    # ds.to_zarr(output, consolidated=True, mode="w")
    return ds

delayed_obs = calc_wbgt(dt)


In [19]:
delayed_obs.compute()



KeyboardInterrupt: 

In [None]:
38 secs for single time slice and 10 gcms