# Comparison using open_mfdataset

In [1]:
import os

import dask
import pandas as pd
import numpy as np
import thermofeel as tf
import xarray as xr
import xclim
import s3fs
from dask.distributed import Client

from utils import wbgt, load_elev, adjust_pressure

gcm_list = [
    "ACCESS-CM2",
    "ACCESS-ESM1-5",
    "BCC-CSM2-MR",
    "CanESM5",
    "CMCC-CM2-SR5",
    "CMCC-ESM2",
    "CNRM-CM6-1",
    "CNRM-ESM2-1",
    "EC-Earth3-Veg-LR",
    "EC-Earth3",
    "FGOALS-g3",
    "GFDL-CM4",
    "GFDL-ESM4",
    "GISS-E2-1-G",
    "HadGEM3-GC31-LL",
    "INM-CM4-8",
    "INM-CM5-0",
    "KACE-1-0-G",
    "KIOST-ESM",
    "MIROC-ES2L",
    "MPI-ESM1-2-HR",
    "MPI-ESM1-2-LR",
    "MRI-ESM2-0",
    "NorESM2-LM",
    "NorESM2-MM",
    "UKESM1-0-LL",
]

gcms_with_nonstandard_calendars_list = [
    "BCC-CSM2-MR",
    "CanESM5",
    "CMCC-CM2-SR5",
    "CMCC-ESM2",
    "FGOALS-g3",
    "GFDL-CM4",
    "GFDL-ESM4",
    "GISS-E2-1-G",
    "HadGEM3-GC31-LL",
    "INM-CM4-8",
    "INM-CM5-0",
    "KACE-1-0-G",
    "KIOST-ESM",
    "NorESM2-LM",
    "NorESM2-MM",
    "UKESM1-0-LL",
]

os.environ["USE_PYGEOS"] = "0"

In [2]:
## loading
df = pd.read_csv(
    "s3://carbonplan-climate-impacts/extreme-heat/v1.0/inputs/nex-gddp-cmip6-files.csv"
)
nasa_nex_runs_df = pd.DataFrame([run.split("/") for run in df[" fileURL"].values]).drop(
    [0, 1, 2, 3], axis=1
)
nasa_nex_runs_df.columns = [
    "GCM",
    "scenario",
    "ensemble_member",
    "variable",
    "file_name",
]

In [3]:
def find_nasanex_filename(gcm, scenario):
    """
    Load list of NASA-NEX files downloaded from their docs. We will use it to create
    the catalog of available datasets. Largely this is used to filter out the GCMs
    that don't have tasmax available.
    """
    template_filename = nasa_nex_runs_df[
        (nasa_nex_runs_df["GCM"] == gcm)
        & (nasa_nex_runs_df["scenario"] == scenario)
        & (nasa_nex_runs_df["variable"] == "tasmax")
    ]["file_name"].iloc[0]
    (
        _variable,
        _timestep,
        _gcm,
        _scenario,
        ensemble_member,
        grid_code,
        _yearnc,
    ) = template_filename.split("_")
    return ensemble_member, grid_code


##
def load_nasanex(scenario, gcm, variables, years, chunk_dict=None):
    """
    Read in NEX-GDDP-CMIP6 data from S3.
    """
    fs = s3fs.S3FileSystem(anon=True, default_fill_cache=False)

    file_objs = {}
    ds = xr.Dataset()
    ensemble_member, grid_code = find_nasanex_filename(gcm, scenario)
    for i, var in enumerate(variables):
        file_objs[var] = [
            fs.open(
                f"nex-gddp-cmip6/NEX-GDDP-CMIP6/{gcm}/{scenario}/"
                f"{ensemble_member}/{var}/{var}_day_{gcm}_{scenario}"
                f"_{ensemble_member}_{grid_code}_{year}.nc"
            )
            for year in years
        ]
        if i == 0:
            ds[var] = xr.open_mfdataset(file_objs[var], engine="h5netcdf")[var]
        else:
            new_var = xr.open_mfdataset(file_objs[var], engine="h5netcdf")
            new_var["time"] = ds[variables[0]]["time"].values
            ds[var] = new_var[var]
    if chunk_dict is not None:
        ds = ds.chunk(chunk_dict)
    return ds

In [4]:
client = Client(n_workers=8)

Perhaps you already have a cluster running?
Hosting the HTTP server on port 56698 instead


In [5]:
client

0,1
Connection method: Cluster object,Cluster type: distributed.LocalCluster
Dashboard: http://127.0.0.1:56698/status,

0,1
Dashboard: http://127.0.0.1:56698/status,Workers: 8
Total threads: 8,Total memory: 16.00 GiB
Status: running,Using processes: True

0,1
Comm: tcp://127.0.0.1:56699,Workers: 8
Dashboard: http://127.0.0.1:56698/status,Total threads: 8
Started: Just now,Total memory: 16.00 GiB

0,1
Comm: tcp://127.0.0.1:56724,Total threads: 1
Dashboard: http://127.0.0.1:56725/status,Memory: 2.00 GiB
Nanny: tcp://127.0.0.1:56702,
Local directory: /var/folders/mb/7d7yq_4j2qgdfm_j3j4tsyl40000gn/T/dask-scratch-space/worker-fxjq7noa,Local directory: /var/folders/mb/7d7yq_4j2qgdfm_j3j4tsyl40000gn/T/dask-scratch-space/worker-fxjq7noa

0,1
Comm: tcp://127.0.0.1:56719,Total threads: 1
Dashboard: http://127.0.0.1:56722/status,Memory: 2.00 GiB
Nanny: tcp://127.0.0.1:56703,
Local directory: /var/folders/mb/7d7yq_4j2qgdfm_j3j4tsyl40000gn/T/dask-scratch-space/worker-2x3ajqaf,Local directory: /var/folders/mb/7d7yq_4j2qgdfm_j3j4tsyl40000gn/T/dask-scratch-space/worker-2x3ajqaf

0,1
Comm: tcp://127.0.0.1:56730,Total threads: 1
Dashboard: http://127.0.0.1:56737/status,Memory: 2.00 GiB
Nanny: tcp://127.0.0.1:56704,
Local directory: /var/folders/mb/7d7yq_4j2qgdfm_j3j4tsyl40000gn/T/dask-scratch-space/worker-cqoiy48c,Local directory: /var/folders/mb/7d7yq_4j2qgdfm_j3j4tsyl40000gn/T/dask-scratch-space/worker-cqoiy48c

0,1
Comm: tcp://127.0.0.1:56727,Total threads: 1
Dashboard: http://127.0.0.1:56728/status,Memory: 2.00 GiB
Nanny: tcp://127.0.0.1:56705,
Local directory: /var/folders/mb/7d7yq_4j2qgdfm_j3j4tsyl40000gn/T/dask-scratch-space/worker-1nim7fjf,Local directory: /var/folders/mb/7d7yq_4j2qgdfm_j3j4tsyl40000gn/T/dask-scratch-space/worker-1nim7fjf

0,1
Comm: tcp://127.0.0.1:56718,Total threads: 1
Dashboard: http://127.0.0.1:56720/status,Memory: 2.00 GiB
Nanny: tcp://127.0.0.1:56706,
Local directory: /var/folders/mb/7d7yq_4j2qgdfm_j3j4tsyl40000gn/T/dask-scratch-space/worker-vz96e2tt,Local directory: /var/folders/mb/7d7yq_4j2qgdfm_j3j4tsyl40000gn/T/dask-scratch-space/worker-vz96e2tt

0,1
Comm: tcp://127.0.0.1:56731,Total threads: 1
Dashboard: http://127.0.0.1:56733/status,Memory: 2.00 GiB
Nanny: tcp://127.0.0.1:56707,
Local directory: /var/folders/mb/7d7yq_4j2qgdfm_j3j4tsyl40000gn/T/dask-scratch-space/worker-ggb7uc2s,Local directory: /var/folders/mb/7d7yq_4j2qgdfm_j3j4tsyl40000gn/T/dask-scratch-space/worker-ggb7uc2s

0,1
Comm: tcp://127.0.0.1:56739,Total threads: 1
Dashboard: http://127.0.0.1:56740/status,Memory: 2.00 GiB
Nanny: tcp://127.0.0.1:56708,
Local directory: /var/folders/mb/7d7yq_4j2qgdfm_j3j4tsyl40000gn/T/dask-scratch-space/worker-otycef_2,Local directory: /var/folders/mb/7d7yq_4j2qgdfm_j3j4tsyl40000gn/T/dask-scratch-space/worker-otycef_2

0,1
Comm: tcp://127.0.0.1:56732,Total threads: 1
Dashboard: http://127.0.0.1:56735/status,Memory: 2.00 GiB
Nanny: tcp://127.0.0.1:56709,
Local directory: /var/folders/mb/7d7yq_4j2qgdfm_j3j4tsyl40000gn/T/dask-scratch-space/worker-hro4ekzm,Local directory: /var/folders/mb/7d7yq_4j2qgdfm_j3j4tsyl40000gn/T/dask-scratch-space/worker-hro4ekzm


In [18]:
scenario_years = {"historical": np.arange(1985, 1986), "ssp245": np.arange(2015, 2016)}
# gcm_list = ['ACCESS-CM2']

In [16]:
gcm_list

['ACCESS-CM2-historical', 'ACCESS-CM2-ssp245']

In [7]:
elev = load_elev()

In [19]:
generate_wbgt_projections = True
variables = ["tasmax", "huss", "tas"]
if generate_wbgt_projections:
    for gcm in gcm_list:
        for scenario, years in scenario_years.items():
            id_string = f"{gcm}-{scenario}"
            print(id_string)
            for year in years:
                print(year)
                output = (
                    f"s3://carbonplan-scratch/TEMP_NASA_NEX/wbgt-shade-"
                    f"gridded/years/{gcm}/{id_string}-{year}.zarr"
                )
                ds = load_nasanex(
                    gcm=gcm, scenario=scenario, variables=variables, years=[year]
                )
                ds = ds.isel(time=0)
                # calculate elevation-adjusted pressure
                ds["ps"] = xr.apply_ufunc(
                    adjust_pressure, ds["tas"], elev, dask="allowed"
                ).rename({"elevation": "ps"})["ps"]
                ds["ps"].attrs["units"] = "Pa"
                ds["hurs"] = xclim.indices.relative_humidity(
                    tas=ds["tasmax"], huss=ds["huss"], ps=ds["ps"]
                )
                ds["tasmax"].attrs = {}

                # windspeed assumption of 0.5 m/s (approximating shaded/indoor
                # conditions)
                ds["sfcWind"] = (ds["tas"] - ds["tas"]) + 0.5
                ds["WBT"] = tf.thermofeel.calculate_wbt(
                    ds["tasmax"] - 273.15, ds["hurs"]
                )

                ds["BGT"] = tf.thermofeel.calculate_bgt(
                    ds["tasmax"], ds["tasmax"], ds["sfcWind"]
                )
                ds["WBGT"] = wbgt(ds["WBT"], ds["BGT"], ds["tasmax"] - 273.15)
                ds["WBGT"].attrs["units"] = "degC"
                ds = ds[["WBGT"]]
                ds = dask.optimize(ds)[0]
                t = ds.to_zarr(output, consolidated=True, mode="w", compute=False)
                t.compute()

ACCESS-CM2-historical
1985


Task exception was never retrieved
future: <Task finished name='Task-643793' coro=<Client._gather.<locals>.wait() done, defined at /Users/nrhagen/micromamba/envs/nasa-nex/lib/python3.9/site-packages/distributed/client.py:2218> exception=AllExit()>
Traceback (most recent call last):
  File "/Users/nrhagen/micromamba/envs/nasa-nex/lib/python3.9/site-packages/distributed/client.py", line 2227, in wait
    raise AllExit()
distributed.client.AllExit


ACCESS-CM2-ssp245
2015


In [20]:
client.shutdown()



In [None]:
# 15 min 37secs

In [26]:
scenario_years["historical"]

array([1985])

In [24]:
gcm_list

['ACCESS-CM2']