In [None]:
from datetime import datetime

from pyrte_rrtmgp.external_data_helpers import download_dyamond2_data

# Download the data
downloaded_files = download_dyamond2_data(
    datetime(2020, 2, 1, 9),
    compute_gas_optics=False,
    data_dir="GEOS-DYAMOND2-data",
)

### Reorganize the data to improve the processing time

In [None]:
import xarray as xr

from pyrte_rrtmgp.constants import HELMERT1

nlev = 181
min_lev_ice = 78

# Load the global dataset
atmosphere = (
    xr.open_mfdataset(
        "GEOS-DYAMOND2-data/*inst_01hr_3d_*.nc4",
        drop_variables=[
            "anchor",
            "cubed_sphere",
            "orientation",
            "contacts",
            "corner_lats",
            "corner_lons",
        ],
    )
    .isel(lev=slice(min_lev_ice, nlev))
    .rename({"lev": "layer"})
    .chunk({"Xdim": 2880, "Ydim": 72, "nf": 1, "layer": -1})
)

# Need to convert LWP/IWP to g/m2 and rel/rei to microns
atmosphere["lwp"] = (atmosphere["DELP"] * atmosphere["QL"]) * 1000 / HELMERT1
atmosphere["iwp"] = (atmosphere["DELP"] * atmosphere["QI"]) * 1000 / HELMERT1
atmosphere["rel"] = atmosphere["RL"] * 1e6
atmosphere["rei"] = atmosphere["RI"] * 1e6

needed_vars = ["lwp", "iwp", "rel", "rei"]

atmosphere[needed_vars].to_netcdf(
    "atmosphere.nc",
    encoding={var: {"zlib": True, "complevel": 5} for var in needed_vars},
)

### Compute the cloud optics

For avoiding memory issues please use dask version 2025.3.0 or higher. A [fix](https://docs.dask.org/en/stable/changelog.html#v2025-3-0) for the apply_ufunc was included in it that solve the memory issues.

In [None]:
import numpy as np
import xarray as xr
from dask.diagnostics import ProgressBar

from pyrte_rrtmgp.data_types import CloudOpticsFiles

# Load the dataset
atmosphere = xr.open_dataset("atmosphere.nc")

# Create the chunks, core dimensions like layer should not be chunked
# the chunk size should be adjusted to your memory
atmosphere = atmosphere.chunk({"Ydim": 27, "nf": 1, "Xdim": -1, "time": 1})

# Define the function to be applied to each chunk
def process_chunk(atm_chunk):
    # Need to be imported here for multiprocessing
    from pyrte_rrtmgp import rrtmgp_cloud_optics

    # Load cloud optics
    cloud_optics_lw = rrtmgp_cloud_optics.load_cloud_optics(
        cloud_optics_file=CloudOpticsFiles.LW_BND
    )

    tau_chunk_ds = cloud_optics_lw.compute_cloud_optics(
        atm_chunk, problem_type="absorption", add_to_input=False
    )

    # Aggregate over 'bnd' and 'layer' dimensions
    tau_agg_chunk = tau_chunk_ds.sum(dim=["bnd", "layer"], skipna=True)

    return tau_agg_chunk

# Create a template array like lwp but without the level dimension
dask_data = xr.full_like(atmosphere["lwp"].isel(layer=0, drop=True), np.nan)

# Create template DataArray with dask array
template_da = xr.DataArray(
    data=dask_data,
    dims=dask_data.dims,
    coords=dask_data.coords,
)

# Create template dataset with tau, ssa, and g variables
# It will be used as the template output of the aggregation
template_agg = xr.Dataset(
    data_vars={
        "tau": template_da.copy(),
        # "ssa": template_da.copy(),
        # "g": template_da.copy()
    }
)

# --- Apply the function chunk-wise using map_blocks ---
tau_agg = xr.map_blocks(
    process_chunk,
    atmosphere,  # Input Dataset (chunked)
    template=template_agg,  # The template with properly chunked dask arrays
)

with ProgressBar():
    result = tau_agg.compute(scheduler="multiprocessing")