In [None]:
from datetime import datetime

from pyrte_rrtmgp.external_data_helpers import download_dyamond2_data

# Download the data
downloaded_files = download_dyamond2_data(
    datetime(2020, 2, 1, 9),
    compute_gas_optics=False,
    data_dir="GEOS-DYAMOND2-data",
)

In [None]:
import xarray as xr

from pyrte_rrtmgp.constants import HELMERT1

nlev = 181
min_lev_ice = 78

# Load the global dataset
atmosphere = (
    xr.open_mfdataset(
        "GEOS-DYAMOND2-data/*inst_01hr_3d_*.nc4",
        drop_variables=[
            "anchor",
            "cubed_sphere",
            "orientation",
            "contacts",
            "corner_lats",
            "corner_lons",
        ],
    )
    .isel(lev=slice(min_lev_ice, nlev))
    .rename({"lev": "layer"})
    .chunk({"Xdim": 200, "Ydim": 200, "nf": 1, "layer": -1})
)

# Need to convert LWP/IWP to g/m2 and rel/rei to microns
atmosphere["lwp"] = (atmosphere["DELP"] * atmosphere["QL"]) * 1000 / HELMERT1
atmosphere["iwp"] = (atmosphere["DELP"] * atmosphere["QI"]) * 1000 / HELMERT1
atmosphere["rel"] = atmosphere["RL"] * 1e6
atmosphere["rei"] = atmosphere["RI"] * 1e6

needed_vars = ["lwp", "iwp", "rel", "rei"]

atmosphere[needed_vars].to_netcdf("atmosphere.nc", encoding={var: {"zlib": True, "complevel": 5} for var in needed_vars})

In [None]:
import xarray as xr
from dask.distributed import Client, LocalCluster
import dask


from pyrte_rrtmgp import rrtmgp_cloud_optics
from pyrte_rrtmgp.data_types import CloudOpticsFiles


atmosphere = xr.open_dataset("atmosphere.nc").chunk({"Xdim": 72, "Ydim": 72, "nf": 1, "layer": -1})


# # Set memory limits and adjust worker configuration
cluster = LocalCluster(
    n_workers=10,
    threads_per_worker=1,
    memory_limit='5GB'
)
client = Client(cluster)
print(f"Dask dashboard available at: {client.dashboard_link}")

# Load cloud optics
cloud_optics_lw = rrtmgp_cloud_optics.load_cloud_optics(
    cloud_optics_file=CloudOpticsFiles.LW_BND
)
# Note: If rechunking persists, the chunking of cloud_optics_lw along 'nband'
# might be influencing the process. Consider loading or rechunking it:
# if "nband" in cloud_optics_lw.chunks:
#     cloud_optics_lw = cloud_optics_lw.chunk({"nband": -1})
# Or if it's reasonably small:
# cloud_optics_lw = cloud_optics_lw.load()

# Define the computation but don't execute it yet
tau_delayed = cloud_optics_lw.compute_cloud_optics(
    atmosphere,
    problem_type="absorption",
    add_to_input=False
)

# Then perform the reduction operations on the result
tau_result_unoptimized = tau_delayed.sum(dim=["bnd", "layer"], skipna=True).astype("float32")

# Optimize the final result graph
tau_result_optimized = dask.optimize(tau_result_unoptimized)[0]

tau_result_optimized.to_zarr("clouds_data.zarr", compute=True, mode='w', consolidated=True)

# print("Starting computation and saving to File...")
# tau_result_optimized.to_zarr("clouds_data.zarr", compute=True, mode='w', consolidated=True)
# # tau_result_optimized.to_netcdf("clouds_data.nc")
# print("Computation and saving finished.")



Dask dashboard available at: http://127.0.0.1:8787/status


This may cause some slowdown.
Consider loading the data with Dask directly
 or using futures or delayed objects to embed the data into the graph without repetition.
See also https://docs.dask.org/en/stable/best-practices.html#load-data-with-dask for more information.
