In [None]:
from pathlib import Path
import xarray as xr
import fsspec

import zarr
from zarr.codecs import BloscCodec

from swed_17.nb_helpers import start_cluster

xr.set_options(use_new_combine_kwarg_defaults=True);

In [None]:
ZARR_ARCHIVE = Path("/nvm9/data/swann/zarr_archive/")

In [None]:
cluster = start_cluster(n_workers=16, memory_limit="12GB", local=False)

In [None]:
water_year = 2024

In [None]:
fs = fsspec.filesystem("https")

fsspec_caching = {
    "cache_type": "blockcache",
    "block_size": 10
    * 1024
    * 1024,  # size in bytes per block, recommended are multiple MB
}

In [None]:
nc_files = [
    fs.open(file, **fsspec_caching)
    for file in fs.glob(f"https://climate.arizona.edu/data/UA_SWE/DailyData_800m/WY{water_year}/UA_SWE_Depth_800m_v1_*.nc")
]

In [None]:
ds = xr.open_mfdataset(
    nc_files,
    preprocess=lambda ds: ds[["SWE", "crs"]],
    engine="h5netcdf",
)

In [None]:
compressor = BloscCodec(
    cname="zlib", clevel=4, shuffle=zarr.codecs.BloscShuffle.bitshuffle
)

encoding = {}
for var_name in ds.data_vars:
    encoding[var_name] = {
        'compressors': compressor,
    }
for coord_name in ds.coords:
    encoding[coord_name] = {'compressors': compressor}

In [None]:
ds.to_zarr(
    (ZARR_ARCHIVE / f"wy{water_year}_ua_swe.zarr").as_posix(),
    mode='w',
    encoding=encoding
)

In [None]:
cluster.shutdown()

## Delete a variable 

In [None]:
ds = zarr.open(file.as_posix(), mode="a")
del ds["grid_mapping"]
zarr.consolidate_metadata(file.as_posix())