In [1]:
import xarray
import numpy
from datetime import datetime

In [25]:
# 2D
arr = numpy.linspace(1, 1000, 1000 * 2000).reshape(1000, 2000)
data = xarray.DataArray(
    arr,
    dims=("y", "x"),
    coords={
        "x": numpy.arange(-170, 170, 0.17),
        "y": numpy.arange(-80, 80, 0.16),
    },
)
data.attrs.update({"valid_min": arr.min(), "valid_max": arr.max(), "fill_value": 0})
ds = data.to_dataset(name="dataset")

ds.to_netcdf("dataset_2d.nc", encoding={"dataset": {"zlib": True, "complevel": 9}})

In [26]:
# 3D
arr = numpy.linspace(1, 1000, 1000 * 2000 * 2).reshape(2, 1000, 2000)
data = xarray.DataArray(
    arr,
    dims=("time", "y", "x"),
    coords={
        "x": numpy.arange(-170, 170, 0.17),
        "y": numpy.arange(-80, 80, 0.16),
        "time": [datetime(2022, 1, 1), datetime(2023, 1, 1)],
    },
)
data.attrs.update({"valid_min": arr.min(), "valid_max": arr.max(), "fill_value": 0})
ds = data.to_dataset(name="dataset")

ds.to_netcdf("dataset_3d.nc", encoding={"dataset": {"zlib": True, "complevel": 9}})

In [27]:
# 4D
arr = numpy.linspace(1, 1000, 1000 * 2000 * 2).reshape(2, 1, 1000, 2000)
data = xarray.DataArray(
    arr,
    dims=("time", "z", "y", "x"),
    coords={
        "x": numpy.arange(-170, 170, 0.17),
        "y": numpy.arange(-80, 80, 0.16),
        "z": [0],
        "time": [datetime(2022, 1, 1), datetime(2023, 1, 1)],
    },
)
data.attrs.update({"valid_min": arr.min(), "valid_max": arr.max(), "fill_value": 0})
ds = data.to_dataset(name="dataset")

ds.to_netcdf("dataset_4d.nc", encoding={"dataset": {"zlib": True, "complevel": 9}})

In [None]:
# 3D Zarr
arr = numpy.linspace(0, 1000, 1000 * 2000 * 2).reshape(2, 1000, 2000)
data = xarray.DataArray(
    arr,
    dims=("time", "y", "x"),
    coords={
        "x": numpy.arange(-170, 170, 0.17),
        "y": numpy.arange(-80, 80, 0.16),
        "time": [datetime(2022, 1, 1), datetime(2023, 1, 1)],
    },
)
data.attrs.update({"valid_min": arr.min(), "valid_max": arr.max()})
assert data.dims == ("time", "y", "x")

ds = data.to_dataset(name="dataset")

ds.to_zarr("dataset_3d.zarr", mode="w")

In [2]:
# Zarr Pyramid
def create_dataset(decimation: int = 0):
    dec = decimation or 1  # make sure we don't / by 0
    width = 2000 // dec
    height = 1000 // dec

    x_res = abs(-170 - 170) / width
    y_res = abs(-80 - 80) / height

    arr = numpy.zeros((height, width), dtype="uint8") + 1 + decimation

    data = xarray.DataArray(
        arr,
        dims=("y", "x"),
        coords={
            "x": numpy.arange(-170, 170, x_res),
            "y": numpy.arange(-80, 80, y_res),
        },
    )
    data.attrs.update({"valid_min": arr.min(), "valid_max": arr.max(), "fill_value": 0})
    return data.to_dataset(name="dataset")


for ix, dec in enumerate([0, 2, 4]):
    ds = create_dataset(dec)
    ds.to_zarr(store="pyramid.zarr", mode="w", group=ix)

In [9]:
import json
import fsspec
from kerchunk.hdf import SingleHdf5ToZarr

with fsspec.open("dataset_3d.nc", mode="rb", anon=True) as infile:
    h5chunks = SingleHdf5ToZarr(infile, "dataset_3d.nc", inline_threshold=100)

    with open("reference.json", "w") as f:
        f.write(json.dumps(h5chunks.translate()))