In [1]:
from pathlib import Path

import numpy as np
import xarray as xr
from datatree import DataTree, open_datatree
from xarray import open_zarr

## Data Creation


In [2]:
xda = xr.DataArray(
    np.arange(3 * 18).reshape(3, 18),
    coords={"label": list("abc"), "z": list(range(18))},
)
xda = xda.chunk({"label": 2, "z": 4})
xda

Unnamed: 0,Array,Chunk
Bytes,432 B,64 B
Shape,"(3, 18)","(2, 4)"
Dask graph,10 chunks in 1 graph layer,10 chunks in 1 graph layer
Data type,int64 numpy.ndarray,int64 numpy.ndarray
"Array Chunk Bytes 432 B 64 B Shape (3, 18) (2, 4) Dask graph 10 chunks in 1 graph layer Data type int64 numpy.ndarray",18  3,

Unnamed: 0,Array,Chunk
Bytes,432 B,64 B
Shape,"(3, 18)","(2, 4)"
Dask graph,10 chunks in 1 graph layer,10 chunks in 1 graph layer
Data type,int64 numpy.ndarray,int64 numpy.ndarray


In [3]:
expected_chunksizes = xda.chunksizes
expected_chunksizes

Frozen({'label': (2, 1), 'z': (4, 4, 4, 4, 2)})

In [4]:
xdt = DataTree(xr.Dataset({"my_xda": xda}))
xdt.my_xda

Unnamed: 0,Array,Chunk
Bytes,432 B,64 B
Shape,"(3, 18)","(2, 4)"
Dask graph,10 chunks in 1 graph layer,10 chunks in 1 graph layer
Data type,int64 numpy.ndarray,int64 numpy.ndarray
"Array Chunk Bytes 432 B 64 B Shape (3, 18) (2, 4) Dask graph 10 chunks in 1 graph layer Data type int64 numpy.ndarray",18  3,

Unnamed: 0,Array,Chunk
Bytes,432 B,64 B
Shape,"(3, 18)","(2, 4)"
Dask graph,10 chunks in 1 graph layer,10 chunks in 1 graph layer
Data type,int64 numpy.ndarray,int64 numpy.ndarray


## Data Writing


In [5]:
zarr_path = Path() / "../generated/my_array.zarr"
xdt.to_zarr(zarr_path)

## Data Reading


### Using xarray's `open_zarr` 

See https://docs.xarray.dev/en/stable/generated/xarray.open_zarr.html

Documentation version: `stable` at time of writing: 11 nov 2023

Resulting behaviours serve as reference.

#### No `chunks` kwarg

Stored chunks are used.

In [6]:
open_zarr(zarr_path).my_xda

Unnamed: 0,Array,Chunk
Bytes,432 B,64 B
Shape,"(3, 18)","(2, 4)"
Dask graph,10 chunks in 2 graph layers,10 chunks in 2 graph layers
Data type,int64 numpy.ndarray,int64 numpy.ndarray
"Array Chunk Bytes 432 B 64 B Shape (3, 18) (2, 4) Dask graph 10 chunks in 2 graph layers Data type int64 numpy.ndarray",18  3,

Unnamed: 0,Array,Chunk
Bytes,432 B,64 B
Shape,"(3, 18)","(2, 4)"
Dask graph,10 chunks in 2 graph layers,10 chunks in 2 graph layers
Data type,int64 numpy.ndarray,int64 numpy.ndarray


#### With `chunks='auto'`

Stored chunks are used.

Same behaviour as with no `chunks` kwarg.

In [7]:
# https://docs.xarray.dev/en/stable/generated/xarray.open_zarr.html
open_zarr(zarr_path, chunks="auto").my_xda

Unnamed: 0,Array,Chunk
Bytes,432 B,64 B
Shape,"(3, 18)","(2, 4)"
Dask graph,10 chunks in 2 graph layers,10 chunks in 2 graph layers
Data type,int64 numpy.ndarray,int64 numpy.ndarray
"Array Chunk Bytes 432 B 64 B Shape (3, 18) (2, 4) Dask graph 10 chunks in 2 graph layers Data type int64 numpy.ndarray",18  3,

Unnamed: 0,Array,Chunk
Bytes,432 B,64 B
Shape,"(3, 18)","(2, 4)"
Dask graph,10 chunks in 2 graph layers,10 chunks in 2 graph layers
Data type,int64 numpy.ndarray,int64 numpy.ndarray


### Using datatree's `open_datatree` with `engine='zarr'`

See https://xarray-datatree.readthedocs.io/en/latest/generated/datatree.open_datatree.html#datatree.open_datatree

Documentation version:

```
Datatree 0.0.14.dev5+g433f78d.d20231110 documentation
```

#### No `chunks` kwarg

No chunking performed.

(NOK)

(!) Differs from the xarray's reference behaviour where stored chunks are used.

In [8]:
open_datatree(zarr_path, engine="zarr").my_xda

#### With `chunks='auto'`

A chunk identical to the shape of the data is used.
This means chunking is useless as there is only a single chunk representing the whole dataset

(NOK)

(!) Differs from the xarray's reference behaviour where stored chunks are used.


In [9]:
xda = open_datatree(zarr_path, engine="zarr", chunks="auto").my_xda
assert xda.chunksizes != expected_chunksizes
assert xda.chunksizes == {"label": (3,), "z": (18,)}
xda

Unnamed: 0,Array,Chunk
Bytes,432 B,432 B
Shape,"(3, 18)","(3, 18)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,int64 numpy.ndarray,int64 numpy.ndarray
"Array Chunk Bytes 432 B 432 B Shape (3, 18) (3, 18) Dask graph 1 chunks in 2 graph layers Data type int64 numpy.ndarray",18  3,

Unnamed: 0,Array,Chunk
Bytes,432 B,432 B
Shape,"(3, 18)","(3, 18)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,int64 numpy.ndarray,int64 numpy.ndarray


#### With `chunks` kwarg (same as stored chunks)

(OK)

No warning is shown because given chunks correspond to the stored chunks

In [10]:
xda = open_datatree(zarr_path, engine="zarr", chunks={"label": 2, "z": 4}).my_xda
xda

Unnamed: 0,Array,Chunk
Bytes,432 B,64 B
Shape,"(3, 18)","(2, 4)"
Dask graph,10 chunks in 2 graph layers,10 chunks in 2 graph layers
Data type,int64 numpy.ndarray,int64 numpy.ndarray
"Array Chunk Bytes 432 B 64 B Shape (3, 18) (2, 4) Dask graph 10 chunks in 2 graph layers Data type int64 numpy.ndarray",18  3,

Unnamed: 0,Array,Chunk
Bytes,432 B,64 B
Shape,"(3, 18)","(2, 4)"
Dask graph,10 chunks in 2 graph layers,10 chunks in 2 graph layers
Data type,int64 numpy.ndarray,int64 numpy.ndarray


#### With `chunks` kwarg (differing from the stored chunks)

(OK)

A warning is shown

> UserWarning: The specified chunks separate the stored chunks along dimension "z" starting at index 5. This could degrade performance. Instead, consider rechunking after loading.

In [11]:
open_datatree(zarr_path, engine="zarr", chunks={"label": 999, "z": 5}).my_xda



Unnamed: 0,Array,Chunk
Bytes,432 B,120 B
Shape,"(3, 18)","(3, 5)"
Dask graph,4 chunks in 2 graph layers,4 chunks in 2 graph layers
Data type,int64 numpy.ndarray,int64 numpy.ndarray
"Array Chunk Bytes 432 B 120 B Shape (3, 18) (3, 5) Dask graph 4 chunks in 2 graph layers Data type int64 numpy.ndarray",18  3,

Unnamed: 0,Array,Chunk
Bytes,432 B,120 B
Shape,"(3, 18)","(3, 5)"
Dask graph,4 chunks in 2 graph layers,4 chunks in 2 graph layers
Data type,int64 numpy.ndarray,int64 numpy.ndarray


#### With `chunks={}` kwarg :ok:

According to [this comment on issue 276](https://github.com/xarray-contrib/datatree/issues/276#issuecomment-1852153509), `chunks={}` should result in expected behaviour: 
on-disk chunking is used.

In [12]:
xda = open_datatree(zarr_path, engine="zarr", chunks={}).my_xda
xda

Unnamed: 0,Array,Chunk
Bytes,432 B,64 B
Shape,"(3, 18)","(2, 4)"
Dask graph,10 chunks in 2 graph layers,10 chunks in 2 graph layers
Data type,int64 numpy.ndarray,int64 numpy.ndarray
"Array Chunk Bytes 432 B 64 B Shape (3, 18) (2, 4) Dask graph 10 chunks in 2 graph layers Data type int64 numpy.ndarray",18  3,

Unnamed: 0,Array,Chunk
Bytes,432 B,64 B
Shape,"(3, 18)","(2, 4)"
Dask graph,10 chunks in 2 graph layers,10 chunks in 2 graph layers
Data type,int64 numpy.ndarray,int64 numpy.ndarray


In [13]:
assert xda.chunksizes == expected_chunksizes