In [None]:
from dask.array import from_zarr

zar_file = "s3://coiled-datasets/synthetic-data/array-random-390KB.zarr"
x = from_zarr(zar_file)

print(x)
# dask.array<from-zarr, shape=(100, 100, 5), dtype=float64, chunksize=(10, 10, 1), chunktype=numpy.ndarray>

In [None]:
from dask.array import from_zarr

x = from_zarr("group.zarr", component="group_foo/sample_array")
print(x)
# dask.array<from-zarr, shape=(10, 10), dtype=int32, chunksize=(2, 2), chunktype=numpy.ndarray>

product = x @ x.T
print(f"Number of tasks: {len((product).dask)}")
# Number of tasks: 376

In [None]:
from dask.array import from_zarr

x = from_zarr("group.zarr", component="group_foo/sample_array", chunks=(2, 10))
print(x)
# dask.array<from-zarr, shape=(10, 10), dtype=int32, chunksize=(2, 2), chunktype=numpy.ndarray>

product = x @ x.T
print(f"Number of tasks: {len((product).dask)}")
# Number of tasks: 61

In [None]:
from dask.array import from_zarr

x = from_zarr("group.zarr", component="group_foo/sample_array")
print(x)
# dask.array<from-zarr, shape=(10, 10), dtype=int32, chunksize=(2, 2), chunktype=numpy.ndarray>

In [None]:
from dask.array import from_zarr

x = from_zarr("sample.zarr")
print(x.dask)
# HighLevelGraph with 2 layers.
# <dask.highlevelgraph.HighLevelGraph object at 0x1877b8f40>
#  0. original-from-zarr-aa89d568fa5d6a465403e4a4457083a9
#  1. from-zarr-aa89d568fa5d6a465403e4a4457083a9

In [None]:
from dask.array import from_zarr

x = from_zarr("sample.zarr", inline_array=True)
print(x.dask)
# HighLevelGraph with 1 layers.
# <dask.highlevelgraph.HighLevelGraph object at 0x1877ecac0>
#  0. from-zarr-dd5aae068dabbf1fea826d7abea7b6f8

In [None]:
import zarr
from dask.array import from_zarr, stack
from matplotlib.pyplot import imshow

# load the list of components inside the zarr
zarr_file = "weather_group.zarr"
list_components = list(zarr.open(zarr_file, mode="r")["temperature"])

# create a list of dask arrays
arrays = [from_zarr(zarr_file, component=f"temperature/{c}") for c in list_components]
print(len(arrays))

# stack the arrays into a single array
temperature = stack(arrays, axis=0)

print(temperature)
# dask.array<stack, shape=(31, 5760, 11520), dtype=float64, chunksize=(1, 500, 500), chunktype=numpy.ndarray>

# calculate average temperature (at a single location across all dates)
result = temperature.mean(axis=0)

print(result)
# dask.array<mean_agg-aggregate, shape=(5760, 11520), dtype=float64, chunksize=(500, 500), chunktype=numpy.ndarray>

# visualize the result
imshow(result, cmap="RdBu_r")