# Can we save two different `xr.Datasets` to seperate zarr groups?

In [2]:
import iris
import os
import xarray as xr
import numpy as np

import crd_utils as crd

In [3]:
# Variables for loading data
filepath = '/data/cssp-china/mini-dataset-24-01-19/20CR/daily'
files = sorted(os.listdir(filepath))

In [3]:
STASH_TEMP = 'm01s16i203'
STASH_PRECIP = 'm01s05i216'
STASH_UWIND = 'm01s03i225'
STASH_VWIND = 'm01s03i226'

In [18]:
cubes = iris.load(os.path.join(filepath, files[1]))

In [20]:
grid_0_cubes = [cubes.extract(iris.AttributeConstraint(STASH=STASH_TEMP))[0], 
                cubes.extract(iris.AttributeConstraint(STASH=STASH_PRECIP))[0]]

grid_0_cubes

[<iris 'Cube' of air_temperature / (K) (time: 10; pressure: 14; grid_latitude: 219; grid_longitude: 286)>,
 <iris 'Cube' of precipitation_flux / (kg m-2 s-1) (time: 10; grid_latitude: 219; grid_longitude: 286)>]

In [21]:
grid_1_cubes = [cubes.extract(iris.AttributeConstraint(STASH=STASH_UWIND))[0],
               cubes.extract(iris.AttributeConstraint(STASH=STASH_VWIND))[0]]

grid_1_cubes

[<iris 'Cube' of x_wind / (m s-1) (time: 10; grid_latitude: 218; grid_longitude: 286)>,
 <iris 'Cube' of y_wind / (m s-1) (time: 10; grid_latitude: 218; grid_longitude: 286)>]

In [22]:
grid0_ds = xr.merge([xr.DataArray.from_iris(cube) for cube in grid_0_cubes])
grid1_ds = xr.merge([xr.DataArray.from_iris(cube) for cube in grid_1_cubes])
display(grid0_ds)
display(grid1_ds)

<xarray.Dataset>
Dimensions:                  (grid_latitude: 219, grid_longitude: 286, pressure: 14, time: 10)
Coordinates:
  * time                     (time) datetime64[ns] 1851-01-05T12:00:00 ... 1851-01-14T12:00:00
  * pressure                 (pressure) float32 10.0 50.0 100.0 ... 925.0 1000.0
  * grid_latitude            (grid_latitude) float32 22.88 22.66 ... -25.08
  * grid_longitude           (grid_longitude) float32 323.48 323.7 ... 386.18002
    forecast_reference_time  datetime64[ns] 1849-12-01
    forecast_period          (time) timedelta64[ns] 400 days 12:00:00 ... 409 days 12:00:00
Data variables:
    air_temperature          (time, pressure, grid_latitude, grid_longitude) float32 dask.array<chunksize=(1, 1, 219, 286), meta=np.ndarray>
    precipitation_flux       (time, grid_latitude, grid_longitude) float32 dask.array<chunksize=(1, 219, 286), meta=np.ndarray>

<xarray.Dataset>
Dimensions:                  (grid_latitude: 218, grid_longitude: 286, time: 10)
Coordinates:
  * time                     (time) datetime64[ns] 1851-01-05T12:00:00 ... 1851-01-14T12:00:00
  * grid_latitude            (grid_latitude) float32 22.77 ... -24.969997
  * grid_longitude           (grid_longitude) float32 323.59003 ... 386.29004
    forecast_reference_time  datetime64[ns] 1849-12-01
    height                   float64 10.0
    forecast_period          (time) timedelta64[ns] 400 days 12:00:00 ... 409 days 12:00:00
Data variables:
    x_wind                   (time, grid_latitude, grid_longitude) float32 dask.array<chunksize=(1, 218, 286), meta=np.ndarray>
    y_wind                   (time, grid_latitude, grid_longitude) float32 dask.array<chunksize=(1, 218, 286), meta=np.ndarray>

In [25]:
def ds_to_zarr(dataset, zarr_store, chunks={'time':10, 'grid_latitude':219, 'grid_longitude':286}, append_dim='time', **kwargs):
    # Write dataset to new zarr store
    # OR append dataset to an existing zarr store
    dataset = dataset.chunk(chunks=chunks)
    if os.path.isdir(zarr_store):
        dataset.to_zarr(zarr_store, consolidated=True, append_dim=append_dim, **kwargs)
        print(f'Appended cube to {zarr_store}')
    else:
        dataset.to_zarr(zarr_store, mode='w', consolidated=True, **kwargs)
        print(f'Written cube to {zarr_store}')

Having updated `ds_to_zarr` to handle `**kwargs`, let's test that it works

In [26]:
zarr_store = '../zarr_groups_test'

ds_to_zarr(grid0_ds, zarr_store, chunks={'time': 10}, group='grid0')

Written cube to ../zarr_groups_test


In [28]:
xr.open_zarr(zarr_store, 'grid0')

<xarray.Dataset>
Dimensions:                  (grid_latitude: 219, grid_longitude: 286, pressure: 14, time: 10)
Coordinates:
    forecast_period          (time) timedelta64[ns] dask.array<chunksize=(10,), meta=np.ndarray>
    forecast_reference_time  datetime64[ns] ...
  * grid_latitude            (grid_latitude) float32 22.88 22.66 ... -25.08
  * grid_longitude           (grid_longitude) float32 323.48 323.7 ... 386.18002
  * pressure                 (pressure) float32 10.0 50.0 100.0 ... 925.0 1000.0
  * time                     (time) datetime64[ns] 1851-01-05T12:00:00 ... 1851-01-14T12:00:00
Data variables:
    air_temperature          (time, pressure, grid_latitude, grid_longitude) float32 dask.array<chunksize=(10, 1, 219, 286), meta=np.ndarray>
    precipitation_flux       (time, grid_latitude, grid_longitude) float32 dask.array<chunksize=(10, 219, 286), meta=np.ndarray>

Hooray! Then let's convert these datasets

In [29]:
zarr_store = '../zarr_groups'

ds_to_zarr(grid0_ds, zarr_store, chunks={'time': 10}, group='grid0')
ds_to_zarr(grid1_ds, zarr_store, chunks={'time': 10}, group='grid1')

Written cube to ../zarr_groups
Appended cube to ../zarr_groups


In [35]:
grid0_z = xr.open_zarr(zarr_store, group='grid0')
grid1_z = xr.open_zarr(zarr_store, group='grid1')

display(grid0_z)
display('\n \n')
display(grid1_z)

<xarray.Dataset>
Dimensions:                  (grid_latitude: 219, grid_longitude: 286, pressure: 14, time: 10)
Coordinates:
    forecast_period          (time) timedelta64[ns] dask.array<chunksize=(10,), meta=np.ndarray>
    forecast_reference_time  datetime64[ns] ...
  * grid_latitude            (grid_latitude) float32 22.88 22.66 ... -25.08
  * grid_longitude           (grid_longitude) float32 323.48 323.7 ... 386.18002
  * pressure                 (pressure) float32 10.0 50.0 100.0 ... 925.0 1000.0
  * time                     (time) datetime64[ns] 1851-01-05T12:00:00 ... 1851-01-14T12:00:00
Data variables:
    air_temperature          (time, pressure, grid_latitude, grid_longitude) float32 dask.array<chunksize=(10, 1, 219, 286), meta=np.ndarray>
    precipitation_flux       (time, grid_latitude, grid_longitude) float32 dask.array<chunksize=(10, 219, 286), meta=np.ndarray>

'\n \n'

<xarray.Dataset>
Dimensions:                  (grid_latitude: 218, grid_longitude: 286, time: 10)
Coordinates:
    forecast_period          (time) timedelta64[ns] dask.array<chunksize=(10,), meta=np.ndarray>
    forecast_reference_time  datetime64[ns] ...
  * grid_latitude            (grid_latitude) float32 22.77 ... -24.969997
  * grid_longitude           (grid_longitude) float32 323.59003 ... 386.29004
    height                   float64 ...
  * time                     (time) datetime64[ns] 1851-01-05T12:00:00 ... 1851-01-14T12:00:00
Data variables:
    x_wind                   (time, grid_latitude, grid_longitude) float32 dask.array<chunksize=(10, 218, 286), meta=np.ndarray>
    y_wind                   (time, grid_latitude, grid_longitude) float32 dask.array<chunksize=(10, 218, 286), meta=np.ndarray>

In [51]:
np.array_equal(grid0_z.time.data, grid1_z.time.data)

True

In [50]:
np.array_equal(grid0_z.grid_longitude.data, grid1_z.grid_longitude.data)

False

In [53]:
np.array_equal(grid0_z.forecast_reference_time.data, grid1_z.forecast_reference_time.data)

True

# CONCLUSION: Saving data to Zarr groups with Xarray is easy