# Can we append a different phenomenon onto a zarr with the same dims

In [1]:
import iris
import os
import xarray as xr
import numpy as np

In [2]:
def pp_to_cube(filename, filepath, constraints={}):
    # Load a cube from a .pp file
    cube, = iris.load(os.path.join(filepath, filename), iris.AttributeConstraint(**constraints))
    print(f'Cube loaded from {filename}')
    return cube

In [3]:
def cube_to_xr(cube):
    # Convert Iris cube to Xarray Dataset
    return xr.DataArray.from_iris(cube).to_dataset()

In [4]:
def xr_to_zarr(dataset, zarr_store, chunks={'time':10, 'grid_latitude':219, 'grid_longitude':286}, append_dim='time'):
    # Write dataset to new zarr store
    # OR append dataset to an existing zarr store
    dataset = dataset.chunk(chunks=chunks)
    if os.path.isdir(zarr_store):
        dataset.to_zarr(zarr_store, consolidated=True, append_dim=append_dim)
        print(f'Appended cube to {zarr_store}')
    else:
        dataset.to_zarr(zarr_store, mode='w', consolidated=True)
        print(f'Written cube to {zarr_store}')

In [5]:
def datetimes_from_cube(cube):
    return xr.DataArray.from_iris(cube).time.data

def datetimes_from_zarr(zarr_store):
    return xr.open_zarr(zarr_store).time.data

## Can we merge together two phenomena with different domains into a single Dataset

In [16]:
# Load air_temperature@pressure data
STASH_ATEMP = 'm01s16i203'
filepath = '/data/cssp-china/mini-dataset-24-01-19/20CR/daily'
files = sorted(os.listdir(filepath))

cube_atmp = pp_to_cube(files[3], filepath, constraints={'STASH': STASH_ATEMP})
# cubes = iris.load(os.path.join(filepath, files[1]))
cube_atmp

Cube loaded from apepda.pa51240.pp


Air Temperature (K),time,pressure,grid_latitude,grid_longitude
Shape,10,14,219,286
Dimension coordinates,,,,
time,x,-,-,-
pressure,-,x,-,-
grid_latitude,-,-,x,-
grid_longitude,-,-,-,x
Auxiliary coordinates,,,,
forecast_period,x,-,-,-
Scalar coordinates,,,,
forecast_reference_time,1849-12-01 00:00:00,1849-12-01 00:00:00,1849-12-01 00:00:00,1849-12-01 00:00:00


In [14]:
# Load air_temperature@pressure data
STASH_PRECIP = 'm01s05i216'
filepath = '/data/cssp-china/mini-dataset-24-01-19/20CR/daily'
files = sorted(os.listdir(filepath))

cube_precip = pp_to_cube(files[3], filepath, constraints={'STASH': STASH_PRECIP})
# cubes = iris.load(os.path.join(filepath, files[1]))
cube_precip

Cube loaded from apepda.pa51240.pp


Precipitation Flux (kg m-2 s-1),time,grid_latitude,grid_longitude
Shape,10,219,286
Dimension coordinates,,,
time,x,-,-
grid_latitude,-,x,-
grid_longitude,-,-,x
Auxiliary coordinates,,,
forecast_period,x,-,-
Scalar coordinates,,,
forecast_reference_time,1849-12-01 00:00:00,1849-12-01 00:00:00,1849-12-01 00:00:00
Attributes,,,


In [17]:
ds_a = cube_to_xr(cube_atmp)
ds_p = cube_to_xr(cube_precip)

print(ds_a)
print('---'*20)
print(ds_p)

<xarray.Dataset>
Dimensions:                  (grid_latitude: 219, grid_longitude: 286, pressure: 14, time: 10)
Coordinates:
  * time                     (time) datetime64[ns] 1851-01-25T12:00:00 ... 1851-02-03T12:00:00
  * pressure                 (pressure) float32 10.0 50.0 100.0 ... 925.0 1000.0
  * grid_latitude            (grid_latitude) float32 22.88 22.66 ... -25.08
  * grid_longitude           (grid_longitude) float32 323.48 323.7 ... 386.18002
    forecast_reference_time  datetime64[ns] ...
    forecast_period          (time) timedelta64[ns] ...
Data variables:
    air_temperature          (time, pressure, grid_latitude, grid_longitude) float32 dask.array<chunksize=(1, 1, 219, 286), meta=np.ndarray>
------------------------------------------------------------
<xarray.Dataset>
Dimensions:                  (grid_latitude: 219, grid_longitude: 286, time: 10)
Coordinates:
  * time                     (time) datetime64[ns] 1851-01-25T12:00:00 ... 1851-02-03T12:00:00
  * grid_latit

In [18]:
ds_new = xr.merge([ds_a, ds_p])

In [19]:
ds_new

<xarray.Dataset>
Dimensions:                  (grid_latitude: 219, grid_longitude: 286, pressure: 14, time: 10)
Coordinates:
  * time                     (time) datetime64[ns] 1851-01-25T12:00:00 ... 1851-02-03T12:00:00
  * pressure                 (pressure) float32 10.0 50.0 100.0 ... 925.0 1000.0
  * grid_latitude            (grid_latitude) float32 22.88 22.66 ... -25.08
  * grid_longitude           (grid_longitude) float32 323.48 323.7 ... 386.18002
    forecast_reference_time  datetime64[ns] 1849-12-01
    forecast_period          (time) timedelta64[ns] 420 days 12:00:00 ... 429 days 12:00:00
Data variables:
    air_temperature          (time, pressure, grid_latitude, grid_longitude) float32 dask.array<chunksize=(1, 1, 219, 286), meta=np.ndarray>
    precipitation_flux       (time, grid_latitude, grid_longitude) float32 dask.array<chunksize=(1, 219, 286), meta=np.ndarray>

### Yes! We can, Xarray accepts all the dims and makes a combined dataset

# CONCLUSION
You can combine different phenomena so long as you make sure there is only one conflict (e.g just one dim is shorter for one data_var, but not that also one has a different additional dimension)