### Intialize an empty zarr store for INCA data

#### Necessary imports

In [1]:
import numpy as np
import zarr
from datetime import datetime

import sys

if not sys.warnoptions:
    import warnings
    warnings.filterwarnings("ignore", category=UserWarning)

#### Initialize zarr store

Define the path to your store

In [2]:
store_path = "INCA.zarr"

To initialize the a zarr store where the time dimension goes until now, the number of timesteps between the origin date to now is defined

In [None]:
now = datetime.now()
now_np = np.datetime64(now).astype('datetime64[h]')
origin = np.datetime64("2011-03-15T00:00:00").astype("datetime64[h]")
timesteps = int((now_np-origin).astype(int))

The extent of each coordinate, as given in the original dataset is defined

In [None]:
x_extent = np.arange(20000, 721000, 1000)
y_extent = np.arange(220000, 621000, 1000)
time_extent = np.arange(0,timesteps,1)

The shape of the individual dataarrays, as well as the shape of the coordinate dimensions are defined. Also the shard and chunk shape is defined so that each shard is ~50 MiB. The chunk shape is defined to fit the input data and also allow a fast read time.

In [None]:
shape = (time_extent.shape[0],y_extent.shape[0],x_extent.shape[0])

chunk_shape = (720, 100,100)
shard_shape = (3600,100,100)
shard_shape_RR = (7200,100,100) 

x_shape = x_extent.shape
y_shape = y_extent.shape
time_shape = time_extent.shape

The store is created and metadata set, if you set overwrite to *True* an already created store can be overwritten

In [None]:
overwrite=False
store = zarr.storage.LocalStore(store_path)
root = zarr.create_group(store=store, overwrite=overwrite, 
                         attributes={"Conventions": "CF-1.7",
                                    "name": "incal-hourly",
                                    "author": "Alexander Kann (alexander.kann@zamg.ac.at), Benedikt Bica (benedikt.bica@zamg.ac.at)",
                                    "freq": "1H",
                                    "spatial_resolution": 1000,
                                    "references": "https://data.hub.geosphere.at/dataset/inca-v1-1h-1km",
                                    "institution": "GeoSphere Austria",
                                    "history": "Original data produced by GeoSphere Austria",
                                    "source": "modeled data",
                                    "crs": "EPSG: 31287",
                                    "grid_mapping": "lambert_conformal_conic",
                                    "spatial_domain": "INCAL",
                                    "title": "INCA analysis - large domain",
                                    "Version":1})

The individual data arrays, as well as the coordinate arrays, are created

In [None]:
root.create_array(name="TD2M",
                    shape=shape,
                    shards=shard_shape,
                    chunks=chunk_shape,
                    compressors = zarr.codecs.BloscCodec(),
                    dtype="int16",
                    fill_value=-9999,
                    dimension_names=["time", "y", "x"],
                    config={"write_empty_chunks":False},
                    attributes={"_FillValue": -9999,
                                "scale_factor": 0.01,
                                "cell_method": "",
                                "description": "dew point temperature 2m above ground",
                                "esri_pe_string": "PROJCS[\"MGI / Austria Lambert\",GEOGCS[\"MGI\",DATUM[\"Militar_Geographische_Institute\",SPHEROID[\"Bessel 1841\",6377397.155,299.1528128,AUTHORITY[\"EPSG\",\"7004\"]],TOWGS84[577.326,90.129,463.919,5.137,1.474,5.297,2.4232],AUTHORITY[\"EPSG\",\"6312\"]],PRIMEM[\"Greenwich\",0,AUTHORITY[\"EPSG\",\"8901\"]],UNIT[\"degree\",0.0174532925199433,AUTHORITY[\"EPSG\",\"9122\"]],AUTHORITY[\"EPSG\",\"4312\"]],PROJECTION[\"Lambert_Conformal_Conic_2SP\"],PARAMETER[\"standard_parallel_1\",49],PARAMETER[\"standard_parallel_2\",46],PARAMETER[\"latitude_of_origin\",47.5],PARAMETER[\"central_meridian\",13.33333333333333],PARAMETER[\"false_easting\",400000],PARAMETER[\"false_northing\",400000],UNIT[\"metre\",1,AUTHORITY[\"EPSG\",\"9001\"]],AUTHORITY[\"EPSG\",\"31287\"]]",
                                "grid_mapping": "lambert_conformal_conic",
                                "long_name": "dew point temperature",
                                "standard_name": "dew_point_temperature",
                                "units": "degree_Celsius"},
                    overwrite=overwrite)

root.create_array(name="VV",
                    shape=shape,
                    shards=shard_shape,
                    chunks=chunk_shape,
                    compressors = zarr.codecs.BloscCodec(),
                    dtype="int16",
                    fill_value=-9999,
                    dimension_names=["time", "y", "x"],
                    config={"write_empty_chunks":False},
                    attributes={"_FillValue": -9999,
                                "scale_factor": 0.01,
                                "cell_method": "",
                                "description": "wind speed in northward direction",
                                "esri_pe_string": "PROJCS[\"MGI / Austria Lambert\",GEOGCS[\"MGI\",DATUM[\"Militar_Geographische_Institute\",SPHEROID[\"Bessel 1841\",6377397.155,299.1528128,AUTHORITY[\"EPSG\",\"7004\"]],TOWGS84[577.326,90.129,463.919,5.137,1.474,5.297,2.4232],AUTHORITY[\"EPSG\",\"6312\"]],PRIMEM[\"Greenwich\",0,AUTHORITY[\"EPSG\",\"8901\"]],UNIT[\"degree\",0.0174532925199433,AUTHORITY[\"EPSG\",\"9122\"]],AUTHORITY[\"EPSG\",\"4312\"]],PROJECTION[\"Lambert_Conformal_Conic_2SP\"],PARAMETER[\"standard_parallel_1\",49],PARAMETER[\"standard_parallel_2\",46],PARAMETER[\"latitude_of_origin\",47.5],PARAMETER[\"central_meridian\",13.33333333333333],PARAMETER[\"false_easting\",400000],PARAMETER[\"false_northing\",400000],UNIT[\"metre\",1,AUTHORITY[\"EPSG\",\"9001\"]],AUTHORITY[\"EPSG\",\"31287\"]]",
                                "grid_mapping": "lambert_conformal_conic",
                                "long_name": "wind speed in northward direction",
                                "standard_name": "y_wind",
                                "units": "m s-1"},
                    overwrite=overwrite)

root.create_array(name="UU",
                    shape=shape,
                    shards=shard_shape,
                    chunks=chunk_shape,
                    compressors = zarr.codecs.BloscCodec(),
                    dtype="int16",
                    fill_value=-9999,
                    dimension_names=["time", "y", "x"],
                    config={"write_empty_chunks":False},
                    attributes={"_FillValue": -9999,
                                "scale_factor": 0.01,
                                "cell_method": "",
                                "description": "wind speed in eastward direction",
                                "esri_pe_string": "PROJCS[\"MGI / Austria Lambert\",GEOGCS[\"MGI\",DATUM[\"Militar_Geographische_Institute\",SPHEROID[\"Bessel 1841\",6377397.155,299.1528128,AUTHORITY[\"EPSG\",\"7004\"]],TOWGS84[577.326,90.129,463.919,5.137,1.474,5.297,2.4232],AUTHORITY[\"EPSG\",\"6312\"]],PRIMEM[\"Greenwich\",0,AUTHORITY[\"EPSG\",\"8901\"]],UNIT[\"degree\",0.0174532925199433,AUTHORITY[\"EPSG\",\"9122\"]],AUTHORITY[\"EPSG\",\"4312\"]],PROJECTION[\"Lambert_Conformal_Conic_2SP\"],PARAMETER[\"standard_parallel_1\",49],PARAMETER[\"standard_parallel_2\",46],PARAMETER[\"latitude_of_origin\",47.5],PARAMETER[\"central_meridian\",13.33333333333333],PARAMETER[\"false_easting\",400000],PARAMETER[\"false_northing\",400000],UNIT[\"metre\",1,AUTHORITY[\"EPSG\",\"9001\"]],AUTHORITY[\"EPSG\",\"31287\"]]",
                                "grid_mapping": "lambert_conformal_conic",
                                "long_name": "wind speed in eastward direction",
                                "standard_name": "x_wind",
                                "units": "m s-1"},
                    overwrite=overwrite)

root.create_array(name="T2M",
                    shape=shape,
                    shards=shard_shape,
                    chunks=chunk_shape,
                    compressors = zarr.codecs.BloscCodec(),
                    dtype="int16",
                    fill_value=-9999,
                    dimension_names=["time", "y", "x"],
                    config={"write_empty_chunks":False},
                    attributes={"_FillValue": -9999,
                                "scale_factor": 0.01,
                                "cell_method": "",
                                "description": "air temperature 2m above ground",
                                "esri_pe_string": "PROJCS[\"MGI / Austria Lambert\",GEOGCS[\"MGI\",DATUM[\"Militar_Geographische_Institute\",SPHEROID[\"Bessel 1841\",6377397.155,299.1528128,AUTHORITY[\"EPSG\",\"7004\"]],TOWGS84[577.326,90.129,463.919,5.137,1.474,5.297,2.4232],AUTHORITY[\"EPSG\",\"6312\"]],PRIMEM[\"Greenwich\",0,AUTHORITY[\"EPSG\",\"8901\"]],UNIT[\"degree\",0.0174532925199433,AUTHORITY[\"EPSG\",\"9122\"]],AUTHORITY[\"EPSG\",\"4312\"]],PROJECTION[\"Lambert_Conformal_Conic_2SP\"],PARAMETER[\"standard_parallel_1\",49],PARAMETER[\"standard_parallel_2\",46],PARAMETER[\"latitude_of_origin\",47.5],PARAMETER[\"central_meridian\",13.33333333333333],PARAMETER[\"false_easting\",400000],PARAMETER[\"false_northing\",400000],UNIT[\"metre\",1,AUTHORITY[\"EPSG\",\"9001\"]],AUTHORITY[\"EPSG\",\"31287\"]]",
                                "grid_mapping": "lambert_conformal_conic",
                                "long_name": "air temperature",
                                "standard_name": "surface_temperature",
                                "units": "degree_Celsius"},
                    overwrite=overwrite)

root.create_array(name="RH2M",
                    shape=shape,
                    shards=shard_shape,
                    chunks=chunk_shape,
                    compressors = zarr.codecs.BloscCodec(),
                    dtype="int16",
                    fill_value=-99,
                    dimension_names=["time", "y", "x"],
                    config={"write_empty_chunks":False},
                    attributes={"_FillValue": -99,
                                "scale_factor": 0.01,
                                "cell_method": "",
                                "description": "relative humidity",
                                "esri_pe_string": "PROJCS[\"MGI / Austria Lambert\",GEOGCS[\"MGI\",DATUM[\"Militar_Geographische_Institute\",SPHEROID[\"Bessel 1841\",6377397.155,299.1528128,AUTHORITY[\"EPSG\",\"7004\"]],TOWGS84[577.326,90.129,463.919,5.137,1.474,5.297,2.4232],AUTHORITY[\"EPSG\",\"6312\"]],PRIMEM[\"Greenwich\",0,AUTHORITY[\"EPSG\",\"8901\"]],UNIT[\"degree\",0.0174532925199433,AUTHORITY[\"EPSG\",\"9122\"]],AUTHORITY[\"EPSG\",\"4312\"]],PROJECTION[\"Lambert_Conformal_Conic_2SP\"],PARAMETER[\"standard_parallel_1\",49],PARAMETER[\"standard_parallel_2\",46],PARAMETER[\"latitude_of_origin\",47.5],PARAMETER[\"central_meridian\",13.33333333333333],PARAMETER[\"false_easting\",400000],PARAMETER[\"false_northing\",400000],UNIT[\"metre\",1,AUTHORITY[\"EPSG\",\"9001\"]],AUTHORITY[\"EPSG\",\"31287\"]]",
                                "grid_mapping": "lambert_conformal_conic",
                                "long_name": "relative humidity",
                                "standard_name": "relative_humidity",
                                "units": "percent"},
                    overwrite=overwrite)

root.create_array(name="P0",
                    shape=shape,
                    shards=shard_shape,
                    chunks=chunk_shape,
                    compressors = zarr.codecs.BloscCodec(),
                    dtype="int32",
                    fill_value=-999,
                    dimension_names=["time", "y", "x"],
                    config={"write_empty_chunks":False},
                    attributes={"_FillValue": -999,
                                "scale_factor": 0.01,
                                "cell_method": "",
                                "description": "mean sea level pressure",
                                "esri_pe_string": "PROJCS[\"MGI / Austria Lambert\",GEOGCS[\"MGI\",DATUM[\"Militar_Geographische_Institute\",SPHEROID[\"Bessel 1841\",6377397.155,299.1528128,AUTHORITY[\"EPSG\",\"7004\"]],TOWGS84[577.326,90.129,463.919,5.137,1.474,5.297,2.4232],AUTHORITY[\"EPSG\",\"6312\"]],PRIMEM[\"Greenwich\",0,AUTHORITY[\"EPSG\",\"8901\"]],UNIT[\"degree\",0.0174532925199433,AUTHORITY[\"EPSG\",\"9122\"]],AUTHORITY[\"EPSG\",\"4312\"]],PROJECTION[\"Lambert_Conformal_Conic_2SP\"],PARAMETER[\"standard_parallel_1\",49],PARAMETER[\"standard_parallel_2\",46],PARAMETER[\"latitude_of_origin\",47.5],PARAMETER[\"central_meridian\",13.33333333333333],PARAMETER[\"false_easting\",400000],PARAMETER[\"false_northing\",400000],UNIT[\"metre\",1,AUTHORITY[\"EPSG\",\"9001\"]],AUTHORITY[\"EPSG\",\"31287\"]]",
                                "grid_mapping": "lambert_conformal_conic",
                                "long_name": "mean sea level pressure",
                                "standard_name": "air_pressure_at_mean_sea_level",
                                "units": "Pa"},
                    overwrite=overwrite)

root.create_array(name="GL",
                    shape=shape,
                    shards=shard_shape,
                    chunks=chunk_shape,
                    compressors = zarr.codecs.BloscCodec(),
                    dtype="int32",
                    fill_value=-999,
                    dimension_names=["time", "y", "x"],
                    config={"write_empty_chunks":False},
                    attributes={"_FillValue": -999,
                                "scale_factor": 0.01,
                                "cell_method": "",
                                "description": "global radiation",
                                "esri_pe_string": "PROJCS[\"MGI / Austria Lambert\",GEOGCS[\"MGI\",DATUM[\"Militar_Geographische_Institute\",SPHEROID[\"Bessel 1841\",6377397.155,299.1528128,AUTHORITY[\"EPSG\",\"7004\"]],TOWGS84[577.326,90.129,463.919,5.137,1.474,5.297,2.4232],AUTHORITY[\"EPSG\",\"6312\"]],PRIMEM[\"Greenwich\",0,AUTHORITY[\"EPSG\",\"8901\"]],UNIT[\"degree\",0.0174532925199433,AUTHORITY[\"EPSG\",\"9122\"]],AUTHORITY[\"EPSG\",\"4312\"]],PROJECTION[\"Lambert_Conformal_Conic_2SP\"],PARAMETER[\"standard_parallel_1\",49],PARAMETER[\"standard_parallel_2\",46],PARAMETER[\"latitude_of_origin\",47.5],PARAMETER[\"central_meridian\",13.33333333333333],PARAMETER[\"false_easting\",400000],PARAMETER[\"false_northing\",400000],UNIT[\"metre\",1,AUTHORITY[\"EPSG\",\"9001\"]],AUTHORITY[\"EPSG\",\"31287\"]]",
                                "grid_mapping": "lambert_conformal_conic",
                                "long_name": "global radiation",
                                "standard_name": "surface_downwelling_shortwave_flux_in_air",
                                "units": "W m-2"},
                    overwrite=overwrite)

root.create_array(name="RR",
                    shape=shape,
                    shards=shard_shape_RR,
                    chunks=chunk_shape,
                    compressors = zarr.codecs.BloscCodec(),
                    dtype="int32",
                    fill_value=-999,
                    dimension_names=["time", "y", "x"],
                    config={"write_empty_chunks":False},
                    attributes={"_FillValue": -999,
                                "scale_factor": 0.001,
                                "cell_method": "time: sum",
                                "description": "1-hour precipitation sum",
                                "esri_pe_string": "PROJCS[\"MGI / Austria Lambert\",GEOGCS[\"MGI\",DATUM[\"Militar_Geographische_Institute\",SPHEROID[\"Bessel 1841\",6377397.155,299.1528128,AUTHORITY[\"EPSG\",\"7004\"]],TOWGS84[577.326,90.129,463.919,5.137,1.474,5.297,2.4232],AUTHORITY[\"EPSG\",\"6312\"]],PRIMEM[\"Greenwich\",0,AUTHORITY[\"EPSG\",\"8901\"]],UNIT[\"degree\",0.0174532925199433,AUTHORITY[\"EPSG\",\"9122\"]],AUTHORITY[\"EPSG\",\"4312\"]],PROJECTION[\"Lambert_Conformal_Conic_2SP\"],PARAMETER[\"standard_parallel_1\",49],PARAMETER[\"standard_parallel_2\",46],PARAMETER[\"latitude_of_origin\",47.5],PARAMETER[\"central_meridian\",13.33333333333333],PARAMETER[\"false_easting\",400000],PARAMETER[\"false_northing\",400000],UNIT[\"metre\",1,AUTHORITY[\"EPSG\",\"9001\"]],AUTHORITY[\"EPSG\",\"31287\"]]",
                                "grid_mapping": "lambert_conformal_conic",
                                "long_name": "1-hour precipitation sum",
                                "standard_name": "precipitation_amount",
                                "units": "kg m-2"},
                    overwrite=overwrite)

x_array = root.create_array(name="x",
                shape=x_shape,
                chunks=x_shape,
                dtype="float64",
                dimension_names=["x"],
                attributes={"_FillValue": "AAAAAAAA+H8=", #fill value is NaN
                            "axis": "X",
                            "long_name": "x coordinate of projection",
                            "standard_name": "projection_x_coordinate",
                            "units": "m"},
                overwrite=overwrite)

y_array = root.create_array(name="y",
                shape=y_shape,
                chunks=y_shape,
                dtype="float64",
                dimension_names=["y"],
                attributes={"_FillValue": "AAAAAAAA+H8=", #fill value is NaN
                            "axis": "Y",
                            "long_name": "y coordinate of projection",
                            "standard_name": "projection_y_coordinate",
                            "units": "m"},
                overwrite=overwrite)

time_array = root.create_array(name="time",
                shape=time_shape,
                chunks=time_shape,
                dtype="int64",
                dimension_names=["time"],
                attributes={"units": "hours since 2011-03-15 00:00:00",
                            "calendar": "proleptic_gregorian",
                            "long_name": "time_of_measurement",
                            "standard_name": "time"},
                overwrite=overwrite)

The coordinate arrays are filled with their respective values, and the metadata is consolidated to finish initializing the empty zarr store. Now data can be written to the data arrays.

In [None]:
x_array[:] = x_extent
y_array[:] = y_extent
time_array[:] = time_extent
zarr.consolidate_metadata(store)

#### Extend time dimension

If the time dimension needs to be extended, this code can be used to extend up until now.

In [3]:
now = datetime.now()
now_np = np.datetime64(now).astype('datetime64[h]')
origin = np.datetime64("2011-03-15T00:00:00").astype("datetime64[h]")

new_shape = int((now_np-origin).astype(int))
new_extent = np.arange(0,new_shape,1)

store = zarr.storage.LocalStore(store_path)
group = zarr.group(store=store)

array_names=set(group.array_keys())
coords = {"time", "x", "y"}
data_arrays = array_names-coords

group["time"].resize(new_shape)
for array in data_arrays:
    group_shape  = group[array].shape
    group[array].resize((new_shape, group_shape[1], group_shape[2]))

zarr.consolidate_metadata(store)

store = zarr.storage.LocalStore(store_path)
group = zarr.group(store=store)

group["time"][:]=new_extent

#### Write additional metadata

To write additional metadata the *attrs* attribute of the whole store or an array in the store is called. 

In [None]:
store = zarr.storage.LocalStore(store_path)
group = zarr.group(store=store)

group.attrs["additional attribute key"] = "attribute value"
group["time"].attrs["additional attribute key"] = "attribute value"

zarr.consolidate_metadata(store)