In [None]:
import xarray as xr
import numpy as np
import zarr
from datetime import datetime, timezone
import BuildZarrStore as bzs
import pandas as pd

import sys

if not sys.warnoptions:
    import warnings
    warnings.filterwarnings("ignore", category=UserWarning)

In [None]:
x_extent = np.arange(20000, 721000, 1000)
y_extent = np.arange(220000, 621000, 1000)
time_extent = np.arange(0,125328,1)

In [None]:
shape = (time_extent.shape[0],y_extent.shape[0],x_extent.shape[0])
chunk_shape = (720, 100,100)
shard_shape = (7200,100,100) 
shard_shape_int32 = (3600,100,100)
x_shape = x_extent.shape
y_shape = y_extent.shape
time_shape = time_extent.shape

In [None]:
overwrite=True
store = zarr.storage.LocalStore("/eodc/private/openeo_platform/zarr_nacho/INCA_test.zarr")
root = zarr.create_group(store=store, overwrite=overwrite, attributes={"name": "incal-hourly",
                                     "Conventions": "CF-1.7",
                                     "freq": "1H",
                                     "spatial_resolution": 1000,
                                     "references": "https://data.hub.geosphere.at/dataset/inca-v1-1h-1km",
                                     "Institution": "GeoSphere Austria",
                                     "history": "Original data produced by GeoSphere Austria",
                                     "source": "modeled data",
                                     "crs": "EPSG: 31287",
                                     "grid_mapping": "lambert_conformal_conic",
                                     "Version": 1})

In [None]:
root.create_array(name="TD2M",
                    shape=shape,
                    shards=shard_shape_int32,
                    chunks=chunk_shape,
                    compressors = zarr.codecs.BloscCodec(),
                    dtype="int16",
                    fill_value=-9999,
                    dimension_names=["time", "y", "x"],
                    config={"write_empty_chunks":False},
                    attributes={"_FillValue": -9999,
                                "scale_factor": 0.01,
                                "Description": "dew point temperature",
                                "Unit": "degree_Celsius"},
                    overwrite=overwrite)

root.create_array(name="VV",
                    shape=shape,
                    shards=shard_shape_int32,
                    chunks=chunk_shape,
                    compressors = zarr.codecs.BloscCodec(),
                    dtype="int16",
                    fill_value=-9999,
                    dimension_names=["time", "y", "x"],
                    config={"write_empty_chunks":False},
                    attributes={"_FillValue": -9999,
                                "scale_factor": 0.01,
                                "Description": "wind speed in eastward direction",
                                "Unit": "m s-1"},
                    overwrite=overwrite)

root.create_array(name="UU",
                    shape=shape,
                    shards=shard_shape_int32,
                    chunks=chunk_shape,
                    compressors = zarr.codecs.BloscCodec(),
                    dtype="int16",
                    fill_value=-9999,
                    dimension_names=["time", "y", "x"],
                    config={"write_empty_chunks":False},
                    attributes={"_FillValue": -9999,
                                "scale_factor": 0.01,
                                "Description": "wind speed in eastward direction",
                                "Unit": "m s-1"},
                    overwrite=overwrite)

root.create_array(name="T2M",
                    shape=shape,
                    shards=shard_shape_int32,
                    chunks=chunk_shape,
                    compressors = zarr.codecs.BloscCodec(),
                    dtype="int16",
                    fill_value=-9999,
                    dimension_names=["time", "y", "x"],
                    config={"write_empty_chunks":False},
                    attributes={"_FillValue": -9999,
                                "scale_factor": 0.01,
                                "Description": "air temperature",
                                "Unit": "degree_Celsius"},
                    overwrite=overwrite)

root.create_array(name="RH2M",
                    shape=shape,
                    shards=shard_shape_int32,
                    chunks=chunk_shape,
                    compressors = zarr.codecs.BloscCodec(),
                    dtype="int16",
                    fill_value=-99,
                    dimension_names=["time", "y", "x"],
                    config={"write_empty_chunks":False},
                    attributes={"_FillValue": -99,
                                "scale_factor": 0.01,
                                "Description": "relative humidity",
                                "Unit": "percent"},
                    overwrite=overwrite)

root.create_array(name="P0",
                    shape=shape,
                    shards=shard_shape_int32,
                    chunks=chunk_shape,
                    compressors = zarr.codecs.BloscCodec(),
                    dtype="int32",
                    fill_value=-999,
                    dimension_names=["time", "y", "x"],
                    config={"write_empty_chunks":False},
                    attributes={"_FillValue": -999,
                                "scale_factor": 0.01,
                                "Description": "mean sea level pressure",
                                "Unit": "Pa"},
                    overwrite=overwrite)

root.create_array(name="GL",
                    shape=shape,
                    shards=shard_shape_int32,
                    chunks=chunk_shape,
                    compressors = zarr.codecs.BloscCodec(),
                    dtype="int32",
                    fill_value=-999,
                    dimension_names=["time", "y", "x"],
                    config={"write_empty_chunks":False},
                    attributes={"_FillValue": -999,
                                "scale_factor": 0.01,
                                "Description": "global radiation",
                                "Unit": "W m-2"},
                    overwrite=overwrite)

root.create_array(name="RR",
                    shape=shape,
                    shards=shard_shape,
                    chunks=chunk_shape,
                    compressors = zarr.codecs.BloscCodec(),
                    dtype="int32",
                    fill_value=-999,
                    dimension_names=["time", "y", "x"],
                    config={"write_empty_chunks":False},
                    attributes={"_FillValue": -999,
                                "scale_factor": 0.001,
                                "Description": "1-hour precipitation sum",
                                "Unit": "kg m-2"},
                    overwrite=overwrite)

x_array = root.create_array(name="x",
                shape=x_shape,
                chunks=x_shape,
                dtype="float64",
                dimension_names=["x"],
                attributes={"_FillValue": "AAAAAAAA+H8="}, #fill value is NaN
                overwrite=overwrite)

y_array = root.create_array(name="y",
                shape=y_shape,
                chunks=y_shape,
                dtype="float64",
                dimension_names=["y"],
                attributes={"_FillValue": "AAAAAAAA+H8="}, #fill value is NaN
                overwrite=overwrite)

time_array = root.create_array(name="time",
                shape=time_shape,
                chunks=time_shape,
                dtype="int64",
                dimension_names=["time"],
                attributes={"units": "hours since 2011-03-15 00:00:00",
                            "calendar": "proleptic_gregorian"},
                overwrite=overwrite)

In [None]:
x_array[:] = x_extent
y_array[:] = y_extent
time_array[:] = time_extent
zarr.consolidate_metadata(store)

In [None]:
# overwrite=True
# store = zarr.storage.LocalStore("INCA_test.zarr")
# root = zarr.create_group(store=store, overwrite=overwrite)

# td2m = root.create_array("TD2M", 
#                          attributes={"Description": "dew point temperature",
#                                      "Unit": "degree_Celsius",
#                                      "name": "incal-hourly",
#                                      "Conventions": "CF-1.7",
#                                      "freq": "1H",
#                                      "spatial_resolution": 1000,
#                                      "references": "https://data.hub.geosphere.at/dataset/inca-v1-1h-1km",
#                                      "Institution": "GeoSphere Austria",
#                                      "history": "Original data produced by GeoSphere Austria",
#                                      "source": "modeled data",
#                                      "crs": "EPSG: 31287",
#                                      "grid_mapping": "lambert_conformal_conic",
#                                      "Version": 1})
# gl = root.create_group("GL", 
#                          attributes={"Description": "global radiation",
#                                      "Unit": "W m-2",
#                                      "name": "incal-hourly",
#                                      "Conventions": "CF-1.7",
#                                      "freq": "1H",
#                                      "spatial_resolution": 1000,
#                                      "references": "https://data.hub.geosphere.at/dataset/inca-v1-1h-1km",
#                                      "Institution": "GeoSphere Austria",
#                                      "history": "Original data produced by GeoSphere Austria",
#                                      "source": "modeled data",
#                                      "crs": "EPSG: 31287",
#                                      "grid_mapping": "lambert_conformal_conic",
#                                      "Version": 1})

# p0 = root.create_group("P0", 
#                          attributes={"Description": "mean sea level pressure",
#                                      "Unit": "Pa",
#                                      "name": "incal-hourly",
#                                      "Conventions": "CF-1.7",
#                                      "freq": "1H",
#                                      "spatial_resolution": 1000,
#                                      "references": "https://data.hub.geosphere.at/dataset/inca-v1-1h-1km",
#                                      "Institution": "GeoSphere Austria",
#                                      "history": "Original data produced by GeoSphere Austria",
#                                      "source": "modeled data",
#                                      "crs": "EPSG: 31287",
#                                      "grid_mapping": "lambert_conformal_conic",
#                                      "Version": 1})

# rh2m = root.create_group("RH2M", 
#                          attributes={"Description": "relative humidity",
#                                      "Unit": "percent",
#                                      "name": "incal-hourly",
#                                      "Conventions": "CF-1.7",
#                                      "freq": "1H",
#                                      "spatial_resolution": 1000,
#                                      "references": "https://data.hub.geosphere.at/dataset/inca-v1-1h-1km",
#                                      "Institution": "GeoSphere Austria",
#                                      "history": "Original data produced by GeoSphere Austria",
#                                      "source": "modeled data",
#                                      "crs": "EPSG: 31287",
#                                      "grid_mapping": "lambert_conformal_conic",
#                                      "Version": 1})

# rr = root.create_group("RR", 
#                          attributes={"Description": "1-hour precipitation sum",
#                                      "Unit": "kg m-2",
#                                      "name": "incal-hourly",
#                                      "Conventions": "CF-1.7",
#                                      "freq": "1H",
#                                      "spatial_resolution": 1000,
#                                      "references": "https://data.hub.geosphere.at/dataset/inca-v1-1h-1km",
#                                      "Institution": "GeoSphere Austria",
#                                      "history": "Original data produced by GeoSphere Austria",
#                                      "source": "modeled data",
#                                      "crs": "EPSG: 31287",
#                                      "grid_mapping": "lambert_conformal_conic",
#                                      "Version": 1})

# t2m = root.create_group("T2M",
#                          attributes={"Description": "air temperature",
#                                      "Unit": "degree_Celsius",
#                                      "name": "incal-hourly",
#                                      "Conventions": "CF-1.7",
#                                      "freq": "1H",
#                                      "spatial_resolution": 1000,
#                                      "references": "https://data.hub.geosphere.at/dataset/inca-v1-1h-1km",
#                                      "Institution": "GeoSphere Austria",
#                                      "history": "Original data produced by GeoSphere Austria",
#                                      "source": "modeled data",
#                                      "crs": "EPSG: 31287",
#                                      "grid_mapping": "lambert_conformal_conic",
#                                      "Version": 1})

# uu = root.create_group("UU", 
#                          attributes={"Description": "wind speed in eastward direction",
#                                      "Unit": "m s-1",
#                                      "name": "incal-hourly",
#                                      "Conventions": "CF-1.7",
#                                      "freq": "1H",
#                                      "spatial_resolution": 1000,
#                                      "references": "https://data.hub.geosphere.at/dataset/inca-v1-1h-1km",
#                                      "Institution": "GeoSphere Austria",
#                                      "history": "Original data produced by GeoSphere Austria",
#                                      "source": "modeled data",
#                                      "crs": "EPSG: 31287",
#                                      "grid_mapping": "lambert_conformal_conic",
#                                      "Version": 1})

# vv = root.create_group("VV", 
#                          attributes={"Description": "wind speed in northward direction",
#                                      "Unit": "m s-1",
#                                      "name": "incal-hourly",
#                                      "Conventions": "CF-1.7",
#                                      "freq": "1H",
#                                      "spatial_resolution": 1000,
#                                      "references": "https://data.hub.geosphere.at/dataset/inca-v1-1h-1km",
#                                      "Institution": "GeoSphere Austria",
#                                      "history": "Original data produced by GeoSphere Austria",
#                                      "source": "modeled data",
#                                      "crs": "EPSG: 31287",
#                                      "grid_mapping": "lambert_conformal_conic",
#                                      "Version": 1})

In [None]:
# bzs.make_group_INCA(td2m, "TD2M", shape, shard_shape_int32, chunk_shape, x_shape, y_shape, time_shape, x_extent, y_extent, time_extent, 0.01, fill_value=-9999)
# bzs.make_group_INCA(vv, "VV", shape, shard_shape_int32, chunk_shape, x_shape, y_shape, time_shape, x_extent, y_extent, time_extent, 0.01, fill_value=-9999)
# bzs.make_group_INCA(uu, "UU", shape, shard_shape_int32, chunk_shape, x_shape, y_shape, time_shape, x_extent, y_extent, time_extent, 0.01, fill_value=-9999)
# bzs.make_group_INCA(t2m, "T2M", shape, shard_shape_int32, chunk_shape, x_shape, y_shape, time_shape, x_extent, y_extent, time_extent, 0.01, fill_value=-9999)
# bzs.make_group_INCA(rh2m, "RH2M", shape, shard_shape_int32, chunk_shape, x_shape, y_shape, time_shape, x_extent, y_extent, time_extent, 0.01, fill_value=-99)
# bzs.make_group_INCA(p0, "P0", shape, shard_shape_int32, chunk_shape, x_shape, y_shape, time_shape, x_extent, y_extent, time_extent, 0.01, fill_value=-999, dtype="int32")
# bzs.make_group_INCA(gl, "GL", shape, shard_shape_int32, chunk_shape, x_shape, y_shape, time_shape, x_extent, y_extent, time_extent, 0.01, fill_value=-999, dtype="int32")
# bzs.make_group_INCA(rr, "RR", shape, shard_shape, chunk_shape, x_shape, y_shape, time_shape, x_extent, y_extent, time_extent, 0.001, fill_value=-999, dtype="int32")


In [None]:
# zarr.consolidate_metadata(store)
# zarr.consolidate_metadata(store, path="TD2M")
# zarr.consolidate_metadata(store, path="VV")
# zarr.consolidate_metadata(store, path="UU")
# zarr.consolidate_metadata(store, path="T2M")
# zarr.consolidate_metadata(store, path="RH2M")
# zarr.consolidate_metadata(store, path="P0")
# zarr.consolidate_metadata(store, path="GL")
# zarr.consolidate_metadata(store, path="RR")

In [None]:
data = xr.open_dataset("INCA_data/INCAL_HOURLY_RR_202507.nc", chunks={})#.sel(time=slice("2019-06-04T08:00:00.000000000","2019-06-04T22:00:00.000000000")).sel(x=slice(420000,430000), y=slice(415000,420000))

In [None]:
data

In [None]:
ds = xr.open_zarr("/eodc/products/eodc/geosphere_inca/INCA.zarr", consolidated=True, chunks={})#, decode_times=False)
ds["T2M"].sel(time=slice("2019-06-04T08:00:00.000000000","2019-06-04T22:00:00.000000000")).sel(x=slice(420000,430000), y=slice(415000,420000)).load()

In [None]:
ds = xr.open_zarr("INCA.zarr", consolidated=True, chunks={})["T2M"]
ds.sel(time=slice("2019-06-15T00:00:00.000000000", "2019-06-30T00:00:00.000000000")).isel(x=slice(300,400), y=slice(100,200)).load()

In [None]:
xr.open_dataset("/eodc/products/eodc/geosphere_inca/INCA.zarr")#.sel(time=slice("2019-06-15T00:00:00.000000000", "2019-06-30T00:00:00.000000000")).isel(x=slice(300,400), y=slice(100,200)).load()

In [None]:
origin = np.datetime64("2011-03-15T00:00:00").astype("datetime64[h]")
end = np.datetime64("2025-07-01T00:00:00").astype("datetime64[h]")

In [None]:
end-origin

In [None]:
store = zarr.storage.LocalStore("/eodc/products/eodc/geosphere_inca/INCA.zarr")
group = zarr.group(store=store)

In [None]:
group["time"].resize(125328)

In [None]:
group["time"][:] = np.arange(0,125328,1)

In [None]:
group["T2M"].shape

In [None]:
group["VV"].resize((125328,401,701))

In [None]:
zarr.consolidate_metadata(store)

In [None]:
group["time"][:] = np.arange(0,125328,1)