In [None]:
import zarr
import numpy as np
import pystac_client as pc
import xarray as xr
from dask.distributed import LocalCluster
from numcodecs import Blosc
import rioxarray
import pandas as pd
#os.environ["ZARR_V3_EXPERIMENTAL_API"] = "1"

In [2]:
def lookup(arr1, arr2):
    '''
    Get Index of values from arr2 in arr1
    '''
    lookup = {val: idx for idx, val in enumerate(arr1)}
    indices = np.array([lookup.get(val, np.nan) for val in arr2])

    return indices

def get_idx(array, value):
    return np.where(array==value)[0][0]

def load_data(item, pol):
    return rioxarray.open_rasterio(item.assets[pol].href).load().expand_dims(time=pd.to_datetime([item.properties["datetime"]]).tz_convert(None))

In [3]:
client = LocalCluster().get_client()
client.dashboard_link

'http://127.0.0.1:8787/status'

In [4]:
pc_client = pc.Client.open("https://stac.eodc.eu/api/v1")
time_range = "2024-01-01/2024-01-02"

search = pc_client.search(
    collections=["SENTINEL1_SIG0_20M"],
    datetime=time_range,
    #bbox = bbox_aut
    query={"Equi7_TileID": {"eq": "EU020M_E051N015T3"}}
)

items_eodc = search.item_collection()
items_eodc

In [5]:
item_list = list(items_eodc)[::-1]

In [6]:
data=[]
for item in item_list[:]:
    
    d = load_data(item, "VH")

    if not data:
        data.append(d)

    else:
        if d.time.values-data[-1].time.values <= pd.Timedelta(seconds=50):
            d = xr.where(d.values==-9999, data[-1], d.values, keep_attrs=True)
            data[-1]=d
        else:
            data.append(d)



In [7]:
data = xr.concat(data, dim="time")
data = data.squeeze()
#data = data.sortby("time")

In [8]:
data

In [9]:
mapping_x = np.arange(5100010, 5400000, 20)
mapping_y = np.arange(1799990, 1500000, -20)
mapping_t = np.arange(0,10000,1)

In [10]:
shape = (mapping_t.shape[0],mapping_x.shape[0],mapping_y.shape[0])
chunk_shape = (2,100,100)
shard_shape = (2,7500,7500)
compressors_array = zarr.codecs.BloscCodec()
x_shape = mapping_x.shape #subset["x"].shape
y_shape = mapping_y.shape #subset["y"].shape
time_shape = mapping_t.shape

In [None]:
overwrite=True

store = zarr.storage.LocalStore("empty.zarr")
root = zarr.create_group(store=store, overwrite=overwrite)
s1sig0 = root.create_group("s1sig0")

vh_array = s1sig0.create_array(name="VH",
                shape=shape,
                shards=shard_shape,
                chunks=chunk_shape,
                compressors=compressors_array,
                dtype="int16",
                fill_value=-9999,
                dimension_names=["time", "x", "y"],
                config={"write_empty_chunks":False},
                #attributes={"_FillValue": -9999},
                overwrite=overwrite)

x_array = s1sig0.create_array(name="x",
                shape=x_shape,
                chunks=(15000,),
                dtype="float64",
                dimension_names=["x"],
                attributes={"_FillValue": "AAAAAAAA+H8="}, #fill value is NaN
                overwrite=overwrite)

y_array = s1sig0.create_array(name="y",
                shape=y_shape,
                chunks=(15000,),
                dtype="float64",
                dimension_names=["y"],
                attributes={"_FillValue": "AAAAAAAA+H8="}, #fill value is NaN
                overwrite=overwrite)

time_array = s1sig0.create_array(name="time",
                shape=time_shape,
                chunks=time_shape,
                dtype="int64",
                dimension_names=["time"],
                attributes={"units": "days since 2014-10-01",
                            "calendar": "proleptic_gregorian"},
                overwrite=overwrite)

In [38]:
x_array[:] = mapping_x
y_array[:] = mapping_y
time_array[:] = mapping_t

In [39]:
zarr.consolidate_metadata(store)
zarr.consolidate_metadata(store, path="s1sig0")



<Group file://empty.zarr/s1sig0>

In [49]:
store = zarr.storage.LocalStore("empty.zarr")
group = zarr.group(store=store)["s1sig0"]

In [41]:
origin = np.datetime64("2014-10-01")
times = data.time.values.astype("datetime64[D]")
time_delta = (times - origin).astype("timedelta64[D]").astype("int64")

In [42]:
time_delta

array([3379, 3380])

In [43]:
t_min, t_max = [time_delta[0], time_delta[-1]+1]
x_min, x_max = [get_idx(mapping_x, data["x"].values[0]), get_idx(mapping_x, data["x"].values[-1])+1]
y_min, y_max = [get_idx(mapping_y, data["y"].values[0]), get_idx(mapping_y, data["y"].values[-1])+1]

In [50]:
group["VH"][t_min:t_max, x_min:x_max, y_min:y_max] = data.values

In [51]:
ds = xr.open_zarr("empty.zarr", group="s1sig0", consolidated=True, chunks=None)#, decode_times=False)
ds

In [52]:
filtered = ds.sel(time="2024-01-01T00:00:00.000000000", x=slice(5100010, 5100070), y=slice(1799990, 1799930))
#filtered = ds.sel(time=3379)#, x=slice(5100010, 5100070), y=slice(1799990, 1799930))

In [53]:
filtered

In [55]:
filtered.load()