In [34]:
from dotenv import load_dotenv
import os
from hera.workflows import models, CronWorkflow, script, Artifact, Parameter, DAG, Steps, Step, NoneArchiveStrategy, Workflow
from hera.shared import global_config

load_dotenv("/home/otto/s1_zarr/.env")

True

In [35]:
global_config.host = "https://dev.services.eodc.eu/workflows/"
global_config.namespace = "inca"
global_config.token = os.getenv("argo_token")
global_config.image = "ghcr.io/oscipal/image_zarr:latest"

In [36]:
nfs_volume = [models.Volume(
    name="eodc-mount",
    persistent_volume_claim={"claimName": "eodc-nfs-claim"},
    )]

security_context = {"runAsUser": 74268,
                    "runAsGroup": 71473}

In [37]:
@script(volume_mounts=[models.VolumeMount(name="eodc-mount", mount_path="/eodc")])

def write_data(tile: str, store_path: str = "/eodc/private/openeo_platform/zarr_nacho/s1sig0.zarr"):
    import pystac_client as pc
    import xarray as xr
    import zarr
    import numpy as np
    import rioxarray
    import pandas as pd

    def get_idx(array1, array2):
        min = np.where(array1==array2[0])[0][0]
        max = np.where(array1==array2[-1])[0][0]+1
        return min, max

    def load_data(item, pols):
        if type(pols)==str:
            data = rioxarray.open_rasterio(item.assets[pols].href).load().expand_dims(time=pd.to_datetime([item.properties["datetime"]]).tz_convert(None)).rename(pols)
        else:
            data = []
            for pol in pols:
                data.append(rioxarray.open_rasterio(item.assets[pol].href).load().expand_dims(time=pd.to_datetime([item.properties["datetime"]]).tz_convert(None)).rename(pol))
            
            data = xr.merge(data)
        return data.squeeze()
    
    def clip_data(dataset, fillvalue=-9999, multiple_vars = False):
        if len(list(dataset.data_vars)) > 1 and not multiple_vars:
            raise Warning("All variables are clipped to the extent of first variable! Set multiple_vars to TRUE if you want to proceed.")
        mask = dataset[list(dataset.data_vars)[0]]!=fillvalue
        ymin, ymax = [np.where(mask)[0].min(), np.where(mask)[0].max()+1]
        xmin, xmax = [np.where(mask)[1].min(), np.where(mask)[1].max()+1]
        data = dataset.isel(x=slice(xmin, xmax), y=slice(ymin,ymax))
        return data

    pc_client = pc.Client.open("https://stac.eodc.eu/api/v1")
    time_range = "2022-01-01/2022-01-30"
    print(tile)
    search = pc_client.search(
        collections=["SENTINEL1_SIG0_20M"],
        datetime=time_range,
        query={"Equi7_TileID": {"eq": f"EU020M_{tile}T3"}})

    items_eodc = search.item_collection()
    item_list = list(items_eodc)[::-1]

    store = zarr.storage.LocalStore(store_path)
    group = zarr.group(store=store)
    x_extent = group["x"][:]
    y_extent = group["y"][:]

    print(len(items_eodc))

    for item in item_list:

        print("alala")

        dataset = load_data(item, ["VH", "VV"])

        dataset["x"] = dataset.x-10
        dataset["y"] = dataset.y+10

        dataset_clipped = clip_data(dataset, multiple_vars=True)
        aon = dataset_clipped.attrs["abs_orbit_number"]
        ron = dataset_clipped.attrs["rel_orbit_number"]
        dataset = None

        time_origin = np.datetime64("2014-10-01")
        times = dataset_clipped.time.values.astype("datetime64[D]")
        time_delta = (times - time_origin).astype("int64")

        sensing_origin = np.datetime64("2014-10-01T00:00:00")
        sensing = dataset_clipped.time.values.astype("datetime64[s]")
        sensing_delta = (sensing - sensing_origin).astype("int64")

        x_min, x_max = get_idx(x_extent, dataset_clipped["x"].values)
        y_min, y_max = get_idx(y_extent, dataset_clipped["y"].values)

        data_vh = dataset_clipped["VH"].values
        existing_data_vh = group["VH"][time_delta, y_min:y_max, x_min:x_max]
        np.copyto(existing_data_vh, data_vh, where=(existing_data_vh==-9999))
        group["VH"][time_delta, y_min:y_max, x_min:x_max] = existing_data_vh
        data_vh = None

        data_vv = dataset_clipped["VV"].values
        existing_data_vv = group["VV"][time_delta, y_min:y_max, x_min:x_max]
        np.copyto(existing_data_vv, data_vv, where=(existing_data_vv==-9999))
        group["VV"][time_delta, y_min:y_max, x_min:x_max] = existing_data_vv
        data_vv = None
        existing_data_vv = None

        new_aon = existing_data_vh.astype(np.int32)
        new_aon[new_aon!=-9999] = aon
        group["absolute_orbit_number"][time_delta, y_min:y_max, x_min:x_max] = new_aon
        new_aon = None

        new_ron = existing_data_vh
        new_ron[new_ron!=-9999] = ron
        group["relative_orbit_number"][time_delta, y_min:y_max, x_min:x_max] = new_ron
        new_ron = None

        new_sensing = existing_data_vh.astype(np.int64)
        existing_data_vh = None
        new_sensing[new_sensing!=-9999] = int(sensing_delta)
        group["sensing_date"][time_delta, y_min:y_max, x_min:x_max] = new_sensing
        new_sensing = None

In [40]:
tiles=["E045N015"]#, "E048N015", "E051N015"]
tiles2 = ["E048N012", "E051N012"]

with Workflow(
    generate_name="s1sig0-zarr-",
    volumes = nfs_volume,
    security_context=security_context,
    entrypoint="workflow"
) as w:
    with DAG(name="workflow"):
        a = write_data(name="step1", with_param=tiles)
        b = write_data(name="step2", with_param=tiles2)

        a>>b

In [41]:
w.create()

