In [157]:
from dotenv import load_dotenv
import os
load_dotenv("s3.env")
from hera.workflows import models, Steps, Workflow, script, Artifact, Container, DAG, RunnerScriptConstructor, Script, NFSVolume, NoneArchiveStrategy
from hera.shared import global_config

global_config.host = "https://dev.services.eodc.eu/workflows/"
global_config.namespace = "inca"
global_config.token = os.getenv("argo_token")
global_config.image = "ghcr.io/oscipal/image_zarr:latest"

security_context = {"runAsUser": 74268,
                    "runAsGroup": 71473}

nfs_volume = [models.Volume(
    name="eodc-mount",
    persistent_volume_claim={"claimName": "eodc-nfs-claim"},
    )]


In [158]:
@script(outputs=Artifact(name="inca-file", path="/tmp/INCA_{{inputs.parameters.variable}}.nc", archive=NoneArchiveStrategy()))
def inca_download(variable: str):
    from urllib.request import urlretrieve
    import os
    import datetime

    ym = (datetime.date.today()-datetime.timedelta(days=20)).strftime("%Y%m")
    print(ym)
    url = f"https://public.hub.geosphere.at/datahub/resources/inca-v1-1h-1km/filelisting/{variable}/INCAL_HOURLY_{variable}_{ym}.nc"
    urlretrieve(url, f"/tmp/INCA_{variable}.nc")

@script(inputs=Artifact(name="inca-file", path="/tmp/INCA_{{inputs.parameters.variable}}.nc"),
        volume_mounts=[models.VolumeMount(name="eodc-mount", mount_path="/eodc")])
def inca_write(variable: str):
    import os
    import xarray as xr
    import numpy as np
    import zarr

    artifact_path = f"/tmp/INCA_{variable}.nc"
    nfs_path = "/eodc/private/openeo_platform/zarr_nacho"

    def get_idx(array1, array2):
        min_idx = np.where(array1 == array2[0])[0][0]
        max_idx = np.where(array1 == array2[-1])[0][0] + 1
        return min_idx, max_idx

    data = xr.open_dataset(artifact_path, mask_and_scale=False).load()

    store = zarr.storage.LocalStore(os.path.join(nfs_path, "INCA_test.zarr"))
    group = zarr.group(store=store)
    x_extent = group["x"][:]
    y_extent = group["y"][:]

    x_min, x_max = get_idx(x_extent, data["x"].values)
    y_min, y_max = get_idx(y_extent, data["y"].values)

    origin = np.datetime64("2011-03-15T00:00:00").astype("datetime64[h]")
    time_min, time_max = data.time.values[0].astype("datetime64[h]"), data.time.values[-1].astype("datetime64[h]") + 1
    time_delta_min, time_delta_max = (time_min - origin).astype("int64"), (time_max - origin).astype("int64")

    group[variable][time_delta_min:time_delta_max, y_min:y_max, x_min:x_max] = data[variable].values

In [159]:
items = ["RR"]

with Workflow(
    generate_name="inca-zarr-",
    volumes = nfs_volume,
    security_context=security_context,
    entrypoint="run-workflow"
) as w:
    with DAG(name="run-workflow"):
        download = inca_download(arguments={"variable":"{{item}}"},
                                 with_param=items,)

        process = inca_write(arguments=[{"variable": "{{item}}"},
                                        download.get_artifact("inca-file").with_name("inca-file")],
                                        with_param=items,)

        download >> process

In [160]:
with open("hera_workflow.yaml", "w") as f:
    f.write(w.to_yaml())

In [142]:
w.create()

Workflow(api_version=None, kind=None, metadata=ObjectMeta(annotations=None, cluster_name=None, creation_timestamp=Time(__root__=datetime.datetime(2025, 7, 16, 6, 6, 45, tzinfo=datetime.timezone.utc)), deletion_grace_period_seconds=None, deletion_timestamp=None, finalizers=None, generate_name='inca-zarr-', generation=1, labels={'workflows.argoproj.io/creator': 'system-serviceaccount-default-jenkins'}, managed_fields=[ManagedFieldsEntry(api_version='argoproj.io/v1alpha1', fields_type='FieldsV1', fields_v1=FieldsV1(), manager='argo', operation='Update', subresource=None, time=Time(__root__=datetime.datetime(2025, 7, 16, 6, 6, 45, tzinfo=datetime.timezone.utc)))], name='inca-zarr-ffz2v', namespace='inca', owner_references=None, resource_version='310369990', self_link=None, uid='ae49ae4c-7cbe-4db2-889c-57c3b604ac2a'), spec=WorkflowSpec(active_deadline_seconds=None, affinity=None, archive_logs=None, arguments=Arguments(artifacts=None, parameters=None), artifact_gc=None, artifact_repository_r