In [None]:
from dotenv import load_dotenv
import os
load_dotenv("s3.env")
from hera.workflows import Steps, Workflow, script, Task, Container, DAG, RunnerScriptConstructor, Script, Volume
from hera.shared import global_config

global_config.host = "https://dev.services.eodc.eu/workflows/"
global_config.namespace = "default"
global_config.token = os.getenv("argo_token")
global_config.image = "ghcr.io/katharinastarzer21/image_zarr:latest"
global_config.set_class_defaults(Script, constructor=RunnerScriptConstructor())

security_context = {"runAsUser": 74268,
                    "runAsGroup": 71473}

nfs_volume = Volume(
    name="eodc-mount",
    persistent_volume_claim={"claimName": "eodc-nfs-claim"}  # Change this to your actual PVC name
)

nfs_volume_mount = {
    "name": "eodc-mount",
    "mountPath": "/eodc"
}


In [None]:
@script()
def inca_download(variable: str)
    from urllib.request import urlretrieve
    import os

    url = f"https://public.hub.geosphere.at/datahub/resources/inca-v1-1h-1km/filelisting/{variable}/INCAL_HOURLY_{variable}_202506.nc"
    filename = os.path.basename(url)
    base = "/tmp"
    urlretrieve(url, os.path.join(base,filename))

@script()
def inca_write(variable: str)
    import os
    import xarray as xr
    import numpy as np
    import zarr

    filename = f"INCAL_HOURLY_{variable}_202506.nc"
    artifact_path = os.path.join("/tmp", filename)
    nfs_path = "/eodc/private/openeo_platform/zarr_nacho"

    def get_idx(array1, array2):
        min_idx = np.where(array1 == array2[0])[0][0]
        max_idx = np.where(array1 == array2[-1])[0][0] + 1
        return min_idx, max_idx

    data = xr.open_dataset(artifact_path, mask_and_scale=False).load()

    store = zarr.storage.LocalStore(os.path.join(nfs_path, "INCA_test.zarr"))
    group = zarr.group(store=store)
    x_extent = group["x"][:]
    y_extent = group["y"][:]

    x_min, x_max = get_idx(x_extent, data["x"].values)
    y_min, y_max = get_idx(y_extent, data["y"].values)

    origin = np.datetime64("2011-03-15T00:00:00").astype("datetime64[h]")
    time_min, time_max = data.time.values[0].astype("datetime64[h]"), data.time.values[-1].astype("datetime64[h]") + 1
    time_delta_min, time_delta_max = (time_min - origin).astype("int64"), (time_max - origin).astype("int64")

    group[variable][time_delta_min:time_delta_max, y_min:y_max, x_min:x_max] = data[variable].values

In [None]:
with Workflow(
    generate_name="INCA-zarr-"
    volumes = "download",
) as w:
    with DAG(name="download")

Workflow(api_version=None, kind=None, metadata=ObjectMeta(annotations=None, cluster_name=None, creation_timestamp=Time(__root__=datetime.datetime(2025, 7, 11, 13, 34, 21, tzinfo=datetime.timezone.utc)), deletion_grace_period_seconds=None, deletion_timestamp=None, finalizers=None, generate_name='hello-world-steps-', generation=1, labels={'workflows.argoproj.io/creator': 'system-serviceaccount-default-jenkins'}, managed_fields=[ManagedFieldsEntry(api_version='argoproj.io/v1alpha1', fields_type='FieldsV1', fields_v1=FieldsV1(), manager='argo', operation='Update', subresource=None, time=Time(__root__=datetime.datetime(2025, 7, 11, 13, 34, 21, tzinfo=datetime.timezone.utc)))], name='hello-world-steps-mklx6', namespace='default', owner_references=None, resource_version='307545700', self_link=None, uid='d87fc5d0-4cb4-4498-b855-4131cb11e1c2'), spec=WorkflowSpec(active_deadline_seconds=None, affinity=None, archive_logs=None, arguments=Arguments(artifacts=None, parameters=None), artifact_gc=None

In [None]:
from hera.workflows import Workflow, Task, Container

# Define your container step
container_step = Container(
    name="run-python-script",  # Use the pushed image
)

# Define a task running that container
task = Task(name="run-script-task", template=container_step)

# Define the workflow
with Workflow(generate_name="python-script-workflow-") as w:
    with Steps(name="main-steps") as s:
        s.add(task)

# Create the workflow (you need a K8s context set up for this to work)
w.create()
