In [31]:
import os
import json
import pystac
import xarray as xr
import xstac

from pystac_client import Client
from stac_validator import stac_validator

from pprint import pprint


template_file = "/home/iferrario/dev/intertwin-stac/collection_template.json"
template = json.load(open(template_file))


# InterTwin Hydrological Application Workflow

## Overview

<img src="./overview.png" width="1000" style="margin:0px 400px"> </img>

## Common Workflow Language Application Components
<img src="./cwl_presentation_stac.svg" style="margin:0px 400px" width="1000" > </img>

---

# Model builder Application

## Executable (hydromt_build.sh)

```bash
#!/bin/bash

## processing arguments
region=$1
setupconfig=$2
catalog=$3

## run hydromt 
hydromt build wflow model -r "$region" -d "$catalog" -i "$setupconfig" 

```
--- 

## Container (Dockerfile)


```dockerfile

FROM ghcr.io/osgeo/gdal:ubuntu-small-3.8.1

RUN apt update
RUN apt -y install python3-pip

# python packages
COPY requirements.txt ./

RUN pip install --upgrade pip
RUN pip install --no-cache-dir -r requirements.txt

# directory where there are datasets referred by catalog.yaml
RUN mkdir /data 

COPY ./src/hydromt_build.sh /usr/bin/hydromt_build
RUN chmod +x /usr/bin/hydromt_build

```


## CWL Application (hydromt-build.cwl)

```yaml

#!/usr/bin/env cwl-runner

cwlVersion: v1.2
class: CommandLineTool
id: hydromt-build

requirements:
    DockerRequirement:
        dockerPull: gitlab.inf.unibz.it:4567/remsen/cdr/climax/meteo-data-pipeline:hydromt
        dockerOutputDirectory: /output
    InitialWorkDirRequirement:
        listing:
            - entry: $(inputs.volume_data)
              entryname: /data

baseCommand: hydromt_build
arguments: []

inputs:
    region: # THIS COULD BE ALSO A VECTOR GIS FILE
        type: string
        inputBinding:
            position: 1
    setupconfig:  # WFLOW.ini
        type: File
        inputBinding:
            position: 2
    catalog: # THIS WILL BE A URL POINTING TO STAC OR A CATALOG.JSON
        type: File
        inputBinding:
            position: 3
    volume_data:
        type: Directory
        inputBinding:
            position: 4

outputs: # THIS SHOULD REFLECT THE OUTPUT FROM THE EXECUTABLE, AND IT CONTROLS THE JSON OUTPUT STRUCTURE
    output:
        outputBinding:
            glob: .
        type: Directory

```


## CWL Input Parameters (params.yaml)

```yaml
region: "{'subbasin':[ 11.4750, 46.8717 ], 'strord':3}"
setupconfig:
  class: File
  path: wflow.ini
catalog: 
  class: File
  path: hydromt_data.yaml # stac_catalog.json
volume_data: 
  class: Directory
  path: /mnt/CEPH_PROJECTS/InterTwin/Wflow/data
  
```



## RUN Model Builder application



In [15]:
!cwltool -w output.json hydromt-build.cwl params.yaml

Authenticating with existing credentials...
https://docs.docker.com/engine/reference/commandline/login/#credentials-store

Login Succeeded
[1;30mINFO[0m /home/iferrario/.local/miniforge/envs/hydromt_wflow/bin/cwltool 3.1.20231114134824
[1;30mINFO[0m Resolved 'hydromt-build.cwl' to 'file:///home/iferrario/dev/InterTwin-wflow-app/workflows/hydromt-build.cwl'
[1;30mINFO[0m [job hydromt-build] /tmp/ylz3qrzu$ docker \
    run \
    -i \
    --mount=type=bind,source=/tmp/ylz3qrzu,target=/output \
    --mount=type=bind,source=/tmp/73rh6ldb,target=/tmp \
    --mount=type=bind,source=/home/iferrario/dev/InterTwin-wflow-app/workflows/hydromt_data.yaml,target=/var/lib/cwl/stg55c42202-0b79-48ee-8284-44f32d9ead0a/hydromt_data.yaml,readonly \
    --mount=type=bind,source=/home/iferrario/dev/InterTwin-wflow-app/workflows/wflow.ini,target=/var/lib/cwl/stgc396d00e-2d4a-4c9a-9e9c-ee4a8e422f34/wflow.ini,readonly \
    --mount=type=bind,source=/mnt/CEPH_PROJECTS/InterTwin/Wflow/data,target=/data,rea

In [17]:
!ls ./ylz3qrzu/model

forcings.nc  hydromt_data.yml  run_default  staticmaps.nc
hydromt.log  instate	       staticgeoms  wflow_sbm.toml


In [18]:
!cat output.json

{
    "output": {
        "location": "file:///home/iferrario/dev/InterTwin-wflow-app/workflows/ylz3qrzu",
        "basename": "ylz3qrzu",
        "class": "Directory",
        "listing": [
            {
                "class": "Directory",
                "location": "file:///home/iferrario/dev/InterTwin-wflow-app/workflows/ylz3qrzu/model",
                "basename": "model",
                "listing": [
                    {
                        "class": "Directory",
                        "location": "file:///home/iferrario/dev/InterTwin-wflow-app/workflows/ylz3qrzu/model/run_default",
                        "basename": "run_default",
                        "listing": [],
                        "path": "/home/iferrario/dev/InterTwin-wflow-app/workflows/ylz3qrzu/model/run_default"
                    },
                    {
                        "class": "File",
                        "location": "file:///home/iferrario/dev/InterTwin-wflow-app/workflows/ylz3qrzu/model/wf

---

# STAC-HydroMT integration, <span style="color:orange"> reading </span> metadata from and  <span style="color:orange"> writing </span>  metadata to STAC

<img src="./workflow_cwl.svg" width="800" style="margin:0px 400px"> </img>

<img src="./hydromt_stac_integration3.drawio.svg" width="800" style="margin:0px 400px"> </img>

## Reading

## 1) HydroMT from STAC Catalog

<img src="./screeen.png" width="1000" style="margin:0px 20px"> </img>

## 2) STAC Collection are referenced by keyword in the hydromt_catalog.yaml

### hydromt_catalog.yaml 

```yaml
.
.
.
wflow-forcings:
  crs: EPSG/WKT
  data_type: RasterDataset/GeoDataset/GeoDataFrame/DataFrame
  driver: raster/raster_tindex/netcdf/zarr/vector/vector_table/csv/xlsx/xls
  driver_kwargs:
    key: value
  filesystem: local/gcs/s3/http # /stac ?
  # ? filesystem_kwargs: ?
  path: /absolut_path/to/my_dataset.extension OR relative_path/to_my_dataset.extension
.
.
corine_land_cover:
.
.
soilgrid:
.
.
```

## Writing

In [83]:
# STATIC LOCAL STAC CATALOG

wflow_catalog = pystac.Catalog(
    id="wflow-catalog",
    title="WFLOW Catalog",
    description="a catalog of the datasets for running wflow hydrological model")

In [79]:
# STATICMAPS

file_path = "/mnt/CEPH_PROJECTS/InterTwin/Wflow/data/test_stac_data/staticmaps.zarr"
ds = xr.open_zarr(file_path, consolidated=True, use_cftime=True)

template["id"] = "wflow-par"
static_map_collection = xstac.xarray_to_stac(
    ds,
    template,
    temporal_dimension="time",
    x_dimension="lon",
    y_dimension="lat",
    reference_system="4326",
    validate=True,
)

with open("./wflow-par/collection.json", "w") as f:
    json.dump(static_map_collection.to_dict(), f, indent=2)
    
static_map_collection

In [80]:
# FORCINGS

f_file_path = "/mnt/CEPH_PROJECTS/InterTwin/Wflow/data/test_stac_data/forcings.zarr"
f_ds = xr.open_zarr(f_file_path, consolidated=True, use_cftime=True)

template["id"] = "wflow-forcings"
forcings_collection = xstac.xarray_to_stac(
    f_ds,
    template,
    temporal_dimension="time",
    x_dimension="lon",
    y_dimension="lat",
    reference_system="4326",
    validate=True,
)

with open("./wflow-forcings/collection2.json", "w") as f:
    json.dump(forcings_collection.to_dict(), f, indent=2)

forcings_collection

In [81]:
wflow_catalog.add_child(static_map_collection)
wflow_catalog.add_child(forcings_collection)

with open("catalog.json", "w") as f:
    json.dump(wflow_catalog.to_dict(), f, indent=2)

wflow_catalog.describe()

* <Catalog id=wflow-catalog>
    * <Collection id=wflow-par>
    * <Collection id=wflow-forcings>


# Discussion points


- Deltares' plans?
 
- Best practices
  - metadata
  - stac/collection structure
- Cloud format preferred: Zarr/COGs

 Bonus:
 
- Filtering (temporal and spatial) STAC or HydroMT responsability

<img src="./stac hydromt.png" width="500" style="margin:0px 600px"> </img> -->