### **Creating a STAC Collection for INCA data**

**Necessary imports**

In [49]:
import os
from urllib.parse import urljoin
import pystac
import datetime

from pystac import Summaries
from pystac.utils import str_to_datetime

from pystac.extensions.datacube import DatacubeExtension, DimensionType, HorizontalSpatialDimensionAxis
from pystac.extensions.projection import ProjectionExtension
from pystac.extensions.scientific import ScientificExtension
from pystac.extensions.raster import RasterExtension

from shapely.geometry import mapping, box

import requests
from requests.auth import HTTPBasicAuth

from dotenv import load_dotenv
load_dotenv("/home/otto/s1_zarr/.env")

True

In [50]:
username = os.getenv("username_stac_dev")
password = os.getenv("password_stac_dev")

**Defining Metadata**

- shape/extent of the datacube:

In [51]:
datacube = {
            "x": {"axis": HorizontalSpatialDimensionAxis.X,
                  "type": DimensionType.SPATIAL,
                  "extent": [20000, 720000],},

            "y": {"axis": HorizontalSpatialDimensionAxis.Y,
                  "type": DimensionType.SPATIAL,
                  "extent": [220000, 620000],},

            "time": {"type": DimensionType.TEMPORAL, 
                     "extent": ["2011-03-15T00:00:00Z", None]},
            }

- available parameters and description:

In [52]:
parameters = {
              "GL": {"description": "global radiation",
                     "unit": "W m-2",
                     "dimensions": ["x", "y", "time"],
                     "type": "data"},

              "P0": {"description": "mean sea level pressure",
                     "unit": "Pa",
                     "dimensions": ["x", "y", "time"],
                     "type": "data"},

              "RH2M": {"description": "relative humidity",
                       "unit": "percent",
                       "dimensions": ["x", "y", "time"],
                       "type": "data"},

              "RR": {"description": "1-hour precipitation sum",
                     "unit": "kg m-2",
                     "dimensions": ["x", "y", "time"],
                     "type": "data"},

              "T2M": {"description": "air temperature",
                      "unit": "degree_Celsius",
                      "dimensions": ["x", "y", "time"],
                      "type": "data"},

              "TD2M": {"description": "dew point temperature",
                       "unit": "degree_Celsius",
                       "dimensions": ["x", "y", "time"],
                       "type": "data"},

              "UU": {"description": "wind speed in eastward direction",
                     "unit": "m s-1",
                     "dimensions": ["x", "y", "time"],
                     "type": "data"},

              "VV": {"description": "wind speed in northward direction",
                     "unit": "m s-1",
                     "dimensions": ["x", "y", "time"],
                     "type": "data"},
              }

- summaries:

In [53]:
    summaries_dict = {}
    summaries_dict["Doi"] = ["https://doi.org/10.60669/m6w8-s545"]
    summaries_dict["Epsg"] = ["31287"]
    summaries_dict["Projection"] = ["Austria Lambert (EPSG: 3416)"]
    summaries_dict["Timezone"] = ["UTC"]
    summaries_dict["Grid"] = ["1x1km"]
    summaries_dict["Temporal Resolution"] = ["hourly"]

- Properties according to [Extensions](https://github.com/stac-extensions):

In [54]:
bbox = [7.1, 45.77, 17.74, 49.48]
properties = {"chunks": {"x":100, "y":100, "time": 720},
              "proj:code": "EPSG31287",
              "proj:bbox": [20000, 220000, 720000, 620000],
              "proj:wkt2": "PROJCS[\"MGI / Austria Lambert\",GEOGCS[\"MGI\",DATUM[\"Militar_Geographische_Institute\",SPHEROID[\"Bessel 1841\",6377397.155,299.1528128,AUTHORITY[\"EPSG\",\"7004\"]],TOWGS84[577.326,90.129,463.919,5.137,1.474,5.297,2.4232],AUTHORITY[\"EPSG\",\"6312\"]],PRIMEM[\"Greenwich\",0,AUTHORITY[\"EPSG\",\"8901\"]],UNIT[\"degree\",0.0174532925199433,AUTHORITY[\"EPSG\",\"9122\"]],AUTHORITY[\"EPSG\",\"4312\"]],PROJECTION[\"Lambert_Conformal_Conic_2SP\"],PARAMETER[\"standard_parallel_1\",49],PARAMETER[\"standard_parallel_2\",46],PARAMETER[\"latitude_of_origin\",47.5],PARAMETER[\"central_meridian\",13.33333333333333],PARAMETER[\"false_easting\",400000],PARAMETER[\"false_northing\",400000],UNIT[\"metre\",1,AUTHORITY[\"EPSG\",\"9001\"]],AUTHORITY[\"EPSG\",\"31287\"]]",
              "proj:shape": [701, 401],
              "proj:geometry": mapping(box(*bbox)),
              "proj:transform": [1000.0, 0.0, 20000.0, 0.0, 1000.0, 220000.0, 0.0, 0.0, 1.0],
              "raster:spatial_resolution": 1000,
              "cube:dimensions": datacube,
              "sci:doi": "10.60669/6akt-5p05"}

**Creating the Collection**

Now we can create the collection, again we can pass metadata.

In [55]:
collection = pystac.Collection(
        id="incal-hourly",
        title= "INCA analysis hourly data (1km)",
        description="INCA combines all available datasources - weather stations, remote sensing data, numerical weather forecast models, and a high resolution terrain model - to provide a precise analysis of the condition of the near surface atmosphere.",
        
        extent=pystac.Extent(pystac.SpatialExtent([bbox]),
                             pystac.TemporalExtent([[str_to_datetime("2011-03-15T00:00:00Z"), None]]),),

        keywords=["GeoSphere", "INCA", "radiation", "pressure", "humiditiy", "precipitation", "temperature", "wind speed"],
        license="CC-BY-4.0",
        
        extra_fields={"cube:dimensions": datacube,
                      "cube:variables": parameters},
    )

Define the data providers and their roles

In [56]:
collection.providers = [
        pystac.Provider(
            name="EODC",
            roles=[pystac.ProviderRole.HOST,],
            url="https://eodc.eu/",),

        pystac.Provider(
            name="GeoSphere Austria",
            roles=[pystac.ProviderRole.PRODUCER,
                   pystac.ProviderRole.LICENSOR,
                   pystac.ProviderRole.PROCESSOR,],
            url="https://data.hub.geosphere.at/",),        
    ]

Define which extensions to use

In [57]:
collection.stac_extensions = [
    ProjectionExtension.get_schema_uri(),
    DatacubeExtension.get_schema_uri(),
    ScientificExtension.get_schema_uri(),
    RasterExtension.get_schema_uri()
    ]

Write the metadata to the appropriate spot

In [58]:
collection.summaries = Summaries(summaries_dict)
collection.extra_fields.update(properties)

Add a thumbnail image

In [59]:
collection.add_asset(
    key="thumbnail",
    asset=pystac.Asset(
            href="https://raw.githubusercontent.com/eodcgmbh/s1_zarr//main/inca/INCA_thumbnail.png",
            media_type=pystac.MediaType.PNG,
            roles=["thumbnail"],
            title="Thumbnail")
    )

As we just have a zarr store for our data, we will add it as an asset

In [60]:
collection.add_asset(
            key = "zarr_store",
            asset=pystac.Asset(
                    href="https://data.eodc.eu/collections/INCA/INCA.zarr",
                    media_type=pystac.MediaType.ZARR,
                    title="INCA",
                    roles=["data"],)
            )

collection.assets["zarr_store"].extra_fields["hello"] = "test"

We can write the collection to a json file to later validate it

In [61]:
collection_path = f"{collection.id}.json"
collection.set_self_href(collection_path)
collection.save_object()

**Validation**

In [62]:
collection = pystac.Collection.from_file(f"{collection.id}.json")

try:
    collection.validate()
    print("Collection is valid.")
except Exception as e:
    print("Validation failed:", e)

Collection is valid.


**Submitting the Collection**

If everything passes the validation we can submit it

In [63]:
# Initial submit
# requests.post("https://dev.stac.eodc.eu/ingestion/v1/collections", json=collection.to_dict(), auth=HTTPBasicAuth(username, password), timeout = 30)

# Update
requests.put("https://dev.stac.eodc.eu/ingestion/v1/collections/incal-hourly", json=collection.to_dict(), auth=HTTPBasicAuth(username, password), timeout = 30)

# Delete Collection
# requests.delete("https://dev.stac.eodc.eu/ingestion/v1/collections/incal-hourly", auth=HTTPBasicAuth(username, password), timeout = 30)

<Response [200]>