In [1]:
import pystac
import os
import glob
import fsspec
import json
from copy import deepcopy as copy
import pandas as pd

In [2]:
phase1=[
    "ifs-fesom2-sr/eerie-control-1950/v20240304",
    "ifs-fesom2-sr/hist-1950/v20240304",
    "ifs-fesom2-sr/eerie-spinup-1950/v20240304",
    "ifs-amip-tco399/hist-c-0-a-lr20/v20240901",
    "ifs-amip-tco399/hist-c-lr20-a-0/v20240901",
    "ifs-amip-tco399/hist-c-lr30-a-lr30/v20231106",
    "ifs-amip-tco399/hist-c-lr30-a-0/v20231106",
    "ifs-amip-tco399/hist/v20240901",
    "icon-esm-er/eerie-control-1950/v20240618",
    "icon-esm-er/eerie-spinup-1950/v20240618",
    "ifs-amip-tco1279/hist-c-0-a-lr20/v20240901",
    "ifs-amip-tco1279/hist/v20240901"
]

In [3]:
PROVIDER_DEFAULT = dict(
    name="DKRZ",
    description="The data host of eerie.cloud",
    roles=["host"],
    url="https://dkrz.de",
)


In [4]:
TECHDOC = pystac.Asset(
    href="https://pad.gwdg.de/OZo5HMC4R6iljvZHlo-BzQ#",
    title="Technical documentation",
    media_type=pystac.MediaType.HTML,
    roles=["OVERVIEW"],
)


In [5]:
dslist=json.load(fsspec.open("https://eerie.cloud.dkrz.de/datasets").open())

In [6]:
intakeurl_root="https://raw.githubusercontent.com/eerie-project/intake_catalogues/refs/heads/main/dkrz/disk/stac-templates/"

In [7]:
ifsamipreadme="https://raw.githubusercontent.com/eerie-project/EERIE_hackathon_2023/refs/heads/main/IFS_AMIP/README.md"

In [8]:
cvurl="https://raw.githubusercontent.com/eerie-project/dreq_tools/refs/heads/update_tables/Tables/EERIE_CV.json"
cv=json.load(fsspec.open(cvurl).open())["CV"]

In [9]:
cvmodels=list(cv["source_id"].keys())
cvexps=list(cv["experiment_id"].keys())

In [10]:
cwd=os.getcwd()
modelpaths=[
    a for a in glob.glob(cwd+"/*")
    if os.path.isdir(a)
]

In [11]:
def addlinks(cat,model,exp):
    global dslist
    cat.add_link(
        pystac.Link(
            "parent",
            intakeurl_root+"catalog-experiments.json",
            media_type=pystac.MediaType.JSON
        )   
    )
    phase1sim=[a.replace('/','.') for a in phase1 if model+"/"+exp in a]
    for entry in dslist:        
        if any(a in entry for a in phase1sim):
            cat.add_link(
                pystac.Link(
                    pystac.RelType.ITEM,
                    "https://eerie.cloud.dkrz.de/datasets/"+entry+"/stac",
                    media_type=pystac.MediaType.GEOJSON
                )   
            )
    return cat

In [12]:
def define_catalog(staccols):    
    description="Static collections of EERIE Experiment output from the eerie.cloud. Recent versions are shown."
    description+=(
        "\n\n[Imprint](https://www.dkrz.de/en/about-en/contact/impressum) "+
        "and \n[Privacy Policy](https://www.dkrz.de/en/about-en/contact/en-datenschutzhinweise).\n"
    )
    cat = pystac.Catalog(
        id="EERIE-static",
        title="EERIE Experiment collections",
        description=description,
        href=intakeurl_root+"catalog-experiments.json"
    ) 
    for col in staccols:
        colself=copy([a for a in col["links"] if a["rel"]=="self"][0])
        if colself:
            colself["rel"]=pystac.RelType.CHILD
            colself["target"] = colself.pop("href")
            colself["media_type"] = colself.pop("type")
            cat.add_link(pystac.Link(**colself))
    cat.add_link(
        pystac.Link(
            "parent",
            "https://swift.dkrz.de/v1/dkrz_7fa6baba-db43-4d12-a295-8e3ebb1a01ed/catalogs/stac-catalog-eeriecloud.json",
            media_type=pystac.MediaType.JSON
        )
    )
    return cat

In [14]:
def define_collections(model,exppath):
    exp=exppath.split('/')[-1]
    modelupper=model.upper()
    tempextent=pystac.TemporalExtent(
            [pd.to_datetime('1950',format='%Y'),
             pd.to_datetime('2050',format='%Y')]
        )
    if "ifs-amip" in model:
        tempextent=pystac.TemporalExtent(
                [pd.to_datetime('1980',format='%Y'),
                 pd.to_datetime('2024',format='%Y')]
            )
        description=fsspec.open(ifsamipreadme,"r").open().read()
    elif modelupper in cvmodels:
        description=cv["source_id"][modelupper]["source"]
    description="# EERIE "+model+" "+exp+" data in Zarr format\n\n"
    if exp in cvexps:
        description+=cv["experiment_id"][exp]["experiment"]
    description+=(
        "\n\n[Imprint](https://www.dkrz.de/en/about-en/contact/impressum) "+
        "and \n[Privacy Policy](https://www.dkrz.de/en/about-en/contact/en-datenschutzhinweise).\n"
    )
    keywords=["EERIE", "cloud"]
    keywords+=model.upper().split('-')
    keywords+=[exp]
    cat = pystac.Collection(
        id="EERIE-"+model+"-"+exp,
        title=model+"-"+exp+" data in Zarr format",
        description=description,
        href=intakeurl_root+model+"/"+exp+"/stac_"+model+"_"+exp+".json",
        extent=pystac.Extent(
            spatial=pystac.SpatialExtent([-180, -90, 180, 90]),
            temporal=tempextent,
        ),
        keywords=keywords,
        providers=[pystac.Provider(PROVIDER_DEFAULT)],
        assets=dict(doc=copy(TECHDOC)),
    )
    cat = addlinks(cat,model,exp=exp)
    return cat

In [15]:
staccols=[]
for modpath in modelpaths:
    exppaths=[
        a for a in glob.glob(modpath+"/*")
        if os.path.isdir(a)
    ]
    model=modpath.split('/')[-1]
    for exppath in exppaths:
        exp=exppath.split('/')[-1]
        col=define_collections(model,exppath)
        if col:
            sd=col.to_dict()
            sd["providers"][0] = sd["providers"][0]["name"]
            json.dump(sd,fsspec.open(exppath+"/stac_"+model+"_"+exp+".json","w").open()) 
            staccols.append(sd)
    #
staccat=define_catalog(staccols)
sd=staccat.to_dict()
json.dump(sd,fsspec.open("catalog-experiments.json","w").open())