In [5]:
%%capture
!pip install intake xarray[complete] pystac

In [1]:
import pandas as pd
import intake
import xarray  as xr
import pystac
import json
from datetime import datetime,timezone

In [2]:
chirpsS3 = "s3://climate-action-datalake/zone=raw/source=chirps/variable=precipitation.zarr/"
datetime_utc = datetime.now(tz=timezone.utc)


In [3]:
catalog = pystac.Catalog(id='datacube-catalog', description='Datacube catalog stored in S3')

In [4]:
print(list(catalog.get_all_items()))
print(list(catalog.get_children()))

[]
[]


In [4]:
dataset_item = pystac.Item(
    id="chirps",
    geometry= None,
    bbox=None,
    datetime =datetime_utc,
    properties={
        "zarr_store": chirpsS3  # Reference the Zarr store location
    }
)

In [5]:
catalog.add_item(dataset_item)

In [6]:
dataset_item.get_parent()

In [7]:
catalog.describe()

* <Catalog id=datacube-catalog>
  * <Item id=chirps>


In [8]:
dataset_item.add_asset(
    key='chips-zarr',
    asset=pystac.Asset(
        title = "Data cube chirps",
        description = "Historical chirps transformed into cloud native format Zarr",
        href=chirpsS3,
        media_type=pystac.MediaType.ZARR 
    )
)
    

In [9]:
print(json.dumps(dataset_item.to_dict(), indent=4))

{
    "type": "Feature",
    "stac_version": "1.0.0",
    "id": "chirps",
    "properties": {
        "zarr_store": "s3://climate-action-datalake/zone=raw/source=chirps/variable=precipitation.zarr/",
        "datetime": "2024-03-14T21:16:16.142340Z"
    },
    "geometry": null,
    "links": [
        {
            "rel": "root",
            "href": null,
            "type": "application/json"
        },
        {
            "rel": "parent",
            "href": null,
            "type": "application/json"
        }
    ],
    "assets": {
        "chips-zarr": {
            "href": "s3://climate-action-datalake/zone=raw/source=chirps/variable=precipitation.zarr/",
            "type": "application/vnd+zarr",
            "title": "Data cube chirps",
            "description": "Historical chirps transformed into cloud native format Zarr"
        }
    },
    "stac_extensions": []
}


In [13]:
catalog.normalize_hrefs('../catalog')

In [14]:
catalog.save(catalog_type=pystac.CatalogType.SELF_CONTAINED)

In [24]:
help(pystac.MediaType)

Help on class MediaType in module pystac.media_type:

class MediaType(pystac.utils.StringEnum)
 |  MediaType(value, names=None, *, module=None, qualname=None, type=None, start=1)
 |  
 |  A list of common media types that can be used in STAC Asset and Link metadata.
 |  
 |  Method resolution order:
 |      MediaType
 |      pystac.utils.StringEnum
 |      builtins.str
 |      enum.Enum
 |      builtins.object
 |  
 |  Data and other attributes defined here:
 |  
 |  COG = image/tiff; application=geotiff; profile=cloud-optimized
 |  
 |  FLATGEOBUF = application/vnd.flatgeobuf
 |  
 |  GEOJSON = application/geo+json
 |  
 |  GEOPACKAGE = application/geopackage+sqlite3
 |  
 |  GEOTIFF = image/tiff; application=geotiff
 |  
 |  HDF = application/x-hdf
 |  
 |  HDF5 = application/x-hdf5
 |  
 |  HTML = text/html
 |  
 |  JPEG = image/jpeg
 |  
 |  JPEG2000 = image/jp2
 |  
 |  JSON = application/json
 |  
 |  KML = application/vnd.google-earth.kml+xml
 |  
 |  PDF = application/pdf
 |  


In [6]:
chirps = xr.open_zarr(chirpsS3)

severe performance issues, see also https://github.com/dask/dask/issues/10276

To fix, you should specify a lower version bound on s3fs, or
update the current installation.



In [7]:
chirps.bounds

AttributeError: 'Dataset' object has no attribute 'bounds'