In [None]:
%%capture
!pip install xstac

In [9]:
# List all zarr data in test-data
%load_ext autoreload
%autoreload
import json
import pystac
from pystac_client import Client
import s3fs
import sys; sys.path.append('..');
import helpers.eodc_hub_role as eodc_hub_role
import helpers.s3helpers as s3helpers
import helpers.stac as stachelpers

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [3]:
credentials = eodc_hub_role.fetch_and_set_credentials()
bucket = 'nasa-eodc-data-store'
prefix = "test-data"
s3_fs = s3fs.S3FileSystem(
    key=credentials['AccessKeyId'],
    secret=credentials['SecretAccessKey'],
    token=credentials['SessionToken'], 
    anon=False
)

In [10]:
# Create a new STAC Catalog
stac_catalog = pystac.Catalog(
    id='eodc-data-store-for-zarr-viz',
    description='EODC Data Store for Zarr Viz'
)

In [11]:
suffixes = [".zarr", ".json"]
matching_paths = s3helpers.list_s3_paths(bucket, prefix, suffixes)

for path in matching_paths:
    print(path)

nasa-eodc-data-store/test-data/cmip6-kerchunk/combined_CMIP6_daily_GISS-E2-1-G_tas_kerchunk.json
nasa-eodc-data-store/test-data/cmip6-zarr/365_262_262_CMIP6_daily_GISS-E2-1-G_tas.zarr
nasa-eodc-data-store/test-data/cmip6-zarr/600_1440_1_CMIP6_daily_GISS-E2-1-G_tas.zarr
nasa-eodc-data-store/test-data/cmip6-zarr/600_1440_29_CMIP6_daily_GISS-E2-1-G_tas.zarr
nasa-eodc-data-store/test-data/fake-data/single_chunk/store_lat1024_lon2048.zarr
nasa-eodc-data-store/test-data/fake-data/single_chunk/store_lat1448_lon2896.zarr
nasa-eodc-data-store/test-data/fake-data/single_chunk/store_lat2048_lon4096.zarr
nasa-eodc-data-store/test-data/fake-data/single_chunk/store_lat2896_lon5792.zarr
nasa-eodc-data-store/test-data/fake-data/single_chunk/store_lat4096_lon8192.zarr
nasa-eodc-data-store/test-data/fake-data/single_chunk/store_lat512_lon1024.zarr
nasa-eodc-data-store/test-data/fake-data/single_chunk/store_lat724_lon1448.zarr
nasa-eodc-data-store/test-data/fake-data/with_chunks/store_lat1448_lon2896.zar

In [12]:
for path in matching_paths:
    reference = False
    if path.endswith('.json'):
        reference = True
    stac_collection = stachelpers.generate_stac(f's3://{path}', reference=reference)
    stac_collection.links = [pystac.Link(target="collection.json", rel="self")]
    stac_catalog.add_child(stac_collection)

  
  
  
  


In [13]:
with open('external-datasets.json', 'r') as f:
    external_datasets = json.loads(f.read())

for key, dataset in external_datasets.items():
    reference = False
    path = dataset['source']
    if path.endswith('.json'):
        reference = True
    stac_collection = stachelpers.generate_stac(path, idstr=key, reference=reference)
    stac_collection.links = [pystac.Link(target="collection.json", rel="self")]
    stac_catalog.add_child(stac_collection)

  


In [14]:
stac_catalog

In [15]:
stac_catalog.normalize_and_save(root_href="stac")

In [16]:
stac_catalog.save(pystac.CatalogType.SELF_CONTAINED)

In [17]:
STAC_API_URL = "stac/catalog.json"
catalog = Client.open(STAC_API_URL)

In [18]:
collections = list(catalog.get_collections())
for collection in sorted(collections, key=lambda x: x.id):
    print(collection.id)

365_262_262_CMIP6_daily_GISS-E2-1-G_tas.zarr
600_1440_1_CMIP6_daily_GISS-E2-1-G_tas.zarr
600_1440_29_CMIP6_daily_GISS-E2-1-G_tas.zarr
aws-noaa-oisst-feedstock_reference
cmip6-pds_GISS-E2-1-G_historical_tas
combined_CMIP6_daily_GISS-E2-1-G_tas_kerchunk.json
power_901_monthly_meteorology_utc.zarr
store_lat1024_lon2048.zarr
store_lat1448_lon2896.zarr
store_lat1448_lon2896.zarr
store_lat2048_lon4096.zarr
store_lat2048_lon4096.zarr
store_lat2896_lon5792.zarr
store_lat2896_lon5792.zarr
store_lat4096_lon8192.zarr
store_lat4096_lon8192.zarr
store_lat512_lon1024.zarr
store_lat5793_lon11586.zarr
store_lat724_lon1448.zarr
