In [1]:
%load_ext autoreload
%autoreload 2

# import os
# os.environ["EOTDL_API_URL"] = "http://localhost:8000/"

# STAC

When you ingest a dataset to the EOTDL, a `catalog.parquet` file is created with the metadata of the dataset. This metadata is STAC-compliant, so it can be used to query the dataset using the STAC API and generate STAC catalogs.

# STAC Catalogs

The following code will ingest a dataset to the EOTDL and create a `catalog.parquet` file with the metadata of the dataset.

In [2]:
from eotdl.datasets import ingest_dataset

path = "example_data/EuroSAT-small"
ingest_dataset(path)

Ingesting directory: example_data/EuroSAT-small


Ingesting files: 100%|██████████| 7/7 [00:02<00:00,  2.70it/s]

No new version was created, your dataset has not changed.





In [3]:
import geopandas as gpd

catalog = f"{path}/catalog.parquet"

gdf = gpd.read_parquet(catalog)
gdf.head()

Unnamed: 0,type,stac_version,stac_extensions,datetime,id,bbox,geometry,assets,links,repository
0,Feature,1.0.0,[],2025-04-02 16:00:51.782652,README.md,"{'xmax': 0.0, 'xmin': 0.0, 'ymax': 0.0, 'ymin'...",POLYGON EMPTY,{'asset': {'checksum': 'a6bb30a57d0f5ff0aaa65b...,[],eotdl
1,Feature,1.0.0,[],2025-04-02 16:00:51.782784,Forest/Forest_1.tif,"{'xmax': 0.0, 'xmin': 0.0, 'ymax': 0.0, 'ymin'...",POLYGON EMPTY,{'asset': {'checksum': 'f3b8b9fef6b2df6f24792e...,[],eotdl
2,Feature,1.0.0,[],2025-04-02 16:00:51.782900,Forest/Forest_2.tif,"{'xmax': 0.0, 'xmin': 0.0, 'ymax': 0.0, 'ymin'...",POLYGON EMPTY,{'asset': {'checksum': '2e38dab64435bfbab25bab...,[],eotdl
3,Feature,1.0.0,[],2025-04-02 16:00:51.783005,Forest/Forest_3.tif,"{'xmax': 0.0, 'xmin': 0.0, 'ymax': 0.0, 'ymin'...",POLYGON EMPTY,{'asset': {'checksum': '3e7bb982f9db5f7dabc556...,[],eotdl
4,Feature,1.0.0,[],2025-04-02 16:00:51.783106,AnnualCrop/AnnualCrop_2.tif,"{'xmax': 0.0, 'xmin': 0.0, 'ymax': 0.0, 'ymin'...",POLYGON EMPTY,{'asset': {'checksum': 'c406cb8920858b98898b9e...,[],eotdl


Since the metadata generated by the EOTDL is STAC-compliant, it can be used to automatically generate STAC catalogs.

In [4]:
from eotdl.curation.stac import create_stac_catalog

items = create_stac_catalog(catalog)

items

  0%|          | 0/7 [00:00<?, ?it/s]

100%|██████████| 7/7 [00:00<00:00, 229.49it/s]


[<Item id=README.md>,
 <Item id=Forest/Forest_1.tif>,
 <Item id=Forest/Forest_2.tif>,
 <Item id=Forest/Forest_3.tif>,
 <Item id=AnnualCrop/AnnualCrop_2.tif>,
 <Item id=AnnualCrop/AnnualCrop_3.tif>,
 <Item id=AnnualCrop/AnnualCrop_1.tif>]

Optionally, you can create a STAC catalog / collection and link the items to it.

In [5]:
from eotdl.curation.stac import create_stac_catalog
import pystac

stac_catalog = pystac.Catalog(
	id = "eotdl-catalog",
	description = "EOTDL Catalog",
	title = "EOTDL Catalog",
	stac_extensions = [],
	extra_fields = {},
)

stac_catalog = create_stac_catalog(catalog, stac_catalog)

stac_catalog

100%|██████████| 7/7 [00:00<00:00, 3785.47it/s]


Either way, once the STAC metadata is generated, can be saved to disk.

In [6]:
stac_catalog.normalize_and_save(
	root_href='data/stac',
	catalog_type=pystac.CatalogType.SELF_CONTAINED
)

Keep in mind that if the original dataset already has STAC metadata, it will be overwritten.

# STAC API

You can interact with EOTDL via its STAC API, both with the `eotdl` CLI and the Python API.

In [8]:
!eotdl stac status

{'message': 'Welcome to the STAC API'}


Explore collections

In [9]:
!eotdl stac collections

['Test-links', 'EuroSAT-RGB-small-STAC', 'EuroSAT-small']


In [10]:
from eotdl.curation.stac.api import retrieve_stac_collections

retrieve_stac_collections()

['Test-links', 'EuroSAT-RGB-small-STAC', 'EuroSAT-small']

Retrieve one collection

In [11]:
!eotdl stac collection EuroSAT-small

{'uid': 'auth0|616b0057af0c7500691a026e', 'id': '67e2be7c27175b2e6770a02f', 'name': 'EuroSAT-small', 'metadata': {'authors': ['Juan B. Pedro'], 'license': 'free', 'source': 'https://github.com/earthpulse/eotdl/blob/main/tutorials/notebooks/02_ingesting.ipynb', 'description': '# EuroSAT-small\n\nThis is a small subet of the EuroSAT dataset.', 'thumbnail': ''}, 'versions': [{'version_id': 1, 'createdAt': '2025-03-25T15:31:29.364000', 'size': 643691}, {'version_id': 2, 'createdAt': '2025-03-25T16:28:11.657000', 'size': 643710}], 'tags': [], 'createdAt': '2025-03-25T15:32:28.164000', 'updatedAt': '2025-03-25T15:32:28.164000', 'likes': 0, 'downloads': 0, 'quality': 0}


In [12]:
from eotdl.curation.stac.api import retrieve_stac_collection

retrieve_stac_collection("EuroSAT-small")

{'uid': 'auth0|616b0057af0c7500691a026e',
 'id': '67e2be7c27175b2e6770a02f',
 'name': 'EuroSAT-small',
 'metadata': {'authors': ['Juan B. Pedro'],
  'license': 'free',
  'source': 'https://github.com/earthpulse/eotdl/blob/main/tutorials/notebooks/02_ingesting.ipynb',
  'description': '# EuroSAT-small\n\nThis is a small subet of the EuroSAT dataset.',
  'thumbnail': ''},
 'versions': [{'version_id': 1,
   'createdAt': '2025-03-25T15:31:29.364000',
   'size': 643691},
  {'version_id': 2,
   'createdAt': '2025-03-25T16:28:11.657000',
   'size': 643710}],
 'tags': [],
 'createdAt': '2025-03-25T15:32:28.164000',
 'updatedAt': '2025-03-25T15:32:28.164000',
 'likes': 0,
 'downloads': 0,
 'quality': 0}

Retrieve items

In [13]:
!eotdl stac items EuroSAT-small

['README.md', 'Forest/Forest_1.tif', 'Forest/Forest_2.tif', 'Forest/Forest_3.tif', 'AnnualCrop/AnnualCrop_2.tif', 'AnnualCrop/AnnualCrop_3.tif', 'AnnualCrop/AnnualCrop_1.tif']


In [14]:
from eotdl.curation.stac.api import retrieve_stac_items

retrieve_stac_items("EuroSAT-small")

['README.md',
 'Forest/Forest_1.tif',
 'Forest/Forest_2.tif',
 'Forest/Forest_3.tif',
 'AnnualCrop/AnnualCrop_2.tif',
 'AnnualCrop/AnnualCrop_3.tif',
 'AnnualCrop/AnnualCrop_1.tif']

Retrieve one item

In [15]:
!eotdl stac item EuroSAT-small README.md

{'type': 'Feature', 'stac_version': '1.0.0', 'stac_extensions': {}, 'datetime': '2025-03-25T15:32:28.130806', 'id': 'README.md', 'bbox': {'xmax': 0.0, 'xmin': 0.0, 'ymax': 0.0, 'ymin': 0.0}, 'geometry': '\x01\x03\x00\x00\x00\x00\x00\x00\x00', 'assets': {'asset': {'checksum': 'a6bb30a57d0f5ff0aaa65b46d7c1eb16cbced43c', 'href': 'http://localhost:8000/datasets/67e2be7c27175b2e6770a02f/stage/README.md', 'size': 227, 'timestamp': '2025-03-25T15:32:28.130902'}}, 'links': {}, 'repository': 'eotdl'}


In [16]:
from eotdl.curation.stac.api import retrieve_stac_item

retrieve_stac_item("EuroSAT-small", "README.md")

{'type': 'Feature',
 'stac_version': '1.0.0',
 'stac_extensions': {},
 'datetime': '2025-03-25T15:32:28.130806',
 'id': 'README.md',
 'bbox': {'xmax': 0.0, 'xmin': 0.0, 'ymax': 0.0, 'ymin': 0.0},
 'geometry': '\x01\x03\x00\x00\x00\x00\x00\x00\x00',
 'assets': {'asset': {'checksum': 'a6bb30a57d0f5ff0aaa65b46d7c1eb16cbced43c',
   'href': 'http://localhost:8000/datasets/67e2be7c27175b2e6770a02f/stage/README.md',
   'size': 227,
   'timestamp': '2025-03-25T15:32:28.130902'}},
 'links': {},
 'repository': 'eotdl'}

Search items using SQL queries (duckdb)

In [28]:
!eotdl stac search EuroSAT-small --query "id IN ('README.md', 'Forest/Forest_3.tif')"

[{'type': 'Feature', 'stac_version': '1.0.0', 'stac_extensions': [], 'datetime': 1742916748130, 'id': 'README.md', 'bbox': {'xmax': 0.0, 'xmin': 0.0, 'ymax': 0.0, 'ymin': 0.0}, 'geometry': {}, 'assets': {'asset': {'checksum': 'a6bb30a57d0f5ff0aaa65b46d7c1eb16cbced43c', 'href': 'http://localhost:8000/datasets/67e2be7c27175b2e6770a02f/stage/README.md', 'size': 227, 'timestamp': 1742916748130}}, 'links': [], 'repository': 'eotdl'}, {'type': 'Feature', 'stac_version': '1.0.0', 'stac_extensions': [], 'datetime': 1742916748131, 'id': 'Forest/Forest_3.tif', 'bbox': {'xmax': 0.0, 'xmin': 0.0, 'ymax': 0.0, 'ymin': 0.0}, 'geometry': {}, 'assets': {'asset': {'checksum': '3e7bb982f9db5f7dabc556016c3d081dfb1fb73d', 'href': 'http://localhost:8000/datasets/67e2be7c27175b2e6770a02f/stage/Forest/Forest_3.tif', 'size': 107244, 'timestamp': 1742916748131}}, 'links': [], 'repository': 'eotdl'}]


In [29]:
from eotdl.curation.stac.api import search_stac_items

query = "id IN ('README.md', 'Forest/Forest_3.tif')"

search_stac_items("EuroSAT-small", query)

[{'type': 'Feature',
  'stac_version': '1.0.0',
  'stac_extensions': [],
  'datetime': 1742916748130,
  'id': 'README.md',
  'bbox': {'xmax': 0.0, 'xmin': 0.0, 'ymax': 0.0, 'ymin': 0.0},
  'geometry': {},
  'assets': {'asset': {'checksum': 'a6bb30a57d0f5ff0aaa65b46d7c1eb16cbced43c',
    'href': 'http://localhost:8000/datasets/67e2be7c27175b2e6770a02f/stage/README.md',
    'size': 227,
    'timestamp': 1742916748130}},
  'links': [],
  'repository': 'eotdl'},
 {'type': 'Feature',
  'stac_version': '1.0.0',
  'stac_extensions': [],
  'datetime': 1742916748131,
  'id': 'Forest/Forest_3.tif',
  'bbox': {'xmax': 0.0, 'xmin': 0.0, 'ymax': 0.0, 'ymin': 0.0},
  'geometry': {},
  'assets': {'asset': {'checksum': '3e7bb982f9db5f7dabc556016c3d081dfb1fb73d',
    'href': 'http://localhost:8000/datasets/67e2be7c27175b2e6770a02f/stage/Forest/Forest_3.tif',
    'size': 107244,
    'timestamp': 1742916748131}},
  'links': [],
  'repository': 'eotdl'}]

You can retrieve a list of fields available for a search query

In [31]:
!eotdl stac search EuroSAT-small

{'schema': None, 'type': 'BYTE_ARRAY', 'stac_version': 'BYTE_ARRAY', 'stac_extensions': None, 'list': None, 'element': 'INT32', 'datetime': 'INT64', 'id': 'BYTE_ARRAY', 'bbox': None, 'xmax': 'DOUBLE', 'xmin': 'DOUBLE', 'ymax': 'DOUBLE', 'ymin': 'DOUBLE', 'geometry': 'BYTE_ARRAY', 'assets': None, 'asset': None, 'checksum': 'BYTE_ARRAY', 'href': 'BYTE_ARRAY', 'size': 'INT64', 'timestamp': 'INT64', 'links': None, 'repository': 'BYTE_ARRAY'}


In [32]:
search_stac_items("EuroSAT-small")

{'schema': None,
 'type': 'BYTE_ARRAY',
 'stac_version': 'BYTE_ARRAY',
 'stac_extensions': None,
 'list': None,
 'element': 'INT32',
 'datetime': 'INT64',
 'id': 'BYTE_ARRAY',
 'bbox': None,
 'xmax': 'DOUBLE',
 'xmin': 'DOUBLE',
 'ymax': 'DOUBLE',
 'ymin': 'DOUBLE',
 'geometry': 'BYTE_ARRAY',
 'assets': None,
 'asset': None,
 'checksum': 'BYTE_ARRAY',
 'href': 'BYTE_ARRAY',
 'size': 'INT64',
 'timestamp': 'INT64',
 'links': None,
 'repository': 'BYTE_ARRAY'}

TODO: make spatial queries work

In [33]:
# Example bbox query (minx, miny, maxx, maxy)
bbox = [-122.5, 37.7, -122.3, 37.9]

query = f"ST_Intersects(geometry, ST_GeomFromWKB(ST_Envelope(ST_MakeEnvelope({bbox[0]}, {bbox[1]}, {bbox[2]}, {bbox[3]}, 4326))))"

search_stac_items("EuroSAT-small", query)

Exception: Binder Error: No function matches the given name and argument types 'ST_MakeEnvelope(DECIMAL(4,1), DECIMAL(3,1), DECIMAL(4,1), DECIMAL(3,1), INTEGER_LITERAL)'. You might need to add explicit type casts.
	Candidate functions:
	ST_MakeEnvelope(DOUBLE, DOUBLE, DOUBLE, DOUBLE) -> GEOMETRY


LINE 4: ...    WHERE ST_Intersects(geometry, ST_GeomFromWKB(ST_Envelope(ST_MakeEnvelope(-122.5, 37.7, -122.3, 37.9, 4326))))
                                                                        ^

In [63]:
# Combine with other filters
query = f"ST_Intersects(geometry, ST_GeomFromWKB(ST_Envelope(ST_MakeEnvelope({bbox[0]}, {bbox[1]}, {bbox[2]}, {bbox[3]}, 4326)))) AND datetime > '2023-01-01'"

search_stac_items("EuroSAT-RGBa123123", query)

Exception: Binder Error: No function matches the given name and argument types 'ST_MakeEnvelope(DECIMAL(4,1), DECIMAL(3,1), DECIMAL(4,1), DECIMAL(3,1), INTEGER_LITERAL)'. You might need to add explicit type casts.
	Candidate functions:
	ST_MakeEnvelope(DOUBLE, DOUBLE, DOUBLE, DOUBLE) -> GEOMETRY


LINE 4: ...    WHERE ST_Intersects(geometry, ST_GeomFromWKB(ST_Envelope(ST_MakeEnvelope(-122.5, 37.7, -122.3, 37.9, 4326)))) AND...
                                                                        ^