In [1]:
%load_ext autoreload
%autoreload 2

import os
os.environ["EOTDL_API_URL"] = "https://api.eotdl.com/"

# STAC

When you ingest a dataset to the EOTDL, a `catalog.parquet` file is created with the metadata of the dataset. This metadata is STAC-compliant, so it can be used to query the dataset using the STAC API and generate STAC catalogs.

# STAC Catalogs

The following code will ingest a dataset to the EOTDL and create a `catalog.parquet` file with the metadata of the dataset.

In [2]:
from eotdl.datasets import ingest_dataset

path = "example_data/EuroSAT-small"
ingest_dataset(path)

Ingesting directory: example_data/EuroSAT-small


Ingesting files: 100%|██████████| 7/7 [00:01<00:00,  5.04it/s]


No new version was created, your dataset has not changed.


In [3]:
import geopandas as gpd

catalog = f"{path}/catalog.parquet"

gdf = gpd.read_parquet(catalog)
gdf.head()

Unnamed: 0,type,stac_version,stac_extensions,datetime,id,bbox,geometry,assets,links,repository
0,Feature,1.0.0,[],2025-05-26 12:22:59.615086,README.md,"{'xmax': 0.0, 'xmin': 0.0, 'ymax': 0.0, 'ymin'...",POLYGON EMPTY,{'asset': {'checksum': 'a6bb30a57d0f5ff0aaa65b...,[],eotdl
1,Feature,1.0.0,[],2025-05-26 12:22:59.615246,Forest/Forest_1.tif,"{'xmax': 0.0, 'xmin': 0.0, 'ymax': 0.0, 'ymin'...",POLYGON EMPTY,{'asset': {'checksum': 'f3b8b9fef6b2df6f24792e...,[],eotdl
2,Feature,1.0.0,[],2025-05-26 12:22:59.615871,Forest/Forest_2.tif,"{'xmax': 0.0, 'xmin': 0.0, 'ymax': 0.0, 'ymin'...",POLYGON EMPTY,{'asset': {'checksum': '2e38dab64435bfbab25bab...,[],eotdl
3,Feature,1.0.0,[],2025-05-26 12:22:59.616104,Forest/Forest_3.tif,"{'xmax': 0.0, 'xmin': 0.0, 'ymax': 0.0, 'ymin'...",POLYGON EMPTY,{'asset': {'checksum': '3e7bb982f9db5f7dabc556...,[],eotdl
4,Feature,1.0.0,[],2025-05-26 12:22:59.616305,AnnualCrop/AnnualCrop_2.tif,"{'xmax': 0.0, 'xmin': 0.0, 'ymax': 0.0, 'ymin'...",POLYGON EMPTY,{'asset': {'checksum': 'c406cb8920858b98898b9e...,[],eotdl


Since the metadata generated by the EOTDL is STAC-compliant, it can be used to automatically generate STAC catalogs.

In [4]:
from eotdl.curation.stac import create_stac_catalog

items = create_stac_catalog(catalog)

items

  0%|          | 0/7 [00:00<?, ?it/s]

100%|██████████| 7/7 [00:00<00:00, 388.61it/s]


[<Item id=README.md>,
 <Item id=Forest/Forest_1.tif>,
 <Item id=Forest/Forest_2.tif>,
 <Item id=Forest/Forest_3.tif>,
 <Item id=AnnualCrop/AnnualCrop_2.tif>,
 <Item id=AnnualCrop/AnnualCrop_3.tif>,
 <Item id=AnnualCrop/AnnualCrop_1.tif>]

Optionally, you can create a STAC catalog / collection and link the items to it.

In [5]:
from eotdl.curation.stac import create_stac_catalog
import pystac

stac_catalog = pystac.Catalog(
	id = "eotdl-catalog",
	description = "EOTDL Catalog",
	title = "EOTDL Catalog",
	stac_extensions = [],
	extra_fields = {},
)

stac_catalog = create_stac_catalog(catalog, stac_catalog)

stac_catalog

  0%|          | 0/7 [00:00<?, ?it/s]

100%|██████████| 7/7 [00:00<00:00, 3600.70it/s]


Either way, once the STAC metadata is generated, can be saved to disk.

In [6]:
stac_catalog.normalize_and_save(
	root_href='data/stac',
	catalog_type=pystac.CatalogType.SELF_CONTAINED
)

Keep in mind that if the original dataset already has STAC metadata, it will be overwritten.

# STAC API

You can interact with EOTDL via its STAC API, both with the `eotdl` CLI and the Python API.

In [7]:
!eotdl stac status

{'message': 'Welcome to the STAC API'}


Explore collections

In [8]:
!eotdl stac collections

[{'name': 'Test-links', 'id': '683440d487f8a6eed16b57fa'}, {'name': 'EuroSAT-RGB-small-STAC', 'id': '6834407a57638bef53669578'}, {'name': 'EuroSAT-small', 'id': '68343e3157638bef53669577'}, {'name': 'EuroCropsCloudNative', 'id': '682f2d186a29eac175867330'}, {'name': 'MSC-France', 'id': '682731d2180d79b848ab04f2'}, {'name': 'ESAWAAI', 'id': '6826ee856a29eac175867327'}, {'name': 'international-charter-earthquake-eo-data', 'id': '68260de258344526e140a298'}, {'name': 'JPL-CH4-detection', 'id': '680760267b05622170bef9ff'}, {'name': 'HYPERVIEW2', 'id': '68074b43c8575682bb134c3e'}, {'name': 'PASTIS-HD', 'id': '6800bb5bc8575682bb134c38'}, {'name': 'xView2', 'id': '67f65bc18940e45fe460232b'}, {'name': 'crop-type-mapping-south-sudan', 'id': '67f637f7c8575682bb134c37'}, {'name': 'Five-Billion-Pixels', 'id': '67f539a5c8575682bb134c36'}, {'name': 'CROPGRIDS', 'id': '67f535be7b05622170bef9f9'}, {'name': 'DynamicEarthNet', 'id': '67f44098c8575682bb134c35'}, {'name': 'sen1floods11', 'id': '67f3ceb3894

In [9]:
from eotdl.curation.stac.api import retrieve_stac_collections

retrieve_stac_collections()

[{'name': 'Test-links', 'id': '683440d487f8a6eed16b57fa'},
 {'name': 'EuroSAT-RGB-small-STAC', 'id': '6834407a57638bef53669578'},
 {'name': 'EuroSAT-small', 'id': '68343e3157638bef53669577'},
 {'name': 'EuroCropsCloudNative', 'id': '682f2d186a29eac175867330'},
 {'name': 'MSC-France', 'id': '682731d2180d79b848ab04f2'},
 {'name': 'ESAWAAI', 'id': '6826ee856a29eac175867327'},
 {'name': 'international-charter-earthquake-eo-data',
  'id': '68260de258344526e140a298'},
 {'name': 'JPL-CH4-detection', 'id': '680760267b05622170bef9ff'},
 {'name': 'HYPERVIEW2', 'id': '68074b43c8575682bb134c3e'},
 {'name': 'PASTIS-HD', 'id': '6800bb5bc8575682bb134c38'},
 {'name': 'xView2', 'id': '67f65bc18940e45fe460232b'},
 {'name': 'crop-type-mapping-south-sudan', 'id': '67f637f7c8575682bb134c37'},
 {'name': 'Five-Billion-Pixels', 'id': '67f539a5c8575682bb134c36'},
 {'name': 'CROPGRIDS', 'id': '67f535be7b05622170bef9f9'},
 {'name': 'DynamicEarthNet', 'id': '67f44098c8575682bb134c35'},
 {'name': 'sen1floods11', '

Retrieve one collection

In [10]:
!eotdl stac collection EuroSAT-small

{'uid': 'auth0|616b0057af0c7500691a026e', 'id': '68343e3157638bef53669577', 'name': 'EuroSAT-small', 'metadata': {'authors': ['Juan B. Pedro'], 'license': 'free', 'source': 'https://github.com/earthpulse/eotdl/blob/main/tutorials/notebooks/02_ingesting.ipynb', 'description': '# EuroSAT-small\n\nThis is a small subet of the EuroSAT dataset.', 'thumbnail': 'https://images.unsplash.com/photo-1658763440438-2391eba32774?q=80&w=2065&auto=format&fit=crop&ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D'}, 'versions': [{'version_id': 1, 'createdAt': '2025-05-26T10:15:31.216000', 'size': 643691}, {'version_id': 2, 'createdAt': '2025-05-26T12:22:36.231000', 'size': 643710}], 'tags': [], 'createdAt': '2025-05-26T12:10:57.377000', 'updatedAt': '2025-05-26T12:10:57.377000', 'likes': 0, 'downloads': 0, 'quality': 0, 'active': True, 'allowed_users': [], 'benchmark': None, 'visibility': 'public'}


In [11]:
from eotdl.curation.stac.api import retrieve_stac_collection

retrieve_stac_collection("EuroSAT-small")

{'uid': 'auth0|616b0057af0c7500691a026e',
 'id': '68343e3157638bef53669577',
 'name': 'EuroSAT-small',
 'metadata': {'authors': ['Juan B. Pedro'],
  'license': 'free',
  'source': 'https://github.com/earthpulse/eotdl/blob/main/tutorials/notebooks/02_ingesting.ipynb',
  'description': '# EuroSAT-small\n\nThis is a small subet of the EuroSAT dataset.',
  'thumbnail': 'https://images.unsplash.com/photo-1658763440438-2391eba32774?q=80&w=2065&auto=format&fit=crop&ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D'},
 'versions': [{'version_id': 1,
   'createdAt': '2025-05-26T10:15:31.216000',
   'size': 643691},
  {'version_id': 2,
   'createdAt': '2025-05-26T12:22:36.231000',
   'size': 643710}],
 'tags': [],
 'createdAt': '2025-05-26T12:10:57.377000',
 'updatedAt': '2025-05-26T12:10:57.377000',
 'likes': 0,
 'downloads': 0,
 'quality': 0,
 'active': True,
 'allowed_users': [],
 'benchmark': None,
 'visibility': 'public'}

Retrieve items

In [12]:
!eotdl stac items EuroSAT-small

[{'id': 'README.md', 'assets': {'asset': {'checksum': 'a6bb30a57d0f5ff0aaa65b46d7c1eb16cbced43c', 'href': 'https://api.eotdl.com/datasets/68343e3157638bef53669577/stage/README.md', 'size': 227, 'timestamp': '2025-05-26T12:10:57.099646'}}}, {'id': 'Forest/Forest_1.tif', 'assets': {'asset': {'checksum': 'f3b8b9fef6b2df6f24792ead860616186fe5efe0', 'href': 'https://api.eotdl.com/datasets/68343e3157638bef53669577/stage/Forest/Forest_1.tif', 'size': 107244, 'timestamp': '2025-05-26T12:10:57.100192'}}}, {'id': 'Forest/Forest_2.tif', 'assets': {'asset': {'checksum': '2e38dab64435bfbab25bab8c779ecad6c0764677', 'href': 'https://api.eotdl.com/datasets/68343e3157638bef53669577/stage/Forest/Forest_2.tif', 'size': 107244, 'timestamp': '2025-05-26T12:10:57.100401'}}}, {'id': 'Forest/Forest_3.tif', 'assets': {'asset': {'checksum': '3e7bb982f9db5f7dabc556016c3d081dfb1fb73d', 'href': 'https://api.eotdl.com/datasets/68343e3157638bef53669577/stage/Forest/Forest_3.tif', 'size': 107244, 'timestamp': '2025-0

In [13]:
from eotdl.curation.stac.api import retrieve_stac_items

retrieve_stac_items("EuroSAT-small")

[{'id': 'README.md',
  'assets': {'asset': {'checksum': 'a6bb30a57d0f5ff0aaa65b46d7c1eb16cbced43c',
    'href': 'https://api.eotdl.com/datasets/68343e3157638bef53669577/stage/README.md',
    'size': 227,
    'timestamp': '2025-05-26T12:10:57.099646'}}},
 {'id': 'Forest/Forest_1.tif',
  'assets': {'asset': {'checksum': 'f3b8b9fef6b2df6f24792ead860616186fe5efe0',
    'href': 'https://api.eotdl.com/datasets/68343e3157638bef53669577/stage/Forest/Forest_1.tif',
    'size': 107244,
    'timestamp': '2025-05-26T12:10:57.100192'}}},
 {'id': 'Forest/Forest_2.tif',
  'assets': {'asset': {'checksum': '2e38dab64435bfbab25bab8c779ecad6c0764677',
    'href': 'https://api.eotdl.com/datasets/68343e3157638bef53669577/stage/Forest/Forest_2.tif',
    'size': 107244,
    'timestamp': '2025-05-26T12:10:57.100401'}}},
 {'id': 'Forest/Forest_3.tif',
  'assets': {'asset': {'checksum': '3e7bb982f9db5f7dabc556016c3d081dfb1fb73d',
    'href': 'https://api.eotdl.com/datasets/68343e3157638bef53669577/stage/Forest/

Retrieve one item

In [14]:
!eotdl stac item EuroSAT-small README.md

{'type': 'Feature', 'stac_version': '1.0.0', 'stac_extensions': {}, 'datetime': '2025-05-26T12:10:57.099531', 'id': 'README.md', 'bbox': {'xmax': 0.0, 'xmin': 0.0, 'ymax': 0.0, 'ymin': 0.0}, 'geometry': '\x01\x03\x00\x00\x00\x00\x00\x00\x00', 'assets': {'asset': {'checksum': 'a6bb30a57d0f5ff0aaa65b46d7c1eb16cbced43c', 'href': 'https://api.eotdl.com/datasets/68343e3157638bef53669577/stage/README.md', 'size': 227, 'timestamp': '2025-05-26T12:10:57.099646'}}, 'links': {}, 'repository': 'eotdl'}


In [15]:
from eotdl.curation.stac.api import retrieve_stac_item

retrieve_stac_item("EuroSAT-small", "README.md")

{'type': 'Feature',
 'stac_version': '1.0.0',
 'stac_extensions': {},
 'datetime': '2025-05-26T12:10:57.099531',
 'id': 'README.md',
 'bbox': {'xmax': 0.0, 'xmin': 0.0, 'ymax': 0.0, 'ymin': 0.0},
 'geometry': '\x01\x03\x00\x00\x00\x00\x00\x00\x00',
 'assets': {'asset': {'checksum': 'a6bb30a57d0f5ff0aaa65b46d7c1eb16cbced43c',
   'href': 'https://api.eotdl.com/datasets/68343e3157638bef53669577/stage/README.md',
   'size': 227,
   'timestamp': '2025-05-26T12:10:57.099646'}},
 'links': {},
 'repository': 'eotdl'}

Search items using SQL queries (duckdb)

In [16]:
!eotdl stac search EuroSAT-small --query "id IN ('README.md', 'Forest/Forest_3.tif')"

[{'type': 'Feature', 'stac_version': '1.0.0', 'stac_extensions': [], 'datetime': 1748261457099, 'id': 'README.md', 'bbox': {'xmax': 0.0, 'xmin': 0.0, 'ymax': 0.0, 'ymin': 0.0}, 'geometry': {}, 'assets': {'asset': {'checksum': 'a6bb30a57d0f5ff0aaa65b46d7c1eb16cbced43c', 'href': 'https://api.eotdl.com/datasets/68343e3157638bef53669577/stage/README.md', 'size': 227, 'timestamp': 1748261457099}}, 'links': [], 'repository': 'eotdl'}, {'type': 'Feature', 'stac_version': '1.0.0', 'stac_extensions': [], 'datetime': 1748261457100, 'id': 'Forest/Forest_3.tif', 'bbox': {'xmax': 0.0, 'xmin': 0.0, 'ymax': 0.0, 'ymin': 0.0}, 'geometry': {}, 'assets': {'asset': {'checksum': '3e7bb982f9db5f7dabc556016c3d081dfb1fb73d', 'href': 'https://api.eotdl.com/datasets/68343e3157638bef53669577/stage/Forest/Forest_3.tif', 'size': 107244, 'timestamp': 1748261457100}}, 'links': [], 'repository': 'eotdl'}]


In [17]:
from eotdl.curation.stac.api import search_stac_items

query = "id IN ('README.md', 'Forest/Forest_3.tif')"

search_stac_items("EuroSAT-small", query)

[{'type': 'Feature',
  'stac_version': '1.0.0',
  'stac_extensions': [],
  'datetime': 1748261457099,
  'id': 'README.md',
  'bbox': {'xmax': 0.0, 'xmin': 0.0, 'ymax': 0.0, 'ymin': 0.0},
  'geometry': {},
  'assets': {'asset': {'checksum': 'a6bb30a57d0f5ff0aaa65b46d7c1eb16cbced43c',
    'href': 'https://api.eotdl.com/datasets/68343e3157638bef53669577/stage/README.md',
    'size': 227,
    'timestamp': 1748261457099}},
  'links': [],
  'repository': 'eotdl'},
 {'type': 'Feature',
  'stac_version': '1.0.0',
  'stac_extensions': [],
  'datetime': 1748261457100,
  'id': 'Forest/Forest_3.tif',
  'bbox': {'xmax': 0.0, 'xmin': 0.0, 'ymax': 0.0, 'ymin': 0.0},
  'geometry': {},
  'assets': {'asset': {'checksum': '3e7bb982f9db5f7dabc556016c3d081dfb1fb73d',
    'href': 'https://api.eotdl.com/datasets/68343e3157638bef53669577/stage/Forest/Forest_3.tif',
    'size': 107244,
    'timestamp': 1748261457100}},
  'links': [],
  'repository': 'eotdl'}]

You can retrieve a list of fields available for a search query

In [18]:
!eotdl stac search EuroSAT-small

{'schema': None, 'type': 'BYTE_ARRAY', 'stac_version': 'BYTE_ARRAY', 'stac_extensions': None, 'list': None, 'element': 'INT32', 'datetime': 'INT64', 'id': 'BYTE_ARRAY', 'bbox': None, 'xmax': 'DOUBLE', 'xmin': 'DOUBLE', 'ymax': 'DOUBLE', 'ymin': 'DOUBLE', 'geometry': 'BYTE_ARRAY', 'assets': None, 'asset': None, 'checksum': 'BYTE_ARRAY', 'href': 'BYTE_ARRAY', 'size': 'INT64', 'timestamp': 'INT64', 'links': None, 'repository': 'BYTE_ARRAY'}


In [19]:
search_stac_items("EuroSAT-small")

{'schema': None,
 'type': 'BYTE_ARRAY',
 'stac_version': 'BYTE_ARRAY',
 'stac_extensions': None,
 'list': None,
 'element': 'INT32',
 'datetime': 'INT64',
 'id': 'BYTE_ARRAY',
 'bbox': None,
 'xmax': 'DOUBLE',
 'xmin': 'DOUBLE',
 'ymax': 'DOUBLE',
 'ymin': 'DOUBLE',
 'geometry': 'BYTE_ARRAY',
 'assets': None,
 'asset': None,
 'checksum': 'BYTE_ARRAY',
 'href': 'BYTE_ARRAY',
 'size': 'INT64',
 'timestamp': 'INT64',
 'links': None,
 'repository': 'BYTE_ARRAY'}

TODO: make spatial queries work

In [20]:
# Example bbox query (minx, miny, maxx, maxy)
bbox = [-122.5, 37.7, -122.3, 37.9]

query = f"ST_Intersects(geometry, ST_GeomFromWKB(ST_Envelope(ST_MakeEnvelope({bbox[0]}, {bbox[1]}, {bbox[2]}, {bbox[3]}, 4326))))"

search_stac_items("EuroSAT-small", query)

Exception: Binder Error: No function matches the given name and argument types 'ST_MakeEnvelope(DECIMAL(4,1), DECIMAL(3,1), DECIMAL(4,1), DECIMAL(3,1), INTEGER_LITERAL)'. You might need to add explicit type casts.
	Candidate functions:
	ST_MakeEnvelope(DOUBLE, DOUBLE, DOUBLE, DOUBLE) -> GEOMETRY


LINE 4: ...    WHERE ST_Intersects(geometry, ST_GeomFromWKB(ST_Envelope(ST_MakeEnvelope(-122.5, 37.7, -122.3, 37.9, 4326))))
                                                                        ^

In [63]:
# Combine with other filters
query = f"ST_Intersects(geometry, ST_GeomFromWKB(ST_Envelope(ST_MakeEnvelope({bbox[0]}, {bbox[1]}, {bbox[2]}, {bbox[3]}, 4326)))) AND datetime > '2023-01-01'"

search_stac_items("EuroSAT-RGBa123123", query)

Exception: Binder Error: No function matches the given name and argument types 'ST_MakeEnvelope(DECIMAL(4,1), DECIMAL(3,1), DECIMAL(4,1), DECIMAL(3,1), INTEGER_LITERAL)'. You might need to add explicit type casts.
	Candidate functions:
	ST_MakeEnvelope(DOUBLE, DOUBLE, DOUBLE, DOUBLE) -> GEOMETRY


LINE 4: ...    WHERE ST_Intersects(geometry, ST_GeomFromWKB(ST_Envelope(ST_MakeEnvelope(-122.5, 37.7, -122.3, 37.9, 4326)))) AND...
                                                                        ^