In [1]:
%load_ext autoreload
%autoreload 2

EOTDL uses GeoDB to store STAC metadata and query items.

Let's see how it works with a simple example.

In [2]:
import os 
import pystac 

path = 'data/EuroSAT-STAC'
catalog_path = os.path.join(path, 'catalog.json')

catalog = pystac.Catalog.from_file(catalog_path)
catalog

EOTDL offers the `STACGeoDataframe` class as an intermediate STAC representation between the raw `json` and the `GeoDataFrame` from `geopandas`.

In [3]:
from eotdl.curation.stac import STACDataFrame

sdf = STACDataFrame.from_stac_file(catalog_path)

sdf.head()

Unnamed: 0,type,id,stac_version,description,links,extent,license,stac_extensions,summaries,properties,geometry,assets,bbox,collection
0,Catalog,eurosat-rgb,1.0.0,EuroSAT-RGB dataset,"[{'rel': 'self', 'href': '/home/juan/Desktop/e...",,,,,,POLYGON EMPTY,,,
1,Collection,source,1.0.0,Collection,"[{'rel': 'root', 'href': '../catalog.json', 't...","{'spatial': {'bbox': [[0, 0, 0, 0]]}, 'tempora...",proprietary,,,,POLYGON EMPTY,,,
2,Collection,labels,1.0.0,Labels,"[{'rel': 'root', 'href': '../catalog.json', 't...","{'spatial': {'bbox': [[0, 0, 0, 0]]}, 'tempora...",proprietary,[https://stac-extensions.github.io/label/v1.0....,"{'label:classes': [{'classes': ['Industrial', ...",,POLYGON EMPTY,,,
3,Feature,Industrial_1743,1.0.0,,"[{'rel': 'root', 'href': '../../catalog.json',...",,,[],,{'datetime': '2000-01-01T00:00:00Z'},"POLYGON ((0.00000 0.00000, 0.00000 0.00000, 0....",{'Industrial_1743': {'href': '/home/juan/Deskt...,"[0, 0, 0, 0]",source
4,Feature,Industrial_1273,1.0.0,,"[{'rel': 'root', 'href': '../../catalog.json',...",,,[],,{'datetime': '2000-01-01T00:00:00Z'},"POLYGON ((0.00000 0.00000, 0.00000 0.00000, 0....",{'Industrial_1273': {'href': '/home/juan/Deskt...,"[0, 0, 0, 0]",source


We can ingest the dataframe into geodb.

In [4]:
from shapely.geometry import Polygon
import json 
import geopandas as gpd

values = gpd.GeoDataFrame.from_features(json.loads(sdf.to_json())["features"], crs="4326") # if we don't parse it, we get some errors when ingesting
# values = sdf.copy() 

values.rename(columns={"id": "stac_id"}, inplace=True) # avoid conflict with geodb id field (set automatically)
values.geometry = sdf.geometry.apply(lambda x: Polygon() if x is None else x) # add empty Polygon for empty geometries (geodb will complain otherwise)

values.head()

Unnamed: 0,geometry,type,stac_id,stac_version,description,links,extent,license,stac_extensions,summaries,properties,assets,bbox,collection
0,POLYGON EMPTY,Catalog,eurosat-rgb,1.0.0,EuroSAT-RGB dataset,"[{'rel': 'self', 'href': '/home/juan/Desktop/e...",,,,,,,,
1,POLYGON EMPTY,Collection,source,1.0.0,Collection,"[{'rel': 'root', 'href': '../catalog.json', 't...","{'spatial': {'bbox': [[0, 0, 0, 0]]}, 'tempora...",proprietary,,,,,,
2,POLYGON EMPTY,Collection,labels,1.0.0,Labels,"[{'rel': 'root', 'href': '../catalog.json', 't...","{'spatial': {'bbox': [[0, 0, 0, 0]]}, 'tempora...",proprietary,[https://stac-extensions.github.io/label/v1.0....,"{'label:classes': [{'classes': ['Industrial', ...",,,,
3,"POLYGON ((0.00000 0.00000, 0.00000 0.00000, 0....",Feature,Industrial_1743,1.0.0,,"[{'rel': 'root', 'href': '../../catalog.json',...",,,[],,{'datetime': '2000-01-01T00:00:00Z'},{'Industrial_1743': {'href': '/home/juan/Deskt...,"[0, 0, 0, 0]",source
4,"POLYGON ((0.00000 0.00000, 0.00000 0.00000, 0....",Feature,Industrial_1273,1.0.0,,"[{'rel': 'root', 'href': '../../catalog.json',...",,,[],,{'datetime': '2000-01-01T00:00:00Z'},{'Industrial_1273': {'href': '/home/juan/Deskt...,"[0, 0, 0, 0]",source


In [5]:
from xcube_geodb.core.geodb import GeoDBClient
from dotenv import load_dotenv

load_dotenv()

geodb_client = GeoDBClient(
    server_url=os.environ["GEODB_API_SERVER_URL"],
    server_port=os.environ["GEODB_API_SERVER_PORT"],
    client_id=os.environ["GEODB_AUTH_CLIENT_ID"],
    client_secret=os.environ["GEODB_AUTH_CLIENT_SECRET"],
    auth_aud=os.environ["GEODB_AUTH_DOMAIN"],
)

geodb_client.whoami

'geodb_616b0057af0c7500691a026e'

In [6]:
collection = 'geodb-demo'
database = None 

if geodb_client.collection_exists(collection, database):
    geodb_client.drop_collection(collection)

collections = {
    collection: {
        'crs': 4326,
        'properties': {column: 'text' for column in values.columns if column not in ['geometry', 'id']}, # this is a limitation, we need to set appropriate types for querying...
    } 
}

geodb_client.create_collections(collections, database=database)

geodb_client.insert_into_collection(
    collection, database=database, values=values
)

Processing rows from 0 to 203


<xcube_geodb.core.message.Message at 0x7fb3bd198700>

We can retrieve the dataframe from geodb.

In [7]:
gdf = geodb_client.get_collection(collection)
gdf.head()

Unnamed: 0,id,created_at,modified_at,geometry,type,stac_id,stac_version,description,links,extent,license,stac_extensions,summaries,properties,assets,bbox,collection
0,1,2023-09-15T11:38:31.660271+00:00,,POLYGON EMPTY,Catalog,eurosat-rgb,1.0.0,EuroSAT-RGB dataset,"[{""rel"": ""self"", ""href"": ""/home/juan/Desktop/e...",,,,,,,,
1,2,2023-09-15T11:38:31.660271+00:00,,POLYGON EMPTY,Collection,source,1.0.0,Collection,"[{""rel"": ""root"", ""href"": ""../catalog.json"", ""t...","{""spatial"": {""bbox"": [[0, 0, 0, 0]]}, ""tempora...",proprietary,,,,,,
2,3,2023-09-15T11:38:31.660271+00:00,,POLYGON EMPTY,Collection,labels,1.0.0,Labels,"[{""rel"": ""root"", ""href"": ""../catalog.json"", ""t...","{""spatial"": {""bbox"": [[0, 0, 0, 0]]}, ""tempora...",proprietary,"[""https://stac-extensions.github.io/label/v1.0...","{""label:classes"": [{""classes"": [""Industrial"", ...",,,,
3,4,2023-09-15T11:38:31.660271+00:00,,"POLYGON ((0.00000 0.00000, 0.00000 0.00000, 0....",Feature,Industrial_1743,1.0.0,,"[{""rel"": ""root"", ""href"": ""../../catalog.json"",...",,,[],,"{""datetime"": ""2000-01-01T00:00:00Z""}","{""Industrial_1743"": {""href"": ""/home/juan/Deskt...","[0, 0, 0, 0]",source
4,5,2023-09-15T11:38:31.660271+00:00,,"POLYGON ((0.00000 0.00000, 0.00000 0.00000, 0....",Feature,Industrial_1273,1.0.0,,"[{""rel"": ""root"", ""href"": ""../../catalog.json"",...",,,[],,"{""datetime"": ""2000-01-01T00:00:00Z""}","{""Industrial_1273"": {""href"": ""/home/juan/Deskt...","[0, 0, 0, 0]",source


And query

In [8]:
catalog = gdf[gdf['type'] == 'Catalog']
catalog = json.loads(catalog.to_json())['features'][0]['properties']
catalog

{'id': 1,
 'created_at': '2023-09-15T11:38:31.660271+00:00',
 'modified_at': None,
 'type': 'Catalog',
 'stac_id': 'eurosat-rgb',
 'stac_version': '1.0.0',
 'description': 'EuroSAT-RGB dataset',
 'links': '[{"rel": "self", "href": "/home/juan/Desktop/eotdl/demos/ipynbs/data/EuroSAT-STAC/catalog.json", "type": "application/json"}, {"rel": "root", "href": "./catalog.json", "type": "application/json"}, {"rel": "child", "href": "./source/collection.json", "type": "application/json"}, {"rel": "child", "href": "./labels/collection.json", "type": "application/json"}]',
 'extent': None,
 'license': None,
 'stac_extensions': None,
 'summaries': None,
 'properties': None,
 'assets': None,
 'bbox': None,
 'collection': None}

In [9]:
labels = gdf[gdf['collection'] == 'labels']
len(labels)

100

Or, better, directly query the geodb.

In [10]:
geodb_client.get_collection(collection, query=f"type=eq.Catalog")

Unnamed: 0,id,created_at,modified_at,geometry,type,stac_id,stac_version,description,links,extent,license,stac_extensions,summaries,properties,assets,bbox,collection
0,1,2023-09-15T11:38:31.660271+00:00,,POLYGON EMPTY,Catalog,eurosat-rgb,1.0.0,EuroSAT-RGB dataset,"[{""rel"": ""self"", ""href"": ""/home/juan/Desktop/e...",,,,,,,,


In [11]:
geodb_client.get_collection(collection, query=f"collection=eq.labels").sample(3)

Unnamed: 0,id,created_at,modified_at,geometry,type,stac_id,stac_version,description,links,extent,license,stac_extensions,summaries,properties,assets,bbox,collection
59,163,2023-09-15T11:38:31.660271+00:00,,"POLYGON ((0.00000 0.00000, 0.00000 0.00000, 0....",Feature,Residential_733,1.0.0,,"[{""rel"": ""source"", ""href"": ""../../source/Resid...",,,"[""https://stac-extensions.github.io/label/v1.0...",,"{""label:classes"": [{""classes"": [""Industrial"", ...","{""labels"": {""href"": ""/home/juan/Desktop/eotdl/...","[0, 0, 0, 0]",labels
80,184,2023-09-15T11:38:31.660271+00:00,,"POLYGON ((0.00000 0.00000, 0.00000 0.00000, 0....",Feature,AnnualCrop_1033,1.0.0,,"[{""rel"": ""source"", ""href"": ""../../source/Annua...",,,"[""https://stac-extensions.github.io/label/v1.0...",,"{""label:classes"": [{""classes"": [""Industrial"", ...","{""labels"": {""href"": ""/home/juan/Desktop/eotdl/...","[0, 0, 0, 0]",labels
49,153,2023-09-15T11:38:31.660271+00:00,,"POLYGON ((0.00000 0.00000, 0.00000 0.00000, 0....",Feature,Highway_765,1.0.0,,"[{""rel"": ""source"", ""href"": ""../../source/Highw...",,,"[""https://stac-extensions.github.io/label/v1.0...",,"{""label:classes"": [{""classes"": [""Industrial"", ...","{""labels"": {""href"": ""/home/juan/Desktop/eotdl/...","[0, 0, 0, 0]",labels


We need to expose this query functionality through the EOTDL library.

> TODO: GeoDB STAC API