In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import logging
from concurrent.futures import ThreadPoolExecutor, as_completed

from pystac import Item

from stactools.hotosm.exceptions import AssetNotFoundError
from stactools.hotosm.oam_metadata import OamMetadata
from stactools.hotosm.oam_metadata_client import OamMetadataClient
from stactools.hotosm.stac import create_collection, create_item

logging.basicConfig()


class QuietLogFormatter(logging.Formatter):
    """Quieter exception logging for the notebook."""

    def format(self, record):
        """Only log the last line of an exception."""
        if not record.exc_text:
            record.exc_text = self.formatException(record.exc_info)
        record.exc_text = record.exc_text.split("\n")[-1]
        return super().format(record)


hotosm_logger = logging.getLogger("stactools.hotosm")
handler = logging.StreamHandler()
handler.setFormatter(QuietLogFormatter())
hotosm_logger.addHandler(handler)

In [3]:
create_collection()

In [4]:
client = OamMetadataClient.new()

In [5]:
total = client.get_count()

oam_items = client.get_all_items()


def process_metadata_to_stac(metadata: OamMetadata) -> Item:
    """Worker function to convert metadata to STAC."""
    # correct common issues in metadata, e.g., incorrect SPDX license identifiers
    oam_item_sanitized = oam_item.sanitize()
    return create_item(oam_item_sanitized)


stac_items: list[Item] = []
failures: list[str] = []
with ThreadPoolExecutor() as pool:
    futures = {}

    for oam_item in oam_items:
        future = pool.submit(process_metadata_to_stac, oam_item)
        futures[future] = oam_item

    for i, future in enumerate(as_completed(futures)):
        if i % 1000 == 0:
            print(f"Item {i}/{total}")

        oam_item = futures[future]

        try:
            stac_item = future.result()
        except AssetNotFoundError:
            print(f"Could not generate STAC Item for {oam_item.id}")
            failures.append(oam_item)
        else:
            stac_items.append(stac_item)

Could not parse id=59e62b773d6412ef72209353
TypeError: fromisoformat: argument must be str
ERROR:stactools.hotosm.oam_metadata_client:Could not parse id=59e62b773d6412ef72209353
TypeError: fromisoformat: argument must be str
Could not parse id=59e62b773d6412ef722093b7
TypeError: fromisoformat: argument must be str
ERROR:stactools.hotosm.oam_metadata_client:Could not parse id=59e62b773d6412ef722093b7
TypeError: fromisoformat: argument must be str
Could not parse id=59e62b743d6412ef722091bd
TypeError: fromisoformat: argument must be str
ERROR:stactools.hotosm.oam_metadata_client:Could not parse id=59e62b743d6412ef722091bd
TypeError: fromisoformat: argument must be str


Item 0/17750
Item 1000/17750
Item 2000/17750
Item 3000/17750
Item 4000/17750
Item 5000/17750
Item 6000/17750
Item 7000/17750
Item 8000/17750
Item 9000/17750
Item 10000/17750
Item 11000/17750
Item 12000/17750
Item 13000/17750
Item 14000/17750
Item 15000/17750
Item 16000/17750
Item 17000/17750
Could not generate STAC Item for 5bcf4a365a9ef7cb5d8a2bc2
Could not generate STAC Item for 5bcf4a375a9ef7cb5d8a2c05
Could not generate STAC Item for 5bcf4a375a9ef7cb5d8a2c2b
Could not generate STAC Item for 5bcf4a365a9ef7cb5d8a2bdc
Could not generate STAC Item for 5bcf4a355a9ef7cb5d8a2baa
Could not generate STAC Item for 5bcf4a375a9ef7cb5d8a2c1f
Could not generate STAC Item for 5bcf4a355a9ef7cb5d8a2ba0
Could not generate STAC Item for 5bcf4a385a9ef7cb5d8a2c3e
Could not generate STAC Item for 5bcf4a365a9ef7cb5d8a2beb
Could not generate STAC Item for 5bcf4a375a9ef7cb5d8a2c24
Could not generate STAC Item for 5bcf4a385a9ef7cb5d8a2c47
Could not generate STAC Item for 5bcf4a375a9ef7cb5d8a2bff
Could not g

In [6]:
print(f"Converted {len(stac_items)} of {total} metadata entries to STAC Items")

Converted 17694 of 17750 metadata entries to STAC Items


### Write to NDJSON for ingestion into (PgSTAC) STAC Catalog

In [7]:
import json

with open("openaerialmap.ndjson", "w") as dst:
    for item in stac_items:
        item_json = json.dumps(item.to_dict())
        dst.write(f"{item_json}\n")