# Convert OAM catalog into STAC

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import datetime as dt
import json
import logging
import time
from concurrent.futures import ThreadPoolExecutor, as_completed

from pystac import Item

from stactools.hotosm.exceptions import AssetNotFoundError
from stactools.hotosm.oam_metadata import OamMetadata
from stactools.hotosm.oam_metadata_client import OamMetadataClient
from stactools.hotosm.stac import create_collection, create_item

logging.basicConfig()


class QuietLogFormatter(logging.Formatter):
    """Quieter exception logging for the notebook."""

    def format(self, record):
        """Only log the last line of an exception."""
        if not record.exc_text:
            record.exc_text = self.formatException(record.exc_info)
        record.exc_text = record.exc_text.split("\n")[-1]
        return super().format(record)


hotosm_logger = logging.getLogger("stactools.hotosm")
handler = logging.StreamHandler()
handler.setFormatter(QuietLogFormatter())
hotosm_logger.addHandler(handler)

In [3]:
collection = create_collection()
collection

In [4]:
with open("./openaerialmap-collection.json", "w") as f:
    f.write(json.dumps(collection.to_dict(), indent=2))

In [5]:
client = OamMetadataClient.new()

In [6]:
def process_metadata_to_stac(metadata: OamMetadata) -> Item:
    """Worker function to convert metadata to STAC."""
    # correct common issues in metadata, e.g., incorrect SPDX license identifiers
    sanitized_metadata = metadata.sanitize()
    return create_item(sanitized_metadata)


oam_items = client.get_all_items()
total = client.get_count()

print(f"Going to process {total} items from entire catalog")

stac_items: list[Item] = []
failures: list[tuple[OamMetadata, Exception]] = []

time1 = time.time()
with ThreadPoolExecutor() as pool:
    futures = {}

    for oam_item in oam_items:
        future = pool.submit(process_metadata_to_stac, oam_item)
        futures[future] = oam_item

    for i, future in enumerate(as_completed(futures)):
        if i % 1000 == 0:
            print(f"Item {i}/{total}")

        oam_item = futures[future]

        try:
            stac_item = future.result()
        except AssetNotFoundError as e:
            print(f"Could not generate STAC Item for {oam_item.id}")
            failures.append((oam_item, e))
        except Exception as e:
            print(f"Unexpected error occurred for {oam_item.id}!")
            failures.append((oam_item, e))
        else:
            stac_items.append(stac_item)

time2 = time.time()
print(f"Took {(time2 - time1) / 60.0:0.3f} minutes to convert catalog to STAC")

converted_at = dt.datetime.now()

Going to process 17930 items from entire catalog


Could not parse id=59e62b743d6412ef722091bd
TypeError: fromisoformat: argument must be str
ERROR:stactools.hotosm.oam_metadata_client:Could not parse id=59e62b743d6412ef722091bd
TypeError: fromisoformat: argument must be str
Could not parse id=59e62b773d6412ef72209353
TypeError: fromisoformat: argument must be str
ERROR:stactools.hotosm.oam_metadata_client:Could not parse id=59e62b773d6412ef72209353
TypeError: fromisoformat: argument must be str
Could not parse id=59e62b773d6412ef722093b7
TypeError: fromisoformat: argument must be str
ERROR:stactools.hotosm.oam_metadata_client:Could not parse id=59e62b773d6412ef722093b7
TypeError: fromisoformat: argument must be str


Item 0/17930
Item 1000/17930
Item 2000/17930
Item 3000/17930
Item 4000/17930




Item 5000/17930
Item 6000/17930
Item 7000/17930
Item 8000/17930




Item 9000/17930




Item 10000/17930
Item 11000/17930
Item 12000/17930
Item 13000/17930
Item 14000/17930
Item 15000/17930
Item 16000/17930
Could not generate STAC Item for 5bcf4a395a9ef7cb5d8a2c96
Could not generate STAC Item for 5bcf4a385a9ef7cb5d8a2c4d
Could not generate STAC Item for 5bcf4a395a9ef7cb5d8a2c7a
Could not generate STAC Item for 5bcf4a395a9ef7cb5d8a2c9e
Could not generate STAC Item for 5bcf4a385a9ef7cb5d8a2c5f
Could not generate STAC Item for 5bcf4a385a9ef7cb5d8a2c53
Could not generate STAC Item for 5bcf4a385a9ef7cb5d8a2c47
Could not generate STAC Item for 5bcf4a385a9ef7cb5d8a2c6e
Could not generate STAC Item for 5bcf4a385a9ef7cb5d8a2c72
Could not generate STAC Item for 5bcf4a395a9ef7cb5d8a2c86
Could not generate STAC Item for 5bcf4a395a9ef7cb5d8a2c81
Could not generate STAC Item for 5bcf4a385a9ef7cb5d8a2c66
Could not generate STAC Item for 5bcf4a395a9ef7cb5d8a2c92
Could not generate STAC Item for 5bcf4a385a9ef7cb5d8a2c3e
Could not generate STAC Item for 5bcf4a385a9ef7cb5d8a2c37
Could not g

In [7]:
print(f"Converted {len(stac_items)} of {total} metadata entries to STAC Items")

Converted 17861 of 17930 metadata entries to STAC Items


## Write to NDJSON for ingestion into (PgSTAC) STAC Catalog

In [8]:
destination = f"openaerialmap-{converted_at.strftime('%Y%m%dT%H%M%S')}.ndjson"

with open(destination, "w") as dst:
    for item in stac_items:
        dst.write(f"{json.dumps(item.to_dict())}\n")

print(f"Wrote {len(stac_items)} STAC Items to {destination}")

Wrote 17861 STAC Items to openaerialmap-20250512T180412.ndjson


## Write to STAC GeoParquet

In [9]:
import rustac

destination = f"openaerialmap-{converted_at.strftime('%Y%m%dT%H%M%S')}.parquet"

await rustac.write(destination, [item.to_dict() for item in stac_items])
print(f"Wrote {len(stac_items)} STAC Items to {destination}")

Wrote 17861 STAC Items to openaerialmap-20250512T180412.parquet
