In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import logging
from concurrent.futures import ThreadPoolExecutor, as_completed

from pystac import Item

from stactools.hotosm.exceptions import AssetNotFoundError
from stactools.hotosm.oam_metadata import OamMetadata
from stactools.hotosm.oam_metadata_client import OamMetadataClient
from stactools.hotosm.stac import create_collection, create_item

logging.basicConfig()

In [3]:
create_collection()

In [4]:
client = OamMetadataClient.new()

In [5]:
total = client.get_count()

oam_items = client.get_all_items()


def process_metadata_to_stac(metadata: OamMetadata) -> Item:
    """Worker function to convert metadata to STAC."""
    # correct common issues in metadata, e.g., incorrect SPDX license identifiers
    oam_item_sanitized = oam_item.sanitize()
    return create_item(oam_item_sanitized)


stac_items = []
with ThreadPoolExecutor() as pool:
    futures = {}
    for oam_item in oam_items:
        future = pool.submit(process_metadata_to_stac, oam_item)
        futures[future] = oam_item

    for i, future in enumerate(as_completed(futures)):
        if i % 1000 == 0:
            print(f"Item {i}/{total}")

        oam_item = futures[future]

        try:
            stac_item = future.result()
        except AssetNotFoundError:
            logging.exception(f"Could not generate STAC Item for {oam_item.id}")
        else:
            stac_items.append(stac_item)

ERROR:stactools.hotosm.oam_metadata_client:Could not parse id=59e62b773d6412ef72209353
Traceback (most recent call last):
  File "/Users/ceholden/Documents/HOTOSM/stactools-hotosm/src/stactools/hotosm/oam_metadata_client.py", line 101, in get_items
    results.append(self._parse_result(result))
                   ^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/ceholden/Documents/HOTOSM/stactools-hotosm/src/stactools/hotosm/oam_metadata_client.py", line 46, in _parse_result
    acquisition_start=dt.datetime.fromisoformat(result["acquisition_start"]),
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
TypeError: fromisoformat: argument must be str
ERROR:stactools.hotosm.oam_metadata_client:Could not parse id=59e62b773d6412ef722093b7
Traceback (most recent call last):
  File "/Users/ceholden/Documents/HOTOSM/stactools-hotosm/src/stactools/hotosm/oam_metadata_client.py", line 101, in get_items
    results.append(self._parse_result(result))
                   ^^^^^^^^^^^

Item 0/17745
Item 1000/17745
Item 2000/17745
Item 3000/17745
Item 4000/17745
Item 5000/17745
Item 6000/17745
Item 7000/17745
Item 8000/17745
Item 9000/17745
Item 10000/17745




Item 11000/17745
Item 12000/17745
Item 13000/17745
Item 14000/17745
Item 15000/17745
Item 16000/17745
Item 17000/17745


ERROR:root:Could not generate STAC Item for 5bcf4a385a9ef7cb5d8a2c66
Traceback (most recent call last):
  File "rasterio/_base.pyx", line 310, in rasterio._base.DatasetBase.__init__
  File "rasterio/_base.pyx", line 221, in rasterio._base.open_dataset
  File "rasterio/_err.pyx", line 359, in rasterio._err.exc_wrap_pointer
rasterio._err.CPLE_HttpResponseError: HTTP response code: 404

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/Users/ceholden/Documents/HOTOSM/stactools-hotosm/src/stactools/hotosm/stac.py", line 202, in _add_projection_extension
    with rasterio.open(href) as src:
         ^^^^^^^^^^^^^^^^^^^
  File "/Users/ceholden/Documents/HOTOSM/stactools-hotosm/.venv/lib/python3.12/site-packages/rasterio/env.py", line 463, in wrapper
    return f(*args, **kwds)
           ^^^^^^^^^^^^^^^^
  File "/Users/ceholden/Documents/HOTOSM/stactools-hotosm/.venv/lib/python3.12/site-packages/rasterio/__init__.py", line 356, i

In [8]:
print(f"Converted {len(stac_items)} of {total} metadata entries to STAC Items")

Converted 17690 of 17745 metadata entries to STAC Items


### Write to NDJSON for ingestion into (PgSTAC) STAC Catalog

In [9]:
import json

with open("openaerialmap.ndjson", "w") as dst:
    for item in stac_items:
        item_json = json.dumps(item.to_dict())
        dst.write(f"{item_json}\n")