# Process Maxar's dpen data catalog into OAM specific STAC

In [1]:
import datetime as dt
import json
from concurrent.futures import ThreadPoolExecutor, as_completed

import pystac
import requests

from stactools.hotosm.maxar.stac import create_collection, create_item

In [2]:
MAXAR_ROOT_CATALOG = "https://maxar-opendata.s3.amazonaws.com/events/catalog.json"
MAXAR_EVENT_INFO = "https://maxar-opendata.s3.amazonaws.com/event_info.json"

In [3]:
resp = requests.get(MAXAR_EVENT_INFO)
maxar_event_info = resp.json()

maxar_event_dates = sorted(
    [dt.datetime.strptime(info["date"], "%Y-%m-%d") for info in maxar_event_info]
)

In [4]:
maxar_catalog = pystac.read_file(MAXAR_ROOT_CATALOG)
maxar_catalog

In [5]:
collection = create_collection(
    maxar_catalog, min(maxar_event_dates), max(maxar_event_dates)
)

MAXAR_COLLECTION_PATH = "maxar-opendata-collection.json"
with open(MAXAR_COLLECTION_PATH, "w") as dst:
    json.dump(collection.to_dict(), dst)

collection

In [8]:
maxar_items = []
with ThreadPoolExecutor() as pool:
    futures_find = {}
    futures_translate = {}

    # Find all items up front in threads since it takes a while to
    # scan through static catalog
    collections = maxar_catalog.get_collections()

    print("Found all top level Collections")

    for collection in collections:
        subcollections = list(collection.get_collections())
        print(f"Found {len(subcollections)} in {collection}")
        for subcollection in subcollections:
            future = pool.submit(lambda coln: list(coln.get_all_items()), subcollection)
            futures_find[future] = f"{collection.id}/{subcollection.id}"

    print("Submitted all Collections")

    item_i = 0
    for future in as_completed(futures_find):
        collection_id = futures_find[future]
        items_found = future.result()
        for item_found in items_found:
            item_i += 1
            if item_i % 2000 == 0:
                print(f"Submitting item {item_i}")
            future = pool.submit(create_item, item_found)
            futures_translate[future] = item_found

    print("Found all Items")

    for rewrite_i, future in enumerate(as_completed(futures_translate)):
        if rewrite_i % 2000 == 0:
            print(f"Processed item {rewrite_i} of {item_i}")
        original_item = futures_translate[future]
        try:
            result = future.result()
        except Exception as exc:
            print(f"Unexpected exception! {exc}")
            raise
        else:
            maxar_items.append(result)

converted_at = dt.datetime.now()

Found all top level Collections
Found 5 in <Collection id=BayofBengal-Cyclone-Mocha-May-23>
Found 7 in <Collection id=Belize-Wildfires-June24>
Found 36 in <Collection id=Brazil-Flooding-May24>
Found 26 in <Collection id=Cyclone-Chido-Dec15>
Found 29 in <Collection id=Earthquake-Myanmar-March-2025>
Found 11 in <Collection id=Emilia-Romagna-Italy-flooding-may23>
Found 10 in <Collection id=Floods-Spain-Oct24>
Found 6 in <Collection id=Gambia-flooding-8-11-2022>
Found 16 in <Collection id=Hurricane-Fiona-9-19-2022>
Found 89 in <Collection id=Hurricane-Ian-9-26-2022>
Found 14 in <Collection id=Hurricane-Idalia-Florida-Aug23>
Found 42 in <Collection id=HurricaneHelene-Oct24>
Found 15 in <Collection id=HurricaneMilton-Oct24>
Found 1 in <Collection id=Iceland-Volcano_Eruption-Dec-2023>
Found 6 in <Collection id=India-Floods-Oct-2023>
Found 8 in <Collection id=Indonesia-Earthquake22>
Found 11 in <Collection id=Japan-Earthquake-Jan-2024>
Found 76 in <Collection id=Kahramanmaras-turkey-earthquake

In [9]:
print(json.dumps(maxar_items[24_000].to_dict(), indent=2))

{
  "type": "Feature",
  "stac_version": "1.1.0",
  "stac_extensions": [
    "https://stac-extensions.github.io/view/v1.0.0/schema.json",
    "https://stac-extensions.github.io/projection/v2.0.0/schema.json",
    "https://stac-extensions.github.io/eo/v1.1.0/schema.json",
    "https://stac-extensions.github.io/raster/v1.1.0/schema.json",
    "https://stac-extensions.github.io/grid/v1.1.0/schema.json",
    "https://stac-extensions.github.io/alternate-assets/v1.2.0/schema.json"
  ],
  "id": "29-031313133003-104001004A435000",
  "geometry": {
    "type": "Polygon",
    "coordinates": [
      [
        [
          -9.367070778987003,
          30.688662710556297
        ],
        [
          -9.366889628824723,
          30.64072532898949
        ],
        [
          -9.311449280676847,
          30.640869956882003
        ],
        [
          -9.311603058583081,
          30.688807613114765
        ],
        [
          -9.367070778987003,
          30.688662710556297
        ]
     

## Write to NDJSON for ingestion into (PgSTAC) STAC Catalog

In [10]:
destination = f"maxar-opendata-{converted_at.strftime('%Y%m%dT%H%M%S')}.ndjson"

with open(destination, "w") as dst:
    for item in maxar_items:
        dst.write(f"{json.dumps(item.to_dict())}\n")

print(f"Wrote {len(maxar_items)} STAC Items to {destination}")

Wrote 29228 STAC Items to maxar-opendata-20250501T141434.ndjson
