In [1]:
import datetime as dt
import json
from concurrent.futures import ThreadPoolExecutor, as_completed

import requests
from pystac import (
    Catalog,
)
from pystac_client import Client

from stactools.hotosm.maxar.stac import create_collection, create_item

In [2]:
MAXAR_ROOT_CATALOG = "https://maxar-opendata.s3.amazonaws.com/events/catalog.json"
MAXAR_EVENT_INFO = "https://maxar-opendata.s3.amazonaws.com/event_info.json"

In [3]:
resp = requests.get(MAXAR_EVENT_INFO)
maxar_event_info = resp.json()

maxar_event_dates = sorted(
    [dt.datetime.strptime(info["date"], "%Y-%m-%d") for info in maxar_event_info]
)

In [4]:
client = Client.open(MAXAR_ROOT_CATALOG)
client

/Users/ceholden/Documents/HOTOSM/stactools-hotosm/.venv/lib/python3.12/site-packages/pystac_client/client.py:186: NoConformsTo: Server does not advertise any conformance classes.


In [5]:
maxar_catalog = Catalog.from_dict(client.to_dict())

collection = create_collection(maxar_catalog, maxar_event_dates)

MAXAR_COLLECTION_PATH = "maxar-opendata-collection.json"
with open(MAXAR_COLLECTION_PATH, "w") as dst:
    json.dump(collection.to_dict(), dst)

collection

In [6]:
maxar_items = []
with ThreadPoolExecutor() as pool:
    futures_find = {}
    futures_translate = {}

    # Find all items up front in threads since it takes a while to
    # scan through static catalog
    for collection in client.get_collections():
        for subcollection in collection.get_collections():
            future = pool.submit(lambda coln: list(coln.get_all_items()), subcollection)
            futures_find[future] = f"{collection.id}/{subcollection.id}"

    item_i = 0
    for future in as_completed(futures_find):
        collection_id = futures_find[future]
        items_found = future.result()
        for item_found in items_found:
            item_i += 1
            if item_i % 2000 == 0:
                print(f"Submitting item {item_i}")
            future = pool.submit(create_item, item_found)
            futures_translate[future] = item_found

    print("Found all Items")

    for rewrite_i, future in enumerate(as_completed(futures_translate)):
        if rewrite_i % 2000 == 0:
            print(f"Processed item {rewrite_i} of {item_i}")
        original_item = futures_translate[future]
        try:
            result = future.result()
        except Exception as exc:
            print(f"Unexpected exception! {exc}")
            raise
        else:
            maxar_items.append(result)

/Users/ceholden/Documents/HOTOSM/stactools-hotosm/.venv/lib/python3.12/site-packages/pystac_client/client.py:441: FallbackToPystac: Falling back to pystac. This might be slow.
  self._warn_about_fallback("COLLECTIONS", "FEATURES")
/Users/ceholden/Documents/HOTOSM/stactools-hotosm/.venv/lib/python3.12/site-packages/pystac_client/collection_client.py:149: FallbackToPystac: Falling back to pystac. This might be slow.
  root._warn_about_fallback("ITEM_SEARCH")


Submitting item 2000
Submitting item 4000
Submitting item 6000
Submitting item 8000
Submitting item 10000
Submitting item 12000
Submitting item 14000
Submitting item 16000
Submitting item 18000
Submitting item 20000
Submitting item 22000
Submitting item 24000
Submitting item 26000
Submitting item 28000
Found all Items
Processed item 0 of 29228
Processed item 2000 of 29228
Processed item 4000 of 29228
Processed item 6000 of 29228
Processed item 8000 of 29228
Processed item 10000 of 29228
Processed item 12000 of 29228
Processed item 14000 of 29228
Processed item 16000 of 29228
Processed item 18000 of 29228
Processed item 20000 of 29228
Processed item 22000 of 29228
Processed item 24000 of 29228
Processed item 26000 of 29228
Processed item 28000 of 29228


In [7]:
print(json.dumps(maxar_items[24_000].to_dict(), indent=2))

{
  "type": "Feature",
  "stac_version": "1.1.0",
  "stac_extensions": [
    "https://stac-extensions.github.io/view/v1.0.0/schema.json",
    "https://stac-extensions.github.io/projection/v2.0.0/schema.json",
    "https://stac-extensions.github.io/eo/v1.1.0/schema.json",
    "https://stac-extensions.github.io/raster/v1.1.0/schema.json",
    "https://stac-extensions.github.io/grid/v1.1.0/schema.json",
    "https://stac-extensions.github.io/alternate-assets/v1.2.0/schema.json"
  ],
  "id": "17-120200210200-10300100D0636800",
  "geometry": {
    "type": "Polygon",
    "coordinates": [
      [
        [
          -80.08813725389182,
          33.02875182648932
        ],
        [
          -80.12189397855221,
          33.02899394951969
        ],
        [
          -80.14499775094758,
          33.03129597014829
        ],
        [
          -80.14488666659524,
          33.04278540028727
        ],
        [
          -80.08769734971874,
          33.07141530288402
        ],
        

In [8]:
items_ndjson = f"maxar-items-{dt.datetime.now().strftime('%Y%m%dT%H%M%S')}.ndjson"
with open(items_ndjson, "w") as dst:
    for item in maxar_items:
        item_json = json.dumps(item.to_dict())
        dst.write(f"{item_json}\n")