In [None]:
import rustac
import json
import obstore
from typing import Any

In [None]:
def find_stac_item_objects_s3(
    bucket: str,
    prefix: str = "",
    suffix: str = ".stac-item.json",
    chunk_size: int = 200,
    **s3_kwargs: Any,
) -> list[str]:
    """Return S3 URIs for objects ending in `suffix` under bucket/prefix."""
    normalized_prefix = prefix.lstrip("/")
    store = obstore.store.S3Store(bucket=bucket, **s3_kwargs)

    matches: list[str] = []
    stream = obstore.list(store, prefix=normalized_prefix, chunk_size=chunk_size)

    for chunk in stream:
        for obj in chunk:
            path = obj.get("path", "")
            if path.endswith(suffix):
                matches.append(f"s3://{bucket}/{path}")

    return matches

aws_region = "us-west-2"
bucket = "data.ldn.auspatious.com"
version = "0-0-2"
# region = "ausp_ls_geomad" # Pacific
region = "ci_ls_geomad" # Non-Pacific
prefix = f"{region}/{version}"

stac_items = find_stac_item_objects_s3(bucket=bucket, prefix=prefix, region=aws_region)
print(f"Found {len(stac_items)} matching objects")

In [None]:
# Load STAC item JSON docs into memory
def load_stac_docs_into_memory(
    s3_uris: list[str], region: str = aws_region
) -> list[dict]:
    docs: list[dict] = []

    for uri in s3_uris:
        if not uri.startswith("s3://"):
            continue

        path = uri.removeprefix("s3://")
        bucket_name, key = path.split("/", 1)

        store = obstore.store.S3Store(bucket=bucket_name, region=region)
        raw = obstore.get(store, key)

        payload = raw.bytes()
        if hasattr(payload, "to_bytes"):
            payload = payload.to_bytes()
        elif not isinstance(payload, (bytes, bytearray)):
            payload = bytes(payload)

        docs.append(json.loads(payload.decode("utf-8")))

    return docs

stac_docs = load_stac_docs_into_memory(stac_items, region=aws_region)
print(f"Loaded {len(stac_docs)} STAC docs into memory")

In [None]:
await rustac.write(
    f"s3://{bucket}/{prefix}/{region}.parquet",
    stac_docs,
    store=obstore.store.S3Store(bucket=bucket, region=aws_region),
)