# 03 Create STAC

See [STAC architecture](./STAC_architecture.md) for details on the STAC structure we have chosen. Implementation is found in [ns_staccer.py](./ns_staccer.py).

In [1]:
from ns_configs import get_rgb_item_config, get_ms_item_config
from ns_staccer import create_item
import pystac

In [2]:
from pathlib import Path
input_dir = Path("/home/jamesg/test_data/naturescan")
stac_out_dir = Path("/home/jamesg/test_data/naturescan/stac")
stac_out_dir.mkdir(exist_ok=True)
cogs = list(input_dir.rglob('*.cog.tif'))

ASSET_BASE_HREF = "http://127.0.0.1:8010"
STAC_BASE_HREF = f"{ASSET_BASE_HREF}/stac"

def get_href(p: Path):
    return f"{ASSET_BASE_HREF}/{str(p.relative_to(input_dir))}"

In [3]:
# Create the catalog
catalog = pystac.Catalog(
    id='naturescan', 
    description='TerraLuma NatureScan Catalog', 
    title='NatureScan Catalog',
    catalog_type=pystac.CatalogType.ABSOLUTE_PUBLISHED
)

# Create Terra Luma provider
terra_luma_provider = pystac.Provider(
    name="TerraLuma", 
    description="TerraLuma University of Tasmania",
    roles=[pystac.ProviderRole.HOST, pystac.ProviderRole.LICENSOR, pystac.ProviderRole.PRODUCER, pystac.ProviderRole.PROCESSOR],
    url='https://www.utas.edu.au/research/projects/terraluma'
)

## RGB Collection

In [4]:
rgb_cogs = input_dir.rglob('*rgb.cog.tif', case_sensitive=False)
rgb_cogs_with_thumbnails = [
    (cog, cog.with_name(cog.name.replace('.cog.tif', '.thumbnail.png'))) 
    for cog in rgb_cogs
]

rgb_items = [
    create_item(cog, thumbnail, get_rgb_item_config(asset_href=get_href(cog), thumbnail_href=get_href(thumbnail)))
    for cog, thumbnail in rgb_cogs_with_thumbnails
]

rgb_collection = pystac.Collection(
    id="naturescan-rgb",
    title='NatureScan RGB Orthomosaics',
    description='NatureScan RGB Orthomosaics',
    extent=pystac.Extent.from_items(rgb_items),
    providers=[terra_luma_provider],
)
_links = rgb_collection.add_items(rgb_items)

## MS Orthomosaics

In [5]:
ms_cogs = input_dir.rglob('*ms.cog.tif', case_sensitive=False)
ms_cogs_with_thumbnails = [
    (cog, cog.with_name(cog.name.replace('.cog.tif', '.thumbnail.png'))) 
    for cog in ms_cogs
]

ms_items = [
    create_item(cog, thumbnail, get_ms_item_config(asset_href=get_href(cog), thumbnail_href=get_href(thumbnail)))
    for cog, thumbnail in ms_cogs_with_thumbnails
]

ms_collection = pystac.Collection(
    id="naturescan-ms",
    title='NatureScan MS Orthomosaics',
    description='NatureScan MS Orthomosaics',
    extent=pystac.Extent.from_items(ms_items),
    providers=[terra_luma_provider],
)
_links = ms_collection.add_items(ms_items)

In [6]:
catalog.add_child(rgb_collection)
catalog.add_child(ms_collection)
catalog.normalize_hrefs(STAC_BASE_HREF)
catalog.save(catalog_type=pystac.CatalogType.ABSOLUTE_PUBLISHED, dest_href=stac_out_dir)

# STAC GeoParquet

We also wan't to save our STAC in geoparquet format. We will do so with [ruhstac-py](https://stac-utils.github.io/rustac-py/latest/). It's compatible with `stac-geoparquet` but has [some benefits](https://github.com/stac-utils/rustac-py?tab=readme-ov-file#stac-geoparquet).

A limitation to stac-fastapi-geoparquet is only one collection at a time. So the following script will create one stac-geoparquet file for each collection, and collections.json file to be used by stac-fastapi-geoparquet.

In [7]:
import rustac
import json

collections_out = []

for collection in [rgb_collection, ms_collection]:
    parquet_file_name = f"{collection.id}.parquet"
    items = collection.get_all_items()
    item_dicts = [item.to_dict() for item in items]
    await rustac.write(str(stac_out_dir / parquet_file_name), item_dicts)

    collection_dict = collection.to_dict()
    # Don't need links for geoparquet
    collection_dict["links"] = []
    # Add the parquet items as an asset on the collection
    collection_dict["assets"] = {
        "data": {
            "href": f"{STAC_BASE_HREF}/{parquet_file_name}",
            "type": "application/vnd.apache.parquet"
        }
    }

    collections_out.append(collection_dict)

collections_out
with open(stac_out_dir / "collections.json", "w") as f:
    json.dump(collections_out, f, indent=2)



ModuleNotFoundError: No module named 'rustac'