# 03 Create STAC

See [STAC architecture](./STAC_architecture.md) for details on the STAC structure we have chosen. Implementation is found in [ns_staccer.py](./ns_staccer.py).

In [1]:
from ns_configs import RGB_ITEM_CONFIG, MS_ITEM_CONFIG
from ns_staccer import create_item
import pystac

In [2]:
from pathlib import Path
input_dir = Path("/Volumes/Sammy/terra-luma/stac-input")
stac_out_dir = Path("/Volumes/Sammy/terra-luma/stac-output")
stac_out_dir.mkdir(exist_ok=True)
cogs = list(input_dir.rglob('*.cog.tif'))

In [7]:
# Create the catalog
catalog = pystac.Catalog(
    id='naturescan', 
    description='TerraLuma NatureScan Catalog', 
    title='NatureScan Catalog',
    catalog_type=pystac.CatalogType.ABSOLUTE_PUBLISHED
)

# Create Terra Luma provider
terra_luma_provider = pystac.Provider(
    name="TerraLuma", 
    description="TerraLuma University of Tasmania",
    roles=[pystac.ProviderRole.HOST, pystac.ProviderRole.LICENSOR, pystac.ProviderRole.PRODUCER, pystac.ProviderRole.PROCESSOR],
    url='https://www.utas.edu.au/research/projects/terraluma'
)

## RGB Collection

In [None]:
rgb_cogs = input_dir.rglob('*rgb.cog.tif', case_sensitive=False)
rgb_cogs_with_thumbnails = [
    (cog, cog.with_name(cog.name.replace('.cog.tif', '.thumbnail.png'))) 
    for cog in rgb_cogs
]

rgb_items = [
    create_item(cog, thumbnail, RGB_ITEM_CONFIG)
    for cog, thumbnail in rgb_cogs_with_thumbnails
]

rgb_collection = pystac.Collection(
    id="naturescan-rgb",
    title='NatureScan RGB Orthomosaics',
    description='NatureScan RGB Orthomosaics',
    extent=pystac.Extent.from_items(rgb_items),
    providers=[terra_luma_provider],
)
_links = rgb_collection.add_items(rgb_items)

[<Link rel=item target=<Item id=20241207_SANSSTP002_m3m_50mAGL_ortho_rgb>>,
 <Link rel=item target=<Item id=20241208_SANSSTP005_m3m_60mAGL_ortho_rgb>>,
 <Link rel=item target=<Item id=20241210_SANSSTP020_m3m_50mAGL_ortho_rgb>>,
 <Link rel=item target=<Item id=20241209_SANSSTP010_m3m_50mAGL_ortho_rgb>>,
 <Link rel=item target=<Item id=20240812_SANSSTP009_m3m_70mAGL_ortho_rgb>>,
 <Link rel=item target=<Item id=20241208_SANSSTP006_m3m_110mAGL_ortho_rgb>>,
 <Link rel=item target=<Item id=20241210_SANSSTP014_m3m_50mAGL_ortho_rgb>>,
 <Link rel=item target=<Item id=20241001_SAAGAW0004_m3m_50mAGL_ortho_RGB>>,
 <Link rel=item target=<Item id=20241002_SAAASTP0033_m3m_50mAGL_ortho_RGB>>,
 <Link rel=item target=<Item id=20241002_SAASTP0033_m3m_100mAGL_ortho_RGB>>,
 <Link rel=item target=<Item id=20241001_SAAGAW0009_m3m_50mAGL_ortho_RGB>>,
 <Link rel=item target=<Item id=20241002_SAAGAW0007_m3m_50mAGL_ortho_RGB>>,
 <Link rel=item target=<Item id=20241001_SAAGAW0008_m3m_100mAGL_ortho_RGB>>,
 <Link r

## MS Orthomosaics

In [9]:
ms_cogs = input_dir.rglob('*ms.cog.tif', case_sensitive=False)
ms_cogs_with_thumbnails = [
    (cog, cog.with_name(cog.name.replace('.cog.tif', '.thumbnail.png'))) 
    for cog in ms_cogs
]

ms_items = [
    create_item(cog, thumbnail, MS_ITEM_CONFIG)
    for cog, thumbnail in ms_cogs_with_thumbnails
]

ms_collection = pystac.Collection(
    id="naturescan-ms",
    title='NatureScan MS Orthomosaics',
    description='NatureScan MS Orthomosaics',
    extent=pystac.Extent.from_items(ms_items),
    providers=[terra_luma_provider],
)
_links = ms_collection.add_items(ms_items)

In [12]:
catalog.add_child(rgb_collection)
catalog.add_child(ms_collection)
catalog.normalize_and_save(str(stac_out_dir), catalog_type=pystac.CatalogType.RELATIVE_PUBLISHED)

# STAC GeoParquet

We also wan't to save our STAC in geoparquet format. We will do so with [ruhstac-py](https://stac-utils.github.io/rustac-py/latest/). It's compatible with `stac-geoparquet` but has [some benefits](https://github.com/stac-utils/rustac-py?tab=readme-ov-file#stac-geoparquet).

A limitation to stac-fastapi-geoparquet is only one collection at a time. So the following script will create one stac-geoparquet file for each collection, and collections.json file to be used by stac-fastapi-geoparquet.

In [11]:
import rustac
import json

collections_out = []

for collection in [rgb_collection, ms_collection]:
    parquet_file_name = f"{collection.id}.parquet"
    items = collection.get_all_items()
    item_dicts = [item.to_dict() for item in items]
    await rustac.write(str(stac_out_dir / parquet_file_name), item_dicts)

    collection_dict = collection.to_dict()
    # Don't need links for geoparquet
    collection_dict["links"] = []
    # Add the parquet items as an asset on the collection
    collection_dict["assets"] = {
        "data": {
            "href": f"./{parquet_file_name}",
            "type": "application/vnd.apache.parquet"
        }
    }

    collections_out.append(collection_dict)

collections_out
with open(stac_out_dir / "collections.json", "w") as f:
    json.dump(collections_out, f, indent=2)

