# Create STAC

Working on creating STAC items from COGs that are compliant with 1.1
Some of the latest changes in 1.1, specifically those arround [common metadata](https://github.com/radiantearth/stac-spec/blob/master/commons/common-metadata.md#bands), are not yet included in the pystac library.

Todo:
- [ ] Split up RGB and MS process for both this and the geotiff to cog part

In [34]:
from pathlib import Path
from osgeo import gdal
import pystac

import shapely
from datetime import datetime
from zoneinfo import ZoneInfo

import rasterio

# Import extension version
from rio_stac.stac import PROJECTION_EXT_VERSION, RASTER_EXT_VERSION, EO_EXT_VERSION

# Import rio_stac methods
from rio_stac.stac import (
    get_dataset_geom,
    get_projection_info,
    get_raster_info,
    get_eobands_info,
    bbox_to_geom,
    create_stac_item
)

In [35]:
input_dir = Path("/Volumes/Sammy/terra-luma/stac-input")
cogs = list(input_dir.rglob('*.cog.tif'))

In [36]:
# def create_main_asset(input: Path) -> pystac.Asset:
#     cog_info = gdal.alg.raster.info(input).Output()
#     bands_info = cog_info['bands']

#     # Some values from GDAL are missing here (specifically no data for MS)
#     # eo_bands = cog_info['stac']['eo:bands']
#     # raster_bands = cog_info['stac']['raster:bands']

#     # Lift data_type and nodata from bands as they should be consistent
#     # across all bands
#     data_type = bands_info[0]["type"]
#     nodata = bands_info[0]["noDataValue"]

#     # Assert that they are consistent across all bands
#     assert all(band["type"] == data_type for band in bands_info), "All bands must share the same data_type"
#     assert all(band["noDataValue"] == nodata for band in bands_info), "All bands must share the same nodata"

#     # Change datatype from GDAL naming to STAC naming
#     if data_type == 'Byte':
#         data_type = pystac.extensions.raster.DataType.UINT8
#     elif data_type == 'Float32':
#         data_type = pystac.extensions.raster.DataType.FLOAT32
#     else:
#         raise Exception(f"Unhandled data type {data_type}")

#     # Just pull name and description from eo:bands
#     bands = [{ "name": f"b{b["band"]}", "description": b["colorInterpretation"] } for b in bands_info]

#     # Use number of bands to create title and description
#     if len(bands_info) == 3:
#         title = "RGB Ortho"
#         description = "3-band RGB orthomosaic COG."
#     elif len(bands_info) == 4:
#         title = "MS Ortho"
#         description = '4-band MS orthomsaic COG.'
#     else:
#         raise Exception("COG does not have 3 or 4 bands")

#     asset = pystac.Asset(
#         href=str(input),
#         title=title,
#         description=description,
#         roles=["data"],
#         media_type=pystac.MediaType.COG,
#         extra_fields={
#             "data_type": data_type,
#             "nodata": nodata,
#             "bands": bands,
#         },
#     )

#     return asset

# def create_stac_item(input: Path):
#     id = input.name.replace('.cog.tif', '')
#     name_parts = input.name.split('_')
    
#     item_date = datetime.strptime(name_parts[0], '%Y%m%d')
#     item_date = item_date.replace(tzinfo=ZoneInfo('Australia/Hobart'), hour=12)
#     item_date = item_date.astimezone(ZoneInfo('UTC'))

#     cog_info = gdal.alg.raster.info(input).Output()
#     geometry = cog_info['wgs84Extent'] 
#     bbox = pystac.utils.geometry_to_bbox(geometry)

#     item = pystac.Item(
#         id=id,
#         geometry=geometry,
#         bbox=bbox,
#         datetime=item_date,
#         properties={}
#     )

#     main_asset = create_main_asset(input)

#     item.add_asset('main', main_asset)

#     return item

In [37]:
def create_rgb_asset(input: Path) -> pystac.Asset:
    cog_info = gdal.alg.raster.info(input).Output()
    bands_info = cog_info['bands']

    num_bands = len(bands_info)
    assert num_bands == 3, f'Expected 3 bands. Found {num_bands} bands instead.'

    # Lift data_type and nodata from bands as they should be consistent
    # across all bands
    data_type = bands_info[0]["type"]
    nodata = bands_info[0]["noDataValue"]

    # Assert that they are consistent across all bands
    assert all(band["type"] == data_type for band in bands_info), "All bands must share the same data_type"
    assert all(band["noDataValue"] == nodata for band in bands_info), "All bands must share the same nodata"
    assert data_type == "Byte"
    assert nodata == 255

    # Change datatype from GDAL naming to STAC naming
    data_type = pystac.extensions.raster.DataType.UINT8

    # Just pull name and description from eo:bands
    bands = [dict(name=f"b{b['band']}", description=b["colorInterpretation"]) for b in bands_info]


    # Use number of bands to create title and description
    title = "RGB Ortho"
    description = "3-band RGB orthomosaic COG."

    asset = pystac.Asset(
        href=str(input),
        title=title,
        description=description,
        roles=["data"],
        media_type=pystac.MediaType.COG,
        extra_fields={
            "data_type": data_type,
            "nodata": nodata,
            "bands": bands,
        },
    )

    return asset

def create_rgb_item(input: Path):
    id = input.name.replace('.cog.tif', '')
    name_parts = input.name.split('_')
    
    item_date = datetime.strptime(name_parts[0], '%Y%m%d')
    item_date = item_date.replace(tzinfo=ZoneInfo('Australia/Hobart'), hour=12)
    item_date = item_date.astimezone(ZoneInfo('UTC'))

    site = name_parts[1]
    agl = int(name_parts[3].replace('mAGL', ''))
    platform = 'M3M'    # Maybe should be individual drone name
    sensor = ['RGB Camera']

    cog_info = gdal.alg.raster.info(input).Output()
    geometry = cog_info['wgs84Extent'] 
    bbox = pystac.utils.geometry_to_bbox(geometry)

    item = pystac.Item(
        id=id,
        geometry=geometry,
        bbox=bbox,
        datetime=item_date,
        properties={
            "site": site,
            "agl_m": agl,
            "platform": platform,
            "sensor": sensor
        }
    )

    main_asset = create_rgb_asset(input)

    item.add_asset('main', main_asset)

    return item

def create_ms_asset(input: Path) -> pystac.Asset:
    cog_info = gdal.alg.raster.info(input).Output()
    bands_info = cog_info['bands']

    num_bands = len(bands_info)
    assert num_bands == 4, f'Expected 4 bands. Found {num_bands} bands instead.'

    # Lift data_type and nodata from bands as they should be consistent
    # across all bands
    data_type = bands_info[0]["type"]
    nodata = bands_info[0]["noDataValue"]

    # Assert that they are consistent across all bands
    assert all(band["type"] == data_type for band in bands_info), "All bands must share the same data_type"
    assert all(band["noDataValue"] == nodata for band in bands_info), "All bands must share the same nodata"
    assert data_type == "Float32"
    assert nodata == -32767.0

    # Change datatype from GDAL naming to STAC naming
    data_type = pystac.extensions.raster.DataType.FLOAT32

    # Just pull name and description from eo:bands
    bands = [dict(name=f"b{b['band']}", description=b["colorInterpretation"]) for b in bands_info]

    # Use number of bands to create title and description
    title = "MS Ortho"
    description = '4-band MS orthomsaic COG.'

    asset = pystac.Asset(
        href=str(input),
        title=title,
        description=description,
        roles=["data"],
        media_type=pystac.MediaType.COG,
        extra_fields={
            "data_type": data_type,
            "nodata": nodata,
            "bands": bands,
        },
    )

    return asset

def create_ms_item(input: Path):
    id = input.name.replace('.cog.tif', '')
    name_parts = input.name.split('_')
    
    item_date = datetime.strptime(name_parts[0], '%Y%m%d')
    item_date = item_date.replace(tzinfo=ZoneInfo('Australia/Hobart'), hour=12)
    item_date = item_date.astimezone(ZoneInfo('UTC'))

    site = name_parts[1]
    agl = int(name_parts[3].replace('mAGL', ''))
    platform = 'M3M'    # Maybe should be individual drone name
    sensor = ["MS Camera"]

    cog_info = gdal.alg.raster.info(input).Output()
    geometry = cog_info['wgs84Extent'] 
    bbox = pystac.utils.geometry_to_bbox(geometry)

    item = pystac.Item(
        id=id,
        geometry=geometry,
        bbox=bbox,
        datetime=item_date,
        properties={
            "site": site,
            "agl_m": agl,
            "platform": platform,
            "sensor": sensor
        }
    )

    main_asset = create_ms_asset(input)

    item.add_asset('main', main_asset)

    return item

In [38]:
# Copied from pystac asset class
# Don't really know how it works

from pystac.html.jinja_env import get_jinja_env
from html import escape
from IPython.display import HTML

def pretty_out(obj: dict):
    jinja_env = get_jinja_env()
    template = jinja_env.get_template("JSON.jinja2")
    return HTML(str(template.render(dict=obj, plain=escape(repr(obj)))))

In [39]:
create_ms_item(cogs[0])

In [40]:
info = gdal.alg.raster.info(cogs[0]).Output()
pretty_out(info)

In [41]:
item = create_stac_item(str(cogs[0]), with_eo=True, with_raster=True, with_proj=True)
item