In [None]:
import os
import xml.etree.ElementTree as ET
from datetime import datetime
from pathlib import Path

import rio_stac
from odc.stac import load

In [None]:
# Extract the original zip file from here https://drive.google.com/uc?id=1xrr1VDS_AcFEaufVBGFHVyRdbZgCyJ-V&export=download
# into the data folder
in_file = "data/TD1_004930_20230205_L2A_20230224_03001065/TD1_004930_20230205_L2A_20230224_03001065.tif"
filename = Path(in_file)

if not filename.exists():
    raise Exception("You need to download the file first.")

In [None]:
# Here we're creating an in-memory STAC document, which will help
# ODC STAC to load the data.

stac_filename = Path(in_file.replace(".tif", ".stac-item.json"))

day_string = str(filename.name).split("_")[2]
file_datetime = datetime(
    day=int(day_string[6:8]), month=int(day_string[4:6]), year=int(day_string[:4])
)

item = rio_stac.create_stac_item(
    filename,
    input_datetime=file_datetime,
    with_proj=True,
    with_raster=True,
    with_eo=True,
    asset_name="reflectance",
    asset_href=os.path.abspath(filename),
    collection="hsi"
)
item.set_self_href(os.path.abspath(stac_filename))

def deconstruct_xml(field_name):
  xml_string = tree.find(field_name).text.lstrip("{").rstrip("}")
  return [s.strip() for s in xml_string.split(",")]

# Parse some values out of the ridiculous XML file
xml_file = in_file.replace(".tif", ".xml")
tree = ET.parse(xml_file)

bands = deconstruct_xml("Bands_UID")
wavelengths = deconstruct_xml("Wavelength_list")

new_eo_bands = [{"name": band, "description": wavelength} for band, wavelength in zip(bands, wavelengths)]

# Update the automatic bands info with better values
item.assets["reflectance"].extra_fields["eo:bands"] = new_eo_bands

# Fix band nodata values
new_raster_bands = []
for band in item.assets["reflectance"].extra_fields["raster:bands"]:
    new_band = band.copy()
    new_band["nodata"] = 0
    new_raster_bands.append(new_band)

item.assets["reflectance"].extra_fields["raster:bands"] = new_raster_bands

# You can write out the STAC object if you want.
# item.save_object()

In [None]:
# Save for writing to S3.
# item.set_self_href("s3://files.auspatious.com/hsi_example/TD1_004930_20230205_L2A_20230224_03001065_COG.stac-item.json")
# item.assets["reflectance"].href = "s3://files.auspatious.com/hsi_example/TD1_004930_20230205_L2A_20230224_03001065_COG.tif"
# item.save_object()

In [None]:
# Select a subset, so it doesn't take a long time
eo_bands_subset = item.assets["reflectance"].extra_fields["eo:bands"][0:5]

# Load the data
data = load(
    [item],
    measurements=[i["name"] for i in eo_bands_subset]
)

# No need for time
data = data.squeeze("time")

# Stack up the bands, so we have a multi-dimensional raster instead
data_stacked = data.to_array("band")

# Replace the original data object with a nice indexed one
data = data_stacked.assign_coords(band=[float(i["description"]) for i in eo_bands_subset])


In [None]:
# Now we can select a band by wavelength and plot it!
data.sel(band=450, method="nearest").plot()