In [None]:
from dep_tools.loaders import OdcLoader
from dep_tools.searchers import PystacSearcher
from odc.stac import configure_s3_access
from ldn.geomad import (
    GeoMADLandsatProcessor,
    USGS_CATALOG,
    USGS_COLLECTION
)
from dask.distributed import Client as DaskClient

from ldn.grids import get_gridspec

In [None]:
# Reload functions
%load_ext autoreload
%autoreload 2

In [None]:
# Make sure we can access S3
_ = configure_s3_access(cloud_defaults=True, requester_pays=True)

In [None]:
decimated = False

datetime = "2025"
xy_chunk_size = 1000
geomad_threads = 10
scale = 0.0000275
offset = -0.2
drop_vars = ["qa_pixel"]

tile_id = "136,142"

grid = get_gridspec()
tile_index = tuple(int(i) for i in tile_id.split(","))
geobox = grid.tile_geobox(tile_index)

if decimated:
    geobox = geobox.zoom_out(10)

geobox.explore()


In [None]:
search_kwargs = {
    "query": {"landsat:collection_category": {"in": ["T1"]}}
}

searcher = PystacSearcher(
    catalog=USGS_CATALOG, collections=[USGS_COLLECTION], datetime=datetime, **search_kwargs
)

items = searcher.search(geobox)

print(f"Found {len(items)} items")

In [None]:
loader = OdcLoader(
    bands=["qa_pixel", "red", "green", "blue"],
    chunks={"x": 2048, "y": 2048, "time": 1},
    groupby="solar_day",
    fail_on_error=False,
)

data = loader.load(items, geobox)
data

In [None]:
processor = GeoMADLandsatProcessor(
    geomad_options=dict(
        work_chunks=(xy_chunk_size, xy_chunk_size),
        num_threads=geomad_threads,
        maxiters=100,
        scale=scale,
        offset=offset,
        nodata=0
    ),
    min_timesteps=5,
    drop_vars=drop_vars
)

# Do the processing using a local dask cluster
with DaskClient(n_workers=1, threads_per_worker=32, memory_limit="16GB"):
    output_data = processor.process(data)

output_data

In [None]:
output_data.odc.explore()

In [None]:
# End-to-end test an item
from pystac import Item
from odc.stac import load

item = Item.from_file("https://data.ldn.auspatious.com/ausp_ls_geomad/0-0-0/136/142/2025/ausp_ls_geomad_136_142_2025.stac-item.json")
data = load([item])
data

In [None]:
data.odc.explore()

## The next section tests GeoMAD for many spatially diverse tiles in 5-yearly increments

At full resolution

https://github.com/auspatious/ldn-lulc/issues/

In [None]:
from odc.stac import configure_s3_access
import os

# TODO: Remove these before committing
# TODO: I don't think these are needed, or if you can simply export in terminal and then restart kernel.
# Set AWS credentials
os.environ["AWS_ACCESS_KEY_ID"] = ""
os.environ["AWS_SECRET_ACCESS_KEY"] = ""
os.environ["AWS_SESSION_TOKEN"] = ""

# Print to verify
print("AWS_ACCESS_KEY_ID:", os.environ.get("AWS_ACCESS_KEY_ID"))
print("AWS_SECRET_ACCESS_KEY:", os.environ.get("AWS_SECRET_ACCESS_KEY"))
print("AWS_SESSION_TOKEN:", os.environ.get("AWS_SESSION_TOKEN"))

# Make sure we can access S3
_ = configure_s3_access(cloud_defaults=True, requester_pays=True)

# Define years and tiles to test
years = "2000,2005,2010,2015,2020,2025"
years_list = years.split(",")

grid_tiles = [
    "53_113", # Pacific island (atolls). Kiribati.
    "413_86", # Pacific islands (volcanic). Fiji.
    "29_86", # Fiji antimeridian crossing. Fiji.
    "127_134", # Caribbean (atolls). Belize.
    "162_117", # Carribean (land). Suriname.
    "197_133", # Africa. Cape Verde.
    "268_94", # Indian Ocean. Comoros.
]

In [None]:
from ldn.grids import get_gridspec

grid = get_gridspec()

geoboxes = []

for year in years_list:
    datetime = year
    for tile_id in grid_tiles:
        tile_id = tile_id.replace("_", ",")

        tile_index = tuple(int(i) for i in tile_id.split(","))
        geobox = grid.tile_geobox(tile_index)

        geoboxes.append(geobox)

print(len(geoboxes))
assert len(geoboxes) == len(years_list)*len(grid_tiles)

In [None]:
import geopandas as gpd
from shapely.geometry import box

# Only use geoboxes from the first year for exploration
first_year = years_list[0]
start = 0
end = len(grid_tiles)
geoms = [box(*geobox.extent.boundingbox) for geobox in geoboxes[start:end]]
gdf = gpd.GeoDataFrame({"year": [first_year]*len(grid_tiles), "tile": grid_tiles}, geometry=geoms, crs=geoboxes[0].crs)
gdf.explore()

# A tile wraps the antimeridian. Hopefully geomad works.

In [None]:
from ldn.cli import geomad
import time

# decimated = True # For testing faster
decimated = False

# Subset to test the testing
years_list_test = ["2020"]
grid_tiles_test = ["268_94"]

# TODO: Don't rerun the full res test tile-year that I ran.

# TODO: Test AWS Put before running to check env vars have come through.

# for year in years_list:
#     for tile_id in grid_tiles:
for year in years_list_test:
    for tile_id in grid_tiles_test:
        print(f"Processing tile {tile_id} for year {year}")
        start_time = time.time()
        # Run geomad processing for this tile and year
        geomad(
            tile_id,
            year,
            "0.0.0",
            bucket="data.ldn.auspatious.com",
            overwrite=True,
            decimated=decimated,
            all_bands=True,
            memory_limit="6GB",
            n_workers=4,
            threads_per_worker=4,
            xy_chunk_size=8192, # TODO: align with internal COG tiling for performance.
            geomad_threads=1, # TODO: Validate this. Advice is to not double-thread. Let Dask do parallelism. Nested threading + I/O is slow.
        )
        elapsed = time.time() - start_time
        minutes = int(elapsed // 60)
        seconds = elapsed % 60
        print(f"Tile {tile_id} year {year} took {minutes}m {seconds:.1f}s.")


In [None]:
# End-to-end test an item
from pystac import Item
from odc.stac import load

item = Item.from_file("https://data.ldn.auspatious.com/ausp_ls_geomad/0-0-0/053/113/2000/ausp_ls_geomad_053_113_2000.stac-item.json")
data = load([item])
data

In [None]:
data.odc.explore()