In [None]:
from dep_tools.loaders import OdcLoader
from dep_tools.searchers import PystacSearcher
from odc.stac import configure_s3_access
from ldn.geomad import (
    GeoMADLandsatProcessor,
    USGS_CATALOG,
    USGS_COLLECTION
)
from dask.distributed import Client as DaskClient

from ldn.grids import get_gridspec

In [None]:
# Reload functions
%load_ext autoreload
%autoreload 2

In [None]:
# Make sure we can access S3
_ = configure_s3_access(cloud_defaults=True, requester_pays=True)

In [None]:
decimated = False

datetime = "2025"
xy_chunk_size = 1000
geomad_threads = 10
scale = 0.0000275
offset = -0.2
drop_vars = ["qa_pixel"]

tile_id = "136,142"

grid = get_gridspec()
tile_index = tuple(int(i) for i in tile_id.split(","))
geobox = grid.tile_geobox(tile_index)

if decimated:
    geobox = geobox.zoom_out(10)

geobox.explore()


In [None]:
search_kwargs = {
    "query": {"landsat:collection_category": {"in": ["T1"]}}
}

searcher = PystacSearcher(
    catalog=USGS_CATALOG, collections=[USGS_COLLECTION], datetime=datetime, **search_kwargs
)

items = searcher.search(geobox)

print(f"Found {len(items)} items")

In [None]:
loader = OdcLoader(
    bands=["qa_pixel", "red", "green", "blue"],
    chunks={"x": 2048, "y": 2048, "time": 1},
    groupby="solar_day",
    fail_on_error=False,
)

data = loader.load(items, geobox)
data

In [None]:
processor = GeoMADLandsatProcessor(
    geomad_options=dict(
        work_chunks=(xy_chunk_size, xy_chunk_size),
        num_threads=geomad_threads,
        maxiters=100,
        scale=scale,
        offset=offset,
        nodata=0
    ),
    min_timesteps=5,
    drop_vars=drop_vars
)

# Do the processing using a local dask cluster
with DaskClient(n_workers=1, threads_per_worker=32, memory_limit="16GB"):
    output_data = processor.process(data)

output_data

In [None]:
output_data.odc.explore()

In [None]:
# End-to-end test an item
from pystac import Item
from odc.stac import load

item = Item.from_file("https://data.ldn.auspatious.com/ausp_ls_geomad/0-0-0/136/142/2025/ausp_ls_geomad_136_142_2025.stac-item.json")
data = load([item])
data

In [None]:
data.odc.explore()

## The next section tests GeoMAD for many spatially diverse tiles in 5-yearly increments

At full resolution

https://github.com/auspatious/ldn-lulc/issues/

In [None]:
from odc.stac import configure_s3_access, configure_rio, load

import os

# TODO: Remove these hardcoded secrets before committing
# Set AWS credentials
os.environ["AWS_ACCESS_KEY_ID"] = ""
os.environ["AWS_SECRET_ACCESS_KEY"] = ""
os.environ["AWS_SESSION_TOKEN"] = ""

# Print to verify
print("AWS_ACCESS_KEY_ID:", os.environ.get("AWS_ACCESS_KEY_ID"))
print("AWS_SECRET_ACCESS_KEY:", os.environ.get("AWS_SECRET_ACCESS_KEY"))
print("AWS_SESSION_TOKEN:", os.environ.get("AWS_SESSION_TOKEN"))

os.environ["AWS_REQUEST_PAYER"] = "requester"
print("AWS_REQUEST_PAYER:", os.environ.get("AWS_REQUEST_PAYER"))

# Make sure we can access S3
_ = configure_s3_access(cloud_defaults=True, requester_pays=True, aws_unsigned=False)

configure_rio(
    cloud_defaults=True,
    # aws={"aws_unsigned": True, "aws_request_payer": "requester"},
)

# Define years and tiles to test
# years = "2000,2005,2010,2015,2020,2025"
years = "2010,2015,2020,2025"
years_list = years.split(",")

grid_tiles = [
    "53_113", # Pacific island (atolls). Kiribati.
    "413_86", # Pacific islands (volcanic). Fiji.
    "29_86", # Fiji antimeridian crossing. Fiji.
    "127_134", # Caribbean (atolls). Belize.
    "162_117", # Carribean (land). Suriname.
    "197_133", # Africa. Cape Verde.
    "268_94", # Indian Ocean. Comoros.
]

In [None]:
from ldn.grids import get_gridspec

grid = get_gridspec()

geoboxes = []

for year in years_list:
    datetime = year
    for tile_id in grid_tiles:
        tile_id = tile_id.replace("_", ",")

        tile_index = tuple(int(i) for i in tile_id.split(","))
        geobox = grid.tile_geobox(tile_index)

        geoboxes.append(geobox)

print(len(geoboxes))
assert len(geoboxes) == len(years_list)*len(grid_tiles)

In [None]:
import geopandas as gpd
from shapely.geometry import box

# Only use geoboxes from the first year for exploration
first_year = years_list[0]
start = 0
end = len(grid_tiles)
geoms = [box(*geobox.extent.boundingbox) for geobox in geoboxes[start:end]]
gdf = gpd.GeoDataFrame({"year": [first_year]*len(grid_tiles), "tile": grid_tiles}, geometry=geoms, crs=geoboxes[0].crs)
gdf.explore()

# A tile wraps the antimeridian. Hopefully geomad works.

In [None]:
from ldn.cli import geomad
import time

# decimated = True # For testing faster
decimated = False

# Subset to test the testing
# years_list_test = ["2020"]
# grid_tiles_test = ["268_94"]

# TODO: Test AWS Put before running to check env vars have come through.

for year in years_list:
    for tile_id in grid_tiles:
# for year in years_list_test:
#     for tile_id in grid_tiles_test:
        try:
            # Don't rerun the full res test tile-year that I ran.
            if (year == "2020" and tile_id == "268_94") or (year == "2010" and tile_id == "53_113"):
                print(f"Skipping already processed tile-year {tile_id}_{year}.")
                continue

            print(f"Processing tile {tile_id} for year {year}")

            start_time = time.time()
            # Run geomad processing for this tile and year
            geomad(
                tile_id,
                year,
                "0.0.0",
                bucket="data.ldn.auspatious.com",
                overwrite=True,
                decimated=decimated,
                all_bands=True,
                memory_limit="6GB", # We aren't close to hitting this.
                n_workers=4,
                threads_per_worker=4,
                xy_chunk_size=8192, # TODO: align with internal COG tiling for performance.
                geomad_threads=1, # TODO: Validate this. Advice is to not double-thread. Let Dask do parallelism. Nested threading + I/O is slow.
            )
            elapsed = time.time() - start_time
            minutes = int(elapsed // 60)
            seconds = elapsed % 60
            print(f"Tile {tile_id} year {year} took {minutes}m {seconds:.1f}s.")
        except Exception as e:
            print(f"Error processing tile {tile_id} for year {year}: {e}")
            quit() # Stop processing on error for debugging


In [None]:
import boto3
import os

s3 = boto3.client("s3")

bucket = "data.ldn.auspatious.com"
prefix = "ausp_ls_geomad/0-0-0"

continuation_token = None

results = []

while True:
    kwargs = {
        "Bucket": bucket,
        "Prefix": prefix,
        "MaxKeys": 1000,
    }

    if continuation_token:
        kwargs["ContinuationToken"] = continuation_token

    response = s3.list_objects_v2(**kwargs)

    for obj in response.get("Contents", []):

        key = obj["Key"]
        filename = os.path.basename(key)
        created = obj["LastModified"]
        if filename.endswith("stac-item.json"): # and "2015" in filename: # and "2026-01-27" in str(created):
            path = f"https://{bucket}/{key}"
            results.append(path)

            print(f"Name: {filename}")
            print(f"Created: {created}")
            print(f"Path: {path}")
            print("-" * 50)

    if response.get("IsTruncated"):  # more pages to fetch
        continuation_token = response.get("NextContinuationToken")
    else:
        break

print(len(results))
# assert len(results) == 7

In [None]:
# End-to-end test an item
from pystac import Item
from odc.stac import load


# items = []
# for tile_path in results:
#     item = Item.from_file(tile_path)
#     items.append(item)

item = results[6]
print(item)
item = Item.from_file(item)
data = load([item])
# data = load(items)

# item = Item.from_file("https://data.ldn.auspatious.com/ausp_ls_geomad/0-0-0/268/094/2020/ausp_ls_geomad_268_094_2020.stac-item.json")
# data = load([item])
# data
data.odc.explore(tiles="CartoDB positron", alpha=0.5, bands=["red", "green", "blue"])

In [None]:
data.odc.explore(tiles="CartoDB positron")

In [None]:
# QA QC on count and valid percent of pixels.

# count = number of valid observations contributing to that median

# valid_percent

import csv
csv_path = "geomad_qa_results.csv"

VALID_PERCENT_MIN = 5.0
COUNT_MEAN_MIN = 2.0
COUNT_MAX_MIN = 5


def assess_stac_item(item: dict) -> tuple[str, list[str]]:
    reasons = []

    assets = item.get("assets", {})

    # --- Check valid_percent across all raster assets ---
    valid_percents = []

    for name, asset in assets.items():
        bands = asset.get("raster:bands", [])
        if not bands:
            continue

        stats = bands[0].get("statistics", {})
        vp = stats.get("valid_percent")

        if vp is not None:
            valid_percents.append(vp)
            if vp < VALID_PERCENT_MIN:
                reasons.append(
                    f"{name}: valid_percent {vp:.2f}% < {VALID_PERCENT_MIN}%"
                )

    # --- Count-specific checks ---
    if "count" in assets:
        count_stats = assets["count"]["raster:bands"][0]["statistics"]

        mean_count = count_stats.get("mean")
        max_count = count_stats.get("maximum")

        if mean_count is not None and mean_count < COUNT_MEAN_MIN:
            reasons.append(
                f"count mean {mean_count:.2f} < {COUNT_MEAN_MIN}"
            )

        if max_count is not None and max_count < COUNT_MAX_MIN:
            reasons.append(
                f"count max {max_count} < {COUNT_MAX_MIN}"
            )
    else:
        reasons.append("missing count asset")

    status = "FAIL" if reasons else "PASS"
    return status, reasons

# Explanation
print("QA Assessment Criteria:")
print(f" - valid_percent >= {VALID_PERCENT_MIN}% for all assets")
print(f" - count mean >= {COUNT_MEAN_MIN}")
print(f" - count max >= {COUNT_MAX_MIN}")
print("")

print("-"*150)
print("-"*150)

qa_results = {}

with open(csv_path, "w", newline="", encoding="utf-8") as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(["year", "x_y", "status", "reasons"])  # header

    for stac_item_path in results:
    # for stac_item_path in [results[0]]:
        print(f"Assessing {stac_item_path}")

        year = stac_item_path.split("/")[-2]
        x = stac_item_path.split("/")[-4]
        y = stac_item_path.split("/")[-3]
        xy = f"{x}_{y}"
        item = Item.from_file(stac_item_path)
        item_dict = item.to_dict()

        status, reasons = assess_stac_item(item_dict)

        print(f"QA STATUS: {status}")
        if reasons:
            print("Reasons:")
            for r in reasons:
                print(f" - {r}")

        qa_results[year] = qa_results.get(year, {})
        qa_results[year][xy] = {
            "status": status,
            "reasons": reasons
        }

        # Write row to CSV
        reasons_str = "; ".join(reasons) if reasons else ""
        writer.writerow([year, xy, status, reasons_str])

        print("-"*150)


In [None]:
qa_results

In [None]:
import csv
from pystac import Item  # assuming you are using pystac

# ----------------------------
# QA thresholds
# ----------------------------
VALID_PERCENT_MIN = 5.0
COUNT_MEAN_MIN = 2.0
COUNT_MAX_MIN = 5

# ----------------------------
# QA function
# ----------------------------
def assess_stac_item(item: dict) -> tuple[str, list[str]]:
    reasons = []

    assets = item.get("assets", {})

    # --- Check valid_percent across all raster assets ---
    for name, asset in assets.items():
        bands = asset.get("raster:bands", [])
        if not bands:
            continue

        stats = bands[0].get("statistics", {})
        vp = stats.get("valid_percent")

        if vp is not None and vp < VALID_PERCENT_MIN:
            reasons.append(f"{name}: valid_percent {vp:.2f}% < {VALID_PERCENT_MIN}%")

    # --- Count-specific checks ---
    if "count" in assets:
        count_stats = assets["count"]["raster:bands"][0]["statistics"]

        mean_count = count_stats.get("mean")
        max_count = count_stats.get("maximum")

        if mean_count is not None and mean_count < COUNT_MEAN_MIN:
            reasons.append(f"count mean {mean_count:.2f} < {COUNT_MEAN_MIN}")
        if max_count is not None and max_count < COUNT_MAX_MIN:
            reasons.append(f"count max {max_count} < {COUNT_MAX_MIN}")
    else:
        reasons.append("missing count asset")

    status = "FAIL" if reasons else "PASS"
    return status, reasons

# ----------------------------
# Paths and storage
# ----------------------------
csv_path_flat = "geomad_qa_results.csv"
csv_path_matrix = "geomad_qa_matrix.csv"

qa_results = {}

# ----------------------------
# Process all STAC items
# ----------------------------
with open(csv_path_flat, "w", newline="", encoding="utf-8") as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(["year", "x_y", "status", "reasons"])  # flat CSV header

    for stac_item_path in results:
        print(f"Assessing {stac_item_path}")

        year = stac_item_path.split("/")[-2]
        x = stac_item_path.split("/")[-4]
        y = stac_item_path.split("/")[-3]
        xy = f"{x}_{y}"

        item = Item.from_file(stac_item_path)
        item_dict = item.to_dict()

        status, reasons = assess_stac_item(item_dict)

        print(f"QA STATUS: {status}")
        if reasons:
            for r in reasons:
                print(f" - {r}")

        # Save in dictionary for matrix
        qa_results[year] = qa_results.get(year, {})
        qa_results[year][xy] = {
            "status": status,
            "reasons": reasons
        }

        # Write flat CSV row
        reasons_str = "; ".join(reasons) if reasons else ""
        writer.writerow([year, xy, status, reasons_str])

        print("-" * 150)

print(f"Flat CSV written to {csv_path_flat}")

# ----------------------------
# Write matrix CSV
# ----------------------------
# Get all unique x_y tiles across all years
all_xy = sorted({xy for year_data in qa_results.values() for xy in year_data.keys()})
all_years = sorted(qa_results.keys())

with open(csv_path_matrix, "w", newline="", encoding="utf-8") as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(["year"] + all_xy)  # header row

    for year in all_years:
        row = [year]
        for xy in all_xy:
            if xy in qa_results[year]:
                status = qa_results[year][xy]["status"]
                reasons = qa_results[year][xy]["reasons"]
                cell = f"{status}" + (f": {'; '.join(reasons)}" if reasons else "")
            else:
                cell = ""
            row.append(cell)
        writer.writerow(row)

print(f"Matrix CSV written to {csv_path_matrix}")


In [None]:
# Searching for items for area GeoBox((3000, 3000), Affine(30.0, 0.0, -17390000.0,
#        0.0, -30.0, -2170000.0), CRS('EPSG:6933'))
# Found 92 items for area
antimeridian_items = [
"s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/073/072/LC09_L2SR_073072_20241229_20241230_02_T1/LC09_L2SR_073072_20241229_20241230_02_T1_SR_B4.TIF",
"s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/073/073/LC08_L2SR_073073_20241221_20241228_02_T1/LC08_L2SR_073073_20241221_20241228_02_T1_SR_B4.TIF",
"s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/074/072/LC09_L2SR_074072_20241220_20241221_02_T1/LC09_L2SR_074072_20241220_20241221_02_T1_SR_B4.TIF",
"s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/073/073/LC09_L2SR_073073_20241213_20241214_02_T1/LC09_L2SR_073073_20241213_20241214_02_T1_SR_B4.TIF",
"s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/073/072/LC09_L2SR_073072_20241213_20241214_02_T1/LC09_L2SR_073072_20241213_20241214_02_T1_SR_B4.TIF",
"s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/074/072/LC08_L2SR_074072_20241212_20241218_02_T1/LC08_L2SR_074072_20241212_20241218_02_T1_SR_B4.TIF",
"s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/073/072/LC08_L2SR_073072_20241205_20241210_02_T1/LC08_L2SR_073072_20241205_20241210_02_T1_SR_B4.TIF",
"s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/073/073/LC09_L2SR_073073_20241127_20241128_02_T1/LC09_L2SR_073073_20241127_20241128_02_T1_SR_B4.TIF",
"s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/073/072/LC09_L2SR_073072_20241127_20241128_02_T1/LC09_L2SR_073072_20241127_20241128_02_T1_SR_B4.TIF",
"s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/073/073/LC08_L2SR_073073_20241119_20241127_02_T1/LC08_L2SR_073073_20241119_20241127_02_T1_SR_B4.TIF",
"s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/073/072/LC08_L2SR_073072_20241119_20241127_02_T1/LC08_L2SR_073072_20241119_20241127_02_T1_SR_B4.TIF",
"s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/073/073/LC09_L2SR_073073_20241111_20241116_02_T1/LC09_L2SR_073073_20241111_20241116_02_T1_SR_B4.TIF",
"s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/074/072/LC08_L2SR_074072_20241110_20241118_02_T1/LC08_L2SR_074072_20241110_20241118_02_T1_SR_B4.TIF",
"s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/073/073/LC08_L2SR_073073_20241103_20241112_02_T1/LC08_L2SR_073073_20241103_20241112_02_T1_SR_B4.TIF",
"s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/073/072/LC08_L2SR_073072_20241103_20241112_02_T1/LC08_L2SR_073072_20241103_20241112_02_T1_SR_B4.TIF",
"s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/074/072/LC09_L2SR_074072_20241102_20241103_02_T1/LC09_L2SR_074072_20241102_20241103_02_T1_SR_B4.TIF",
"s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/073/073/LC09_L2SR_073073_20241026_20241027_02_T1/LC09_L2SR_073073_20241026_20241027_02_T1_SR_B4.TIF",
"s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/073/072/LC09_L2SR_073072_20241026_20241027_02_T1/LC09_L2SR_073072_20241026_20241027_02_T1_SR_B4.TIF",
"s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/073/073/LC08_L2SR_073073_20241018_20241028_02_T1/LC08_L2SR_073073_20241018_20241028_02_T1_SR_B4.TIF",
"s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/073/072/LC08_L2SR_073072_20241018_20241028_02_T1/LC08_L2SR_073072_20241018_20241028_02_T1_SR_B4.TIF",
"s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/073/073/LC09_L2SR_073073_20241010_20241011_02_T1/LC09_L2SR_073073_20241010_20241011_02_T1_SR_B4.TIF",
"s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/073/072/LC09_L2SR_073072_20241010_20241011_02_T1/LC09_L2SR_073072_20241010_20241011_02_T1_SR_B4.TIF",
"s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/074/072/LC08_L2SR_074072_20241009_20241015_02_T1/LC08_L2SR_074072_20241009_20241015_02_T1_SR_B4.TIF",
"s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/073/073/LC08_L2SR_073073_20241002_20241008_02_T1/LC08_L2SR_073073_20241002_20241008_02_T1_SR_B4.TIF",
"s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/073/072/LC08_L2SR_073072_20241002_20241008_02_T1/LC08_L2SR_073072_20241002_20241008_02_T1_SR_B4.TIF",
"s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/074/072/LC09_L2SR_074072_20241001_20241002_02_T1/LC09_L2SR_074072_20241001_20241002_02_T1_SR_B4.TIF",
"s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/074/072/LC08_L2SR_074072_20240923_20240928_02_T1/LC08_L2SR_074072_20240923_20240928_02_T1_SR_B4.TIF",
"s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/073/072/LC08_L2SR_073072_20240916_20240921_02_T1/LC08_L2SR_073072_20240916_20240921_02_T1_SR_B4.TIF",
"s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/074/072/LC09_L2SR_074072_20240915_20240916_02_T1/LC09_L2SR_074072_20240915_20240916_02_T1_SR_B4.TIF",
"s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/073/073/LC09_L2SR_073073_20240908_20240909_02_T1/LC09_L2SR_073073_20240908_20240909_02_T1_SR_B4.TIF",
"s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/073/072/LC09_L2SR_073072_20240908_20240909_02_T1/LC09_L2SR_073072_20240908_20240909_02_T1_SR_B4.TIF",
"s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/073/073/LC08_L2SR_073073_20240831_20240906_02_T1/LC08_L2SR_073073_20240831_20240906_02_T1_SR_B4.TIF",
"s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/073/072/LC08_L2SR_073072_20240831_20240906_02_T1/LC08_L2SR_073072_20240831_20240906_02_T1_SR_B4.TIF",
"s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/073/073/LC09_L2SR_073073_20240823_20240824_02_T1/LC09_L2SR_073073_20240823_20240824_02_T1_SR_B4.TIF",
"s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/073/072/LC09_L2SR_073072_20240823_20240824_02_T1/LC09_L2SR_073072_20240823_20240824_02_T1_SR_B4.TIF",
"s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/073/072/LC08_L2SR_073072_20240815_20240822_02_T1/LC08_L2SR_073072_20240815_20240822_02_T1_SR_B4.TIF",
"s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/073/073/LC09_L2SR_073073_20240807_20240808_02_T1/LC09_L2SR_073073_20240807_20240808_02_T1_SR_B4.TIF",
"s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/073/072/LC09_L2SR_073072_20240807_20240808_02_T1/LC09_L2SR_073072_20240807_20240808_02_T1_SR_B4.TIF",
"s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/074/072/LC08_L2SR_074072_20240806_20240814_02_T1/LC08_L2SR_074072_20240806_20240814_02_T1_SR_B4.TIF",
"s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/073/073/LC08_L2SR_073073_20240730_20240807_02_T1/LC08_L2SR_073073_20240730_20240807_02_T1_SR_B4.TIF",
"s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/073/072/LC08_L2SR_073072_20240730_20240807_02_T1/LC08_L2SR_073072_20240730_20240807_02_T1_SR_B4.TIF",
"s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/073/073/LC09_L2SR_073073_20240722_20240723_02_T1/LC09_L2SR_073073_20240722_20240723_02_T1_SR_B4.TIF",
"s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/073/072/LC09_L2SR_073072_20240722_20240723_02_T1/LC09_L2SR_073072_20240722_20240723_02_T1_SR_B4.TIF",
"s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/074/072/LC09_L2SR_074072_20240627_20240629_02_T1/LC09_L2SR_074072_20240627_20240629_02_T1_SR_B4.TIF",
"s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/073/073/LC09_L2SR_073073_20240620_20240621_02_T1/LC09_L2SR_073073_20240620_20240621_02_T1_SR_B4.TIF",
"s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/073/072/LC09_L2SR_073072_20240620_20240621_02_T1/LC09_L2SR_073072_20240620_20240621_02_T1_SR_B4.TIF",
"s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/073/073/LC08_L2SR_073073_20240612_20240628_02_T1/LC08_L2SR_073073_20240612_20240628_02_T1_SR_B4.TIF",
"s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/073/072/LC08_L2SR_073072_20240612_20240628_02_T1/LC08_L2SR_073072_20240612_20240628_02_T1_SR_B4.TIF",
"s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/074/072/LC09_L2SR_074072_20240611_20240612_02_T1/LC09_L2SR_074072_20240611_20240612_02_T1_SR_B4.TIF",
"s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/073/073/LC09_L2SR_073073_20240604_20240605_02_T1/LC09_L2SR_073073_20240604_20240605_02_T1_SR_B4.TIF",
"s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/073/072/LC09_L2SR_073072_20240604_20240605_02_T1/LC09_L2SR_073072_20240604_20240605_02_T1_SR_B4.TIF",
"s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/073/073/LC09_L2SR_073073_20240519_20240522_02_T1/LC09_L2SR_073073_20240519_20240522_02_T1_SR_B4.TIF",
"s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/073/072/LC09_L2SR_073072_20240519_20240522_02_T1/LC09_L2SR_073072_20240519_20240522_02_T1_SR_B4.TIF",
"s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/074/072/LC08_L2SR_074072_20240518_20240605_02_T1/LC08_L2SR_074072_20240518_20240605_02_T1_SR_B4.TIF",
"s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/073/073/LC08_L2SR_073073_20240511_20240520_02_T1/LC08_L2SR_073073_20240511_20240520_02_T1_SR_B4.TIF",
"s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/073/072/LC08_L2SR_073072_20240511_20240520_02_T1/LC08_L2SR_073072_20240511_20240520_02_T1_SR_B4.TIF",
"s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/074/072/LC09_L2SR_074072_20240510_20240511_02_T1/LC09_L2SR_074072_20240510_20240511_02_T1_SR_B4.TIF",
"s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/073/072/LC09_L2SR_073072_20240503_20240504_02_T1/LC09_L2SR_073072_20240503_20240504_02_T1_SR_B4.TIF",
"s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/074/072/LC08_L2SR_074072_20240502_20240513_02_T1/LC08_L2SR_074072_20240502_20240513_02_T1_SR_B4.TIF",
"s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/073/073/LC08_L2SR_073073_20240425_20240510_02_T1/LC08_L2SR_073073_20240425_20240510_02_T1_SR_B4.TIF",
"s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/073/072/LC08_L2SR_073072_20240425_20240510_02_T1/LC08_L2SR_073072_20240425_20240510_02_T1_SR_B4.TIF",
"s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/074/072/LC09_L2SR_074072_20240424_20240425_02_T1/LC09_L2SR_074072_20240424_20240425_02_T1_SR_B4.TIF",
"s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/073/073/LC09_L2SR_073073_20240417_20240418_02_T1/LC09_L2SR_073073_20240417_20240418_02_T1_SR_B4.TIF",
"s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/073/072/LC09_L2SR_073072_20240417_20240418_02_T1/LC09_L2SR_073072_20240417_20240418_02_T1_SR_B4.TIF",
"s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/074/072/LC08_L2SR_074072_20240416_20240423_02_T1/LC08_L2SR_074072_20240416_20240423_02_T1_SR_B4.TIF",
"s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/073/073/LC08_L2SR_073073_20240409_20240418_02_T1/LC08_L2SR_073073_20240409_20240418_02_T1_SR_B4.TIF",
"s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/073/072/LC08_L2SR_073072_20240409_20240419_02_T1/LC08_L2SR_073072_20240409_20240419_02_T1_SR_B4.TIF",
"s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/073/073/LC09_L2SR_073073_20240401_20240403_02_T1/LC09_L2SR_073073_20240401_20240403_02_T1_SR_B4.TIF",
"s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/073/072/LC09_L2SR_073072_20240401_20240403_02_T1/LC09_L2SR_073072_20240401_20240403_02_T1_SR_B4.TIF",
"s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/074/072/LC08_L2SR_074072_20240331_20240410_02_T1/LC08_L2SR_074072_20240331_20240410_02_T1_SR_B4.TIF",
"s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/073/073/LC08_L2SR_073073_20240324_20240403_02_T1/LC08_L2SR_073073_20240324_20240403_02_T1_SR_B4.TIF",
"s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/073/072/LC08_L2SR_073072_20240324_20240403_02_T1/LC08_L2SR_073072_20240324_20240403_02_T1_SR_B4.TIF",
"s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/073/073/LC08_L2SR_073073_20240308_20240316_02_T1/LC08_L2SR_073073_20240308_20240316_02_T1_SR_B4.TIF",
"s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/073/072/LC08_L2SR_073072_20240308_20240316_02_T1/LC08_L2SR_073072_20240308_20240316_02_T1_SR_B4.TIF",
"s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/073/073/LC09_L2SR_073073_20240229_20240301_02_T1/LC09_L2SR_073073_20240229_20240301_02_T1_SR_B4.TIF",
"s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/073/072/LC09_L2SR_073072_20240229_20240301_02_T1/LC09_L2SR_073072_20240229_20240301_02_T1_SR_B4.TIF",
"s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/073/072/LC08_L2SR_073072_20240221_20240229_02_T1/LC08_L2SR_073072_20240221_20240229_02_T1_SR_B4.TIF",
"s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/074/072/LC09_L2SR_074072_20240220_20240222_02_T1/LC09_L2SR_074072_20240220_20240222_02_T1_SR_B4.TIF",
"s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/073/072/LC09_L2SR_073072_20240213_20240215_02_T1/LC09_L2SR_073072_20240213_20240215_02_T1_SR_B4.TIF",
"s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/073/072/LC08_L2SR_073072_20240205_20240212_02_T1/LC08_L2SR_073072_20240205_20240212_02_T1_SR_B4.TIF",
"s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/073/073/LC09_L2SR_073073_20240128_20240130_02_T1/LC09_L2SR_073073_20240128_20240130_02_T1_SR_B4.TIF",
"s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/073/072/LC09_L2SR_073072_20240128_20240130_02_T1/LC09_L2SR_073072_20240128_20240130_02_T1_SR_B4.TIF",
"s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/074/072/LC08_L2SR_074072_20240127_20240207_02_T1/LC08_L2SR_074072_20240127_20240207_02_T1_SR_B4.TIF",
"s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/073/073/LC08_L2SR_073073_20240120_20240129_02_T1/LC08_L2SR_073073_20240120_20240129_02_T1_SR_B4.TIF",
"s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/073/072/LC08_L2SR_073072_20240120_20240129_02_T1/LC08_L2SR_073072_20240120_20240129_02_T1_SR_B4.TIF",
"s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/074/072/LC09_L2SR_074072_20240119_20240120_02_T1/LC09_L2SR_074072_20240119_20240120_02_T1_SR_B4.TIF",
"s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/073/073/LC09_L2SR_073073_20240112_20240113_02_T1/LC09_L2SR_073073_20240112_20240113_02_T1_SR_B4.TIF",
"s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/073/072/LC09_L2SR_073072_20240112_20240113_02_T1/LC09_L2SR_073072_20240112_20240113_02_T1_SR_B4.TIF",
"s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/074/072/LC08_L2SR_074072_20240111_20240123_02_T1/LC08_L2SR_074072_20240111_20240123_02_T1_SR_B4.TIF",
"s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/073/073/LC08_L2SR_073073_20240104_20240113_02_T1/LC08_L2SR_073073_20240104_20240113_02_T1_SR_B4.TIF",
"s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/073/072/LC08_L2SR_073072_20240104_20240113_02_T1/LC08_L2SR_073072_20240104_20240113_02_T1_SR_B4.TIF",
"s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/074/072/LC09_L2SR_074072_20240103_20240106_02_T1/LC09_L2SR_074072_20240103_20240106_02_T1_SR_B4.TIF",
]

antimeridian_items[0]

import boto3
import re

s3 = boto3.client("s3")

url = 's3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/073/072/LC09_L2SR_073072_20241229_20241230_02_T1/LC09_L2SR_073072_20241229_20241230_02_T1_SR_B4.TIF'

# Parse bucket and key from the S3 URL
match = re.match(r's3://([^/]+)/(.+)', url)
if not match:
    raise ValueError(f"Invalid S3 URL: {url}")

bucket = match.group(1)
key = match.group(2)

# To download the file to local disk:
local_path = f"/Users/wj/Downloads/{key.split('/')[-1]}"
s3.download_file(bucket, key, local_path, ExtraArgs={'RequestPayer': 'requester'})

# DEBUGGING ANTIMERIDIAN

In [None]:
import pystac_client

from ldn.geomad import http_to_s3_url, USGS_CATALOG, USGS_COLLECTION
# catalog = pystac_client.Client.open(http_to_s3_url("https://landsatlook.usgs.gov/stac-server"))
catalog = pystac_client.Client.open("https://landsatlook.usgs.gov/stac-server")
# catalog = pystac_client.Client.open(USGS_CATALOG)


# Set a bounding box
# [xmin, ymin, xmax, ymax] in latitude and longitude
bbox = [-179.78797886784525, -17.969338895119208, -179.3186512262919, -17.252353456473642]

# Set a start and end date
start_date = "2024-01-01"
end_date = "2024-12-31"

# Set product ID as the STAC "collection"
collections = ["landsat-c2l2-sr"]
# collections = [USGS_COLLECTION]
catalog

In [None]:
# Build a query with the parameters above
query = catalog.search(
    bbox=bbox,
    collections=collections,
    query={"landsat:collection_category": {"eq": "T1"}, "landsat:cloud_cover_land": {"lt": 50.00}},
    datetime=f"{start_date}/{end_date}",
)

# Search the STAC catalog for all items matching the query
items = list(query.items())
print(f"Found: {len(items):d} T1 datasets")

print(items[0])
print(items[0].assets['red'])

In [None]:
# from ldn.geomad import http_to_s3_url

def http_to_s3_url(http_url: str) -> str:
    # import pdb; pdb.set_trace()
    # set_trace()
    """Convert a USGS HTTP URL to an S3 URL"""
    s3_url = http_url.replace(
        "https://landsatlook.usgs.gov/data", "s3://usgs-landsat"
    ).rstrip(":1")
    print(f"Converted {http_url} to {s3_url}")
    return s3_url

ds = load(
    items,
    bands=["red", "green", "blue"],
    # bands=["red"],
    crs="utm",
    resolution=30,
    bbox=bbox,
    skip_broken_datasets=False,
    fail_on_error=True,
    # chunks={"x": 512, "y": 512}, # This makes loading lazy
    patch_url=http_to_s3_url,
    # groupby="solar_day",
)

ds



In [None]:
# import boto3
# s3 = boto3.client("s3")
# s3.head_object(Bucket="usgs-landsat", Key="collection02/level-2/standard/oli-tirs/2024/073/072/LC08_L2SR_073072_20241002_20241008_02_T1/LC08_L2SR_073072_20241002_20241008_02_T1_SR_B4.TIF", RequestPayer="requester")

In [None]:
ds.to_array().plot.imshow(col="time", col_wrap=4, robust=True, size=5)
# to_dataarray is not deprecated

In [None]:
best_image_date = "2024-06-04"

best_image = ds.sel(time=best_image_date).squeeze()

best_image

In [None]:
import numpy as np
import xarray as xr

# Select RGB bands
rgb = best_image[['red', 'green', 'blue']]

# Conservative stretch and mild gamma correction
percentile_stretch = (2, 98)
gamma = 1.4  # Increase for darker, less saturated look

rgb_uint8 = xr.Dataset()
for band in rgb.data_vars:
    band_data = rgb[band]
    # Compute percentiles using only valid data
    vmin, vmax = np.nanpercentile(band_data.values, percentile_stretch)
    # Normalize and scale, keeping NaNs
    norm = ((band_data - vmin) / (vmax - vmin)).clip(0, 1)
    # Apply gamma correction
    norm_gamma = norm ** gamma
    scaled = (norm_gamma * 220).astype('uint8')  # Use 220 to avoid maxing out white
    # Fill NaNs with 0 in the uint8 output
    rgb_uint8[band] = scaled.fillna(0)

# Now you can explore interactively (select a time slice if needed)
rgb_uint8.isel(time=0).odc.explore()