In [None]:
import joblib
import geopandas as gpd
from pystac.client import Client
from odc.stac import load

import pandas as pd

from utils import calculate_band_indices, apply_masks, scale

from sklearn_evaluation.plot import confusion_matrix

In [None]:
# Load the model
model = joblib.load("models/model-geomad-joined-data-rf-04032025.model")

In [None]:
# Open test data
region = gpd.read_file("testing-data/utanglang_postcard.geojson")
region.explore()

In [None]:
bbox = list(region.total_bounds)
year = "2024"

client = Client.open("https://stac.digitalearthpacific.org")
collection = "dep_s2_geomad"

items = client.search(collections=collection, bbox=bbox, datetime=year).item_collection()

print(f"Found {len(items)} items")

In [None]:
data = load(
    items,
    bbox=bbox,
    measurements=[
        "nir",
        "red",
        "blue",
        "green",
        "emad",
        "smad",
        "bcmad",
        "count",
        "green",
        "nir08",
        "nir09",
        "swir16",
        "swir22",
        "coastal",
        "rededge1",
        "rededge2",
        "rededge3",
    ],
    chunks={"x": 2048, "y": 2048},
)

data

In [None]:
masked = calculate_band_indices(apply_masks(scale(data)))
masked

In [None]:
# First transform the training points to the same CRS as the data
test = region.to_crs(data.odc.geobox.crs)

# Next get the X and Y values out of the point geometries
training_da = test.assign(x=test.geometry.x, y=test.geometry.y).to_xarray()

# Now we can use the x and y values (lon, lat) to extract values from the median composite
training_values = (
    masked.sel(training_da[["x", "y"]], method="nearest").squeeze().compute().to_pandas()
)

# Join the training data with the extracted values and remove unnecessary columns
training_array = pd.concat([test["cc_id"], training_values], axis=1)
training_array = training_array.drop(
    columns=[
        "y",
        "x",
        "spatial_ref",
    ]
)

# Drop rows where there was no data available
training_array = training_array.dropna()

# Preview our resulting training array
training_array.head()

In [None]:
# Run a prediction and compare with our known values
coastal_class = training_array["cc_id"]
variables = training_array.drop(columns=["cc_id", "time"])

predictions = model.predict(variables)

In [None]:
confusion_matrix(coastal_class, predictions)