# Validation

This notebook assesses classification accuracy by comparing photo-interpreted validation 
plots with classified salvage maps.

In [None]:
import ee
import geopandas as gpd
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.metrics import (
    ConfusionMatrixDisplay,
    accuracy_score,
    classification_report,
    confusion_matrix,
    precision_score,
    recall_score,
)

from pfh.scripts import config

ee.Initialize()

## Load Data

In [None]:
plots = ee.FeatureCollection(config.INTERPRETATIONS)

## Extract Salvage

Extract predicted salvage and burn severity data at each plot from Earth Engine.

In [None]:
harvest = (
    ee.ImageCollection(config.HARVEST_COLLECTION)
    .mosaic()
    .gt(0)
    .unmask(0)
    .rename("pred_salvage")
)
severity = ee.ImageCollection(config.SEVERITY_COLLECTION).mosaic()

plot_fc = ee.Image.cat([harvest, severity]).reduceRegions(
    collection=plots, reducer=ee.Reducer.first(), scale=30
)

plot_data = gpd.GeoDataFrame.from_features(plot_fc.getInfo()).dropna()

# Exclude plots in unmanaged areas from analysis
plot_data = plot_data[plot_data.wilderness.eq(0) & plot_data.nps.eq(0)].copy()

# Exclude low confidence interpretations
plot_data = plot_data[plot_data.notes.ne("low confidence")].copy()

plot_data["severity"] = plot_data.severity.astype(int).map(config.SEVERITY_CLASSES)

## Assess Accuracy

Overall accuracy.

In [None]:
y_true = plot_data["salvage"]
y_pred = plot_data["pred_salvage"]

print(classification_report(y_true, y_pred))

cm = confusion_matrix(y_true, y_pred)
ConfusionMatrixDisplay(cm, display_labels=["Not Salvaged", "Salvaged"]).plot()

Accuracy by severity class.

In [None]:
severity_classes = config.SEVERITY_CLASSES.values()
fig, axes = plt.subplots(
    nrows=1, ncols=len(severity_classes), figsize=(20, 5), sharey="row"
)

severity_results = []

for i, severity in enumerate(severity_classes):
    severity_plots = plot_data[plot_data.severity.eq(severity)]
    y_true = severity_plots["salvage"]
    y_pred = severity_plots["pred_salvage"]
    cm = confusion_matrix(y_true, y_pred)

    recall = recall_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred)
    overall_accuracy = accuracy_score(y_true, y_pred)
    severity_results.append({
        "severity": severity,
        "precision": precision,
        "recall": recall,
        "overall_accuracy": overall_accuracy,
        "n": len(severity_plots),
    })

    disp = ConfusionMatrixDisplay(cm, display_labels=["Not Salvaged", "Salvaged"])
    disp.plot(ax=axes[i])
    disp.ax_.set_title(severity)
    disp.im_.colorbar.remove()
    disp.ax_.set_xlabel("")
    if i > 0:
        disp.ax_.set_ylabel("")


fig.text(0.4, 0.1, "Predicted label", ha="left")
plt.subplots_adjust(wspace=0.40, hspace=0.1)


severity_results = pd.DataFrame(severity_results).set_index("severity")
y_true = plot_data["salvage"]
y_pred = plot_data["pred_salvage"]

overall = [
    precision_score(y_true, y_pred),
    recall_score(y_true, y_pred),
    accuracy_score(y_true, y_pred),
    len(plot_data),
]
# Add an "Overall" row to the severity results
severity_results.loc["Overall"] = overall
severity_results