## Setup & Imports

In [None]:
from pathlib import Path
import sys

import geopandas as gpd
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

sys.path.append("../")

from src.readers.geom_reader import load_geodata
from src.readers.results_reader import read_conceptual_results, read_neural_results
from src.timeseries_stats.metrics import (
    calculate_bias,
    calculate_kge,
    calculate_mae,
    calculate_nse,
    calculate_rmse,
)
from src.utils.logger import setup_logger

plt.rcParams["font.family"] = "DeJavu Serif"
plt.rcParams["font.serif"] = ["Times New Roman"]

log = setup_logger("chapter_two", log_file="../logs/chapter_two.log")

## 1. Load Data

In [None]:
# Load watershed geometries and gauge locations
ws, gauges = load_geodata(folder_depth="../")
common_index = gauges.index.to_list()

# Load cluster assignments (from Chapter 1)
gauge_mapping = pd.read_csv(
    "../res/chapter_one/gauge_hybrid_mapping.csv",
    index_col="gauge_id",
    dtype={"gauge_id": str},
)

print(f"Loaded {len(gauges)} gauges with hybrid classification")

## 2. Load Model Results

In [None]:
# Load GR4J results
gr4j_dir = Path("../test/gr4j")
gr4j_results = read_conceptual_results(
    gr4j_dir, model_name="GR4J", common_index=common_index
)

# Load HBV results
hbv_dir = Path("../test/hbv")
hbv_results = read_conceptual_results(
    hbv_dir, model_name="HBV", common_index=common_index
)

# Load RFR results
rfr_dir = Path("../test/rfr")
rfr_results = read_conceptual_results(
    rfr_dir, model_name="RFR", common_index=common_index
)

# Load LSTM results
lstm_dir = Path("../archive/neural_forecast")
lstm_results = read_neural_results(lstm_dir, common_index=common_index)

print(f"Loaded results for {len(gr4j_results)} gauges (GR4J)")
print(f"Loaded results for {len(hbv_results)} gauges (HBV)")
print(f"Loaded results for {len(rfr_results)} gauges (RFR)")
print(f"Loaded results for {len(lstm_results)} gauges (LSTM)")

## 3. Calculate Performance Metrics

In [None]:
def compute_all_metrics(obs, sim):
    """Calculate NSE, KGE, RMSE, MAE, Bias."""
    return {
        "nse": calculate_nse(obs, sim),
        "kge": calculate_kge(obs, sim),
        "rmse": calculate_rmse(obs, sim),
        "mae": calculate_mae(obs, sim),
        "bias": calculate_bias(obs, sim),
    }


# Compute metrics for all models
metrics_data = []

for gauge_id in common_index:
    try:
        # Extract observed data (same for all models)
        obs = gr4j_results[gauge_id]["obs"]

        # GR4J metrics
        gr4j_sim = gr4j_results[gauge_id]["sim"]
        gr4j_metrics = compute_all_metrics(obs, gr4j_sim)
        metrics_data.append({"gauge_id": gauge_id, "model": "GR4J", **gr4j_metrics})

        # HBV metrics
        hbv_sim = hbv_results[gauge_id]["sim"]
        hbv_metrics = compute_all_metrics(obs, hbv_sim)
        metrics_data.append({"gauge_id": gauge_id, "model": "HBV", **hbv_metrics})

        # RFR metrics
        rfr_sim = rfr_results[gauge_id]["sim"]
        rfr_metrics = compute_all_metrics(obs, rfr_sim)
        metrics_data.append({"gauge_id": gauge_id, "model": "RFR", **rfr_metrics})

        # LSTM metrics
        if gauge_id in lstm_results:
            lstm_sim = lstm_results[gauge_id]["sim"]
            lstm_metrics = compute_all_metrics(obs, lstm_sim)
            metrics_data.append({"gauge_id": gauge_id, "model": "LSTM", **lstm_metrics})
    except Exception as e:
        log.warning(f"Error processing {gauge_id}: {e}")
        continue

metrics_df = pd.DataFrame(metrics_data)
print(f"\nCalculated metrics for {len(metrics_df)} gauge-model pairs")

## 4. Overall Performance Comparison

In [None]:
# Aggregate metrics by model
summary = metrics_df.groupby("model").agg(
    {
        "nse": ["mean", "median", "std"],
        "kge": ["mean", "median", "std"],
        "rmse": ["mean", "median", "std"],
        "mae": ["mean", "median", "std"],
        "bias": ["mean", "median", "std"],
    }
)

print("\nOverall Model Performance (Mean ± Std):")
print(summary)

In [None]:
# Box plots
fig, axes = plt.subplots(1, 3, figsize=(15, 5))

# NSE
sns.boxplot(data=metrics_df, x="model", y="nse", ax=axes[0])
axes[0].set_title("Nash-Sutcliffe Efficiency")
axes[0].axhline(0.5, color="red", linestyle="--", alpha=0.5, label="Threshold")
axes[0].legend()

# KGE
sns.boxplot(data=metrics_df, x="model", y="kge", ax=axes[1])
axes[1].set_title("Kling-Gupta Efficiency")
axes[1].axhline(0.5, color="red", linestyle="--", alpha=0.5, label="Threshold")
axes[1].legend()

# RMSE
sns.boxplot(data=metrics_df, x="model", y="rmse", ax=axes[2])
axes[2].set_title("Root Mean Squared Error")

plt.tight_layout()
plt.savefig("../res/chapter_two/model_comparison_boxplots.png", dpi=300)
plt.show()

## 5. Performance by Cluster

In [None]:
# Merge with cluster assignments
metrics_with_clusters = metrics_df.merge(
    gauge_mapping[["hybrid_class"]], left_on="gauge_id", right_index=True
)

# Calculate mean NSE by cluster and model
cluster_performance = (
    metrics_with_clusters.groupby(["hybrid_class", "model"])["nse"]
    .mean()
    .reset_index()
    .pivot(index="hybrid_class", columns="model", values="nse")
)

print("\nMean NSE by Cluster:")
print(cluster_performance)

In [None]:
# Heatmap
plt.figure(figsize=(12, 8))
sns.heatmap(
    cluster_performance,
    annot=True,
    fmt=".2f",
    cmap="RdYlGn",
    vmin=0,
    vmax=1,
    cbar_kws={"label": "Mean NSE"},
)
plt.title("Model Performance by Hybrid Class (NSE)")
plt.xlabel("Model")
plt.ylabel("Hybrid Class")
plt.tight_layout()
plt.savefig("../res/chapter_two/cluster_performance_heatmap.png", dpi=300)
plt.show()

## 6. Best Model by Cluster

In [None]:
# Identify best model for each cluster
best_models = cluster_performance.idxmax(axis=1)
print("\nBest Model by Cluster (NSE):")
print(best_models)

# Count model preferences
model_counts = best_models.value_counts()
print("\nModel Preference Summary:")
print(model_counts)

## 7. Export Results

In [None]:
# Export full metrics table
metrics_df.to_csv("../res/chapter_two/model_metrics_all.csv", index=False)

# Export summary statistics
summary.to_csv("../res/chapter_two/model_metrics_summary.csv")

# Export cluster performance
cluster_performance.to_csv("../res/chapter_two/cluster_performance_nse.csv")

print("\n✓ Exported performance metrics to res/chapter_two/")

## Summary

- **Models evaluated:** GR4J, HBV, RFR, LSTM
- **Metrics:** NSE, KGE, RMSE, MAE, Bias
- **Cluster analysis:** Performance varies significantly across hybrid classes
- **Key finding:** No single model dominates all regions — cluster-specific model selection recommended