# 05 — Evaluation & Interpretation

**Purpose:** Summarize CV and hold‑out performance, prefer Ridge diagnostics if available, and show feature-importance views for RF and Ridge with non‑lag focus.

**Inputs:**
- `./reports/metrics/cv_summary.csv`, `cv_summary_ridge.csv` (optional)
- `./reports/predictions/cv_predictions.csv`
- `./reports/metrics/holdout_metrics.csv`, `holdout_metrics_ridge.csv` (optional)
- `./reports/predictions/holdout_predictions.csv`, `holdout_predictions_ridge.csv` (optional)
- `./reports/metrics/holdout_permutation_importance.csv` (optional)
- `./reports/predictions/residuals_all_data.csv` (optional)
- `./reports/metrics/holdout_shap_mean_abs.csv` (optional)

**Outputs:** none

## 1) Load artifacts

In [None]:
from pathlib import Path
import pandas as pd, numpy as np, matplotlib.pyplot as plt
plt.rcParams["figure.figsize"] = (8,5); plt.rcParams["figure.dpi"] = 120

BASE = Path("./reports"); MET, PRED = BASE/"metrics", BASE/"predictions"

def sread(p):
    try:
        if p.exists(): return pd.read_csv(p)
    except Exception as e:
        print("Read failed:", p, e)
    return None

# Metrics
rf_cv = sread(MET/"cv_summary.csv")
rg_cv = sread(MET/"cv_summary_ridge.csv")
rf_ho = sread(MET/"holdout_metrics.csv")
rg_ho = sread(MET/"holdout_metrics_ridge.csv")

# Predictions
rf_ho_p = sread(PRED/"holdout_predictions.csv")
rg_ho_p = sread(PRED/"holdout_predictions_ridge.csv")

# Importance
imp_rf = sread(MET/"holdout_permutation_importance.csv")
imp_rg = sread(MET/"holdout_permutation_importance_ridge.csv")
shapabs = sread(MET/"holdout_shap_mean_abs.csv")  # typically RF

## 2) Executive summary

In [None]:
def summary_line(df, label):
    if df is None or df.empty: return None
    r = df.iloc[0]
    return f"{label}: R² {float(r['R2']):.3f}; MAE {float(r['MAE']):.1f}; RMSE {float(r['RMSE']):.1f} ({r.get('years','hold-out')})"

lines = []
if rf_cv is not None and not rf_cv.empty:
    m = rf_cv[pd.to_numeric(rf_cv["fold"], errors="coerce").notna()][["R2","MAE","RMSE"]].astype(float)
    lines.append(f"RF CV: R² {m['R2'].mean():.3f} ± {m['R2'].std():.3f}; MAE {m['MAE'].mean():.1f}; RMSE {m['RMSE'].mean():.1f}")
if rg_cv is not None and not rg_cv.empty:
    m = rg_cv[pd.to_numeric(rg_cv["fold"], errors="coerce").notna()][["R2","MAE","RMSE"]].astype(float)
    lines.append(f"Ridge CV: R² {m['R2'].mean():.3f} ± {m['R2'].std():.3f}; MAE {m['MAE'].mean():.1f}; RMSE {m['RMSE'].mean():.1f}")
if rf_ho is not None and not rf_ho.empty:
    lines.append(summary_line(rf_ho, "RF Hold-out"))
if rg_ho is not None and not rg_ho.empty:
    lines.append(summary_line(rg_ho, "Ridge Hold-out"))

print("\n".join(lines) if lines else "No metrics available.")

## 3) Hold‑out diagnostics (prefer Ridge, else RF)

In [None]:
pred_df = None; model_name = None

if rg_ho_p is not None and not rg_ho_p.empty:
    pred_df = rg_ho_p.copy(); model_name = "Ridge"
elif rf_ho_p is not None and not rf_ho_p.empty:
    pred_df = rf_ho_p.copy(); model_name = "Random Forest"

if pred_df is None:
    print("No hold-out predictions found for Ridge or RF.")
else:
    import numpy as np
    x = pred_df["y_true"].to_numpy(dtype=float)
    y = pred_df["y_pred"].to_numpy(dtype=float)

    plt.figure(); plt.scatter(x, y, alpha=0.4)
    lim = [np.nanmin([x.min(), y.min()]), np.nanmax([x.max(), y.max()])]
    plt.plot(lim, lim); plt.xlabel("y_true (kg/ha)"); plt.ylabel("y_pred (kg/ha)")
    plt.title(f"Hold-out: y_true vs y_pred ({model_name})")
    plt.tight_layout(); plt.show()

    resid = x - y
    plt.figure(); plt.hist(resid, bins=30)
    plt.title(f"Hold-out residuals ({model_name})"); plt.xlabel("Residual"); plt.ylabel("Count")
    plt.tight_layout(); plt.show()

## 4) Feature importance — non‑lag focus and category summaries

In [None]:
def is_lag_or_roll(feat: str) -> bool:
    f = str(feat)
    if f.startswith("cereal_yield"):   # exclude yield lags/rolls explicitly
        return True
    return ("_lag" in f) or ("_roll" in f)

def feature_category(feat: str) -> str:
    f = feat.lower()
    if "temp" in f: return "Climate: temperature"
    if "precip" in f: return "Climate: precipitation"
    if "fertilizer" in f: return "Inputs: fertilizer"
    if "gdp" in f: return "Macro: income"
    if "co2" in f: return "Macro: emissions"
    if "rural_pop" in f or "population" in f: return "Demography"
    return "Other"

def show_nonlag(df, value_col, label):
    if df is None or df.empty:
        print(f"{label}: not available."); return None
    tbl = df[~df["feature"].apply(is_lag_or_roll)].copy().sort_values(value_col, ascending=False)
    print(label + " — top non-lag features:"); display(tbl.head(15))
    return tbl

def cat_table(df, value_col, label):
    if df is None or df.empty: return
    tab = (df.assign(category=df["feature"].apply(feature_category))
             .groupby("category", as_index=False)[value_col].mean()
             .sort_values(value_col, ascending=False))
    print(label + " — category averages:"); display(tab)

perm_rf_nonlag = show_nonlag(imp_rf, "perm_importance", "RF Permutation importance")
perm_rg_nonlag = show_nonlag(imp_rg, "perm_importance", "Ridge Permutation importance")

if shapabs is not None and not shapabs.empty:
    shap_nonlag = shapabs[~shapabs["feature"].apply(is_lag_or_roll)].copy()                     .sort_values("mean_abs_shap", ascending=False)
    print("RF SHAP (mean |SHAP|) — top non-lag features:")
    display(shap_nonlag.head(15))
else:
    print("RF SHAP: not available.")

cat_table(perm_rf_nonlag, "perm_importance", "RF permutation")
cat_table(perm_rg_nonlag, "perm_importance", "Ridge permutation")

## 5) Notes & environment

In [None]:
import sys, platform, numpy, pandas, matplotlib
print("Python:", sys.version.split()[0])
print("Platform:", platform.platform())
print("NumPy:", numpy.__version__)
print("Pandas:", pandas.__version__)
print("Matplotlib:", matplotlib.__version__)