# Recuperación de GW usando series sintéticas de precipitación
Ejemplo para un pozo y variable GW. Se usan funciones del script `fun_LR_hydro_memory.py`.

In [20]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
import matplotlib.pyplot as plt
import sys
from pathlib import Path
import os


sys.path.append(str(Path('..').resolve()))
from fun_LR_hydro_memory import make_lag_ranges, rolling_predictors

out_figs = 'figs_gw_recovery'
os.makedirs(out_figs, exist_ok=True)

# === Configuration ===
selected_lag_type_gw = 'type2_incr4'
lag_ranges, _, _ = make_lag_ranges(lag_increase=4, n_windows=6, incr_type=2)

# === Load observed series ===
df_obs = pd.read_csv('../../data/ms_data/wells_gw.csv', parse_dates=['date'])
df_pr = pd.read_csv('../../data/ms_data/wells_pr.csv', parse_dates=['date'])

# === Load previous results (to filter data)===
df_r2 = pd.read_csv('../4_GW_Q_memory/csv/gw_memory.csv', dtype={'well_id': str})
eligible_wells = df_r2['well_id'].unique()

# === for loop through wells ===
# well_id = '5120013'  # another example well
# well_id = '5415008'
# well_id = '3434005'

for well_id in eligible_wells:
    
    obs_abs = df_obs.set_index('date')[well_id].sort_index().asfreq('MS')
    pr_abs = df_pr.set_index('date')[well_id].sort_index().asfreq('MS')

    obs_an = obs_abs - obs_abs.groupby(obs_abs.index.month).transform('mean')
    pr_an = pr_abs - pr_abs.groupby(pr_abs.index.month).transform('mean')

    # === Relevant mean values ===
    mon_mean_obs_abs = obs_abs.groupby(obs_abs.index.month).mean()
    mon_mean_pr = pr_abs.groupby(pr_abs.index.month).mean()
    winter_months = [6, 7, 8]

    mean_obs_an_preMD = obs_an[obs_an.index < '01-01-2010'].mean()
    mean_obs_abs_pre_MD = obs_abs[obs_abs.index < '01-01-2010'].mean()

    # === Build historical predictors & model ===
    X_hist_df = rolling_predictors(pr_an, lag_ranges, standardize=False).dropna()
    y_hist_full = obs_an.reindex(X_hist_df.index)
    valid_mask = y_hist_full.notna() & np.isfinite(y_hist_full.values)
    valid_mask &= np.all(np.isfinite(X_hist_df.values), axis=1)

    X_hist_clean = X_hist_df.loc[valid_mask]
    y_hist_clean = y_hist_full.loc[valid_mask]

    model = LinearRegression().fit(X_hist_clean.values, y_hist_clean.values)
    r2_hist = r2_score(y_hist_clean.values, model.predict(X_hist_clean.values))

    def predict_series(features_df):
        features_df = features_df.dropna()
        finite_mask = np.all(np.isfinite(features_df.values), axis=1)
        features_clean = features_df.loc[finite_mask]
        preds = model.predict(features_clean.values)
        return pd.Series(preds, index=features_clean.index)

    # === Predictions for historical period ===
    obs_sim_hist = predict_series(X_hist_df)

    # === Future Precipitation Scenarios (10 years) ===
    future_months = 120
    last_hist_date = pr_an.index.max()
    future_index = pd.date_range(last_hist_date + pd.offsets.MonthBegin(1), periods=future_months, freq='MS')
    future_start = future_index[0]

    # Scenario 1: zero anomaly continuation
    label_sce1 = 'S1: avg precip' 
    pr_sce1 = pd.Series(0.0, index=future_index)
    pr_full_sce1 = pd.concat([pr_an, pr_sce1])
    X_df_sce1 = rolling_predictors(pr_full_sce1, lag_ranges, standardize=False).dropna()
    obs_sim_full_sce1 = predict_series(X_df_sce1)
    obs_sim_sce1 = obs_sim_full_sce1[obs_sim_full_sce1.index >= future_start]

    # Scenario 2: winter +10% surplus relative to mean precipitation
    label_sce2 = 'S2: +10% winter surplus' 
    pr_sce2 = pd.Series(0.0, index=future_index)
    winter_mask = pr_sce2.index.month.isin(winter_months)
    pr_sce2[winter_mask] = pr_sce2.index[winter_mask].map(lambda dt: 0.1 * mon_mean_pr.loc[winter_months].mean())
    pr_full_sce2 = pd.concat([pr_an, pr_sce2])
    X_df_sce2 = rolling_predictors(pr_full_sce2, lag_ranges, standardize=False).dropna()
    obs_sim_full_sce2 = predict_series(X_df_sce2)
    obs_sim_sce2 = obs_sim_full_sce2[obs_sim_full_sce2.index >= future_start]

    # Scenario 3: winter +30% surplus relative to mean precipitation
    label_sce3 = 'S3: +30% winter surplus' 
    pr_sce3 = pd.Series(0.0, index=future_index)
    winter_mask = pr_sce3.index.month.isin(winter_months)
    pr_sce3[winter_mask] = pr_sce3.index[winter_mask].map(lambda dt: 0.3 * mon_mean_pr.loc[winter_months].mean())
    pr_full_sce3 = pd.concat([pr_an, pr_sce3])
    X_df_sce3 = rolling_predictors(pr_full_sce3, lag_ranges, standardize=False).dropna()
    obs_sim_full_sce3 = predict_series(X_df_sce3)
    obs_sim_sce3 = obs_sim_full_sce3[obs_sim_full_sce3.index >= future_start]

    def add_monthly_mean_offset(series, monthly_means):
        offsets = pd.Series(series.index.month, index=series.index).map(monthly_means)
        if offsets.isna().any():
            raise ValueError('Monthly mean missing for at least one month in series index.')
        return series + offsets

    # === Compute absolute values ===
    try:
        obs_sim_abs_hist = add_monthly_mean_offset(obs_sim_hist, mon_mean_obs_abs)
        obs_sim_abs_sce1 = add_monthly_mean_offset(obs_sim_sce1, mon_mean_obs_abs)
        obs_sim_abs_sce2 = add_monthly_mean_offset(obs_sim_sce2, mon_mean_obs_abs)
        obs_sim_abs_sce3 = add_monthly_mean_offset(obs_sim_sce3, mon_mean_obs_abs)
    except ValueError as exc:
        print(f"Skipping well {well_id}: {exc}")
        continue
    # === Smooth absolute values (6-month rolling mean) ===
    window_months = 6
    obs_sim_abs_hist_smooth = obs_sim_abs_hist.rolling(window=window_months, min_periods=1).mean()
    obs_sim_abs_sce1_smooth = obs_sim_abs_sce1.rolling(window=window_months, min_periods=1).mean()
    obs_sim_abs_sce2_smooth = obs_sim_abs_sce2.rolling(window=window_months, min_periods=1).mean()
    obs_sim_abs_sce3_smooth = obs_sim_abs_sce3.rolling(window=window_months, min_periods=1).mean()

    # === Plot ===
    # plt.figure(figsize=(10, 4))
    # plt.scatter(obs_an.index, obs_an.values, label='Obs GW ', color='black', alpha=0.8, s=6, zorder = 5)
    # plt.plot(obs_sim_hist.index, obs_sim_hist.values, label=f'Sim GW (R²={r2_hist:.2f})', color='tab:blue', lw=.8)
    # plt.plot(obs_sim_sce1.index, obs_sim_sce1.values, label=label_sce1, color='tab:orange', lw=.8)
    # plt.plot(obs_sim_sce2.index, obs_sim_sce2.values, label=label_sce2, color='tab:green', lw=.8)
    # plt.plot(obs_sim_sce3.index, obs_sim_sce3.values, label=label_sce3, color='tab:purple', lw=.8)
    # plt.axvline(future_start, color='gray', linestyle='--', alpha=0.6)
    # plt.axhline(mean_obs_an_preMD, color='k', linestyle='--', linewidth=0.8)
    # plt.legend()
    # plt.xlabel('Date')
    # plt.ylabel('GW anomaly (m)')
    # plt.title(f'Well {well_id} GW recovery')
    # plt.tight_layout()
    # plt.close()

    # === Plot absolute values ===
    plt.figure(figsize=(10, 4))
    plt.scatter(obs_abs.index, obs_abs.values, label='Obs GW ', color='black', alpha=0.8, s=6, zorder = 5)
    plt.plot(obs_sim_abs_hist_smooth.index, obs_sim_abs_hist_smooth.values, label=f'Sim GW (R²={r2_hist:.2f})', color='tab:blue', lw=.8)
    plt.plot(obs_sim_abs_sce1_smooth.index, obs_sim_abs_sce1_smooth.values, label=label_sce1, color='tab:orange', lw=.8)
    plt.plot(obs_sim_abs_sce2_smooth.index, obs_sim_abs_sce2_smooth.values, label=label_sce2, color='tab:green', lw=.8)
    plt.plot(obs_sim_abs_sce3_smooth.index, obs_sim_abs_sce3_smooth.values, label=label_sce3, color='tab:purple', lw=.8)
    plt.axvline(future_start, color='gray', linestyle='--', alpha=0.6)
    plt.axhline(mean_obs_abs_pre_MD, color='k', linestyle='--', linewidth=0.8)
    plt.legend()
    plt.xlabel('Date')
    plt.ylabel('GW (m)')
    plt.title(f'Well {well_id} GW recovery')
    plt.tight_layout()
    plt.tight_layout()
    plt.savefig(os.path.join(out_figs, f"gw_recovery_{well_id}.png"), bbox_inches='tight')
    plt.close()


# === Recovery time diagnostics ===
# def first_recovery(series, threshold=0.05):
#     mask = np.abs(series.values) < threshold
#     if not mask.any():
#         return None
#     return series.index[mask.argmax()]

# print('Recovery (|GW| < 0.05 m)')
# rec_hist = first_recovery(obs_sim_hist)
# print(f"  Historical model: {rec_hist.date() if rec_hist else 'No recovery in historical period'}")
# rec_zero = first_recovery(scenario_splits['zero_future'])
# print(f"  Future scenario (zero anomaly): {rec_zero.date() if rec_zero else 'No recovery in next 10 years'}")
# rec_surplus = first_recovery(scenario_splits['surplus_future'])
# print(f"  Future scenario (+30% winter surplus): {rec_surplus.date() if rec_surplus else 'No recovery in next 10 years'}")


Skipping well 2942006: Monthly mean missing for at least one month in series index.
Skipping well 5713014: Monthly mean missing for at least one month in series index.
Skipping well 5730036: Monthly mean missing for at least one month in series index.
Skipping well 5731006: Monthly mean missing for at least one month in series index.
Skipping well 5732008: Monthly mean missing for at least one month in series index.
Skipping well 5732011: Monthly mean missing for at least one month in series index.
Skipping well 5733013: Monthly mean missing for at least one month in series index.
Skipping well 5734010: Monthly mean missing for at least one month in series index.
Skipping well 5735011: Monthly mean missing for at least one month in series index.
Skipping well 5737014: Monthly mean missing for at least one month in series index.
Skipping well 5744008: Monthly mean missing for at least one month in series index.
Skipping well 6130004: Monthly mean missing for at least one month in series

In [22]:
# === Streamflow recovery analysis ===
out_figs_q = 'figs_q_recovery'
os.makedirs(out_figs_q, exist_ok=True)

selected_lag_type_q = 'type1_incr1'
lag_ranges_q, _, _ = make_lag_ranges(lag_increase=1, n_windows=6, incr_type=1)

df_q = pd.read_csv('../../data/camels/camels_q_mm.csv', parse_dates=['date'])
df_pr_q = pd.read_csv('../../data/camels/camels_pr_mm.csv', parse_dates=['date'])

df_r2_q = pd.read_csv('../3_run_LR_Q/csv/r2_LR_rest_pos_False_trained_all.csv', dtype={'gauge_id': str})
eligible_gauges = set(df_r2_q[df_r2_q['r2_all'] > 0.4]['gauge_id'].unique())

for gauge_id in eligible_gauges:
    q_abs = df_q.set_index('date')[gauge_id].sort_index().asfreq('MS')
    pr_abs = df_pr_q.set_index('date')[gauge_id].sort_index().asfreq('MS')

    q_an = q_abs - q_abs.groupby(q_abs.index.month).transform('mean')
    pr_an = pr_abs - pr_abs.groupby(pr_abs.index.month).transform('mean')

    mon_mean_q_abs = q_abs.groupby(q_abs.index.month).mean()
    mon_mean_pr = pr_abs.groupby(pr_abs.index.month).mean()
    winter_months = [6, 7, 8]

    mean_q_an_preMD = q_an[q_an.index < '01-01-2010'].mean()
    mean_q_abs_pre_MD = q_abs[q_abs.index < '01-01-2010'].mean()

    X_hist_df = rolling_predictors(pr_an, lag_ranges_q, standardize=False).dropna()
    y_hist_full = q_an.reindex(X_hist_df.index)
    valid_mask = y_hist_full.notna() & np.isfinite(y_hist_full.values)
    valid_mask &= np.all(np.isfinite(X_hist_df.values), axis=1)

    X_hist_clean = X_hist_df.loc[valid_mask]
    y_hist_clean = y_hist_full.loc[valid_mask]

    if X_hist_clean.empty or y_hist_clean.empty:
        print(f'Skipping gauge {gauge_id}: insufficient training data')
        continue

    model = LinearRegression().fit(X_hist_clean.values, y_hist_clean.values)
    r2_hist = r2_score(y_hist_clean.values, model.predict(X_hist_clean.values))

    def predict_series(features_df):
        features_df = features_df.dropna()
        finite_mask = np.all(np.isfinite(features_df.values), axis=1)
        features_clean = features_df.loc[finite_mask]
        if features_clean.empty:
            return pd.Series(dtype=float)
        preds = model.predict(features_clean.values)
        return pd.Series(preds, index=features_clean.index)

    obs_sim_hist = predict_series(X_hist_df)

    future_months = 120
    last_hist_date = pr_an.index.max()
    future_index = pd.date_range(last_hist_date + pd.offsets.MonthBegin(1), periods=future_months, freq='MS')
    future_start = future_index[0]

    label_sce1 = 'S1: avg precip'
    pr_sce1 = pd.Series(0.0, index=future_index)
    pr_full_sce1 = pd.concat([pr_an, pr_sce1])
    X_df_sce1 = rolling_predictors(pr_full_sce1, lag_ranges_q, standardize=False).dropna()
    obs_sim_full_sce1 = predict_series(X_df_sce1)
    obs_sim_sce1 = obs_sim_full_sce1[obs_sim_full_sce1.index >= future_start]

    label_sce2 = 'S2: +10% winter surplus'
    pr_sce2 = pd.Series(0.0, index=future_index)
    winter_mask = pr_sce2.index.month.isin(winter_months)
    pr_sce2[winter_mask] = pr_sce2.index[winter_mask].map(lambda dt: 0.1 * mon_mean_pr.loc[winter_months].mean())
    pr_full_sce2 = pd.concat([pr_an, pr_sce2])
    X_df_sce2 = rolling_predictors(pr_full_sce2, lag_ranges_q, standardize=False).dropna()
    obs_sim_full_sce2 = predict_series(X_df_sce2)
    obs_sim_sce2 = obs_sim_full_sce2[obs_sim_full_sce2.index >= future_start]

    label_sce3 = 'S3: +30% winter surplus'
    pr_sce3 = pd.Series(0.0, index=future_index)
    winter_mask = pr_sce3.index.month.isin(winter_months)
    pr_sce3[winter_mask] = pr_sce3.index[winter_mask].map(lambda dt: 0.3 * mon_mean_pr.loc[winter_months].mean())
    pr_full_sce3 = pd.concat([pr_an, pr_sce3])
    X_df_sce3 = rolling_predictors(pr_full_sce3, lag_ranges_q, standardize=False).dropna()
    obs_sim_full_sce3 = predict_series(X_df_sce3)
    obs_sim_sce3 = obs_sim_full_sce3[obs_sim_full_sce3.index >= future_start]

    def add_monthly_mean_offset(series, monthly_means):
        offsets = pd.Series(series.index.month, index=series.index).map(monthly_means)
        if offsets.isna().any():
            raise ValueError('Monthly mean missing for at least one month in series index.')
        return series + offsets

    try:
        obs_sim_abs_hist = add_monthly_mean_offset(obs_sim_hist, mon_mean_q_abs).clip(lower=0)
        obs_sim_abs_sce1 = add_monthly_mean_offset(obs_sim_sce1, mon_mean_q_abs).clip(lower=0)
        obs_sim_abs_sce2 = add_monthly_mean_offset(obs_sim_sce2, mon_mean_q_abs).clip(lower=0)
        obs_sim_abs_sce3 = add_monthly_mean_offset(obs_sim_sce3, mon_mean_q_abs).clip(lower=0)
    except ValueError as exc:
        print(f'Skipping gauge {gauge_id}: {exc}')
        continue

    window_months = 1
    obs_sim_abs_hist_smooth = obs_sim_abs_hist.rolling(window=window_months, min_periods=1).mean()
    obs_sim_abs_sce1_smooth = obs_sim_abs_sce1.rolling(window=window_months, min_periods=1).mean()
    obs_sim_abs_sce2_smooth = obs_sim_abs_sce2.rolling(window=window_months, min_periods=1).mean()
    obs_sim_abs_sce3_smooth = obs_sim_abs_sce3.rolling(window=window_months, min_periods=1).mean()

    plt.figure(figsize=(10, 4))
    plt.scatter(q_abs.index, q_abs.values, label='Obs Q', color='black', alpha=0.8, s=5, zorder=5)
    plt.plot(obs_sim_abs_hist_smooth.index, obs_sim_abs_hist_smooth.values, label=f'Sim Q (R²={r2_hist:.2f})', color='tab:blue', lw=.8)
    plt.plot(obs_sim_abs_sce1_smooth.index, obs_sim_abs_sce1_smooth.values, label=label_sce1, color='tab:orange', lw=.8)
    plt.plot(obs_sim_abs_sce2_smooth.index, obs_sim_abs_sce2_smooth.values, label=label_sce2, color='tab:green', lw=.8)
    plt.plot(obs_sim_abs_sce3_smooth.index, obs_sim_abs_sce3_smooth.values, label=label_sce3, color='tab:purple', lw=.8)
    plt.axvline(future_start, color='gray', linestyle='--', alpha=0.6)
    plt.axhline(mean_q_abs_pre_MD, color='k', linestyle='--', linewidth=0.8)
    plt.legend()
    plt.xlabel('Date')
    plt.ylabel('Streamflow (mm)')
    plt.title(f'Gauge {gauge_id} streamflow recovery')
    plt.tight_layout()
    plt.savefig(os.path.join(out_figs_q, f"q_recovery_{gauge_id}.png"), bbox_inches='tight')
    plt.close()
