In [11]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import gridspec
import os
import re
from scipy.interpolate import UnivariateSpline, PchipInterpolator
from sklearn.isotonic import isotonic_regression
import sys
from pathlib import Path

sys.path.append(str(Path('..').resolve()))
from fun_LR_hydro_memory import *

# selected_lag_type_q = 'type1_incr1'
# selected_lag_type_gw = 'type2_incr4'

lag_ranges_gw, widths_gw, incr_name_gw = make_lag_ranges(
    lag_increase=4,
    n_windows=6,
    incr_type=2
)
 
lag_ranges_q, widths_q, incr_name_q = make_lag_ranges(
    lag_increase=1,
    n_windows=6,
    incr_type=1
)
 
print('Groundwater lag ranges:\n', 
      lag_ranges_gw)
print('Streamflow lag ranges:\n', 
      lag_ranges_q)
 

Groundwater lag ranges:
 OrderedDict([('lag_0_0', (0, 0)), ('lag_1_6', (1, 6)), ('lag_7_17', (7, 17)), ('lag_18_33', (18, 33)), ('lag_34_54', (34, 54)), ('lag_55_80', (55, 80))])
Streamflow lag ranges:
 OrderedDict([('lag_0_0', (0, 0)), ('lag_1_2', (1, 2)), ('lag_3_5', (3, 5)), ('lag_6_9', (6, 9)), ('lag_10_14', (10, 14)), ('lag_15_20', (15, 20))])


In [None]:
       
        
def compute_memory_from_spline_variants(df_config, perc_max_r2=0.9, spline_smoothing=0.5):
    """
    Compute memory estimates from a DataFrame built for a single well (well_data).

    This minimal, robust implementation collects available (pred_memory1..6, r2_pred1..6)
    pairs across the rows in `df_config`, filters out NaNs, and fits three curve variants
    (UnivariateSpline, PCHIP, and isotonic) to estimate the predictor memory at which
    R² first reaches `perc_max_r2` of the observed maximum R².

    Returns a dict with keys: 'lag_type' (None here), 'memory_spline','memory_pchip','memory_iso',
    'x_vals','y_spline','y_pchip','y_iso'.
    """
    # Collect x (memory) and y (r2) pairs from available columns
    x_list = []
    y_list = []
    for _, row in df_config.iterrows():
        for i in range(1, 7):
            xm = row.get(f'pred_memory{i}', np.nan)
            ym = row.get(f'r2_pred{i}', np.nan)
            x_list.append(float(xm))
            y_list.append(float(ym))

    if len(x_list) == 0:
        # nothing to fit
        return {
            'lag_type': None,
            'memory_spline': np.nan,
            'memory_pchip': np.nan,
            'memory_iso': np.nan,
            'x_vals': None,
            'y_spline': None,
            'y_pchip': None,
            'y_iso': None
        }

    x = np.array(x_list)
    y = np.array(y_list)

    # Remove NaNs and sort by x
    mask = ~np.isnan(x) & ~np.isnan(y)
    x = x[mask]
    y = y[mask]
    if x.size == 0:
        return {
            'lag_type': None,
            'memory_spline': np.nan,
            'memory_pchip': np.nan,
            'memory_iso': np.nan,
            'x_vals': None,
            'y_spline': None,
            'y_pchip': None,
            'y_iso': None
        }

    idx = np.argsort(x)
    x_sorted = x[idx]
    y_sorted = y[idx]
    x_dense = np.linspace(x_sorted.min(), x_sorted.max(), 500)
    r2_target = perc_max_r2 * np.nanmax(y_sorted)

    def first_cross(xvals, yvals, thr):
        above = yvals >= thr
        return xvals[np.argmax(above)] if np.any(above) else np.nan

    # Spline
    try:
        spline = UnivariateSpline(x_sorted, y_sorted, s=spline_smoothing)
        y_spline = spline(x_dense)
        mem_spline = first_cross(x_dense, y_spline, r2_target)
    except Exception:
        y_spline = None
        mem_spline = np.nan

    # PCHIP
    try:
        pchip = PchipInterpolator(x_sorted, y_sorted)
        y_pchip = pchip(x_dense)
        mem_pchip = first_cross(x_dense, y_pchip, r2_target)
    except Exception:
        y_pchip = None
        mem_pchip = np.nan

    # Isotonic
    try:
        # prefer the functional API if available
        try:
            y_iso = isotonic_regression(y_sorted)
        except Exception:
            # fallback to estimator API
            from sklearn.isotonic import IsotonicRegression
            ir = IsotonicRegression()
            # fit_transform expects X shape; use x_sorted as X to preserve monotonicity domain
            y_iso = ir.fit_transform(x_sorted, y_sorted)
        y_iso_dense = np.interp(x_dense, x_sorted, y_iso)
        mem_iso = first_cross(x_dense, y_iso_dense, r2_target)
    except Exception:
        y_iso_dense = None
        mem_iso = np.nan

    return {
        'lag_type': None,
        'memory_spline': mem_spline,
        'memory_pchip': mem_pchip,
        'memory_iso': mem_iso,
        'x_vals': x_dense,
        'y_spline': y_spline,
        'y_pchip': y_pchip,
        'y_iso': y_iso_dense,
    }


In [5]:
######## Memory computation for GW ########

# === Load R2 results ===
df_gw = pd.read_csv('../2_run_LR_gw/csv/r2_LR_rest_pos_False_trained_all.csv', dtype={"well_id": str})
df_q = pd.read_csv('../3_run_LR_Q/csv/r2_LR_rest_pos_False_trained_all.csv', dtype={"well_id": str})

# Work on groundwater results in this notebook
df = df_gw.copy()
mask = (df['target'] == 'gw_an') & (df['predictor'] == 'pr_an')
df = df[mask]

# === Load predictions (used for time series plotting) ===
predictions_file = '../2_run_LR_gw/csv/gw_sim_LR_trained_all.csv'
# load only necessary columns if file large; adjust as needed
df_pred = pd.read_csv(predictions_file, parse_dates=['date'], dtype={"well_id": str})
mask = (df_pred['target'] == 'gw_an') & (df_pred['predictor'] == 'pr_an')
df_pred = df_pred[mask]

# selected_lag_type_q = 'type1_incr1'
selected_lag_type_gw = 'type2_incr4'

# === Create pred_memory1..6 columns from lag_ranges (explicit, no helper) ===
pattern = r"\(?\s*[-\d.]+\s*,\s*([-\d.]+)\s*\)?"
# initialize columns
for i in range(1, 7):
    df[f'pred_memory{i}'] = np.nan

# iterate rows and extract
for idx in df.index:
    lr = df.at[idx, 'lag_ranges'] if 'lag_ranges' in df.columns else None
    if lr is None:
        vals = [np.nan] * 6
    else:
        s = str(lr)
        matches = re.findall(pattern, s)
        vals = []
        for i in range(6):
            if i < len(matches):
                try:
                    vals.append(float(matches[i]))
                except Exception:
                    vals.append(np.nan)
            else:
                vals.append(np.nan)
    # assign
    for j, v in enumerate(vals, start=1):
        df.at[idx, f'pred_memory{j}'] = v

# also keep an array column for backward compatibility
if 'pred_memory' not in df.columns:
    df['pred_memory'] = df.apply(lambda r: np.array([r[f'pred_memory{i}'] for i in range(1, 7)]), axis=1)


# === Output ===
out_figs = 'figs_gw'
os.makedirs(out_figs, exist_ok=True)

out_results = 'csv'
os.makedirs(out_results, exist_ok=True)


# === Model configurations and colors ===
color_palette = plt.get_cmap('tab10')

# only selected models with 6 predictors to avoid overfitting and misinterpreting results
memory_summary = []

# Create a deterministic color mapping from the available lag types
lag_types_all = sorted(df['lag_type'].unique()) if 'lag_type' in df.columns else []
# Specify colors by hand for the first three lag types; fallback to colormap for others
specified_colors = ['blue', 'purple', 'green']
model_colors = {}
for i, name in enumerate(lag_types_all):
    if i < len(specified_colors):
        model_colors[name] = specified_colors[i]
    else:
        model_colors[name] = color_palette(i % 10)

# === Plot per well ===
# for well in ['3450005']:
for well in sorted(df['well_id'].unique()):
    well_data = df[df['well_id'] == well]
    df_fit = well_data[well_data['lag_type'] == selected_lag_type_gw]
    max_r2_selected = df_fit['r2_all'].max() if 'r2_all' in df_fit.columns else np.nan

    # Only proceed with plotting if the selected model configs show sufficient performance
    if np.isnan(max_r2_selected) or max_r2_selected <= 0.4:
        print(f"Skipping well {well}: max_r2_selected={max_r2_selected}")
        continue

    fig = plt.figure(figsize=(13, 4))
    gs = gridspec.GridSpec(1, 2, width_ratios=[3, 2])  # left panel wider
    ax1 = fig.add_subplot(gs[0])
    ax2 = fig.add_subplot(gs[1])

    # Left plot: Observed vs predicted time series
    pred_data = df_pred[df_pred['well_id'] == well].copy()

    for name in sorted(well_data['lag_type'].unique()):
        mask = (pred_data['lag_type'] == name) 
        pred_subset = pred_data[mask]
        color = model_colors.get(name)
        ax1.plot(
            pred_subset['date'], pred_subset['pred'],
            label=f'{name}',
            color=color,
            linestyle='-',
            linewidth=1,
            zorder=1,
        )

    # Plot observed values
    obs = pred_data[pred_data['target'] == 'gw_an']
    if not obs.empty and 'target_val' in obs.columns:
        ax1.scatter(obs['date'], obs['target_val'], color='black', s=6, zorder=2)
    ax1.set_title(f"Well {well} – observed vs simulated GWL, R² = {np.round(max_r2_selected,2)}", loc='left')
    ax1.set_xlabel("Date")
    ax1.set_ylabel("GW anom (m)")
    ax1.grid(True, linestyle='--', alpha=0.5)

    # Add legend only if prediction lines exist
    handles, labels = ax1.get_legend_handles_labels()
    if labels:
        ax1.legend(fontsize=8)

    # Right plot: R² vs memory
    for name in sorted(well_data['lag_type'].unique()):
        mask = (well_data['lag_type'] == name) & (well_data['target'] == 'gw_an') & (well_data['predictor'] == 'pr_an')
        pred_subset = well_data[mask]
        color = model_colors.get(name, 'red')
        label = name
        # make sure columns exist in pred_subset
        xm = np.array([pred_subset['pred_memory1'],
                      pred_subset['pred_memory2'],
                      pred_subset['pred_memory3'],
                      pred_subset['pred_memory4'],
                      pred_subset['pred_memory5'],
                      pred_subset['pred_memory6']])
        ym = np.array([pred_subset['r2_pred1'],
                      pred_subset['r2_pred2'],
                      pred_subset['r2_pred3'],
                      pred_subset['r2_pred4'],
                      pred_subset['r2_pred5'],
                      pred_subset['r2_pred6']])
        
        ax2.plot(xm, ym,label=label, marker='o', linestyle='None', color=color,zorder=2)
   
    # plot fitted curves
    fit_results = compute_memory_from_spline_variants(df_fit)
    if fit_results['x_vals'] is not None and fit_results['y_iso'] is not None:
        ax2.plot(fit_results['x_vals'], fit_results['y_iso'], '--', color='grey', alpha=0.7, label=None, linewidth=.6)
        # ax2.axvline(fit_results['memory_iso'], color='orange', linestyle='-', linewidth=.6)

    if fit_results['x_vals'] is not None and fit_results['y_spline'] is not None:
        ax2.plot(fit_results['x_vals'], fit_results['y_spline'], '--', color='grey', alpha=0.7, label=None, linewidth=.6)
        # ax2.axvline(fit_results['memory_spline'], color='grey', linestyle='-', linewidth=.6)

    if fit_results['x_vals'] is not None and fit_results['y_pchip'] is not None:
        ax2.plot(fit_results['x_vals'], fit_results['y_pchip'], '--', color='grey', alpha=0.7, label=None, linewidth=.6)
        # ax2.axvline(fit_results['memory_pchip'], color='blue', linestyle='-', linewidth=.6)

    avg_memory = np.nanmean([fit_results.get('memory_iso', np.nan),
                                fit_results.get('memory_spline', np.nan),
                                fit_results.get('memory_pchip', np.nan)])
    
    ax2.axvline(avg_memory, color='black', linestyle='-', linewidth=.9, label='GW memory')
    
    ax2.set_title(f"R² with increasing # of predictors\nGW memory = {avg_memory:.0f} months", loc='left')
    ax2.set_xlabel("Pr_an lags (months)")
    ax2.set_ylabel("R²")
    ax2.grid(True, linestyle='--', alpha=0.5)
    ax2.legend(fontsize=8)
    ax2.set_xlim(df['pred_memory'].apply(lambda a: np.nanmin(a) if hasattr(a, '__iter__') else np.nan).min() - 5,
                df['pred_memory'].apply(lambda a: np.nanmax(a) if hasattr(a, '__iter__') else np.nan).max() + 5)
    # ax2.set_ylim(0, 1)

    # save figure per well

    plt.tight_layout()
    plt.savefig(os.path.join(out_figs, f"gw_memory_{well}.png"), bbox_inches='tight')
    plt.close(fig)

       # === Store memory results if at least one is valid ===
    if not np.isnan(avg_memory):
        memory_summary.append({
            'well_id': well,
            'gw_memory': avg_memory,
            'max_r2_selected_configs': max_r2_selected
        })


# write summary
if memory_summary:
    df_memory_summary = pd.DataFrame(memory_summary)
    out_csv = os.path.join(out_results, 'gw_memory.csv')
    # ensure overwrite by removing existing file first
    try:
        if os.path.exists(out_csv):
            os.remove(out_csv)
    except Exception:
        pass
    df_memory_summary.to_csv(out_csv, index=False)
else:
    print('No memory results to save')

print("\nLR models Finished! \n+Finito! \n+Terminado!\n")


Skipping well 1110003: max_r2_selected=0.1974195693329598
Skipping well 1110004: max_r2_selected=0.1367242917953338
Skipping well 1110005: max_r2_selected=0.126131562041706
Skipping well 1110006: max_r2_selected=0.1338070478573109
Skipping well 1110007: max_r2_selected=0.1540253968775451
Skipping well 1110008: max_r2_selected=0.3545518758903567
Skipping well 1110009: max_r2_selected=0.2699826824854185
Skipping well 1110010: max_r2_selected=0.2470590991696076
Skipping well 1211010: max_r2_selected=0.219686055301868
Skipping well 1310024: max_r2_selected=0.2417607884151632
Skipping well 1310025: max_r2_selected=0.129484438479177
Skipping well 1310026: max_r2_selected=0.3250459604277125
Skipping well 1310027: max_r2_selected=0.3412423243315973
Skipping well 1310033: max_r2_selected=0.2920052447680535
Skipping well 1310036: max_r2_selected=0.2394999627220056
Skipping well 1310064: max_r2_selected=0.2334122440103773
Skipping well 1310069: max_r2_selected=0.3202398320275157
Skipping well 170

In [6]:
######## Memory computation for STREAMFLOW ########

# === Load R2 results ===
# df_gw = pd.read_csv('../2_run_LR_gw/csv/r2_LR_rest_pos_False_trained_all.csv', dtype={"well_id": str})
df_q = pd.read_csv('../3_run_LR_Q/csv/r2_LR_rest_pos_False_trained_all.csv', dtype={"gauge_id": str})

# Work on groundwater results in this notebook
df = df_q.copy()
mask = (df['target'] == 'q_an') & (df['predictor'] == 'pr_an')
df = df[mask]

# === Load predictions (used for time series plotting) ===
predictions_file = '../3_run_LR_Q/csv/q_sim_LR_trained_all.csv'
# load only necessary columns if file large; adjust as needed
df_pred = pd.read_csv(predictions_file, parse_dates=['date'], dtype={"gauge_id": str})
mask = (df_pred['target'] == 'q_an') & (df_pred['predictor'] == 'pr_an')
df_pred = df_pred[mask]

selected_lag_type_q = 'type1_incr1'
# selected_lag_type_gw = 'type2_incr4'

# === Create pred_memory1..6 columns from lag_ranges (explicit, no helper) ===
pattern = r"\(?\s*[-\d.]+\s*,\s*([-\d.]+)\s*\)?"
# initialize columns
for i in range(1, 7):
    df[f'pred_memory{i}'] = np.nan

# iterate rows and extract
for idx in df.index:
    lr = df.at[idx, 'lag_ranges'] if 'lag_ranges' in df.columns else None
    if lr is None:
        vals = [np.nan] * 6
    else:
        s = str(lr)
        matches = re.findall(pattern, s)
        vals = []
        for i in range(6):
            if i < len(matches):
                try:
                    vals.append(float(matches[i]))
                except Exception:
                    vals.append(np.nan)
            else:
                vals.append(np.nan)
    # assign
    for j, v in enumerate(vals, start=1):
        df.at[idx, f'pred_memory{j}'] = v

# also keep an array column for backward compatibility
if 'pred_memory' not in df.columns:
    df['pred_memory'] = df.apply(lambda r: np.array([r[f'pred_memory{i}'] for i in range(1, 7)]), axis=1)


# === Output ===
out_figs = 'figs_q'
os.makedirs(out_figs, exist_ok=True)

out_results = 'csv'
os.makedirs(out_results, exist_ok=True)


# === Model configurations and colors ===
color_palette = plt.get_cmap('tab10')

# only selected models with 6 predictors to avoid overfitting and misinterpreting results
memory_summary = []

# Create a deterministic color mapping from the available lag types
lag_types_all = sorted(df['lag_type'].unique()) if 'lag_type' in df.columns else []
# Specify colors by hand for the first three lag types; fallback to colormap for others
specified_colors = ['blue', 'purple', 'green']
model_colors = {}
for i, name in enumerate(lag_types_all):
    if i < len(specified_colors):
        model_colors[name] = specified_colors[i]
    else:
        model_colors[name] = color_palette(i % 10)

# === Plot per well ===
# for well in ['5715001']:
for well in sorted(df['gauge_id'].unique()):
    well_data = df[df['gauge_id'] == well]
    df_fit = well_data[well_data['lag_type'] == selected_lag_type_q]
    max_r2_selected = df_fit['r2_all'].max() if 'r2_all' in df_fit.columns else np.nan

    # Only proceed with plotting if the selected model configs show sufficient performance
    if np.isnan(max_r2_selected) or max_r2_selected <= 0.4:
        print(f"Skipping basin {well}: max_r2_selected={max_r2_selected}")
        continue

    fig = plt.figure(figsize=(13, 4))
    gs = gridspec.GridSpec(1, 2, width_ratios=[3, 2])  # left panel wider
    ax1 = fig.add_subplot(gs[0])
    ax2 = fig.add_subplot(gs[1])

    # Left plot: Observed vs predicted time series
    pred_data = df_pred[df_pred['gauge_id'] == well].copy()

    for name in sorted(well_data['lag_type'].unique()):
        mask = (pred_data['lag_type'] == name) 
        pred_subset = pred_data[mask]
        color = model_colors.get(name)
        ax1.plot(
            pred_subset['date'], pred_subset['pred'],
            label=f'{name}',
            color=color,
            linestyle='-',
            linewidth=1,
            zorder=1,
        )

    # Plot observed values
    obs = pred_data[pred_data['target'] == 'q_an']
    if not obs.empty and 'target_val' in obs.columns:
        ax1.scatter(obs['date'], obs['target_val'], color='black', s=6, zorder=2)
    ax1.set_title(f"Basin {well} – observed vs simulated Q, R² = {np.round(max_r2_selected,2)}", loc='left')
    ax1.set_xlabel("Date")
    ax1.set_ylabel("Q anom (m3/s)")
    ax1.grid(True, linestyle='--', alpha=0.5)

    # Add legend only if prediction lines exist
    handles, labels = ax1.get_legend_handles_labels()
    if labels:
        ax1.legend(fontsize=8)

    # Right plot: R² vs memory
    for name in sorted(well_data['lag_type'].unique()):
        mask = (well_data['lag_type'] == name) & (well_data['target'] == 'q_an') & (well_data['predictor'] == 'pr_an')
        pred_subset = well_data[mask]
        color = model_colors.get(name, 'red')
        label = name
        # make sure columns exist in pred_subset
        xm = np.array([pred_subset['pred_memory1'],
                      pred_subset['pred_memory2'],
                      pred_subset['pred_memory3'],
                      pred_subset['pred_memory4'],
                      pred_subset['pred_memory5'],
                      pred_subset['pred_memory6']])
        ym = np.array([pred_subset['r2_pred1'],
                      pred_subset['r2_pred2'],
                      pred_subset['r2_pred3'],
                      pred_subset['r2_pred4'],
                      pred_subset['r2_pred5'],
                      pred_subset['r2_pred6']])
        
        ax2.plot(xm, ym,label=label, marker='o', linestyle='None', color=color,zorder=2)
   
    # plot fitted curves
    fit_results = compute_memory_from_spline_variants(df_fit)
    if fit_results['x_vals'] is not None and fit_results['y_iso'] is not None:
        ax2.plot(fit_results['x_vals'], fit_results['y_iso'], '--', color='grey', alpha=0.7, label=None, linewidth=.6)
        # ax2.axvline(fit_results['memory_iso'], color='orange', linestyle='-', linewidth=.6)

    if fit_results['x_vals'] is not None and fit_results['y_spline'] is not None:
        ax2.plot(fit_results['x_vals'], fit_results['y_spline'], '--', color='grey', alpha=0.7, label=None, linewidth=.6)
        # ax2.axvline(fit_results['memory_spline'], color='grey', linestyle='-', linewidth=.6)

    if fit_results['x_vals'] is not None and fit_results['y_pchip'] is not None:
        ax2.plot(fit_results['x_vals'], fit_results['y_pchip'], '--', color='grey', alpha=0.7, label=None, linewidth=.6)
        # ax2.axvline(fit_results['memory_pchip'], color='blue', linestyle='-', linewidth=.6)

    avg_memory = np.nanmean([fit_results.get('memory_iso', np.nan),
                                fit_results.get('memory_spline', np.nan),
                                fit_results.get('memory_pchip', np.nan)])
    
    ax2.axvline(avg_memory, color='black', linestyle='-', linewidth=.9, label='GW memory')
    
    ax2.set_title(f"R² with increasing # of predictors\nQ memory = {avg_memory:.0f} months", loc='left')
    ax2.set_xlabel("Pr_an lags (months)")
    ax2.set_ylabel("R²")
    ax2.grid(True, linestyle='--', alpha=0.5)
    ax2.legend(fontsize=8)
    ax2.set_xlim(df['pred_memory'].apply(lambda a: np.nanmin(a) if hasattr(a, '__iter__') else np.nan).min() - 5,
                df['pred_memory'].apply(lambda a: np.nanmax(a) if hasattr(a, '__iter__') else np.nan).max() + 5)
    # ax2.set_ylim(0, 1)

    # save figure per well

    plt.tight_layout()
    plt.savefig(os.path.join(out_figs, f"q_memory_{well}.png"), bbox_inches='tight')
    plt.close(fig)

       # === Store memory results if at least one is valid ===
    if not np.isnan(avg_memory):
        memory_summary.append({
            'gauge_id': well,
            'q_memory': avg_memory,
            'max_r2_selected_configs': max_r2_selected
        })


# write summary
if memory_summary:
    df_memory_summary = pd.DataFrame(memory_summary)
    out_csv = os.path.join(out_results, 'q_memory.csv')
    try:
        if os.path.exists(out_csv):
            os.remove(out_csv)
    except Exception:
        pass
    df_memory_summary.to_csv(out_csv, index=False)
else:
    print('No memory results to save')

print("\nLR models Finished! \n+Finito! \n+Terminado!\n")


Skipping basin 1001001: max_r2_selected=0.1143571391325802
Skipping basin 1001002: max_r2_selected=0.2904962954806629
Skipping basin 1001003: max_r2_selected=0.2520159223870031
Skipping basin 10100006: max_r2_selected=0.34605145004728
Skipping basin 1020002: max_r2_selected=0.1569166417149786
Skipping basin 1020003: max_r2_selected=0.1320164217913983
Skipping basin 1021001: max_r2_selected=0.2563801403920459
Skipping basin 1021002: max_r2_selected=0.0213146861442199
Skipping basin 10311001: max_r2_selected=0.3761883802760139
Skipping basin 10322003: max_r2_selected=0.2478212480028519
Skipping basin 10401001: max_r2_selected=0.2557416545343098
Skipping basin 1041002: max_r2_selected=0.1109798253685421
Skipping basin 10431001: max_r2_selected=0.1040518076345923
Skipping basin 1044001: max_r2_selected=0.2834439604862138
Skipping basin 1050002: max_r2_selected=0.0401917357919165
Skipping basin 1050004: max_r2_selected=0.0857540329177164
Skipping basin 10503001: max_r2_selected=0.3261655420