# RNN Weight Replications Analysis

Examine variability in RMSE for training and validation sets.

NOTE: the RNN was trained with a training set from 2023-2024, and the validation set was only used to control early stopping. The estimated forecast accuracy was performed with a full train/val/test split with test set all of 2024. The final model trains over all available data and uses the estimated forecast accuracy from the spatiotemporal analysis.

The replications for this analysis varied the random seed for order of training samples (batch or mini-batch order), random sample of physical stations for train vs val sets, and initial weights of the RNN. The training set is fixed. So the variability in the replications is meant to estimate the uncertainty introduced by the gradient descent algorithm. 

The purpose of this is to examine whether the RNN is estimating the same function over replications, and to make stronger conclusions about what the time warping does. 

In [None]:
import h5py
import os
import numpy as np
import tensorflow as tf
import pandas as pd
import joblib
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error
from src.models import moisture_rnn as mrnn
from src.utils import read_yml, time_intp, plot_styles, str2time

In [None]:
# Read Trained model
params = read_yml("models/params.yaml")
rnn = mrnn.RNN_Flexible(params=params)
scaler = joblib.load("models/scaler.joblib")
rnn.load_weights('models/rnn.keras')

In [None]:
# Weight replications
rep_dirs = os.listdir("models/reps")

## Fitting Accuracy

In [None]:
train_mse = []
val_mse = []
n_train = []
n_val   = []

base_path = "models/reps"

for d in rep_dirs:
    csv_path = os.path.join(base_path, d, "fitting_mse.csv")
    
    if not os.path.isfile(csv_path):
        continue
    
    df = pd.read_csv(csv_path)
    
    train_val = df.loc[df["set"] == "train", "mse"].values
    train_n_val   = df.loc[df["set"] == "train", "n_samples"].values
    val_val   = df.loc[df["set"] == "val", "mse"].values
    val_n_val     = df.loc[df["set"] == "val", "n_samples"].values
    
    train_mse.append(train_val[0])
    n_train.append(train_n_val[0])
    val_mse.append(val_val[0])
    n_val.append(val_n_val[0])

train_mse = np.array(train_mse)
n_train = np.array(n_train)
val_mse = np.array(val_mse)
n_val = np.array(n_val)

In [None]:
import numpy as np
import pandas as pd

# ---- RMSE stats (sqrt applied after aggregation) ----
train_mean = np.sqrt(np.mean(train_mse))
train_std  = np.sqrt(np.std(train_mse))
train_low  = np.sqrt(np.min(train_mse))
train_high = np.sqrt(np.max(train_mse))


# ---- N stats (no sqrt) ----
train_n_mean = int(np.round(np.mean(n_train)))
train_n_std  = int(np.round(np.std(n_train)))

train_row = [
    f"{train_mean:.2f} $\\pm$ {train_std:.2f}",
    f"({train_low:.2f}, {train_high:.2f})",
    f"{train_n_mean:,} $\\pm$ {train_n_std:,}"
]

table = pd.DataFrame({
    "Metric": [
        "RMSE (Mean $\\pm$ SD)",
        "RMSE Range (Low, High)",
        "N (Mean $\\pm$ SD)"
    ],
    "Values": [
        f"{train_mean:.2f} $\\pm$ {train_std:.2f}",
        f"({train_low:.2f}, {train_high:.2f})",
        f"{train_n_mean:,} $\\pm$ {train_n_std:,}"
    ]
})

table

## Parameter Distributions

Forget Gate and Input Gate bias terms are a random realization of a 64-length vector, and should be permutation invariant.

In [None]:
bf = []
bi = []

for d in rep_dirs:
    weight_path = os.path.join(base_path, d, "rnn.weights.h5")
    f = h5py.File(weight_path, 'r')
    cell_vars = f['layers']['lstm']['cell']['vars']
    arrays = {k: cell_vars[k][()] for k in cell_vars.keys()}
    b = arrays['2']
    units = b.shape[0] // 4
    bf.append(b[units:2*units])
    bi.append(b[0:units])

In [None]:
bf = np.array(bf)
bi = np.array(bi)
diff = np.array(bf-bi)

In [None]:
# Summary Stats
# Forget gate
bf_model_means = np.mean(bf, axis=1)
bf_between_mean = np.mean(bf_model_means)
bf_between_std  = np.std(bf_model_means)

bf_model_sds = np.std(bf, axis=1)
bf_mean_model_sd = np.mean(bf_model_sds)
bf_sd_model_sd   = np.std(bf_model_sds)

bf_mean_low  = np.min(bf_model_means)
bf_mean_high = np.max(bf_model_means)

bf_sd_low  = np.min(bf_model_sds)
bf_sd_high = np.max(bf_model_sds)


# Input gate
bi_model_means = np.mean(bi, axis=1)
bi_between_mean = np.mean(bi_model_means)
bi_between_std  = np.std(bi_model_means)

bi_model_sds = np.std(bi, axis=1)
bi_mean_model_sd = np.mean(bi_model_sds)
bi_sd_model_sd   = np.std(bi_model_sds)

bi_mean_low  = np.min(bi_model_means)
bi_mean_high = np.max(bi_model_means)

bi_sd_low  = np.min(bi_model_sds)
bi_sd_high = np.max(bi_model_sds)

# Diff
diff_model_means = np.mean(diff, axis=1)
diff_between_mean = np.mean(diff_model_means)
diff_between_std  = np.std(diff_model_means)

diff_model_sds = np.std(diff, axis=1)
diff_mean_model_sd = np.mean(diff_model_sds)
diff_sd_model_sd   = np.std(diff_model_sds)

diff_mean_low  = np.min(diff_model_means)
diff_mean_high = np.max(diff_model_means)

diff_sd_low  = np.min(diff_model_sds)
diff_sd_high = np.max(diff_model_sds)


bf_row = [
    "Forget Gate Bias",
    f"{bf_between_mean:.2f} $\\pm$ {bf_between_std:.2f}",
    f"({bf_mean_low:.2f}, {bf_mean_high:.2f})",
    f"{bf_mean_model_sd:.2f} $\\pm$ {bf_sd_model_sd:.2f}",
    f"({bf_sd_low:.2f}, {bf_sd_high:.2f})",
]

bi_row = [
    "Input Gate Bias",
    f"{bi_between_mean:.2f} $\\pm$ {bi_between_std:.2f}",
    f"({bi_mean_low:.2f}, {bi_mean_high:.2f})",
    f"{bi_mean_model_sd:.2f} $\\pm$ {bi_sd_model_sd:.2f}",
    f"({bi_sd_low:.2f}, {bi_sd_high:.2f})",
]

diff_row = [
    "Difference",
    f"{diff_between_mean:.2f} $\\pm$ {diff_between_std:.2f}",
    f"({diff_mean_low:.2f}, {diff_mean_high:.2f})",
    f"{diff_mean_model_sd:.2f} $\\pm$ {diff_sd_model_sd:.2f}",
    f"({diff_sd_low:.2f}, {diff_sd_high:.2f})",
]

# table
table = pd.DataFrame({
    "Metric": [
        "Mean of per-model means ($\\pm$ SD across reps)",
        "Range of per-model means (low, high)",
        "Mean of per-model SDs ($\\pm$ SD across reps)",
        "Range of per-model SDs (low, high)",
    ],
    "Forget Gate Bias": bf_row[1:],
    "Input Gate Bias": bi_row[1:],
    "Difference": diff_row[1:]
})

table

In [None]:
# document-safe viz defaults
DPI = 300
LABEL_SIZE = 14
TICK_SIZE = 12
CBAR_LABEL_SIZE = 13

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import gaussian_kde

# bi assumed shape (n_reps, n_units)
n_reps, n_units = bi.shape

# Compute SD per replication
rep_sds = bi.std(axis=1)
sorted_idx = np.argsort(rep_sds)  # ascending (lowest SD first → highest SD last)

# Define common x-grid
xmin = np.min(bi)
xmax = np.max(bi)
x_grid = np.linspace(xmin, xmax, 400)

fig, axes = plt.subplots(1, 2, figsize=(14, 10), sharex=True, dpi=DPI)

# ---------- Left: Original (unsorted) ----------
axes[0].axvline(bi.mean(), linestyle='--', linewidth=1.5, alpha=.4)
offset = 0
offset_step = 0.4

for r in range(n_reps):
    kde = gaussian_kde(bi[r, :])
    density = kde(x_grid)
    density = density / density.max() * 0.35

    axes[0].fill_between(
        x_grid,
        offset,
        density + offset,
        alpha=0.5
    )
    offset += offset_step

axes[0].set_xlim(-3.5, 3.5)
axes[0].set_title("Input Gate Bias Distribution Across Replications", fontsize=LABEL_SIZE)
axes[0].set_xlabel(r"Input Gate Bias ($b_i$)", fontsize=LABEL_SIZE)
axes[0].set_yticks([])
axes[0].tick_params(labelsize=TICK_SIZE)

# ---------- Right: Sorted by SD ----------
axes[1].axvline(bi.mean(), linestyle='--', linewidth=1.5, alpha=.4)
offset = 0

for r in sorted_idx:
    kde = gaussian_kde(bi[r, :])
    density = kde(x_grid)
    density = density / density.max() * 0.35

    axes[1].fill_between(
        x_grid,
        offset,
        density + offset,
        alpha=0.5
    )
    offset += offset_step

axes[1].set_xlim(-3.5, 3.5)
axes[1].set_title(
    "Sorted by Replication SD\n(High Variance to Low Variance)",
    fontsize=LABEL_SIZE
)
axes[1].set_xlabel(r"Input Gate Bias ($b_i$)", fontsize=LABEL_SIZE)
axes[1].set_yticks([])
axes[1].tick_params(labelsize=TICK_SIZE)

plt.tight_layout()
plt.savefig("outputs/bi_hist.png", dpi=DPI)
plt.show()

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import gaussian_kde

# bf assumed shape (n_reps, n_units)
n_reps, n_units = bf.shape

# Compute SD per replication for sorting
rep_sds = bf.std(axis=1)
sorted_idx = np.argsort(rep_sds)  # ascending

# Define common x-grid
xmin = np.min(bf)
xmax = np.max(bf)
x_grid = np.linspace(xmin, xmax, 400)

fig, axes = plt.subplots(1, 2, figsize=(14, 10), sharex=True)

# ---------- Left: Original (unsorted) ----------
axes[0].axvline(bf.mean(), linestyle='--', linewidth=1.5, alpha=.4)
offset = 0
offset_step = 0.4

for r in range(n_reps):
    kde = gaussian_kde(bf[r, :])
    density = kde(x_grid)
    density = density / density.max() * 0.35

    axes[0].fill_between(
        x_grid,
        offset,
        density + offset,
        alpha=0.5
    )
    offset += offset_step

axes[0].set_xlim(-3.5, 3.5)
axes[0].set_title("Forget Gate Bias Distributions Across Replications", fontsize=LABEL_SIZE)
axes[0].set_xlabel(r"Forget Gate Bias ($b_f$)", fontsize=LABEL_SIZE)
axes[0].set_yticks([])
axes[0].tick_params(labelsize=TICK_SIZE)

# ---------- Right: Sorted by SD ----------
axes[1].axvline(bf.mean(), linestyle='--', linewidth=1.5, alpha=.4)
offset = 0

for r in sorted_idx:
    kde = gaussian_kde(bf[r, :])
    density = kde(x_grid)
    density = density / density.max() * 0.35

    axes[1].fill_between(
        x_grid,
        offset,
        density + offset,
        alpha=0.5
    )
    offset += offset_step

axes[1].set_xlim(-3.5, 3.5)
axes[1].set_title("Sorted by Replication SD\n(High Variance to Low Variance)", fontsize=LABEL_SIZE)
axes[1].set_xlabel(r"Forget Gate Bias ($b_f$)", fontsize=LABEL_SIZE)
axes[1].set_yticks([])
axes[1].tick_params(labelsize=TICK_SIZE)

plt.tight_layout()
plt.savefig("outputs/bf_hist.png", dpi=DPI)
plt.show()

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import gaussian_kde

# Difference
diff = bf - bi
n_reps, n_units = diff.shape

# Compute SD per replication
rep_sds = diff.std(axis=1)

# Sort high variance → low variance
sorted_idx = np.argsort(rep_sds)[::-1]

# Common x-grid
xmin = np.min(diff)
xmax = np.max(diff)
x_grid = np.linspace(xmin, xmax, 400)

fig, axes = plt.subplots(1, 2, figsize=(14, 10), sharex=True)

# ---------- Left: Original (unsorted) ----------
axes[0].axvline(diff.mean(), linestyle='--', linewidth=1.5, alpha=.4)
offset = 0
offset_step = 0.4

for r in range(n_reps):
    kde = gaussian_kde(diff[r, :])
    density = kde(x_grid)
    density = density / density.max() * 0.35

    axes[0].fill_between(
        x_grid,
        offset,
        density + offset,
        alpha=0.5
    )
    offset += offset_step

axes[0].set_xlim(-3.5, 3.5)
axes[0].set_title("Bias Difference Distributions Across Replications", fontsize=LABEL_SIZE)
axes[0].set_xlabel(r"Difference ($b_f - b_i$)", fontsize=LABEL_SIZE)
axes[0].set_yticks([])
axes[0].tick_params(labelsize=TICK_SIZE)

# ---------- Right: Sorted High → Low SD ----------
axes[1].axvline(diff.mean(), linestyle='--', linewidth=1.5, alpha=.4)
offset = 0

for r in sorted_idx[::-1]:
    kde = gaussian_kde(diff[r, :])
    density = kde(x_grid)
    density = density / density.max() * 0.35

    axes[1].fill_between(
        x_grid,
        offset,
        density + offset,
        alpha=0.5
    )
    offset += offset_step

axes[1].set_xlim(-3.5, 3.5)
axes[1].set_title(
    "Sorted by Replication SD\n(High Variance to Low Variance)",
    fontsize=LABEL_SIZE
)
axes[1].set_xlabel(r"Difference ($b_f - b_i$)", fontsize=LABEL_SIZE)
axes[1].set_yticks([])
axes[1].tick_params(labelsize=TICK_SIZE)

plt.tight_layout()
plt.savefig("outputs/diff_hist.png", dpi=DPI)
plt.show()

## Other Metrics

In [None]:
from scipy.stats import anderson_ksamp

# X: shape (n_reps, n_units), e.g. diff, bf, or bi
X = bf
n_reps, n_units = X.shape

# Each replication is one sample
samples = [X[r, :] for r in range(n_reps)]

# Run k-sample Anderson–Darling test
result = anderson_ksamp(samples)

print("AD statistic:", result.statistic)
print("Critical values:", result.critical_values)
print("Significance level (%):", result.significance_level)

In [None]:
# X: shape (n_reps, n_units), e.g. diff, bf, or bi
X = bi
n_reps, n_units = X.shape

# Each replication is one sample
samples = [X[r, :] for r in range(n_reps)]

# Run k-sample Anderson–Darling test
result = anderson_ksamp(samples)

print("AD statistic:", result.statistic)
print("Critical values:", result.critical_values)
print("Significance level (%):", result.significance_level)

In [None]:
# X: shape (n_reps, n_units), e.g. diff, bf, or bi
X = diff
n_reps, n_units = X.shape

# Each replication is one sample
samples = [X[r, :] for r in range(n_reps)]

# Run k-sample Anderson–Darling test
result = anderson_ksamp(samples)

print("AD statistic:", result.statistic)
print("Critical values:", result.critical_values)
print("Significance level (%):", result.significance_level)

In [None]:
from scipy.stats import ks_2samp
X = diff
n_reps, n_units = X.shape

# Pooled empirical distribution
pooled = X.ravel()

rows = []
for r in range(n_reps):
    D, p = ks_2samp(X[r, :], pooled, alternative="two-sided", method="auto")
    rows.append((r, D, p))

ks_results = pd.DataFrame(rows, columns=["rep", "KS_distance", "p_value"])
ks_results = ks_results.sort_values("KS_distance", ascending=False).reset_index(drop=True)

ks_results