In [3]:
import numpy as np
import pandas as pd
from scipy import stats
from statsmodels.stats.multitest import multipletests

In [4]:
# ------------------------
# Utilities
# ------------------------

def _make_bins(frac, bin_size=0.25):
    # 0-1 inclusive end handling
    idx = np.floor(np.clip(frac - 1e-12, 0, 1) / bin_size).astype(int)
    idx[idx >= int(1/bin_size)] = int(1/bin_size) - 1
    return idx + 1  # bins 1..4

def cliffs_delta(a, b):
    # fast via Mann-Whitney U: delta = 2U/(mn) - 1
    u = stats.mannwhitneyu(a, b, alternative="two-sided").statistic
    m, n = len(a), len(b)
    return (2*u)/(m*n) - 1

def common_language_effect(a, b):
    # estimate P(A>B) using ranks (Mann-Whitney U)
    u = stats.mannwhitneyu(a, b, alternative="two-sided").statistic
    m, n = len(a), len(b)
    return u / (m*n)

def cluster_bootstrap_ci(df, group_col, vec1, vec2, stat_func, B=4000, seed=123):
    """
    Cluster bootstrap CI for a contrast statistic between two groups.
    df -> a DataFrame containing group_col and both vec1, vec2 columns.
    stat_func must accept two 1D arrays (x, y) -> float (e.g., quantile diff).
    """
    rng = np.random.default_rng(seed)
    clusters = df[group_col].unique()
    boot = np.empty(B)
    for b in range(B):
        samp = rng.choice(clusters, size=len(clusters), replace=True)
        df_b = pd.concat([df[df[group_col]==g] for g in samp], ignore_index=True)
        x = df_b.loc[df_b["bin"].isin([1,2,3]), vec1].dropna().to_numpy()
        y = df_b.loc[df_b["bin"].isin([4]), vec2].dropna().to_numpy()
        boot[b] = stat_func(x, y)
    return (np.percentile(boot, 2.5), np.percentile(boot, 97.5))

def quantile_diff_func(tau):
    def f(x, y):
        return np.percentile(x, tau*100.0) - np.percentile(y, tau*100.0)
    return f

# Simple Dunn pairwise (uses pooled ranks then MW z-scores); BH outside.
def dunn_pairwise(groups, values):
    """
    groups: array of group labels (e.g., 1..4)
    values: array of metric values (e.g., NSE)
    Returns a DataFrame of pairwise z and unadjusted p-values (two-sided).
    """
    df = pd.DataFrame({"g": groups, "x": values}).dropna()
    # rank all
    df["r"] = stats.rankdata(df["x"])
    N = len(df)
    grouped = df.groupby("g")
    stats_by_g = grouped.agg(n=("x", "size"), R=("r", "sum"))
    pairs = []
    glabels = stats_by_g.index.to_list()
    for i in range(len(glabels)):
        for j in range(i+1, len(glabels)):
            gi, gj = glabels[i], glabels[j]
            ni, nj = stats_by_g.loc[gi, "n"], stats_by_g.loc[gj, "n"]
            Ri, Rj = stats_by_g.loc[gi, "R"], stats_by_g.loc[gj, "R"]
            # mean rank per group
            Ti = Ri/ni; Tj = Rj/nj
            # Dunn z (no tie correction, usually fine for large N)
            S = np.sqrt((N*(N+1)/12.0)*(1/ni + 1/nj))
            z = (Ti - Tj)/S
            p = 2*stats.norm.sf(abs(z))
            pairs.append({"g1": gi, "g2": gj, "z": z, "p": p})
    return pd.DataFrame(pairs)

In [5]:
# ------------------------
# Main analysis function
# ------------------------

def analyze_no_flow_effect(
    csv_file,
    metric="NSE",                      # or "F1"
    noflow_col="no_flow_threshold",    # or "no_flow_strict"
    bin_size=0.25,
    alpha=0.05,
    B_boot=4000,
    verbose=False
):
    """
    Produces:
      - Global Kruskal-Wallis across 4 bins
      - Spearman rho (bin index vs metric) with cluster-boot CI
      - KW among bins 1-3
      - Dunn pairwise among bins 1-3 (BH-FDR)
      - R(=bins1-3) vs bin4: Brunner–Munzel (one-sided), Cliff's delta, CLES (+ cluster-boot CI for delta)
      - Quantile shift R vs bin4 with cluster-boot CIs for tau in {0.1,0.25,0.5,0.75,0.9}
    """
    df = pd.read_csv(csv_file)
    df = df[[ "prediction_huc", metric, noflow_col ]].dropna().copy()
    df["bin"] = _make_bins(df[noflow_col].to_numpy(), bin_size=bin_size)

    # ---------------- Global: KW across 4 bins ----------------
    groups = [g[metric].dropna().to_numpy() for _, g in df.groupby("bin")]
    kw_stat, kw_p = stats.kruskal(*groups)

    # Trend: Spearman (bin index vs metric), cluster-boot CI
    # Build per-row bin index
    spearman_rho, spearman_p = stats.spearmanr(df["bin"], df[metric], nan_policy="omit")
    # Bootstrap CI for rho (cluster by HUC)
    def stat_rho(xdf):
        return stats.spearmanr(xdf["bin"], xdf[metric], nan_policy="omit").correlation
    rng = np.random.default_rng(42)
    H = df["prediction_huc"].unique()
    boot_rho = np.empty(B_boot)
    for b in range(B_boot):
        samp = rng.choice(H, size=len(H), replace=True)
        xdf = pd.concat([df[df["prediction_huc"]==h] for h in samp], ignore_index=True)
        boot_rho[b] = stat_rho(xdf)
    rho_ci = (np.percentile(boot_rho, 2.5), np.percentile(boot_rho, 97.5))

    # ---------------- Similarity among bins 1-3 ----------------
    df_123 = df[df["bin"].isin([1,2,3])]
    groups_123 = [g[metric].dropna().to_numpy() for _, g in df_123.groupby("bin")]
    kw123_stat, kw123_p = stats.kruskal(*groups_123)

    # Optional: Dunn pairwise among 1-3 with BH-FDR
    dunn_123 = dunn_pairwise(df_123["bin"].to_numpy(), df_123[metric].to_numpy())
    dunn_123["q_BH"] = multipletests(dunn_123["p"].to_numpy(), method="fdr_bh")[1]

    # ---------------- Bin4 vs pooled(1-3) ----------------
    R = df[df["bin"].isin([1,2,3])][metric].dropna().to_numpy()
    B4 = df[df["bin"].isin([4])][metric].dropna().to_numpy()

    # Brunner–Munzel one-sided: H1: R > B4 (higher NSE better)
    bm = stats.brunnermunzel(R, B4, alternative="greater")

    # Effect sizes
    delta = cliffs_delta(R, B4)
    cles = common_language_effect(R, B4)  # P(R>B4)

    # Cluster-boot CI for delta
    df_pair = df.copy()
    # we’ll compute delta on the fly inside bootstrap
    def delta_func(x, y):  # wrapper not used directly
        return cliffs_delta(x, y)

    def _delta_stat(x, y):  # for bootstrap
        return cliffs_delta(x, y)

    # Reuse the generic CI helper by passing a small shim:
    def stat_func(x, y): return cliffs_delta(x, y)
    delta_ci = cluster_bootstrap_ci(
        df, "prediction_huc", metric, metric, stat_func=stat_func, B=B_boot, seed=202
    )

    # ---------------- Quantile shifts R vs B4 ----------------
    taus = [0.10, 0.25, 0.50, 0.75, 0.90]
    q_rows = []
    for tau in taus:
        dq = np.percentile(R, tau*100) - np.percentile(B4, tau*100)
        ci = cluster_bootstrap_ci(
            df, "prediction_huc", metric, metric, stat_func=quantile_diff_func(tau),
            B=B_boot, seed=100+int(tau*1000)
        )
        q_rows.append({"tau": tau, "diff": dq, "ci_low": ci[0], "ci_high": ci[1]})
    qshift = pd.DataFrame(q_rows)

    if verbose:
        print("KW(4 bins):", kw_stat, kw_p)
        print("Spearman rho (bin vs metric):", spearman_rho, spearman_p, "CI:", rho_ci)
        print("KW(bins 1-3):", kw123_stat, kw123_p)
        print("Brunner–Munzel R>bin4:", bm.statistic, bm.pvalue)
        print("Cliff's delta:", delta, "CI:", delta_ci, "CLES P(R>B4):", cles)
        print(qshift)

    results = {
        "kw_4bins": {"H": kw_stat, "p": kw_p},
        "spearman_trend": {"rho": spearman_rho, "p": spearman_p, "ci95": rho_ci},
        "kw_bins123": {"H": kw123_stat, "p": kw123_p},
        "dunn_bins123": dunn_123,  # has BH q-values
        "bm_R_vs_bin4": {"stat": bm.statistic, "p_one_sided": bm.pvalue},
        "effect_R_vs_bin4": {
            "cliffs_delta": delta, "cliffs_delta_ci95": delta_ci, "cles": cles
        },
        "qshift_R_vs_bin4": qshift
    }
    return results

In [6]:
csv_path = '/home/sarth/rootdir/workdir/projects/Paper_Data_Latency/Revised_Statistical_Testing/Figure02'
# ,prediction_huc,NSE,F1,no_flow_frac_strict,no_flow_frac_threshold
# Choose thresholded definition (Q<1); analyze NSE
res = analyze_no_flow_effect(
    f"{csv_path}/continental_with_flow_frac.csv",
    metric="NSE",
    noflow_col="no_flow_frac_threshold",  # or "no_flow_strict"
    bin_size=0.25,
    B_boot=1000,
    verbose=True
)
res

KW(4 bins): 2.111993933499434 0.5494901225322859
Spearman rho (bin vs metric): -0.03483813682955301 0.48993666553556203 CI: (np.float64(-0.11976938306674517), np.float64(0.056626563823235436))
KW(bins 1-3): 1.6046961802038595 0.44827513694883125
Brunner–Munzel R>bin4: -0.7825644256125297 0.21719792677786165
Cliff's delta: 0.046097599745668516 CI: (np.float64(-0.07487741412354412), np.float64(0.14868704353150547)) CLES P(R>B4): 0.5230487998728343
    tau      diff    ci_low   ci_high
0  0.10  0.003110 -0.210690  0.124321
1  0.25 -0.014237 -0.065481  0.042009
2  0.50  0.007601 -0.017800  0.044022
3  0.75  0.002787 -0.007537  0.016334
4  0.90  0.003528 -0.007880  0.014747


{'kw_4bins': {'H': np.float64(2.111993933499434),
  'p': np.float64(0.5494901225322859)},
 'spearman_trend': {'rho': np.float64(-0.03483813682955301),
  'p': np.float64(0.48993666553556203),
  'ci95': (np.float64(-0.11976938306674517),
   np.float64(0.056626563823235436))},
 'kw_bins123': {'H': np.float64(1.6046961802038595),
  'p': np.float64(0.44827513694883125)},
 'dunn_bins123':    g1  g2         z         p      q_BH
 0   1   2 -1.251773  0.210653  0.453042
 1   1   3 -0.848127  0.396367  0.453042
 2   2   3  0.750353  0.453042  0.453042,
 'bm_R_vs_bin4': {'stat': np.float64(-0.7825644256125297),
  'p_one_sided': np.float64(0.21719792677786165)},
 'effect_R_vs_bin4': {'cliffs_delta': np.float64(0.046097599745668516),
  'cliffs_delta_ci95': (np.float64(-0.07487741412354412),
   np.float64(0.14868704353150547)),
  'cles': np.float64(0.5230487998728343)},
 'qshift_R_vs_bin4':     tau      diff    ci_low   ci_high
 0  0.10  0.003110 -0.210690  0.124321
 1  0.25 -0.014237 -0.065481  0.

In [7]:
csv_path = '/home/sarth/rootdir/workdir/projects/Paper_Data_Latency/Revised_Statistical_Testing/Figure02'
# ,prediction_huc,NSE,F1,no_flow_frac_strict,no_flow_frac_threshold
# Choose thresholded definition (Q<1); analyze NSE
res1 = analyze_no_flow_effect(
    f"{csv_path}/continental_with_flow_frac.csv",
    metric="NSE",
    noflow_col="no_flow_frac_strict",  # or "no_flow_strict"
    bin_size=0.25,
    B_boot=1000,
    verbose=True
)
res1

KW(4 bins): 3.4180091641992476 0.331552778893218
Spearman rho (bin vs metric): -0.01825003719433364 0.717656245679341 CI: (np.float64(-0.10164996426903009), np.float64(0.07710401549947192))
KW(bins 1-3): 0.7378382904337286 0.6914813177620813
Brunner–Munzel R>bin4: -1.3958093631984319 0.09115532279541295
Cliff's delta: 0.24142480211081785 CI: (np.float64(-0.016483423300920716), np.float64(0.5258267974284107)) CLES P(R>B4): 0.6207124010554089
    tau      diff    ci_low   ci_high
0  0.10  0.169384 -0.088020  7.261137
1  0.25  0.257701  0.002260  0.464755
2  0.50  0.109388  0.016869  0.392400
3  0.75  0.005137 -0.026755  0.138015
4  0.90 -0.000861 -0.013788  0.064100


{'kw_4bins': {'H': np.float64(3.4180091641992476),
  'p': np.float64(0.331552778893218)},
 'spearman_trend': {'rho': np.float64(-0.01825003719433364),
  'p': np.float64(0.717656245679341),
  'ci95': (np.float64(-0.10164996426903009), np.float64(0.07710401549947192))},
 'kw_bins123': {'H': np.float64(0.7378382904337286),
  'p': np.float64(0.6914813177620813)},
 'dunn_bins123':    g1  g2         z         p      q_BH
 0   1   2 -0.858825  0.390437  0.955265
 1   1   3 -0.026964  0.978489  0.978489
 2   2   3  0.472117  0.636843  0.955265,
 'bm_R_vs_bin4': {'stat': np.float64(-1.3958093631984319),
  'p_one_sided': np.float64(0.09115532279541295)},
 'effect_R_vs_bin4': {'cliffs_delta': np.float64(0.24142480211081785),
  'cliffs_delta_ci95': (np.float64(-0.016483423300920716),
   np.float64(0.5258267974284107)),
  'cles': np.float64(0.6207124010554089)},
 'qshift_R_vs_bin4':     tau      diff    ci_low   ci_high
 0  0.10  0.169384 -0.088020  7.261137
 1  0.25  0.257701  0.002260  0.464755
 