
# Aggregate LRT JSON → Chi-square Comparison (REF vs DATA)

This notebook scans your output tree for `lrt_outputs.json` under both:
- `calibration/toy_*/lrt_outputs.json` (REF)
- `comparison/toy_*/lrt_outputs.json` (DATA)

It builds the two distributions from the `test` field and plots them.


In [5]:

import os, glob, json, math
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.font_manager as font_manager
from scipy.stats import norm, chi2, beta
import re
from pathlib import Path
# --- add this import near your others ---
from scipy.stats import beta  # for Clopper–Pearson interval


# --- Style to match your example ---
plt.rcParams["font.family"] = "serif"
plt.style.use('classic')

# --- User config ---
BASE_DIR = "/work/gbadarac/MonoJet_NPLM/MonoJet_NPLM_analysis/LRT_with_unc/results/N_100000_dim_2_seeds_60_4_16_128_15_toys_100_N_sampled_100k_20_kernels_with_unc"    # <- set this to the base folder containing calibration/ and comparison/
FIG_DIR  = os.path.join(BASE_DIR, "plots")
os.makedirs(FIG_DIR, exist_ok=True)

# Defaults mirroring your example
NBINS   = 30
XMIN    = 0
XMAX    = 300
YMAX    = None
PRINT_Z = True


In [6]:
def _as_scalar_test(x):
    """Convert JSON 'test' to a float. If list or array, sum it."""
    if isinstance(x, (int, float)):
        return float(x)
    if isinstance(x, (list, tuple, np.ndarray)):
        if len(x) == 0:
            return float("nan")
        try:
            return float(np.nansum(np.asarray(x, dtype=float)))
        except Exception:
            return float("nan")
    return float("nan")


def _collect_mode_tests(base_dir, mode):
    """Return a 1D numpy array of toy-level test values for a mode."""
    patt = os.path.join(base_dir, mode, "toy_*", "lrt_outputs.json")
    tests = []
    for fp in sorted(glob.glob(patt)):
        try:
            with open(fp, "r") as f:
                obj = json.load(f)
            t = _as_scalar_test(obj.get("test"))
            if np.isfinite(t):
                tests.append(t)
        except Exception:
            pass
    return np.asarray(tests, dtype=float)


In [7]:
import os, numpy as np, matplotlib.pyplot as plt
from matplotlib import font_manager
from scipy.stats import norm


plt.rcParams['patch.edgecolor'] = 'none'
plt.rcParams['patch.linewidth'] = 0.0

from scipy.stats import norm, chi2

# --- helper: empirical Z (one-sided) with Clopper–Pearson interval ---
def empirical_Z_from_toys(T_null, T_obs, alternative="greater", alpha=0.32):
    """
    Empirical Z from toys with an exact (Clopper–Pearson) interval on p.
    Returns: Z, Z_plus, Z_minus, p_hat
    """
    T_null = np.asarray(T_null, float)
    B = T_null.size

    if alternative == "greater":
        k = int(np.count_nonzero(T_null >= T_obs))
    elif alternative == "less":
        k = int(np.count_nonzero(T_null <= T_obs))
    else:  # two-sided via folding around the null median
        T0 = float(np.median(T_null))
        k = int(np.count_nonzero(np.abs(T_null - T0) >= np.abs(T_obs - T0)))

    # add-one estimate (robust at k=0 or k=B)
    p_hat = (k + 1) / (B + 1)

    # exact CP interval for Binomial(k|B,p)
    if k == 0:
        p_lo = 0.0
        p_hi = beta.ppf(1 - alpha/2, 1, B)
    elif k == B:
        p_lo = beta.ppf(alpha/2, B, 1)
        p_hi = 1.0
    else:
        p_lo = beta.ppf(alpha/2, k, B - k + 1)
        p_hi = beta.ppf(1 - alpha/2, k + 1, B - k)

    # map p and its interval to Z (one-sided)
    Z      = norm.ppf(1 - p_hat)
    Z_plus = norm.ppf(1 - max(1e-16, p_lo)) - Z
    Z_minus= Z - norm.ppf(1 - min(1 - 1e-16, p_hi))
    return Z, Z_plus, Z_minus, p_hat


In [8]:

def fd_bins(x):
    """Numero di bin secondo Freedman–Diaconis."""
    x = np.asarray(x, float)
    if len(x) < 2:
        return 5
    iqr = np.subtract(*np.percentile(x, [75, 25]))
    h = 2 * iqr / (len(x) ** (1/3) + 1e-12)
    if h <= 0:
        return 10
    nb = int(np.ceil((x.max() - x.min()) / h))
    return max(5, nb)

def _safe_xlim_from_data(t_all, xmin, xmax, pad_frac=0.05):
    """If provided x-lims don't cover the data, replace with padded data range."""
    t_all = np.asarray(t_all, float)
    dmin, dmax = np.min(t_all), np.max(t_all)
    if not np.isfinite(dmin) or not np.isfinite(dmax) or dmin == dmax:
        return xmin, xmax  # nothing to do
    # if user limits are None, or exclude the data range, recompute
    bad = (xmin is None or xmax is None or xmin >= xmax or dmax < xmin or dmin > xmax)
    if bad:
        span = dmax - dmin
        pad  = pad_frac * span if span > 0 else 1.0
        return dmin - pad, dmax + pad
    return xmin, xmax


# ===== replace your plot_2distribution_overlap_new with this =====
def plot_2distribution_overlap_new(
    t1, t2, output_path, nbins=None, save=True,
    label1='REF (ensemble)', label2='DATA (ground truth)',
    save_name='probability_plot_ensemble_uq',
    print_Zscore=True, ymax=None, show_expectation=True,
    title='Ensemble with uncertainty modeling',
    xmin=None, xmax=None
):
    t1 = np.asarray(t1, float)
    t2 = np.asarray(t2, float)

    # --- figure: main axis + side panel ------------------------------------
    plt.rcParams["font.family"] = "serif"
    plt.style.use('classic')
    fig = plt.figure(figsize=(12, 9))
    fig.patch.set_facecolor('white')
    ax  = fig.add_axes([0.10, 0.12, 0.62, 0.78])   # main plot (left)
    axp = fig.add_axes([0.76, 0.12, 0.20, 0.78])   # side panel (right)
    axp.axis('off')

    # --- bins ---------------------------------------------------------------
    t_all = np.concatenate([t1, t2]) if len(t2) else t1
    if nbins is None:
        nbins = fd_bins(t_all)
    bins = np.linspace(np.min(t_all), np.max(t_all), nbins + 1)
    binswidth = (np.max(t_all) - np.min(t_all)) / nbins
    xcenters = 0.5 * (bins[1:] + bins[:-1])

    # ========================== REF (ensemble) ==============================
    # MAIN: legend label ONLY (no size/median here)
    h1 = ax.hist(
        t1,
        weights=np.ones_like(t1)/(t1.shape[0]*binswidth),
        color="#e186ed", alpha=0.5, bins=bins, label=label1,
        edgecolor='none', linewidth=0
    )
    err1 = np.sqrt(h1[0] / (t1.shape[0] * binswidth))
    ax.errorbar(xcenters, h1[0], yerr=err1,
                color="#8a2be2", marker='o', ls='', alpha=0.6,
                markersize=5, capsize=2, elinewidth=0.8)

    # ========================== DATA (ground truth) =========================
    # MAIN: legend label ONLY (no size/median here)
    p_emp = np.nan
    Z_emp_val = Z_emp_p = Z_emp_m = np.nan
    if len(t2):
        h2 = ax.hist(
            t2,
            weights=np.ones_like(t2)/(t2.shape[0]*binswidth),
            color="#68aedc", alpha=0.5, bins=bins, label=label2,
            edgecolor='none', linewidth=0
        )
        err2 = np.sqrt(h2[0] / (t2.shape[0] * binswidth))
        ax.errorbar(xcenters, h2[0], yerr=err2,
                    color="#004c99", marker='o', ls='', alpha=0.6,
                    markersize=5, capsize=2, elinewidth=0.8)

        # Empirical Z computed against T_obs = median(DATA)
        T_obs = float(np.median(t2))
        Z_emp, Zp, Zm, p_emp = empirical_Z_from_toys(
            t1, T_obs, alternative="greater", alpha=0.32
        )
        Z_emp_val, Z_emp_p, Z_emp_m = Z_emp, Zp, Zm

    # ============================ side panel ================================
    from matplotlib.patches import Rectangle
    panel_fp = font_manager.FontProperties(family='serif', size=20)
    axp.axis('off')

    # N_toys on top
    same_size = (len(t1) == len(t2)) if len(t2) else True
    N_toys = len(t1) if same_size else (len(t1), len(t2))
    axp.text(0.03, 0.98,
             rf'$N_{{\mathrm{{toys}}}} = {N_toys if isinstance(N_toys,int) else min(N_toys)}$',
             va='top', ha='left', fontproperties=panel_fp)

    # layout constants (match your style)
    Y_REF_HDR  = 0.86
    Y_REF_TXT  = 0.815
    Y_DATA_HDR = 0.66
    Y_DATA_TXT = 0.615
    DY = 0.075  # vertical step below DATA median for Z/p block

    SW = 0.032
    GAP = 0.012

    def _header(y, color_hex, text):
        axp.add_patch(Rectangle((0.03, y - SW/2), SW, SW,
                                transform=axp.transAxes,
                                facecolor=color_hex, edgecolor='none', alpha=0.5))
        axp.text(0.03 + SW + GAP, y, text,
                 va='center', ha='left', fontproperties=panel_fp, color='black')

    # REF block — med/std in right panel
    _header(Y_REF_HDR, "#e186ed", label1)
    axp.text(0.03 + 0.036, Y_REF_TXT,
             rf'$\mathrm{{median}} = {np.median(t1):.2f}$, $\mathrm{{std}} = {np.std(t1):.2f}$',
             va='top', ha='left', fontproperties=panel_fp)

    # DATA block — med/std + empirical Z (and p) right below
    if len(t2):
        _header(Y_DATA_HDR, "#68aedc", label2)
        axp.text(0.03 + 0.036, Y_DATA_TXT,
                 rf'$\mathrm{{median}} = {np.median(t2):.2f}$, $\mathrm{{std}} = {np.std(t2):.2f}$',
                 va='top', ha='left', fontproperties=panel_fp)

        # ---- move empirical Z (and p) HERE, under DATA median ----
        axp.text(
            0.03 + 0.036, Y_DATA_TXT - DY,
            rf'p-value = {p_emp:.5f}' + '\n' +           # outside math → roman
            rf'empirical $Z = {Z_emp_val:.2f}\,^{{+{Z_emp_p:.2f}}}_{{-{Z_emp_m:.2f}}}$',
            va='top', ha='left', fontproperties=panel_fp
        )


    # ============================ styling ==================================
    legend_fp = font_manager.FontProperties(family='serif', size=16.5)
    ax.legend(ncol=1, loc='upper right', prop=legend_fp, frameon=False,
              handlelength=1.8, borderpad=0.3, labelspacing=0.3)

    ax.set_xlabel(r'$t$', fontsize=32, fontname="serif")
    ax.set_ylabel('Probability', fontsize=32, fontname="serif")

    if ymax is not None:
        ax.set_ylim(0., ymax)
    xmin, xmax = _safe_xlim_from_data(t_all, xmin, xmax)
    if (xmin is not None) or (xmax is not None):
        ax.set_xlim(xmin if xmin is not None else ax.get_xlim()[0],
                    xmax if xmax is not None else ax.get_xlim()[1])

    ax.tick_params(axis='x', labelsize=22)
    ax.tick_params(axis='y', labelsize=22)

    if title:
        ax.set_title(title, fontsize=30, fontname="serif", pad=14)

    plt.tight_layout()
    if save:
        base = os.path.join(output_path, save_name)
        os.makedirs(output_path, exist_ok=True)

        # vector (keep it)
        fig.savefig(base + ".pdf", bbox_inches="tight", pad_inches=0)

        # razor-sharp raster for photos/screens
        fig.savefig(
            base + ".png",
            dpi=1200,                 # 900–1200 is ideal for posters
            bbox_inches="tight",
            pad_inches=0,             # no extra white border in photos
            transparent=False,
            facecolor="white",
        )

        print("Saved:", base + ".pdf")
        print("Saved:", base + ".png")
    
    plt.close(fig)

In [9]:
if __name__ == "__main__":
    t_ref = _collect_mode_tests(BASE_DIR, "calibration")
    t_data = _collect_mode_tests(BASE_DIR, "comparison")

    os.makedirs(FIG_DIR, exist_ok=True)

    # declare the totals you want shown in the side panel

    plot_2distribution_overlap_new(
    t_ref, t_data,
    output_path=FIG_DIR,
    xmin=XMIN, xmax=XMAX, ymax=YMAX, nbins=NBINS,
    save=True,
    label1='REF (ensemble)',
    label2='DATA (target distribution samples)',
    save_name='probability_plot_ensemble_uq',
    print_Zscore=PRINT_Z,
    title='Ensemble with uncertainty modeling')



  plt.tight_layout()


Saved: /work/gbadarac/MonoJet_NPLM/MonoJet_NPLM_analysis/LRT_with_unc/results/N_100000_dim_2_seeds_60_4_16_128_15_toys_100_N_sampled_100k_20_kernels_with_unc/plots/probability_plot_ensemble_uq.pdf
Saved: /work/gbadarac/MonoJet_NPLM/MonoJet_NPLM_analysis/LRT_with_unc/results/N_100000_dim_2_seeds_60_4_16_128_15_toys_100_N_sampled_100k_20_kernels_with_unc/plots/probability_plot_ensemble_uq.png


In [16]:
import glob, os

calib_files = sorted(glob.glob(os.path.join(BASE_DIR, "calibration", "toy_*", "lrt_outputs.json")))
print("Found calibration JSON files:", len(calib_files))

Found calibration JSON files: 100


In [17]:
bad_files = []
tests = []
for fp in calib_files:
    with open(fp, "r") as f:
        obj = json.load(f)
    t = obj.get("test")
    try:
        val = float(np.nansum(np.asarray(t, dtype=float))) if not isinstance(t, (int,float)) else float(t)
    except Exception:
        val = float("nan")
    if not np.isfinite(val):
        bad_files.append(fp)
    else:
        tests.append(val)

print("Total good values:", len(tests))
print("Bad/missing values:", len(bad_files))
for bf in bad_files:
    print("Bad file:", bf)

Total good values: 99
Bad/missing values: 1
Bad file: /work/gbadarac/MonoJet_NPLM/MonoJet_NPLM_analysis/LRT_with_unc/results/N_100000_dim_2_seeds_60_4_16_128_15_toys_100_N_sampled_100k_20_kernels_with_unc/calibration/toy_82/lrt_outputs.json


In [18]:
t_ref = np.asarray(tests, float)
print("Final REF length:", len(t_ref))

Final REF length: 99


In [None]:
import re, json, numpy as np, os, glob

calib_files = sorted(glob.glob(os.path.join(BASE_DIR, "calibration", "toy_*", "lrt_outputs.json")))
bad_files = []

def to_float_or_nan(t):
    if isinstance(t, (int, float)):
        return float(t)
    if isinstance(t, (list, tuple, np.ndarray)):
        if len(t) == 0:
            return float("nan")    # empty payload
        try:
            return float(np.nansum(np.asarray(t, dtype=float)))
        except Exception:
            return float("nan")
    return float("nan")

for fp in calib_files:
    with open(fp, "r") as f:
        obj = json.load(f)
    val = to_float_or_nan(obj.get("test"))
    if not np.isfinite(val):
        bad_files.append((fp, obj))

print("Bad count:", len(bad_files))
for fp, obj in bad_files:
    m = re.search(r"(toy_\d+)", fp)
    toy_id = m.group(1) if m else "toy_???"
    print("\nBAD:", fp)
    print("Toy id:", toy_id)
    print("JSON keys:", list(obj.keys()))
    print("test raw value:", obj.get("test"))

Bad count: 1

BAD: /work/gbadarac/MonoJet_NPLM/MonoJet_NPLM_analysis/LRT_with_unc/results/N_100000_dim_2_seeds_60_4_16_128_15_toys_100_N_sampled_100k_20_kernels_no_softmax/calibration/toy_82/lrt_outputs.json
Toy id: toy_82
JSON keys: ['num', 'den', 'test']
test raw value: nan
