This is a script that plots the same plot for LRT for ensemble + uncertianty modeling (same exact layout) but for  the single NF model. This needs to be better done to get the plot that Phil wants. 

In [None]:
# ======= styled overlap plot (same format as your first script) =======
import os, re, h5py, json, math, time, glob
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.font_manager as font_manager
from scipy.stats import norm, chi2
from matplotlib.patches import Rectangle
from pathlib import Path

# --- Style ---
plt.rcParams["font.family"] = "serif"
plt.style.use('classic')
plt.rcParams['patch.edgecolor'] = 'none'
plt.rcParams['patch.linewidth'] = 0.0

# --- helpers (same as before) ---
def fd_bins(x):
    x = np.asarray(x, float)
    if len(x) < 2:
        return 5
    iqr = np.subtract(*np.percentile(x, [75, 25]))
    h = 2 * iqr / (len(x) ** (1/3) + 1e-12)
    if h <= 0:
        return 10
    nb = int(np.ceil((x.max() - x.min()) / h))
    return max(5, nb)

def _safe_xlim_from_data(t_all, xmin, xmax, pad_frac=0.05):
    t_all = np.asarray(t_all, float)
    dmin, dmax = np.min(t_all), np.max(t_all)
    if not np.isfinite(dmin) or not np.isfinite(dmax) or dmin == dmax:
        return xmin, xmax
    bad = (xmin is None or xmax is None or xmin >= xmax or dmax < xmin or dmin > xmax)
    if bad:
        span = dmax - dmin
        pad  = pad_frac * span if span > 0 else 1.0
        return dmin - pad, dmax + pad
    return xmin, xmax

def _parse_NR_NG_from_path(p):
    # looks for "..._NR50000_NG10000_..."
    m = re.search(r'_NR(\d+)_NG(\d+)', p)
    if m:
        return int(m.group(1)), int(m.group(2))
    return None, None

def Z_score_chi2(t, df):
    sf = chi2.sf(float(t), float(df))
    Z  = -norm.ppf(sf)
    return Z, sf

# allow external override if desired
DF_OVERRIDE = None

title='Ensemble (without uncertainty modeling)'

def plot_2distribution_overlap_new(
    t1, t2, output_path, nbins=None, save=True,
    label1='REF (ground truth))', label2='DATA (ensemble)',
    save_name='probability_plot_single_NF_uq',
    print_Zscore=True, ymax=None, show_expectation=True,
    title=title,
    xmin=None, xmax=None, df_override=None
):
    t1 = np.asarray(t1, float)
    t2 = np.asarray(t2, float)

    fig = plt.figure(figsize=(12, 9))
    fig.patch.set_facecolor('white')
    ax  = fig.add_axes([0.10, 0.12, 0.62, 0.78])
    axp = fig.add_axes([0.76, 0.12, 0.20, 0.78]); axp.axis('off')

    # bins
    t_all = np.concatenate([t1, t2]) if len(t2) else t1
    if nbins is None:
        nbins = fd_bins(t_all)
    bins = np.linspace(np.min(t_all), np.max(t_all), nbins + 1)
    bw   = (np.max(t_all) - np.min(t_all)) / nbins
    xctr = 0.5 * (bins[1:] + bins[:-1])

    # df choice
    df_local = DF_OVERRIDE if DF_OVERRIDE is not None else float(np.mean(t1))

    # --- REF ---
    Zr, p_r = Z_score_chi2(np.median(t1), df_local)
    med_err_r = 1.2533 * np.std(t1) / np.sqrt(len(t1))
    Zr_p, _   = Z_score_chi2(np.median(t1) + med_err_r, df_local)
    Zr_m, _   = Z_score_chi2(np.median(t1) - med_err_r, df_local)
    lab_r = f'{label1}\np-value: {p_r:.5f}\nasymptotic Z: {Zr:.2f} (+{Zr_p-Zr:.2f}/-{Zr-Zr_m:.2f})'
    h1 = ax.hist(t1, weights=np.ones_like(t1)/(t1.shape[0]*bw),
                 color="#e186ed", alpha=0.5, bins=bins, label=lab_r,
                 edgecolor='none', linewidth=0)
    err1 = np.sqrt(h1[0] / (t1.shape[0] * bw))
    ax.errorbar(xctr, h1[0], yerr=err1,
                color="#8a2be2", marker='o', ls='', alpha=0.6,
                markersize=5, capsize=2, elinewidth=0.8)

    # --- DATA ---
    emp_line = ""
    if len(t2):
        Zd, p_d = Z_score_chi2(np.median(t2), df_local)
        med_err_d = 1.2533 * np.std(t2) / np.sqrt(len(t2))
        Zd_p, _   = Z_score_chi2(np.median(t2) + med_err_d, df_local)
        Zd_m, _   = Z_score_chi2(np.median(t2) - med_err_d, df_local)

        '''
        # empirical tail vs REF (binomial error)
        t_emp = np.sum(t1 > np.mean(t2)) / len(t1)
        t_emp = max(t_emp, 1.0/len(t1))
        t_emp_err = np.sqrt(t_emp * (1 - t_emp) / len(t1))
        Z_emp   = norm.ppf(1 - t_emp)
        Z_emp_m = norm.ppf(1 - min(0.999999, max(1e-6, t_emp + t_emp_err)))
        Z_emp_p = norm.ppf(1 - min(0.999999, max(1e-6, t_emp - t_emp_err)))
        '''
        # empirical tail vs REF (match other scripts' ±Z convention)
        k = int(np.sum(t1 > np.median(t2)))  # exceedances
        B = int(len(t1))
        t_emp = max(k / B, 1.0 / B)

        # match: p_err = p * sqrt( 1 / (k + 1/B) )
        t_emp_err = t_emp * np.sqrt(1.0 / (k + 1.0 / B))

        Z_emp = norm.ppf(1.0 - t_emp)
        Z_emp_m = norm.ppf(1.0 - min(0.999999, max(1e-6, t_emp + t_emp_err)))
        Z_emp_p = norm.ppf(1.0 - min(0.999999, max(1e-6, t_emp - t_emp_err)))

        emp_line = (rf'$\mathrm{{empirical}}\ Z = {Z_emp:.2f}\ '
                    rf'(+{Z_emp_p-Z_emp:.2f}/-{Z_emp-Z_emp_m:.2f})$')

        lab_d = f'{label2}\np-value: {p_d:.5f}\nasymptotic Z: {Zd:.2f} (+{Zd_p-Zd:.2f}/-{Zd-Zd_m:.2f})'
        h2 = ax.hist(t2, weights=np.ones_like(t2)/(t2.shape[0]*bw),
                     color="#68aedc", alpha=0.5, bins=bins, label=lab_d,
                     edgecolor='none', linewidth=0)
        err2 = np.sqrt(h2[0] / (t2.shape[0] * bw))
        ax.errorbar(xctr, h2[0], yerr=err2,
                    color="#004c99", marker='o', ls='', alpha=0.6,
                    markersize=5, capsize=2, elinewidth=0.8)

    # --- side panel ---
    axp.axis('off'); panel_fp = font_manager.FontProperties(family='serif', size=20)
    SW, GAP = 0.032, 0.012
    def _header(y, color_hex, text):
        axp.add_patch(Rectangle((0.03, y - SW/2), SW, SW,
                                transform=axp.transAxes, facecolor=color_hex,
                                edgecolor='none', alpha=0.5))
        axp.text(0.03 + SW + GAP, y, text, va='center', ha='left',
                 fontproperties=panel_fp, color='black')

    same_size = (len(t1) == len(t2)) if len(t2) else True
    N_toys = len(t1) if same_size else (len(t1), len(t2))
    axp.text(0.03, 0.98, rf'$N_{{\mathrm{{toys}}}} = {N_toys if isinstance(N_toys,int) else min(N_toys)}$',
             va='top', ha='left', fontproperties=panel_fp)

    Y_REF_HDR, Y_REF_TXT  = 0.86, 0.815
    Y_DATA_HDR, Y_DATA_TXT = 0.66, 0.615
    _header(Y_REF_HDR, "#e186ed", label1)
    axp.text(0.03 + 0.036, Y_REF_TXT,
             rf'$\mathrm{{median}} = {np.median(t1):.2f}$, $\mathrm{{std}} = {np.std(t1):.2f}$',
             va='top', ha='left', fontproperties=panel_fp)
    _header(Y_DATA_HDR, "#68aedc", label2)
    axp.text(0.03 + 0.036, Y_DATA_TXT,
             rf'$\mathrm{{median}} = {np.median(t2):.2f}$, $\mathrm{{std}} = {np.std(t2):.2f}$' +
             (("\n" + emp_line) if emp_line else ""),
             va='top', ha='left', fontproperties=panel_fp)

    # --- axes/legend ---
    legend_fp = font_manager.FontProperties(family='serif', size=16.5)
    ax.legend(ncol=1, loc='upper right', prop=legend_fp, frameon=False,
              handlelength=1.8, borderpad=0.3, labelspacing=0.3)
    ax.set_xlabel(r'$t$', fontsize=32, fontname="serif")
    ax.set_ylabel('Probability', fontsize=32, fontname="serif")
    if ymax is not None:
        ax.set_ylim(0., ymax)
    xmin, xmax = _safe_xlim_from_data(t_all, xmin, xmax)
    if (xmin is not None) or (xmax is not None):
        ax.set_xlim(xmin if xmin is not None else ax.get_xlim()[0],
                    xmax if xmax is not None else ax.get_xlim()[1])
    ax.tick_params(axis='x', labelsize=22)
    ax.tick_params(axis='y', labelsize=22)
    if title:
        ax.set_title(title, fontsize=30, fontname="serif", pad=14)
    plt.tight_layout()

    if save:
        fn = os.path.join(output_path, save_name + '.pdf')
        os.makedirs(output_path, exist_ok=True)
        fig.savefig(fn, bbox_inches="tight", pad_inches=0.01)
        print("Saved figure to:", fn)
    plt.close(fig)


def plot_overlap_from_h5_styled(ref_h5, data_h5, save_dir, nbins=None,
                                label1='REF (ground truth)', label2='DATA (ensemble)',
                                title=title):
    # load arrays
    with h5py.File(ref_h5, 'r') as fr:
        key = ref_h5.split('flksigma')[-1].replace('.h5', '')
        t1  = np.array(fr[key])
    with h5py.File(data_h5, 'r') as fd:
        key = data_h5.split('flksigma')[-1].replace('.h5', '')
        t2  = np.array(fd[key])

    # df and x-lims
    df_val = float(np.mean(t1)) if DF_OVERRIDE is None else float(DF_OVERRIDE)
    xmin = float(min(np.min(t1), np.min(t2)))
    xmax = float(max(np.max(t1), np.max(t2)))

    # make a concise filename tag
    params = '_'.join(os.path.basename(os.path.dirname(ref_h5)).split('_')[2:7])
    save_name = f'probability_plot_overlap_{params}'

    # call the styled plotter
    df_val = float(np.mean(t1)) if DF_OVERRIDE is None else float(DF_OVERRIDE)
    plot_2distribution_overlap_new(
        t1, t2, output_path=save_dir, nbins=nbins, save=True,
        label1=label1, label2=label2, save_name=save_name,
        print_Zscore=True, ymax=None, show_expectation=True, title=title,
        xmin=xmin, xmax=xmax,
        df_override=df_val
    )

    return os.path.join(save_dir, save_name + '.pdf')


# ======= your model map & loop (unchanged paths) =======
models = {
    2000: {
        'ref': '/work/gbadarac/MonoJet_NPLM/MonoJet_NPLM_analysis/NPLM/NPLM_NF_ensemble/calibration/seeds_60_4_16_128_15/nplm_ensemble_NR10000_NG2000_M500_lam1e-6_iter1000000_job233054/tvalues_flksigma1.0.h5',
        'data': '/work/gbadarac/MonoJet_NPLM/MonoJet_NPLM_analysis/NPLM/NPLM_NF_ensemble/comparison/seeds_60_4_16_128_15/nplm_ensemble_NR10000_NG2000_M500_lam1e-6_iter1000000_job233053/tvalues_flksigma1.0.h5'
    },
    5000: {
        'ref': '/work/gbadarac/MonoJet_NPLM/MonoJet_NPLM_analysis/NPLM/NPLM_NF_ensemble/calibration/seeds_60_4_16_128_15/nplm_ensemble_NR25000_NG5000_M700_lam1e-6_iter1000000_job233009/tvalues_flksigma1.0.h5',
        'data': '/work/gbadarac/MonoJet_NPLM/MonoJet_NPLM_analysis/NPLM/NPLM_NF_ensemble/comparison/seeds_60_4_16_128_15/nplm_ensemble_NR25000_NG5000_M700_lam1e-6_iter1000000_job233010/tvalues_flksigma1.0.h5'
    },
    10000: {
        'ref': '/work/gbadarac/MonoJet_NPLM/MonoJet_NPLM_analysis/NPLM/NPLM_NF_ensemble/calibration/seeds_60_4_16_128_15/nplm_ensemble_NR50000_NG10000_M1000_lam1e-6_iter1000000_job233008/tvalues_flksigma1.0.h5',
        'data': '/work/gbadarac/MonoJet_NPLM/MonoJet_NPLM_analysis/NPLM/NPLM_NF_ensemble/comparison/seeds_60_4_16_128_15/nplm_ensemble_NR50000_NG10000_M1000_lam1e-6_iter1000000_job233007/tvalues_flksigma1.0.h5'
    },
    15000: {
        'ref': '/work/gbadarac/MonoJet_NPLM/MonoJet_NPLM_analysis/NPLM/NPLM_NF_ensemble/calibration/seeds_60_4_16_128_15/nplm_ensemble_NR75000_NG15000_M1200_lam1e-6_iter1000000_job233005/tvalues_flksigma1.0.h5',
        'data': '/work/gbadarac/MonoJet_NPLM/MonoJet_NPLM_analysis/NPLM/NPLM_NF_ensemble/comparison/seeds_60_4_16_128_15/nplm_ensemble_NR75000_NG15000_M1200_lam1e-6_iter1000000_job233006/tvalues_flksigma1.0.h5'
    },
    20000: {
        'ref': '/work/gbadarac/MonoJet_NPLM/MonoJet_NPLM_analysis/NPLM/NPLM_NF_ensemble/calibration/seeds_60_4_16_128_15/nplm_ensemble_NR100000_NG20000_M1400_lam1e-6_iter1000000_job233004/tvalues_flksigma1.0.h5',
        'data': '/work/gbadarac/MonoJet_NPLM/MonoJet_NPLM_analysis/NPLM/NPLM_NF_ensemble/comparison/seeds_60_4_16_128_15/nplm_ensemble_NR100000_NG20000_M1400_lam1e-6_iter1000000_job233002/tvalues_flksigma1.0.h5'
    },
}

FIG_DIR = "/work/gbadarac/MonoJet_NPLM/MonoJet_NPLM_analysis/NPLM/NPLM_NF_ensemble/seeds_60_4_16_128_15_pdfs"
os.makedirs(FIG_DIR, exist_ok=True)

# choose ONE
# out_file = plot_overlap_from_h5_styled(models[10000]['ref'], models[10000]['data'], FIG_DIR, nbins=30)

# or ALL
for key, p in models.items():
    out_file = plot_overlap_from_h5_styled(p['ref'], p['data'], FIG_DIR, nbins=30)
    print(f"[{key}] saved: {out_file}")


  plt.tight_layout()


Saved figure to: /work/gbadarac/MonoJet_NPLM/MonoJet_NPLM_analysis/NPLM/NPLM_NF_ensemble/seeds_60_4_16_128_15_pdfs/probability_plot_overlap_NR10000_NG2000_M500_lam1e-6_iter1000000.pdf
[2000] saved: /work/gbadarac/MonoJet_NPLM/MonoJet_NPLM_analysis/NPLM/NPLM_NF_ensemble/seeds_60_4_16_128_15_pdfs/probability_plot_overlap_NR10000_NG2000_M500_lam1e-6_iter1000000.pdf


  plt.tight_layout()


Saved figure to: /work/gbadarac/MonoJet_NPLM/MonoJet_NPLM_analysis/NPLM/NPLM_NF_ensemble/seeds_60_4_16_128_15_pdfs/probability_plot_overlap_NR25000_NG5000_M700_lam1e-6_iter1000000.pdf
[5000] saved: /work/gbadarac/MonoJet_NPLM/MonoJet_NPLM_analysis/NPLM/NPLM_NF_ensemble/seeds_60_4_16_128_15_pdfs/probability_plot_overlap_NR25000_NG5000_M700_lam1e-6_iter1000000.pdf


  plt.tight_layout()


Saved figure to: /work/gbadarac/MonoJet_NPLM/MonoJet_NPLM_analysis/NPLM/NPLM_NF_ensemble/seeds_60_4_16_128_15_pdfs/probability_plot_overlap_NR50000_NG10000_M1000_lam1e-6_iter1000000.pdf
[10000] saved: /work/gbadarac/MonoJet_NPLM/MonoJet_NPLM_analysis/NPLM/NPLM_NF_ensemble/seeds_60_4_16_128_15_pdfs/probability_plot_overlap_NR50000_NG10000_M1000_lam1e-6_iter1000000.pdf


  plt.tight_layout()


Saved figure to: /work/gbadarac/MonoJet_NPLM/MonoJet_NPLM_analysis/NPLM/NPLM_NF_ensemble/seeds_60_4_16_128_15_pdfs/probability_plot_overlap_NR75000_NG15000_M1200_lam1e-6_iter1000000.pdf
[15000] saved: /work/gbadarac/MonoJet_NPLM/MonoJet_NPLM_analysis/NPLM/NPLM_NF_ensemble/seeds_60_4_16_128_15_pdfs/probability_plot_overlap_NR75000_NG15000_M1200_lam1e-6_iter1000000.pdf
Saved figure to: /work/gbadarac/MonoJet_NPLM/MonoJet_NPLM_analysis/NPLM/NPLM_NF_ensemble/seeds_60_4_16_128_15_pdfs/probability_plot_overlap_NR100000_NG20000_M1400_lam1e-6_iter1000000.pdf
[20000] saved: /work/gbadarac/MonoJet_NPLM/MonoJet_NPLM_analysis/NPLM/NPLM_NF_ensemble/seeds_60_4_16_128_15_pdfs/probability_plot_overlap_NR100000_NG20000_M1400_lam1e-6_iter1000000.pdf


  plt.tight_layout()
