In [None]:
%cd ../../

In [None]:
saved = "ml_hep_sim/analysis/results/cls/"

$q_0$ is used to test $\mu=0$ in a class of model where we assume $\mu \geq 0$. Rejecting $\mu=0$ hypothesis leads to the discovery of a new signal.

# Get CLs pipeline and fit

In [None]:
from ml_hep_sim.analysis.cls_pipeline import get_cls_pipeline

import matplotlib.pyplot as plt
from ml_hep_sim.plotting.style import style_setup, set_size

import numpy as np

from ml_hep_sim.pipeline.pipeline_loggers import setup_logger

logger = setup_logger(log_name="cls", log_path="ml_pipeline/")

set_size()
style_setup(seaborn_pallete=True)

In [None]:
use_class = False

pts = 40

cls_pipeline = get_cls_pipeline(pts=pts, lumi=100, use_classifier=use_class, 
                                bin_range=(0.5, 1.1) if use_class else (0.01, 3.0),
                                N_gen=10**6, logger=logger, scale_by_alpha=False)

In [None]:
if use_class:
    saved += "class_"
else:
    saved += "mbb_"

In [None]:
saved

In [None]:
res = cls_pipeline.fit()

In [None]:
parsed_res = res.pipes[-1].parsed_results
mc_res = parsed_res["mc_res"]
ml_res = parsed_res["ml_res"]

In [None]:
ml_res

In [None]:
x = np.array([float(i) * 100 for i in ml_res["sig_frac"].values])
y = np.array([float(i) * 100 for i in ml_res["bkg_err"].values])
z = np.array([float(i) for i in ml_res["p_sb"].values])

plt.xlabel("signal fraction \%")
plt.ylabel("sys. error \%")

plt.hexbin(x, y, z, gridsize=29)
plt.colorbar(label="$p$-value")
plt.tight_layout()

plt.savefig(saved + "hexbin_clsb_ml.pdf")

In [None]:
x = np.array([float(i) * 100 for i in ml_res["sig_frac"].values])
y = np.array([float(i) * 100 for i in ml_res["bkg_err"].values])
z = np.array([float(i) for i in ml_res["p_s"].values])

plt.xlabel("signal fraction \%")
plt.ylabel("sys. error \%")

plt.hexbin(x, y, z, gridsize=29)
plt.colorbar(label="$p$-value")
plt.tight_layout()

plt.savefig(saved + "hexbin_cls_ml.pdf")

In [None]:
x = np.array([float(i) * 100 for i in ml_res["sig_frac"].values])
y = np.array([float(i) * 100 for i in ml_res["bkg_err"].values])
z = np.array([float(i) for i in mc_res["p_sb"].values])

plt.xlabel("signal fraction \%")
plt.ylabel("sys. error \%")

plt.hexbin(x, y, z, gridsize=29)
plt.colorbar(label="$p$-value")
plt.tight_layout()

plt.savefig(saved + "hexbin_clsb_mc.pdf")

In [None]:
x = np.array([float(i) * 100 for i in ml_res["sig_frac"].values])
y = np.array([float(i) * 100 for i in ml_res["bkg_err"].values])
z = np.array([float(i) for i in mc_res["p_s"].values])

plt.xlabel("signal fraction \%")
plt.ylabel("sys. error \%")

plt.hexbin(x, y, z, gridsize=29)
plt.colorbar(label="$p$-value")
plt.tight_layout()

plt.savefig(saved + "hexbin_cls_mc.pdf")

In [None]:
x = np.array([float(i) * 100 for i in ml_res["sig_frac"].values])
y = np.array([float(i) * 100 for i in ml_res["bkg_err"].values])
z = np.array([float(i) for i in ml_res["teststat"].values])

plt.xlabel("signal fraction \%")
plt.ylabel("sys. error \%")

plt.hexbin(x, y, z, gridsize=29)
plt.colorbar(label="$q_0$")
plt.tight_layout()

plt.savefig(saved + "hexbin_q0_ml.pdf")

# Constant signal fraction

In [None]:
sig_fracs = np.linspace(0.01, 0.1, pts)
bkg_errs = np.linspace(0.01, 0.1, pts)

In [None]:
sig_fracs, bkg_errs

In [None]:
for e in [bkg_errs[0], bkg_errs[18], bkg_errs[-1]]:
    ml_ = ml_res[ml_res["bkg_err"] == e]
    plt.scatter(sig_fracs, ml_["teststat"], label='$\sigma_\mathrm{sys}=$' + f"{100*e:.1f}\%")

plt.xlabel(r"$\alpha$")
plt.ylabel("$q_0$ ML")
plt.legend()
plt.tight_layout()

plt.savefig(saved + "q0_vs_sigfrac.pdf")

In [None]:
for sf in [*sig_fracs[:10]]:
    ml_ = ml_res[ml_res["sig_frac"] == sf]
    plt.plot(bkg_errs, ml_["teststat"], label=r'$\alpha=$' + f'{100*sf:.1f}\%', lw=3)

plt.legend(ncol=1, loc='upper left')
plt.xlim([0, 0.105])
plt.xlabel(r"sys. error")
plt.ylabel("$q_0$ ML")
plt.tight_layout()

plt.savefig(saved + "q0_vs_syserr.pdf")

In [None]:
for i, f in enumerate(sig_fracs):
    ml_ = ml_res[ml_res["sig_frac"] == f]
    mc_ = mc_res[mc_res["sig_frac"] == f]

    plt.plot(ml_["bkg_err"] * 100, ml_["p_sb"], c="C0", lw=3)
    plt.plot(ml_["bkg_err"] * 100, ml_["p_b"], c="C1", lw=3)
    plt.plot(ml_["bkg_err"] * 100, ml_["p_s"], c="C2", lw=3)

    plt.plot(mc_["bkg_err"] * 100, mc_["p_sb"], ls='--', c='C0', lw=3)
    plt.plot(mc_["bkg_err"] * 100, mc_["p_b"], ls='--', c='C1', lw=3)
    plt.plot(mc_["bkg_err"] * 100, mc_["p_s"], ls='--', c='C2', lw=3)

    plt.legend(["CLsb", "CLb", "CLs", "CLsb MC", "CLb MC", "CLs MC"], ncol=2)

    plt.xlabel("sys. error \% (signal fraction {:.3f}\%)".format(f), fontsize=22)
    plt.ylabel("$p$-value", fontsize=22)
    plt.tight_layout()

    plt.savefig(saved + f"CLs_q0_mu0_bkg_errs_{i}.pdf")
    plt.show()

In [None]:
# plt.plot(mc_["bkg_err"] * 100, mc_["p_sb"] - ml_["p_sb"], ls='-', c='C0', lw=3)
# plt.plot(mc_["bkg_err"] * 100, mc_["p_b"] - ml_["p_b"], ls='-', c='C1', lw=3)
# plt.plot(mc_["bkg_err"] * 100, mc_["p_s"] - ml_["p_s"], ls='-', c='C2', lw=3)

# plt.legend(["CLsb MC - CLsb", "CLb MC - CLb", "CLs MC - CLs"])

# plt.xlabel("sys. error \% (signal fraction {:.2f}\%)".format(f), fontsize=22)
# plt.ylabel("$p$-value difference", fontsize=22)
# plt.tight_layout()

# plt.savefig(saved + "CLs_q0_mu0_bkg_errs_diff.pdf")

# Constant background error

In [None]:
for i, f in enumerate(bkg_errs):
    ml_ = ml_res[ml_res["bkg_err"] == f]
    mc_ = mc_res[mc_res["bkg_err"] == f]
    
    plt.plot(ml_["sig_frac"] * 100, ml_["p_sb"], c="C0", lw=3)
    plt.plot(ml_["sig_frac"] * 100, ml_["p_b"], c="C1", lw=3)
    plt.plot(ml_["sig_frac"] * 100, ml_["p_s"], c="C2", lw=3)

    plt.plot(mc_["sig_frac"] * 100, mc_["p_sb"], ls='--', c='C0', lw=3)
    plt.plot(mc_["sig_frac"] * 100, mc_["p_b"], ls='--', c='C1', lw=3)
    plt.plot(mc_["sig_frac"] * 100, mc_["p_s"], ls='--', c='C2', lw=3)

    plt.legend(["CLsb", "CLb", "CLs", "CLsb MC", "CLb MC", "CLs MC"], ncol=2)

    plt.xlabel(f"signal fraction \% (sys.error {100*f:.1f}\%)", fontsize=22)
    plt.ylabel("$p$-value", fontsize=22)
    plt.tight_layout()

    plt.savefig(saved + f"CLs_q0_mu0_sig_fracs_{i}.pdf")
    plt.show()