In [None]:
%cd ../../

In [None]:
saved = "ml_hep_sim/analysis/results/spur/"

# Get pipeline

In [None]:
import numpy as np
from ml_hep_sim.analysis.spur_pipeline import get_spur_pipeline

In [None]:
bonly = False
mc_test = False
use_class = True

nu_bs = np.linspace(10 ** 3, 10 ** 5, 40)
alphas = np.linspace(0.01, 0.1, 10)

pipe = get_spur_pipeline(
    nu_bs,
    alphas,
    bin_range=(0.5, 1.1) if use_class else (0.01, 3.0),
    use_classifier=use_class,
    bonly=bonly,
    mc_test=mc_test,
    scale_by_alpha=True,
)

pipe.fit()

res = pipe.pipes[-1]

In [None]:
if use_class and not mc_test:
    saved += "class_"
elif use_class and mc_test:
    saved += "class_mc_"
elif mc_test:
    saved += "mbb_mc_"
else:
    saved += "mbb_"
    
if bonly:
    saved += "bonly_"

In [None]:
saved

# Spurious signal

In [None]:
import matplotlib.pyplot as plt
from ml_hep_sim.plotting.style import style_setup, set_size

set_size()
style_setup(seaborn_pallete=True)

In [None]:
df = res.parsed_results

In [None]:
df.head()

In [None]:
df["total_B"] = np.repeat(nu_bs, len(alphas))
df["alpha"] = np.tile(alphas, len(nu_bs))

In [None]:
df.head()

In [None]:
sig_fracs = alphas
lumis = nu_bs

In [None]:
df["spur"] = df["mu"] * df["total_B"] - df["alpha"] * df["total_B"]
df["spur_ratio"] = np.abs(df["spur"] / df["total_B"])

In [None]:
idx = -1

df[df["alpha"] == sig_fracs[idx]]

In [None]:
if not bonly:
    plt.scatter(lumis, df[df["alpha"] == sig_fracs[idx]]["mu"].to_numpy())
    plt.errorbar(lumis, df[df["alpha"] == sig_fracs[idx]]["mu"].to_numpy(), df[df["alpha"] == sig_fracs[-1]]["mu_err"], capsize=4)
else:
    plt.scatter(lumis, df[df["alpha"] == sig_fracs[idx]]["mu"].to_numpy())

plt.axhline(sig_fracs[idx], c='r', ls='--')
#plt.xlim(-2000, 1.1e5)

plt.tight_layout()

In [None]:
use_std = True

r = df[df["alpha"] == sig_fracs[idx]].groupby("total_B")["spur"].mean(numeric_only=True).to_numpy()

if use_std:
    r_std = df[df["alpha"] == sig_fracs[idx]].groupby("total_B")["spur"].std(numeric_only=True).to_numpy()
else:
    r_std = df[df["alpha"] == sig_fracs[idx]].groupby("total_B")["spur"].apply(lambda x: np.sqrt(np.sum(x**2) / len(x))).to_numpy()

In [None]:
r

In [None]:
plt.scatter(lumis, r)
# plt.errorbar(lumis, r, r_std, ls="none", capsize=4)

# plt.yscale("log")

plt.xlabel("L")
plt.ylabel("spur")

In [None]:
for idx, sf in enumerate(sig_fracs[::3]):
    r = df[df["alpha"] == sf].groupby("total_B")["mu"].mean(numeric_only=True).to_numpy()
    
    if use_std:
        r_std = df[df["alpha"] == sf].groupby("total_B")["mu"].std(numeric_only=True).to_numpy()
    else:
        r_std = df[df["alpha"] == sf].groupby("total_B")["mu"].apply(lambda x: np.sqrt(np.sum(x**2) / len(x))).to_numpy()

    plt.scatter(lumis, r, label="sf={:.2f}".format(sf), edgecolor='k')
    # plt.errorbar(lumis, r, r_std, ls="none", capsize=4)
    plt.axhline(sf, c=f"C{idx}", ls='--', zorder=10)

if bonly:
    plt.yscale("log")

plt.xlabel(r"$\nu_B=L\cdot\sigma$", loc="center")
plt.ylabel("$\mu$")
plt.legend(ncol=3)
plt.ylim(-0.02, 0.13)

plt.tight_layout()
plt.savefig(saved + "mu_vs_L.pdf")
plt.show()

In [None]:
for idx, sf in enumerate(sig_fracs):
    r = df[df["alpha"] == sig_fracs[idx]].groupby("total_B")["spur"].mean(numeric_only=True).to_numpy()
    
    if use_std:
        r_std = df[df["alpha"] == sig_fracs[idx]].groupby("total_B")["spur"].std(numeric_only=True).to_numpy()
    else:
        r_std = df[df["alpha"] == sig_fracs[idx]].groupby("total_B")["spur"].apply(lambda x: np.sqrt(np.sum(x**2) / len(x))).to_numpy()
    
    plt.scatter(lumis, r, label="sf={:.2f}".format(sf))
    # plt.plot(lumis, r, label="sf={:.2f}".format(sf))
    
    # plt.errorbar(lumis, r, r_std, ls="none", capsize=4)

#plt.yscale("symlog")
plt.xlabel(r"$\nu_B=L\cdot\sigma$", loc="center")
plt.ylabel(r"$S_{\text{spur}}$")
plt.legend(ncol=3)

plt.tight_layout()
plt.savefig(saved + "spur_vs_L_scenario.pdf")

In [None]:
for idx, sf in enumerate(sig_fracs):
    r = df[df["alpha"] == sig_fracs[idx]].groupby("total_B").mean(numeric_only=True)["spur_ratio"].to_numpy()
    plt.scatter(lumis, r, label="sf={:.2f}".format(sf))

plt.yscale("log")
plt.xlabel(r"$\nu_B=L\cdot\sigma$", loc="center")
plt.ylabel(r"$S_{\text{spur}} / S$")
plt.legend(ncol=3)

plt.tight_layout()
plt.savefig(saved + "ratio_vs_L.pdf")

In [None]:
for idx, l in enumerate(lumis[::8]):
    r = df[df["total_B"] == lumis[idx]].groupby("alpha").mean(numeric_only=True)["mu"].to_numpy()
    
    if use_std:
        r_std = df[df["total_B"] == lumis[idx]].groupby("alpha")["mu"].std(numeric_only=True).to_numpy()
    else:
        r_std = df[df["total_B"] == lumis[idx]].groupby("alpha")["mu"].apply(lambda x: np.sqrt(np.sum(x**2) / len(x))).to_numpy()
    
    plt.scatter(sig_fracs, r, label=r"$\nu_B=$" + "{}".format(int(l)))
    # plt.errorbar(sig_fracs, r, r_std, ls="none", capsize=4)
    
    # plt.title(r"L$=${}".format(lumis[idx]))

plt.xlabel(r"$S/B$")
plt.ylabel("$\mu$")

plt.yscale("log")
plt.legend()

plt.tight_layout()
plt.savefig(saved + "mu_vs_sig_frac.pdf")

In [None]:
for idx, l in enumerate(lumis[::8]):
    r = df[df["total_B"] == lumis[idx]].groupby("alpha")["spur"].mean(numeric_only=True).to_numpy()
    
    if use_std:
        r_std = df[df["total_B"] == lumis[idx]].groupby("alpha")["spur"].std(numeric_only=True).to_numpy()
    else:
        r_std = df[df["total_B"] == lumis[idx]].groupby("alpha")["spur"].apply(lambda x: np.sqrt(np.sum(x**2) / len(x))).to_numpy()
    
    plt.scatter(sig_fracs, r, label="L={:.1f}".format(l))
    plt.plot(sig_fracs, r)
    # plt.errorbar(sig_fracs, r, r_std, ls="none", capsize=4)
    
    # plt.title(r"L$=${}".format(lumis[idx]))

plt.xlabel(r"$S/B$")
plt.ylabel(r"$S_{\text{spur}}$")

# plt.yscale("log")
plt.legend()

plt.tight_layout()
plt.savefig(saved + "spur_vs_sig_frac.pdf")

In [None]:
if not bonly:
    for idx, l in enumerate(lumis[::8]):
        r = df[df["total_B"] == lumis[idx]].groupby("alpha")["spur_ratio"].mean(numeric_only=True).to_numpy()

        if use_std:
            r_std = df[df["total_B"] == lumis[idx]].groupby("alpha")["spur_ratio"].std(numeric_only=True).to_numpy()
        else:
            r_std = df[df["total_B"] == lumis[idx]].groupby("alpha")["spur_ratio"].apply(lambda x: np.sqrt(np.sum(x**2) / len(x))).to_numpy()

        plt.scatter(sig_fracs, r, label="L={:.1f}".format(l))
        plt.plot(sig_fracs, r)
        # plt.errorbar(sig_fracs, r, r_std, ls="none", capsize=4)

        # plt.title(r"L$=${}".format(lumis[idx]))

    plt.xlabel(r"$S/B$")
    plt.ylabel(r"$S_{\text{spur}}/S$")

    # plt.yscale("log")
    plt.legend()

    plt.tight_layout()
    plt.savefig(saved + "spur_vs_sig_frac_ratio.pdf")

In [None]:
y = np.array([float(i) * 100 for i in df["alpha"].values])
x = np.array([float(i) for i in df["total_B"].values])
z = np.array([float(i) for i in df["mu"].values])

plt.ylabel("$S/B$")
plt.xlabel(r"$\nu_B=L\cdot\sigma$", loc="center")

plt.hexbin(x, y, z, gridsize=6)
plt.colorbar(label="$\mu$")
plt.tight_layout()

plt.savefig(saved + "hexbin_sig_frac_L_mu.pdf")

In [None]:
y = np.array([float(i) * 100 for i in df["alpha"].values])
x = np.array([float(i) for i in df["total_B"].values])
z = np.array([float(i) for i in df["spur"].values])

plt.ylabel("$S/B$")
plt.xlabel(r"$\nu_B=L\cdot\sigma$", loc="center")

plt.hexbin(x, y, z, gridsize=6)
plt.colorbar(label=r"$S_{\text{spur}}$")
plt.tight_layout()

plt.savefig(saved + "hexbin_sig_frac_L_spur.pdf")

In [None]:
if not bonly:
    y = np.array([float(i) * 100 for i in df["alpha"].values])
    x = np.array([float(i) for i in df["total_B"].values])
    z = np.array([float(i) * 100 for i in df["spur_ratio"].values])

    plt.ylabel("$S/B$")
    plt.xlabel(r"$\nu_B=L\cdot\sigma$", loc="center")

    plt.hexbin(x, y, z, gridsize=6)
    plt.colorbar(label=r"$S_{\text{spur}}/S$ [\%]")
    plt.tight_layout()

    plt.savefig(saved + "hexbin_sig_frac_L_spur_ratio.pdf")