In [None]:
%cd ../../

# Get cut pipeline

In [None]:
from ml_hep_sim.analysis.cut_pipeline import get_cut_pipeline

In [None]:
N_gen = 10 ** 6

In [None]:
cut_pipeline = get_cut_pipeline(cut_value=0.5, N_gen=N_gen, use_classifier=True) # this is different classifier...

# Extract variable

In [None]:
from ml_hep_sim.pipeline.pipes import Pipeline
from ml_hep_sim.pipeline.blocks import VariableExtractBlock
from ml_hep_sim.analysis.utils import get_colnames_dict

import numpy as np
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle

from ml_hep_sim.plotting.style import style_setup, set_size

set_size()
style_setup(seaborn_pallete=True)

In [None]:
saved = "" # "ml_hep_sim/analysis/results/hists/"

In [None]:
var = "m bb"

dct = get_colnames_dict()
idx = dct[var]

b_sig_gen_data_cut, b_bkg_gen_data_cut, b_sig_mc_data_cut, b_bkg_mc_data_cut = cut_pipeline.pipes[-4:]

b_sig_gen_var = VariableExtractBlock(idx, save_data=False)(b_sig_gen_data_cut)
b_bkg_gen_var = VariableExtractBlock(idx, save_data=False)(b_bkg_gen_data_cut)
b_sig_mc_var = VariableExtractBlock(idx, save_data=False)(b_sig_mc_data_cut)
b_bkg_mc_var = VariableExtractBlock(idx, save_data=False)(b_bkg_mc_data_cut)

# Make histograms

In [None]:
from ml_hep_sim.analysis.utils import SigBkgBlock
from ml_hep_sim.analysis.hists_pipeline import  MakeHistsFromSamples

In [None]:
b_sig_bkg_gen_mc = SigBkgBlock(b_sig_gen_var, b_bkg_gen_var, b_sig_mc_var, b_bkg_mc_var)(b_sig_gen_var, b_bkg_gen_var, b_sig_mc_var, b_bkg_mc_var)

In [None]:
bins = 30
N_sig = 100
N_bkg = 1000

use_class = True

In [None]:
saved = "" #"ml_hep_sim/analysis/results/hists/"

In [None]:
if use_class:
    saved += "class_"
else:
    saved += "mbb_"

In [None]:
saved

# Build and fit pipeline

In [None]:
from ml_hep_sim.analysis.hists_pipeline import get_hists_pipeline # TODO: write same for var
from ml_hep_sim.pipeline.pipeline_loggers import setup_logger

logger = setup_logger(log_name="hists", log_path="ml_pipeline/")

In [None]:
if use_class:
    bin_range = (0.5, 1.1)

    pipe = get_hists_pipeline(var="",
                bin_range=bin_range,
                bins=bins,
                N_sig=N_sig,
                N_bkg=N_bkg,
                N_gen=N_gen,
                logger=logger,
                use_classifier=True)
    pipe.fit()
    
else:
    bin_range = (0.01, 3.0)
    
    b_hists = MakeHistsFromSamples(bin_range=bin_range, bins=bins, N_sig=N_sig, N_bkg=N_bkg, N_gen=N_gen)(b_sig_bkg_gen_mc)
    
    pipe = Pipeline()
    pipe.compose(
        cut_pipeline,
        b_sig_gen_var,
        b_bkg_gen_var,
        b_sig_mc_var,
        b_bkg_mc_var,
        b_sig_bkg_gen_mc,
        b_hists,
    )
    pipe.fit()

In [None]:
# pipe.draw_pipeline_tree(to_graphviz_file="pipeline_gen_cut", block_idx=-1)

In [None]:
sig_gen = pipe.pipes[-4-2].results
bkg_gen = pipe.pipes[-3-2].results
sig_mc = pipe.pipes[-2-2].results
bkg_mc = pipe.pipes[-1-2].results

In [None]:
sig_gen.shape

In [None]:
plt.hist(sig_gen, range=bin_range, bins=bins, histtype="step", lw=2)
plt.hist(bkg_gen, range=bin_range, bins=bins, histtype="step", lw=2)
plt.hist(np.concatenate([sig_gen, bkg_gen]), range=bin_range, bins=bins, histtype="step", lw=2)
plt.hist(np.concatenate([sig_mc, bkg_mc]), range=bin_range, bins=bins, histtype="stepfilled", color="C7", alpha=0.5)
plt.legend(["ML sig", "ML bkg", "ML sig + bkg (data)", "MC sig + bkg (data)"], fontsize=15)
plt.ylabel("$N$", fontsize=24)
plt.xlabel("$m_{bb}$" if not use_class else "class. output", fontsize=24)

plt.tight_layout()
plt.savefig(saved + "post_cut_dists.pdf")

In [None]:
p = pipe.pipes[-1]
hists = p.histograms
errors = p.errors
alpha = p.alpha
N = p.N_gen

r = range(bins)

# Steps

In [None]:
plt.step(r, alpha * hists["sig_gen"], label="alpha x sig gen")
plt.step(r, hists["bkg_gen"], label="bkg gen")
plt.step(r, hists["data_gen"], label="data gen")

plt.step(r, alpha * hists["sig_mc"], label="alpha x sig mc")
plt.step(r, hists["bkg_mc"], label="bkg mc")
plt.step(r, hists["data_mc"], label="data mc")

plt.xlabel("bins $m_{bb}$")

plt.legend()

plt.ylabel("$N$", fontsize=24)

plt.tight_layout()

In [None]:
sys_err = 0.1

In [None]:
errors["nu_b_ml"]

In [None]:
hists["bkg_gen"] * sys_err

In [None]:
data_err = errors["data_mc"]
bkg_err = np.sqrt(errors["nu_b_ml"] ** 2 + (hists["bkg_gen"] * sys_err) ** 2)

In [None]:
from ml_hep_sim.plotting.hep_plots import StackPlot


x = np.arange(0, bins, 1)

sp = StackPlot(
    x,
    hists_lst=[alpha * hists["sig_mc"], hists["bkg_gen"], alpha * hists["sig_mc"] + hists["bkg_gen"]],
    data_hist=alpha * hists["sig_mc"] + hists["bkg_gen"] + hists["data_mc"],
)

sp.plot_stack(labels=["MC sig", "ML bkg", "MC sig + ML bkg"])

# data_err = np.sqrt(hists["sig_mc"] + hists["bkg_mc"])
sp.plot_data(label="MC data", err=data_err, fmt='.', capsize=1, lw=1)

# eff = len(bkg_gen) / N
# N_hist, _ = np.histogram(bkg_gen, bins=bins, range=bin_range)
# bkg_err = np.sqrt(N_hist * eff * (1 - eff))

sp.plot_mc_errors(bkg_err)

counts_num, counts_den = hists["data_mc"], alpha * hists["sig_mc"] + hists["bkg_gen"]
counts_num_err = data_err
counts_den_err = bkg_err

sp.plot_lower_panel(counts_num, counts_den, counts_num_err, counts_den_err, ylabel="data$/$ML",
                    label_x_start=bin_range[0],
                    label_x_end=bin_range[1], ylim=[0.5, 1.5])

ax = sp.ax
ax_lower = sp.ax_lower

if use_class:
    ax_lower.set_xlabel("class. output")
else:
    ax_lower.set_xlabel("$m_{bb}$")

ax.set_ylabel('$N$')

ax_lower.set_ylim(0.6, 1.4)

plt.legend(loc='upper right')

plt.tight_layout()
plt.savefig(saved + f"stacked_{N_gen}.pdf")