# Templates

Author(s): Raghav Kansal

In [None]:
from pathlib import Path

import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import mplhep as hep
from matplotlib import colors

from boostedhh import utils, hh_vars
from boostedhh.utils import PAD_VAL, Cutflow
from bbtautau.postprocessing import Samples, Regions, postprocessing, plotting
from bbtautau.postprocessing.Samples import CHANNELS, SAMPLES, SIGNALS
import bbtautau.postprocessing.utils as putils
from bbtautau.postprocessing.utils import LoadedSample
from bbtautau.postprocessing.plotting import ratioHistPlot

import logging

logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
logger = logging.getLogger("boostedhh.utils")
logger.setLevel(logging.DEBUG)

In [None]:
# automatically reloads imported files on edits
%load_ext autoreload
%autoreload 2

In [None]:
MAIN_DIR = Path("../../../")
CHANNEL = CHANNELS["he"]  # options: "hh", "he", "hm"

tag = "25Aug4_replayMay26Cuts"
year = "2022"

plot_dir = MAIN_DIR / f"plots/Templates/{tag}/{CHANNEL.key}"
plot_dir.mkdir(parents=True, exist_ok=True)

templates_dir = Path(f"./templates/{tag}/{CHANNEL.key}")
(templates_dir / "cutflows" / year).mkdir(parents=True, exist_ok=True)

base_dir = Path("/ceph/cms/store/user/rkansal/bbtautau/skimmer/")
data_paths = {
    "signal": base_dir / "25Apr17bbpresel_v12_private_signal",
    "data": base_dir / "25Apr17bbpresel_v12_private_signal",
    "bg": base_dir / "25Apr17bbpresel_v12_private_signal",
}

sigs = {s + CHANNEL.key: SAMPLES[s + CHANNEL.key] for s in SIGNALS}
bgs = {bkey: b for bkey, b in SAMPLES.items() if b.get_type() == "bg"}

In [None]:
from bbtautau.HLTs import HLTs

trigdict = postprocessing.trigger_filter(HLTs.hlts_list_by_dtype(year), year)
trigdict["signal"]

## Load samples

In [None]:
# dictionary that will contain all information (from all samples)
events_dict = postprocessing.load_samples(
    year,
    CHANNEL,
    data_paths,
    load_data=True,
    load_bgs=True,
    filters_dict=postprocessing.bb_filters(num_fatjets=3, bb_cut=0.8),
    loaded_samples=True,
)
cutflow = utils.Cutflow(samples=events_dict)
cutflow.add_cut(events_dict, "Preselection", "finalWeight")
cutflow.cutflow

Triggers

In [None]:
postprocessing.apply_triggers(events_dict, year, CHANNEL)
cutflow.add_cut(events_dict, "Triggers", "finalWeight")
cutflow.cutflow

bbtautau assignment

In [None]:
postprocessing.bbtautau_assignment(events_dict, CHANNEL)

Templates

In [None]:
# CHANNEL = Samples.CHANNELS["hh"]  # reload for new cuts
templates = postprocessing.get_templates(
    events_dict,
    year,
    sigs,
    bgs,
    CHANNEL,
    postprocessing.shape_vars,
    {},
    pass_ylim=150,
    fail_ylim=1e5,
    sig_scale_dict={f"bbtt{CHANNEL.key}": 300, f"vbfbbtt-k2v0{CHANNEL.key}": 40},
    plot_dir=plot_dir,
    template_dir=templates_dir,
    show=True,
)

In [None]:
postprocessing.save_templates(
    templates, templates_dir / f"{year}_templates.pkl", True, postprocessing.shape_vars
)

Next part is independent

In [None]:
import pickle
from bbtautau.postprocessing.datacardHelpers import rem_neg, sum_templates

bmin = 1

for ch in CHANNELS:

    templates_dir = f"/home/users/lumori/bbtautau/src/bbtautau/postprocessing/templates/25Aug4_replayMay26Cuts/bmin_{bmin}/{ch}"

    templates_dict = {}
    for year in hh_vars.years:
        with Path(f"{templates_dir}/{year}_templates.pkl").open("rb") as f:
            templates_dict[year] = rem_neg(pickle.load(f))

    templates_summed = sum_templates(templates_dict, hh_vars.years)

    print(CHANNELS[ch].key)

    ratioHistPlot(
        templates_summed["pass"],
        "2022-2023",
        CHANNELS[ch],
        [f"bbtt{CHANNELS[ch].key}"],
        ["ttbarhad", "ttbarsl", "ttbarll", "wjets", "zjets", "hbb", "qcd"],
        plot_ratio=True,
        plot_significance=False,
        show=True,
        sig_scale_dict={
            f"bbtt{CHANNELS[ch].key}": 30.0,
        },
    )

In [None]:
if "Sample" in [ax.name for ax in templates_summed["pass"].axes]:
    sample_axis = templates_summed["pass"].axes.name.index("Sample")
    # Get all available sample names
    sample_names = templates_summed["pass"].axes[sample_axis]
    print("Available samples:", list(sample_names))

    # Pick a sample to inspect: "bbtthh"
    sample_to_check = "bbtthh"
    if sample_to_check in sample_names:
        # Project to the sample - use "Sample" as the key
        h_sample = templates_summed["pass"][{"Sample": sample_to_check}]
        # Sum all weights in this sample
        total_weight = h_sample.values().sum()
        print(f"Total weight for sample '{sample_to_check}':", total_weight)
        print(f"Histogram shape for '{sample_to_check}':", h_sample.shape)
        print(f"Mass axis (bbFatJetParTmassResApplied) bins:", h_sample.axes[0].edges[5:9])

        # Also show the weights per bin
        print(f"Weights per bin for '{sample_to_check}':", np.array(h_sample.values())[5:9])

        tot_sig_w = np.array(h_sample.values())[5:9].sum()
        print(f"Total weights in signal bins for '{sample_to_check}':", tot_sig_w)
    else:
        print(f"Sample '{sample_to_check}' not found in histogram.")
else:
    print("No 'Sample' axis found in histogram.")

In [None]:
from bbtautau.postprocessing.SensitivityStudy import FOMS

fom = FOMS["2sqrtB_S_var"].fom_func

for ch in CHANNELS:

    templates_dir = (
        f"/home/users/lumori/bbtautau/src/bbtautau/postprocessing/templates/25Aug4_replayMay26Cuts/bmin_{bmin}/{ch}"
        # f"/home/users/lumori/bbtautau/src/bbtautau/postprocessing/templates/25July31/nobbttpresel_noNormSig_noBkgConstraint_ParT/bmin_{bmin}/{ch}"
    )

    templates_dict = {}
    for year in hh_vars.years:
        with Path(f"{templates_dir}/{year}_templates.pkl").open("rb") as f:
            templates_dict[year] = rem_neg(pickle.load(f))

    templates_summed = sum_templates(templates_dict, hh_vars.years)

    sig_w_res = np.array(templates_summed["pass"][{"Sample": "bbtt" + ch}].values())[5:9].sum()
    bkg_w_sb = (
        np.array(templates_summed["pass"][{"Sample": "data"}].values())[:5].sum()
        + np.array(templates_summed["pass"][{"Sample": "data"}].values())[9:].sum()
    )

    print(f"Signal weight in resonant region for {ch}: {sig_w_res}")
    print(f"Background weight in sideband region for {ch}: {bkg_w_sb}")

    print(f"FOM for {ch}: {fom( bkg_w_sb, sig_w_res, 0.295)}")

In [None]:
plotting.ratioHistPlot(
    templates_summed["pass"],
    "2022-2023",
    CHANNELS["hm"],
    ["bbtthm"],
    ["ttbarhad", "ttbarsl", "ttbarll", "wjets", "zjets", "hbb"],
    plot_ratio=True,
    plot_significance=False,
    show=True,
)