In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import warnings
warnings.simplefilter(action="ignore", category=pd.errors.PerformanceWarning)
import glob
import itertools

from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import roc_curve
from tqdm import tqdm
import xgboost as xgb #BDT
import pickle
import mplhep as hep

import concurrent.futures as futures

plt.rcParams.update({"figure.facecolor":  (1,1,1,0)})

from utils.analysis import Extrapolation

TAG = "abcdnet"
# TAG = "pnetfix"

  from pandas import MultiIndex, Int64Index


In [2]:
babies = sorted(glob.glob(f"../analysis/studies/vbsvvhjets/output_{TAG}/Run2/*.root"))
sig_babies = [baby for baby in babies if "VBSVVH" in baby]
bkg_babies = [baby for baby in babies if "VBS" not in baby and "data" not in baby]
data_babies = [baby for baby in babies if "data" in baby]
print("Signal:")
print("\n".join(sig_babies))
print("Background:")
print("\n".join(bkg_babies))
print("Data:")
print("\n".join(data_babies))

Signal:
../analysis/studies/vbsvvhjets/output_abcdnet/Run2/VBSVVH.root
Background:
../analysis/studies/vbsvvhjets/output_abcdnet/Run2/Bosons.root
../analysis/studies/vbsvvhjets/output_abcdnet/Run2/QCD.root
../analysis/studies/vbsvvhjets/output_abcdnet/Run2/SingleTop.root
../analysis/studies/vbsvvhjets/output_abcdnet/Run2/TT1L.root
../analysis/studies/vbsvvhjets/output_abcdnet/Run2/TTH.root
../analysis/studies/vbsvvhjets/output_abcdnet/Run2/TTHad.root
../analysis/studies/vbsvvhjets/output_abcdnet/Run2/TTW.root
Data:
../analysis/studies/vbsvvhjets/output_abcdnet/Run2/data.root


In [3]:
vbsvvh = Extrapolation(
    sig_root_files=sig_babies,
    bkg_root_files=bkg_babies,
    data_root_files=data_babies,
    ttree_name="tree",
    weight_columns=[
        "xsec_sf", 
        "pu_sf",
        "prefire_sf",
        "qcdnorm_sf"
    ]
)

# bdt_name = "bdt_mediumPresel"
# with open(f"../analysis/studies/vbsvvhjets/vbsvvhjets_bdt/{bdt_name}_features.txt", "r") as txt_file:
#     features = txt_file.read().splitlines()
# bst = pickle.load(open(f"../analysis/studies/vbsvvhjets/vbsvvhjets_bdt/{bdt_name}.pkl", "rb"))
# vbsvvh.df["bdt"] = bst.predict(xgb.DMatrix(vbsvvh.df[features]))

vbsvvh.df["objsel"] = True
vbsvvh.df["presel"] = vbsvvh.df.eval(
    "objsel and hbbfatjet_xbb > 0.5 and ld_vqqfatjet_xwqq > 0.3 and tr_vqqfatjet_xwqq > 0.3"
)
vbsvvh.make_selection("presel")

# bkg_count = vbsvvh.bkg_count()
# qcd_count = vbsvvh.sample_count("QCD")
# data_count = vbsvvh.data_count()

# vbsvvh.df.loc[vbsvvh.df.name == "QCD", "event_weight"] *= (data_count - (bkg_count - qcd_count))/(qcd_count)

# ORIG_EVENT_WEIGHT = vbsvvh.df.event_weight.values.copy()

Loading sig babies: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.07s/it]
Loading bkg babies: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 7/7 [00:04<00:00,  1.61it/s]
Loading data babies: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.61it/s]


In [4]:
if TAG == "abcdnet":
    # --- Single DisCo ---
    # MAIN Single DisCo:
    # (By hand) For ABCDNet_30DisCo_mediumPresel_longRun_qcdNorm_allFeatNorm_leakyReLU_dCorr
    # bf_rounded_SR = "abcdnet_score > 0.97 and hbbfatjet_xbb > 0.62 and ld_vqqfatjet_xwqq > 0.74 and tr_vqqfatjet_xwqq > 0.74 and abs_deta_jj > 4"
#     SR_like="hbbfatjet_xbb > 0.60 and ld_vqqfatjet_xwqq > 0.75 and tr_vqqfatjet_xwqq > 0.70"
#     dnn_up = "abcdnet_score > 0.97"
#     dnn_dn = "abcdnet_score <= 0.5"
#     dnn_md = f"not ({dnn_up}) and not ({dnn_dn})"
#     vbs_up = "abs_deta_jj > 4"
#     vbs_dn = "abs_deta_jj <= 2"
#     vbs_md = f"not ({vbs_up}) and not ({vbs_dn})"
    
    # MAIN Single DisCo Near-SR:
    # (By hand) For ABCDNet_30DisCo_mediumPresel_longRun_qcdNorm_allFeatNorm_leakyReLU_dCorr
    # bf_rounded_SR = "abcdnet_score > 0.97 and hbbfatjet_xbb > 0.62 and ld_vqqfatjet_xwqq > 0.74 and tr_vqqfatjet_xwqq > 0.74 and abs_deta_jj > 4"
#     SR_like="hbbfatjet_xbb > 0.60 and ld_vqqfatjet_xwqq > 0.75 and tr_vqqfatjet_xwqq > 0.70 and abcdnet_score <= 0.97"
#     dnn_up = "abcdnet_score > 0.85"
#     dnn_dn = "abcdnet_score <= 0.5"
#     dnn_md = f"not ({dnn_up}) and not ({dnn_dn})"
#     vbs_up = "abs_deta_jj > 4"
#     vbs_dn = "abs_deta_jj <= 2"
#     vbs_md = f"not ({vbs_up}) and not ({vbs_dn})"

    # Hbb mass ABCD (not very good!)
    # bf_rounded_SR = "abcdnet_score > 0.97 and hbbfatjet_xbb > 0.8 and ld_vqqfatjet_xwqq > 0.8 and tr_vqqfatjet_xwqq > 0.7"
#     SR_like="hbbfatjet_xbb > 0.8 and ld_vqqfatjet_xwqq > 0.8 and tr_vqqfatjet_xwqq > 0.7"
#     dnn_up = "abcdnet_score > 0.97"
#     dnn_dn = "abcdnet_score <= 0.7"
#     dnn_md = f"not ({dnn_up}) and not ({dnn_dn})"
#     vbs_up = "hbbfatjet_mass >= 75 and hbbfatjet_mass < 150"
#     vbs_dn = f"not ({vbs_up})"
#     vbs_md = f"not ({vbs_up}) and not ({vbs_dn})"

    # TEST Single DisCo (longer training):
    # (By hand) For ABCDNet_30DisCo_mediumPresel_longRun_qcdNorm_allFeatNorm_leakyReLU_dCorr
    # bf_rounded_SR = "abcdnet_score > 0.97 and hbbfatjet_xbb > 0.8 and ld_vqqfatjet_xwqq > 0.8 and tr_vqqfatjet_xwqq > 0.68 and abs_deta_jj > 4"
#     SR_like="hbbfatjet_xbb > 0.8 and ld_vqqfatjet_xwqq > 0.8 and tr_vqqfatjet_xwqq > 0.7"
#     dnn_up = "abcdnet_score > 0.97"
#     dnn_dn = "abcdnet_score <= 0.5"
#     dnn_md = f"not ({dnn_up}) and not ({dnn_dn})"
#     vbs_up = "abs_deta_jj > 4"
#     vbs_dn = "abs_deta_jj <= 2"
#     vbs_md = f"not ({vbs_up}) and not ({vbs_dn})"

    # NEW Single DisCo (longer training):
    # (By hand) For ABCDNet_30DisCo_mediumPresel_longRun_qcdNorm_allFeatNorm_leakyReLU_dCorr
    # bf_rounded_SR = "abcdnet_score > 0.96 and hbbfatjet_xbb > 0.60 and ld_vqqfatjet_xwqq > 0.8 and tr_vqqfatjet_xwqq > 0.75 and abs_deta_jj > 4"
#     SR_like="hbbfatjet_xbb > 0.60 and ld_vqqfatjet_xwqq > 0.75 and tr_vqqfatjet_xwqq > 0.70"
#     dnn_up = "abcdnet_score > 0.96"
#     dnn_dn = "abcdnet_score <= 0.5"
#     dnn_md = f"not ({dnn_up}) and not ({dnn_dn})"
#     vbs_up = "abs_deta_jj > 4"
#     vbs_dn = "abs_deta_jj <= 2"
#     vbs_md = f"not ({vbs_up}) and not ({vbs_dn})"

#     # Fixed rescaling:
#     SR_like="hbbfatjet_xbb > 0.75 and ld_vqqfatjet_xwqq > 0.75 and tr_vqqfatjet_xwqq > 0.75"
#     dnn_up = "abcdnet_score > 0.97"
#     dnn_dn = "abcdnet_score <= 0.5"
#     dnn_md = f"not ({dnn_up}) and not ({dnn_dn})"
#     vbs_up = "abs_deta_jj > 4"
#     vbs_dn = "abs_deta_jj <= 2"
#     vbs_md = f"not ({vbs_up}) and not ({vbs_dn})"
    
    # MAIN Single DisCo Near-SR:
    # (By hand) For ABCDNet_30DisCo_mediumPresel_longRun_qcdNorm_allFeatNorm_leakyReLU_dCorr
#     # bf_rounded_SR = "abcdnet_score > 0.96 and hbbfatjet_xbb > 0.60 and ld_vqqfatjet_xwqq > 0.8 and tr_vqqfatjet_xwqq > 0.75 and abs_deta_jj > 4"
#     SR_like="hbbfatjet_xbb > 0.60 and ld_vqqfatjet_xwqq > 0.75 and tr_vqqfatjet_xwqq > 0.70 and abcdnet_score < 0.96"
#     dnn_up = "abcdnet_score > 0.85"
#     dnn_dn = "abcdnet_score <= 0.5"
#     dnn_md = f"not ({dnn_up}) and not ({dnn_dn})"
#     vbs_up = "abs_deta_jj > 4"
#     vbs_dn = "abs_deta_jj <= 2"
#     vbs_md = f"not ({vbs_up}) and not ({vbs_dn})"

    # NEW Single DisCo (with VBS BDT as disco target):
    # (By hand) For ABCDNet_30DisCo_mediumPresel_longRun_qcdNorm_allFeatNorm_leakyReLU_dCorr
    # bf_rounded_SR = "abcdnet_score > 0.96 and hbbfatjet_xbb > 0.60 and ld_vqqfatjet_xwqq > 0.8 and tr_vqqfatjet_xwqq > 0.75 and abs_deta_jj > 4"
#     SR_like="not (hbbfatjet_xbb > 0.80 and ld_vqqfatjet_xwqq > 0.60 and tr_vqqfatjet_xwqq > 0.40) and (abcdnet_score < 0.95)" # PNet sideband
#     SR_like="not (hbbfatjet_xbb > 0.80) and ld_vqqfatjet_xwqq > 0.60 and tr_vqqfatjet_xwqq > 0.40" # Xbb sideband
#     SR_like="hbbfatjet_xbb > 0.80 and ld_vqqfatjet_xwqq > 0.60 and tr_vqqfatjet_xwqq > 0.40"
#     dnn_up = "abcdnet_score > 0.95"
#     dnn_dn = "abcdnet_score <= 0.5"
#     dnn_md = f"not ({dnn_up}) and not ({dnn_dn})"
#     vbs_up = "vbs_bdt_score > 0.85"
#     vbs_dn = "vbs_bdt_score <= 0.5"
#     vbs_md = f"not ({vbs_up}) and not ({vbs_dn})"

#     SR_like="hbbfatjet_xbb > 0.80 and ld_vqqfatjet_xwqq > 0.60 and tr_vqqfatjet_xwqq > 0.40"
#     dnn_up = "abcdnet_score > 0.90"
#     dnn_dn = "abcdnet_score <= 0.5"
#     dnn_md = f"not ({dnn_up}) and not ({dnn_dn})"
#     vbs_up = "vbs_bdt_score > 0.85"
#     vbs_dn = "vbs_bdt_score <= 0.5"
#     vbs_md = f"not ({vbs_up}) and not ({vbs_dn})"

    # Fixed rescaling
#     SR_like="hbbfatjet_xbb > 0.70 and ld_vqqfatjet_xwqq > 0.60 and tr_vqqfatjet_xwqq > 0.40"
#     dnn_up = "abcdnet_score > 0.93"
#     dnn_dn = "abcdnet_score <= 0.5"
#     dnn_md = f"not ({dnn_up}) and not ({dnn_dn})"
#     vbs_up = "vbs_bdt_score > 0.88"
#     vbs_dn = "vbs_bdt_score <= 0.4"
#     vbs_md = f"not ({vbs_up}) and not ({vbs_dn})"

    # New signal sample
#     SR_like="hbbfatjet_xbb > 0.50 and ld_vqqfatjet_xwqq > 0.70 and tr_vqqfatjet_xwqq > 0.40"
#     dnn_up = "abcdnet_score > 0.85"
#     dnn_dn = "abcdnet_score <= 0.1"
#     dnn_md = f"not ({dnn_up}) and not ({dnn_dn})"
#     vbs_up = "vbs_bdt_score > 0.93"
#     vbs_dn = "vbs_bdt_score <= 0.1"
#     vbs_md = f"not ({vbs_up}) and not ({vbs_dn})"
    
    SR_like="hbbfatjet_xbb > 0.5 and ld_vqqfatjet_xwqq > 0.60 and tr_vqqfatjet_xwqq > 0.60"
    dnn_up = "abcdnet_score > 0.72"
    dnn_dn = "abcdnet_score <= 0.1"
    dnn_md = f"not ({dnn_up}) and not ({dnn_dn})"
    vbs_up = "vbs_bdt_score > 0.91"
    vbs_dn = "vbs_bdt_score <= 0.1"
    vbs_md = f"not ({vbs_up}) and not ({vbs_dn})"

    # NEW Single DisCo (with VBS DNN as disco target):
    # (By hand) For ABCDNet_30DisCo_mediumPresel_longRun_qcdNorm_allFeatNorm_leakyReLU_dCorr
    # bf_rounded_SR = "abcdnet_score > 0.96 and hbbfatjet_xbb > 0.60 and ld_vqqfatjet_xwqq > 0.8 and tr_vqqfatjet_xwqq > 0.75 and abs_deta_jj > 4"
#     SR_like="hbbfatjet_xbb > 0.5 and not (ld_vqqfatjet_xwqq > 0.5 and tr_vqqfatjet_xwqq > 0.4)" # XWqq sideband
#     SR_like="hbbfatjet_xbb > 0.5 and not (ld_vqqfatjet_xwqq > 0.5 and tr_vqqfatjet_xwqq > 0.4) and abcdnet_score < 0.95" # XWqq and ABCDNet sideband
#     SR_like="hbbfatjet_xbb > 0.5 and ld_vqqfatjet_xwqq > 0.5 and tr_vqqfatjet_xwqq > 0.4"
#     dnn_up = "abcdnet_score > 0.85"
#     dnn_dn = "abcdnet_score <= 0.3"
#     dnn_md = f"not ({dnn_up}) and not ({dnn_dn})"
#     vbs_up = "vbs_dnn_score > 0.9"
#     vbs_dn = "vbs_dnn_score <= 0.2"
#     vbs_md = f"not ({vbs_up}) and not ({vbs_dn})"
    
#     SR_like="ld_vqqfatjet_xwqq > 0.5 and tr_vqqfatjet_xwqq > 0.3"
#     dnn_up = "abcdnet_score > 0.8"
#     dnn_dn = "abcdnet_score <= 0.3"
#     dnn_md = f"not ({dnn_up}) and not ({dnn_dn})"
#     vbs_up = "vbs_dnn_score > 0.9"
#     vbs_dn = "vbs_dnn_score <= 0.1"
#     vbs_md = f"not ({vbs_up}) and not ({vbs_dn})"

    # --------------------
    
    # --- Double DisCo ---
    # bf_rounded_SR ="abcdnet_score1 > 0.3 and abcdnet_score2 > 0.3 and hbbfatjet_xbb > 0.5 and ld_vqqfatjet_xwqq > 0.4 and tr_vqqfatjet_xwqq > 0.4"
#     SR_like="hbbfatjet_xbb > 0.5 and ld_vqqfatjet_xwqq > 0.4 and tr_vqqfatjet_xwqq > 0.4"
#     dnn_up = "abcdnet_score1 > 0.3"
#     dnn_dn = "abcdnet_score1 <= 0.3"
#     dnn_md = f"not ({dnn_up}) and not ({dnn_dn})"
#     vbs_up = "abcdnet_score2 > 0.3"
#     vbs_dn = "abcdnet_score2 <= 0.3"
#     vbs_md = f"not ({vbs_up}) and not ({vbs_dn})"

#     SR_like="hbbfatjet_xbb > 0.7 and ld_vqqfatjet_xwqq > 0.7 and tr_vqqfatjet_xwqq > 0.6"
#     dnn_up = "abcdnet_score1 > 0.9"
#     dnn_dn = "abcdnet_score1 <= 0.4"
#     dnn_md = f"not ({dnn_up}) and not ({dnn_dn})"
#     vbs_up = "abcdnet_score2 > 0.8"
#     vbs_dn = "abcdnet_score2 <= 0.3"
#     vbs_md = f"not ({vbs_up}) and not ({vbs_dn})"
    # --------------------

else:
    # --- BDT ---
    # bf_rounded_SR = "bdt > 0.8 and hbbfatjet_xbb > 0.58 and ld_vqqfatjet_xwqq > 0.5 and tr_vqqfatjet_xwqq > 0.66 and abs_deta_jj > 4 and M_jj > 600"
    # vbsvvh.df["bdt_presel"] = vbsvvh.df.eval(
    #     "ST > 1300"
    #     + " and hbbfatjet_xbb > 0.5"
    #     + " and hbbfatjet_mass < 150"
    #     + " and ld_vqqfatjet_xwqq > 0.5 and tr_vqqfatjet_xwqq > 0.5"
    #     + " and ld_vqqfatjet_mass < 120 and tr_vqqfatjet_mass < 120"
    # )
    # SR_like="bdt_presel and hbbfatjet_xbb > 0.58 and ld_vqqfatjet_xwqq > 0.5 and tr_vqqfatjet_xwqq > 0.66 and M_jj > 600"
    # dnn_up = "bdt > 0.8"
    # dnn_dn = "bdt <= 0.1"
    # dnn_md = f"not ({dnn_up}) and not ({dnn_dn})"
    # vbs_up = "abs_deta_jj > 4"
    # vbs_dn = "abs_deta_jj <= 2"
    # vbs_md = f"not ({vbs_up}) and not ({vbs_dn})"

    # bf_rounded_SR = "bdt > 0.9 and hbbfatjet_xbb > 0.5 and ld_vqqfatjet_xwqq > 0.82 and tr_vqqfatjet_xwqq > 0.66 and abs_deta_jj > 4 and M_jj > 600"
    SR_like="hbbfatjet_xbb > 0.5 and ld_vqqfatjet_xwqq > 0.82 and tr_vqqfatjet_xwqq > 0.66"
    dnn_up = "bdt > 0.9"
    dnn_dn = "bdt <= 0.3"
    dnn_md = f"not ({dnn_up}) and not ({dnn_dn})"
    vbs_up = "abs_deta_jj > 4 and M_jj > 600"
    vbs_dn = "abs_deta_jj <= 2 and M_jj <= 200"
    vbs_md = f"not ({vbs_up}) and not ({vbs_dn})"
    # -----------
    

region_A  = f"{dnn_up} and {vbs_up}"
region_B1 = f"{dnn_up} and {vbs_md}"
region_B2 = f"{dnn_up} and {vbs_dn}"
region_C1 = f"{dnn_md} and {vbs_up}"
region_C2 = f"{dnn_dn} and {vbs_up}"
region_D1 = f"{dnn_md} and {vbs_md}"
region_D2 = f"{dnn_md} and {vbs_dn}"
region_D3 = f"{dnn_dn} and {vbs_md}"
region_D4 = f"{dnn_dn} and {vbs_dn}"

# region_A  = f"abcdnet_score > 0.97 and abs_deta_jj > 4"

# region_B1 = f"abcdnet_score > 0.97 and abs_deta_jj <= 4 and abs_deta_jj > 2"
# region_B2 = f"abcdnet_score > 0.97 and abs_deta_jj <= 2"

# region_C1 = f"abcdnet_score < 0.97 and abcdnet_score > 0.5 and abs_deta_jj > 4"
# region_C2 = f"abcdnet_score < 0.97 and abcdnet_score <= 0.5 and abs_deta_jj > 4"

# region_D1 = f"abcdnet_score < 0.97 and abcdnet_score > 0.5 and abs_deta_jj <= 4 and abs_deta_jj > 2"
# region_D2 = f"abcdnet_score < 0.97 and abcdnet_score > 0.5 and abs_deta_jj <= 2"
# region_D3 = f"abcdnet_score < 0.97 and abcdnet_score <= 0.5 and abs_deta_jj <= 4 and abs_deta_jj > 2"
# region_D4 = f"abcdnet_score < 0.97 and abcdnet_score <= 0.5 and abs_deta_jj <= 2"

vbsvvh.df["region_A"]  = vbsvvh.df.eval(f"{SR_like} and {region_A}")
vbsvvh.df["region_B1"] = vbsvvh.df.eval(f"{SR_like} and {region_B1}")
vbsvvh.df["region_B2"] = vbsvvh.df.eval(f"{SR_like} and {region_B2}")
vbsvvh.df["region_C1"] = vbsvvh.df.eval(f"{SR_like} and {region_C1}")
vbsvvh.df["region_C2"] = vbsvvh.df.eval(f"{SR_like} and {region_C2}")
vbsvvh.df["region_D1"] = vbsvvh.df.eval(f"{SR_like} and {region_D1}")
vbsvvh.df["region_D2"] = vbsvvh.df.eval(f"{SR_like} and {region_D2}")
vbsvvh.df["region_D3"] = vbsvvh.df.eval(f"{SR_like} and {region_D3}")
vbsvvh.df["region_D4"] = vbsvvh.df.eval(f"{SR_like} and {region_D4}")

vbsvvh.df["region_B"] = vbsvvh.df.eval("region_B1 or region_B2")
vbsvvh.df["region_C"] = vbsvvh.df.eval("region_C1 or region_C2")
vbsvvh.df["region_D"] = vbsvvh.df.eval("region_D1 or region_D2 or region_D3 or region_D4")

In [5]:
def get_cutflow_row(selection, headers=True, signal_separate=False):
    samples = {"QCD": 0, "TTHad": 0, "TT1L": 0, "TTW": 0, "TTH": 0, "SingleTop": 0, "Bosons": 0, "TotalBkg": 0}
    for sample in samples.keys():
        if sample != "TotalBkg":
            count = vbsvvh.sample_count(sample, selection=selection)
            samples[sample] = count
            samples["TotalBkg"] += count
    
    if not signal_separate:
        samples["TotalSig"] = vbsvvh.sig_count(selection=selection)

    if headers:
        print(",".join(samples.keys()))
    print(",".join([f"{value}" for value in samples.values()]))
    if signal_separate:
        print("---")
        print("Signal")
        print(vbsvvh.sig_count(selection=selection))
    
get_cutflow_row("hbbfatjet_xbb > 0.8")
get_cutflow_row("hbbfatjet_xbb > 0.8 and ld_vqqfatjet_xwqq > 0.6", headers=False)
get_cutflow_row("hbbfatjet_xbb > 0.8 and ld_vqqfatjet_xwqq > 0.6 and ld_vqqfatjet_xwqq > 0.4", headers=False)
get_cutflow_row("region_A or region_B or region_C or region_D", headers=False)
get_cutflow_row("region_A or region_C", headers=False)
get_cutflow_row("region_A", headers=False)

QCD,TTHad,TT1L,TTW,TTH,SingleTop,Bosons,TotalBkg,TotalSig
6222.060365233193,979.3700947346899,129.06788570528118,16.875606552501743,6.953309346812703,106.50749927986402,198.98468595641805,7659.8194468087595,10.10344029792958
2988.2736787345766,550.6796532547116,68.08244023935066,11.17770570623231,4.377469607309415,76.84611707595138,119.98580099748854,3819.42286561562,9.274256740836718
2988.2736787345766,550.6796532547116,68.08244023935066,11.17770570623231,4.377469607309415,76.84611707595138,119.98580099748854,3819.42286561562,9.274256740836718
2175.2391833259526,439.40811287502527,53.254999261777,10.391777815995273,3.4918067901140932,72.66216604552724,103.03645480371303,2857.4845009181045,8.977466055604747
15.003729476544297,7.781183308731203,0.8315614607738749,0.12193975634581482,0.044503924382483295,1.7487997890848301,2.1386598715441476,27.670377587406655,6.523570567346845
0.0,0.2965152171906393,0.045675604177482745,0.1121572372117131,0.004493331889903749,0.18392503866011833,0.40875

In [6]:
def get_abcd(A, B, C, D):
    pred_A_count = 0
    pred_A_error = 0
    for region_i, region in enumerate([A, B, C, D]):
        sig_count, bkg_count = vbsvvh.get_event_counts(selection=region)
        sig_error, bkg_error = vbsvvh.get_event_errors(selection=region)
        csv_line = f"{bkg_count},{bkg_error},{sig_count},{sig_error}"
        if sig_count < bkg_error*1.2:
            data_count = vbsvvh.data_count(selection=region)
            data_error = vbsvvh.data_error(selection=region)
            csv_line += f",{data_count},{data_error}"
        else:
            csv_line += f",—,—"
        print(csv_line)
        # Do extrapolation
        if region_i == 1:
            pred_A_count += bkg_count
        elif region_i == 2:
            pred_A_count *= bkg_count
        elif region_i == 3 and bkg_count > 0:
            pred_A_count /= bkg_count
        # Calculate error
        if region_i > 0 and bkg_count > 0:
            pred_A_error += (bkg_error/bkg_count)**2
    
    pred_A_error = np.sqrt(pred_A_error)*pred_A_count
    print()
    print(f"pred_A_count = {pred_A_count}")
    print(f"pred_A_error = {pred_A_error}")
    print()
        
get_abcd("region_A", "region_B", "region_C", "region_D")

get_abcd("region_B1", "region_B2", "region_D1", "region_D2")

get_abcd("region_D1", "region_D2", "region_D3", "region_D4")

get_abcd("region_C1", "region_D1", "region_C2", "region_D3")

print("---\n")

get_abcd("region_A", "region_B", "region_C1", "region_D1 or region_D2")

get_abcd("region_C1", "region_D1 or region_D2", "region_C2", "region_D3 or region_D4")

1.0515225310137617,0.2936742982891579,5.749356339982519,0.07258844920455661,—,—
115.43100804625023,6.262771808459874,2.0280439565589887,0.04322835088646124,155,12.449899597988733
26.61885505639289,3.1381959163551074,0.774214227364327,0.02530615727566865,47,6.855654600401044
2714.3831152844473,73.36502685097675,0.42585153169891166,0.018542371949489447,2980,54.589376255824725

pred_A_count = 1.131985111053296
pred_A_error = 0.15006007218145315

35.43820695024639,3.3596055734706285,1.9361923421647995,0.0423695399731622,46,6.782329983125268
79.99280109600384,5.285391292567169,0.09185161439418926,0.008573937416707996,109,10.44030650891055
157.3739359947245,14.489522174036527,0.3173981411150598,0.0163864035860166,174,13.19090595827292
395.44630743993247,25.51036491021451,0.03342061670308879,0.004857243498236538,452,21.2602916254693

pred_A_count = 31.834364673220392
pred_A_error = 4.151214913824767

157.3739359947245,14.489522174036527,0.3173981411150598,0.0163864035860166,174,13.19090595827

In [22]:
def get_abcd_comp(A, B, C, D):
    samples = vbsvvh.bkg_df().name.unique()
    print(",,".join(samples))
    for region_i, region in enumerate([A, B, C, D]):
        csv_line = []
        for sample in samples:
            bkg_count = vbsvvh.sample_count(sample, selection=region)
            bkg_error = vbsvvh.sample_error(sample, selection=region)
            csv_line.append(f"{bkg_count},{bkg_error}")
#             csv_line.append(str(bkg_count))
        print(",".join(csv_line))
        
get_abcd_comp("region_A", "region_B", "region_C", "region_D")

get_abcd_comp("region_B1", "region_B2", "region_D1", "region_D2")

get_abcd_comp("region_D1", "region_D2", "region_D3", "region_D4")

get_abcd_comp("region_C1", "region_D1", "region_C2", "region_D3")

print("---\n")

get_abcd_comp("region_A", "region_B", "region_C1", "region_D1 or region_D2")

get_abcd_comp("region_C1", "region_D1 or region_D2", "region_C2", "region_D3 or region_D4")

Bosons,,QCD,,SingleTop,,TT1L,,TTH,,TTHad,,TTW
0.0,0.0,0.2873225325009612,0.2873225325009612,0.1750439807378428,0.1522113276600778,0.0,0.0,0.0,0.0,0.4907536252585979,0.18606918630427793,0.0,0.0
12.341553192472114,1.0042994230981517,151.8550670333085,8.160936511422022,8.745200487932053,1.1400682483963802,2.0147265039486877,0.30440744322874647,0.1256991698888313,0.013383447544194746,14.467759300588469,1.0067148698539916,0.3389400769439469,0.23469566014068832
0.08802401909255668,0.08669360991630513,2.5407435907401483,1.1997222104098215,1.9389965187372222,0.50999767144163,0.0,0.0,0.003938332176416913,0.0018741774949669646,51.54117399196649,1.8750274863431837,0.0014512014986380268,0.0010274784618027931
214.49448438483284,4.420162230702723,6010.91758102931,124.9927556050232,107.72625685143063,3.8579248936017185,101.01570114392142,2.1947053077992202,5.95751075105157,0.0963259024623072,813.0625460497465,7.570375004326675,18.514935652554847,1.4221069964374624
Bosons,,QCD,,SingleTop,,TT1L,,TTH,,T

In [8]:
# For ABCDNet_100DisCo_loosePresel_longRun
# bf_rounded_SR = "abcdnet_score > 0.99 and hbbfatjet_xbb > 0.8 and ld_vqqfatjet_xwqq > 0.8 and tr_vqqfatjet_xwqq > 0.7 and abs_deta_jj > 3.5"
# SR_like = ("hbbfatjet_xbb > 0.8 and ld_vqqfatjet_xwqq > 0.7 and tr_vqqfatjet_xwqq > 0.7") # optimal
# VBS_cut = "abs(deta_jj) > 3.5"
# BDT_cut = "(abcdnet_score > 0.99)"

# For ABCDNet_100DisCo_loosePresel_longRun_smallBatches
# bf_rounded_SR = "abcdnet_score > 0.99 and hbbfatjet_xbb > 0.75 and ld_vqqfatjet_xwqq > 0.55 and tr_vqqfatjet_xwqq > 0.75 and abs_deta_jj > 4"
# SR_like = ("hbbfatjet_xbb > 0.75 and ld_vqqfatjet_xwqq > 0.55 and tr_vqqfatjet_xwqq > 0.75") # optimal
# VBS_cut = "abs(deta_jj) > 4"
# BDT_cut = "(abcdnet_score > 0.99)"

# For ABCDNet_100DisCo_loosePresel_longRun_smallBatches
# bf_rounded_SR = "abcdnet_score > 0.98 and hbbfatjet_xbb > 0.75 and ld_vqqfatjet_xwqq > 0.75 and tr_vqqfatjet_xwqq > 0.7 and abs_deta_jj > 3.5"
# SR_like = ("hbbfatjet_xbb > 0.75 and ld_vqqfatjet_xwqq > 0.75 and tr_vqqfatjet_xwqq > 0.7") # optimal
# VBS_cut = "abs(deta_jj) > 3.5"
# BDT_cut = "(abcdnet_score > 0.98)"

# For ABCDNet_100DisCo_mediumPresel_longRun_qcdNorm_allFeatNorm
# bf_rounded_SR = "abcdnet_score > 0.96 and hbbfatjet_xbb > 0.55 and ld_vqqfatjet_xwqq > 0.75 and tr_vqqfatjet_xwqq > 0.7 and abs_deta_jj > 4"
# SR_like = ("hbbfatjet_xbb > 0.55 and ld_vqqfatjet_xwqq > 0.75 and tr_vqqfatjet_xwqq > 0.7") # optimal
# VBS_cut = "abs_deta_jj > 4"
# BDT_cut = "(abcdnet_score > 0.96)"

# For ABCDNet_100DoubleDisCo_mediumPresel_longRun_qcdNorm_allFeatNorm
# bf_rounded_SR = "abcdnet_score1 > 0.9 and abcdnet_score2 > 0.8 and hbbfatjet_xbb > 0.8 and ld_vqqfatjet_xwqq > 0.6 and tr_vqqfatjet_xwqq > 0.75"
# SR_like = ("hbbfatjet_xbb > 0.8 and ld_vqqfatjet_xwqq > 0.6 and tr_vqqfatjet_xwqq > 0.75") # optimal
# SR_like="obj_sel"
# VBS_cut = "abcdnet_score1 > 0.9"
# BDT_cut = "abcdnet_score2 > 0.8"

# For ABCDNet_20DisCo_mediumPresel_longRun_qcdNorm_allFeatNorm_dCorr
# bf_rounded_SR = "abcdnet_score > 0.96 and hbbfatjet_xbb > 0.62 and ld_vqqfatjet_xwqq > 0.8 and tr_vqqfatjet_xwqq > 0.68 and abs_deta_jj > 4"
# SR_like="hbbfatjet_xbb > 0.62 and ld_vqqfatjet_xwqq > 0.8 and tr_vqqfatjet_xwqq > 0.68"
# VBS_cut = "abs_deta_jj > 4"
# BDT_cut = "abcdnet_score > 0.96"

# For ABCDNet_30DisCo_mediumPresel_longRun_qcdNorm_allFeatNorm_dCorr
# bf_rounded_SR = "abcdnet_score > 0.97 and hbbfatjet_xbb > 0.62 and ld_vqqfatjet_xwqq > 0.8 and tr_vqqfatjet_xwqq > 0.68 and abs_deta_jj > 4"
SR_like="hbbfatjet_xbb > 0.62 and ld_vqqfatjet_xwqq > 0.8 and tr_vqqfatjet_xwqq > 0.68"
VBS_cut = "abs_deta_jj > 4"
BDT_cut = "abcdnet_score > 0.97"


# SR_like = (
#     "hbbfatjet_xbb > 0.6 and ld_vqqfatjet_xwqq > 0.6 and tr_vqqfatjet_xwqq > 0.65"
#     + " and ST > 1300"
#     + " and hbbfatjet_mass < 150"
#     + " and ld_vqqfatjet_mass < 120 and tr_vqqfatjet_mass < 120"
#     + " and M_jj > 600"
# )
# SR_like = f"({SR_like})"
# VBS_cut = "(M_jj > 700 and abs(deta_jj) > 4)"
# VBS_cut = "(M_jj > 600 and abs(deta_jj) > 4)"


# BDT_cut = "(bdt > 0.75)"
# regionA = f"{SR_like} and abs(deta_jj) > 4 and hbbjet_msoftdrop >= 150"
# regionB = f"{SR_like} and abs(deta_jj) <= 4 and hbbjet_msoftdrop >= 150"
# regionC = f"{SR_like} and abs(deta_jj) <= 4 and hbbjet_msoftdrop < 150"
# regionD = f"{SR_like} and abs(deta_jj) > 4 and hbbjet_msoftdrop < 150"
vbsvvh.df["SR_like"] = vbsvvh.df.eval(SR_like)
vbsvvh.df["VBS_cut"] = vbsvvh.df.eval(VBS_cut)
vbsvvh.df["BDT_cut"] = vbsvvh.df.eval(BDT_cut)

# AN_numbers = {
#     "PredBkg": 0,
#     "PredBkgStatErr": 0,
#     "PredBkgSystErr": 0,
#     "ExpSig": round(vbswh.sig_count(selection=regionD)),
#     "ExpSigStatErr": round(vbswh.sig_error(selection=regionD), 1),
#     "ExpSigSystErr": 0,
#     "ExpBkg": round(vbswh.bkg_count(selection=regionD)),
#     "BkgEstABMC": 0,
#     "BkgEstABMCErr": 0,
#     "BkgEstABData": 0,
#     "BkgEstABDataErr": 0,
#     "PredBkgMC": 0,
#     "BkgEstMethodSystErr": 0,
#     "BkgEstBkgCompSystErr": 0,
#     "BkgEstTotalSystErr": 0,
#     "BkgEstStatErr": 0,
#     "BkgEstWJetsUpABMC": 0,
#     "BkgEstWJetsUpABMCErr": 0,
#     "BkgEstWJetsDownABMC": 0,
#     "BkgEstWJetsDownABMCErr": 0,
#     "BkgEstWJetsCompSyst": 0,
#     "BkgEstBosonsUpABMC": 0,
#     "BkgEstBosonsUpABMCErr": 0,
#     "BkgEstBosonsDownABMC": 0,
#     "BkgEstBosonsDownABMCErr": 0,
#     "BkgEstBosonsCompSyst": 0,
#     "SRTwoPredBkg": 0,
#     "SRTwoPredBkgStatErr": 0,
#     "SRTwoPredBkgSystErr": 0,
#     "SRTwoBkgEstStatErr": 0,
#     "SRTwoBkgEstSystErr": 0,
#     "SRTwoExpSig": round(vbswh.sig_count(selection=f"{regionD} and ST > 1500")),
#     "SRTwoExpSigStatErr": round(vbswh.sig_error(selection=f"{regionD} and ST > 1500"), 1),
#     "SRTwoExpSigSystErr": 0,
#     "LambdaWZeqNegOneExcl": 0
# }

In [9]:
pred, stat, syst = vbsvvh.ABCD( 
    "VBS_cut",
    "BDT_cut",
    "SR_like",
    h_dir="left", v_dir="up", 
    show_data=True
)
# AN_numbers["PredBkg"] = round(pred)
# AN_numbers["PredBkgStatErr"] = round(pred*stat, 1)
# A_bkg_wgt = vbsvvh.bkg_count(selection=regionA)
# A_bkg_err = vbsvvh.bkg_error(selection=regionA)
# B_bkg_wgt = vbsvvh.bkg_count(selection=regionB)
# B_bkg_err = vbsvvh.bkg_error(selection=regionB)
# AN_numbers["PredBkgMC"] = round(A_bkg_wgt/B_bkg_wgt*vbswh.bkg_count(selection=regionC), 1)
# AN_numbers["BkgEstABMC"] = A_bkg_wgt/B_bkg_wgt
# AN_numbers["BkgEstABMCErr"] = round(np.sqrt((B_bkg_err/B_bkg_wgt)**2 + (A_bkg_err/A_bkg_wgt)**2)*100, 1)
# AN_numbers["BkgEstMethodSystErr"] = syst*100
# AN_numbers["BkgEstStatErr"] = stat*100
# A_data     = vbsvvh.data_count(selection=regionA)
# A_data_err = vbsvvh.data_error(selection=regionA)
# B_data     = vbsvvh.data_count(selection=regionB)
# B_data_err = vbsvvh.data_error(selection=regionB)
# AN_numbers["BkgEstABData"] = A_data/B_data
# AN_numbers["BkgEstABDataErr"] = round(np.sqrt((B_data_err/B_data)**2 + (A_data_err/A_data)**2)*100, 1)

cut,region,bkg_wgt,bkg_err,sig_wgt,sig_err,data,data_err
SR_like and VBS_cut and (not (BDT_cut)),A,256.6342960975481,26.47785643634475,2.988844777625301,0.04823516208514082,239,15.459624833740307
SR_like and (not (VBS_cut)) and (not (BDT_cut)),B,830.8502844531575,37.031177397464816,0.6494473989546797,0.022275752499616722,975,31.22498999199199
SR_like and (not (VBS_cut)) and BDT_cut,C,3.035872723537908,0.8421334904119976,0.42635882455545715,0.018336565905725098,1,1.0
SR_like and VBS_cut and BDT_cut,D,1.0075970636452793,0.367153172418339,3.307633924484733,0.050991391901517254,BLINDED,BLINDED

name,extp,rel_err
BtoA_MC,0.3088815168023414,0.11238893913282222
BtoA_data,0.24512820512820513,0.072178538666636


In [6]:
table = """cut,region,bkg_wgt,bkg_err,sig_wgt,sig_err,data,data_err
presel_noDetaJJ and M_jj > 600 and ST > 900 and hbbjet_score > 0.9 and abs(deta_jj) > 4 and (not (hbbjet_msoftdrop < 150)),A,172.83836599410466,3.2465338312517833,12.166519202705857,1.516710781384997,142,11.916375287812984
presel_noDetaJJ and M_jj > 600 and ST > 900 and hbbjet_score > 0.9 and (not (abs(deta_jj) > 4)) and (not (hbbjet_msoftdrop < 150)),B,241.74523550858504,5.814278818312787,0.9356567222556252,0.42450324891645086,201,14.177446878757825
presel_noDetaJJ and M_jj > 600 and ST > 900 and hbbjet_score > 0.9 and (not (abs(deta_jj) > 4)) and hbbjet_msoftdrop < 150,C,180.98316859862283,4.400911486540372,16.700083259792436,1.796365544551971,170,13.038404810405298
presel_noDetaJJ and M_jj > 600 and ST > 900 and hbbjet_score > 0.9 and abs(deta_jj) > 4 and hbbjet_msoftdrop < 150,D,116.32589003200601,3.833989382853367,397.43691975034,8.664565650077654,BLINDED,BLINDED
"""

print("Region & Total bkg. (MC) &    Total sig.   &   Total data   \\\\")
print("\\hline")
for line in table.splitlines()[1:]:
    cut, region, bkg_wgt, bkg_err, sig_wgt, sig_err, data, data_err = line.split(",")
    bkg_wgt, bkg_err = (float(bkg_wgt), float(bkg_err))
    sig_wgt, sig_err = (float(sig_wgt), float(sig_err))
    if region == "D":
        print(f"{region:^6} & ${bkg_wgt:.1f} \pm {bkg_err:.1f}$ & ${sig_wgt:.1f} \pm {sig_err:.1f}$ & {'--':^14} \\\\")
    else:
        data, data_err = (int(data), float(data_err))
        print(f"{region:^6} & ${bkg_wgt:.1f} \pm {bkg_err:.1f}$ & ${sig_wgt:>5.1f} \pm {sig_err:.1f}$ & ${data} \pm {data_err:.1f}$ \\\\")

Region & Total bkg. (MC) &    Total sig.   &   Total data   \\
\hline
  A    & $172.8 \pm 3.2$ & $ 12.2 \pm 1.5$ & $142 \pm 11.9$ \\
  B    & $241.7 \pm 5.8$ & $  0.9 \pm 0.4$ & $201 \pm 14.2$ \\
  C    & $181.0 \pm 4.4$ & $ 16.7 \pm 1.8$ & $170 \pm 13.0$ \\
  D    & $116.3 \pm 3.8$ & $397.4 \pm 8.7$ &       --       \\


In [7]:
vbswh.df.loc[vbswh.df.name == "WJets", "event_weight"] *= 2
vbswh.ABCD( 
    "abs(deta_jj) > 4",
    "hbbjet_msoftdrop < 150",
    "presel_noDetaJJ and M_jj > 600 and ST > 900 and hbbjet_score > 0.9",
    h_dir="left", v_dir="up", 
    show_data=True
)
A_bkg_wgt = vbswh.bkg_count(selection=regionA)
A_bkg_err = vbswh.bkg_error(selection=regionA)
B_bkg_wgt = vbswh.bkg_count(selection=regionB)
B_bkg_err = vbswh.bkg_error(selection=regionB)
AN_numbers["BkgEstWJetsUpABMC"] = A_bkg_wgt/B_bkg_wgt
AN_numbers["BkgEstWJetsUpABMCErr"] = round(np.sqrt((B_bkg_err/B_bkg_wgt)**2 + (A_bkg_err/A_bkg_wgt)**2)*100, 1)

vbswh.df.event_weight = ORIG_EVENT_WEIGHT.copy()
print("")

vbswh.df.loc[vbswh.df.name == "WJets", "event_weight"] *= 0.5
vbswh.ABCD( 
    "abs(deta_jj) > 4",
    "hbbjet_msoftdrop < 150",
    "presel_noDetaJJ and M_jj > 600 and ST > 900 and hbbjet_score > 0.9",
    h_dir="left", v_dir="up", 
    show_data=True
)
A_bkg_wgt = vbswh.bkg_count(selection=regionA)
A_bkg_err = vbswh.bkg_error(selection=regionA)
B_bkg_wgt = vbswh.bkg_count(selection=regionB)
B_bkg_err = vbswh.bkg_error(selection=regionB)
AN_numbers["BkgEstWJetsDownABMC"] = A_bkg_wgt/B_bkg_wgt
AN_numbers["BkgEstWJetsDownABMCErr"] = round(np.sqrt((B_bkg_err/B_bkg_wgt)**2 + (A_bkg_err/A_bkg_wgt)**2)*100, 1)
AN_numbers["BkgEstWJetsCompSyst"] = 100*max(
    abs(1 - AN_numbers["BkgEstWJetsUpABMC"]/AN_numbers["BkgEstABMC"]),
    abs(1 - AN_numbers["BkgEstWJetsDownABMC"]/AN_numbers["BkgEstABMC"])
)

vbswh.df.event_weight = ORIG_EVENT_WEIGHT.copy()

cut,region,bkg_wgt,bkg_err,sig_wgt,sig_err,data,data_err
presel_noDetaJJ and M_jj > 600 and ST > 900 and hbbjet_score > 0.9 and abs(deta_jj) > 4 and (not (hbbjet_msoftdrop < 150)),A,184.11863373459033,3.4788901220731163,12.166519202705857,1.516710781384997,142,11.916375287812984
presel_noDetaJJ and M_jj > 600 and ST > 900 and hbbjet_score > 0.9 and (not (abs(deta_jj) > 4)) and (not (hbbjet_msoftdrop < 150)),B,272.278367293264,5.964772315609042,0.9356567222556252,0.42450324891645086,201,14.177446878757825
presel_noDetaJJ and M_jj > 600 and ST > 900 and hbbjet_score > 0.9 and (not (abs(deta_jj) > 4)) and hbbjet_msoftdrop < 150,C,223.80855861934674,4.721721880289837,16.700083259792436,1.796365544551971,170,13.038404810405298
presel_noDetaJJ and M_jj > 600 and ST > 900 and hbbjet_score > 0.9 and abs(deta_jj) > 4 and hbbjet_msoftdrop < 150,D,137.54055752125072,4.412891202237653,397.43691975034,8.664565650077654,BLINDED,BLINDED

name,extp,rel_err
BtoA_MC,0.676214697351556,0.02892968041324986

In [9]:
vbswh.df.loc[vbswh.df.name == "Bosons", "event_weight"] *= 2
vbswh.ABCD( 
    "abs(deta_jj) > 4",
    "hbbjet_msoftdrop < 150",
    "presel_noDetaJJ and M_jj > 600 and ST > 900 and hbbjet_score > 0.9",
    h_dir="left", v_dir="up", 
    show_data=True
)
A_bkg_wgt = vbswh.bkg_count(selection=regionA)
A_bkg_err = vbswh.bkg_error(selection=regionA)
B_bkg_wgt = vbswh.bkg_count(selection=regionB)
B_bkg_err = vbswh.bkg_error(selection=regionB)
AN_numbers["BkgEstBosonsUpABMC"] = A_bkg_wgt/B_bkg_wgt
AN_numbers["BkgEstBosonsUpABMCErr"] = round(np.sqrt((B_bkg_err/B_bkg_wgt)**2 + (A_bkg_err/A_bkg_wgt)**2)*100, 1)

vbswh.df.event_weight = ORIG_EVENT_WEIGHT.copy()
print("")

vbswh.df.loc[vbswh.df.name == "Bosons", "event_weight"] *= 0.5
vbswh.ABCD( 
    "abs(deta_jj) > 4",
    "hbbjet_msoftdrop < 150",
    "presel_noDetaJJ and M_jj > 600 and ST > 900 and hbbjet_score > 0.9",
    h_dir="left", v_dir="up", 
    show_data=True
)
A_bkg_wgt = vbswh.bkg_count(selection=regionA)
A_bkg_err = vbswh.bkg_error(selection=regionA)
B_bkg_wgt = vbswh.bkg_count(selection=regionB)
B_bkg_err = vbswh.bkg_error(selection=regionB)
AN_numbers["BkgEstBosonsDownABMC"] = A_bkg_wgt/B_bkg_wgt
AN_numbers["BkgEstBosonsDownABMCErr"] = round(np.sqrt((B_bkg_err/B_bkg_wgt)**2 + (A_bkg_err/A_bkg_wgt)**2)*100, 1)
AN_numbers["BkgEstBosonsCompSyst"] = 100*max(
    abs(1 - AN_numbers["BkgEstBosonsUpABMC"]/AN_numbers["BkgEstABMC"]),
    abs(1 - AN_numbers["BkgEstBosonsDownABMC"]/AN_numbers["BkgEstABMC"])
)

vbswh.df.event_weight = ORIG_EVENT_WEIGHT.copy()

cut,region,bkg_wgt,bkg_err,sig_wgt,sig_err,data,data_err
presel_noDetaJJ and M_jj > 600 and ST > 900 and hbbjet_score > 0.9 and abs(deta_jj) > 4 and (not (hbbjet_msoftdrop < 150)),A,173.83525942152826,3.4534255278310586,12.166519202705857,1.516710781384997,142,11.916375287812984
presel_noDetaJJ and M_jj > 600 and ST > 900 and hbbjet_score > 0.9 and (not (abs(deta_jj) > 4)) and (not (hbbjet_msoftdrop < 150)),B,249.66308515171525,9.969348396019603,0.9356567222556252,0.42450324891645086,201,14.177446878757825
presel_noDetaJJ and M_jj > 600 and ST > 900 and hbbjet_score > 0.9 and (not (abs(deta_jj) > 4)) and hbbjet_msoftdrop < 150,C,202.49923020258785,7.162464311778483,16.700083259792436,1.796365544551971,170,13.038404810405298
presel_noDetaJJ and M_jj > 600 and ST > 900 and hbbjet_score > 0.9 and abs(deta_jj) > 4 and hbbjet_msoftdrop < 150,D,122.30640806548851,5.967146841301019,397.43691975034,8.664565650077654,BLINDED,BLINDED

name,extp,rel_err
BtoA_MC,0.6962793851397456,0.04460002834900

In [14]:
AN_numbers["BkgEstBosonsCompSyst"]

2.612927996483183

In [11]:
AN_numbers["BkgEstBkgCompSystErr"] = np.sqrt(
    AN_numbers["BkgEstWJetsCompSyst"]**2 + AN_numbers["BkgEstBosonsCompSyst"]**2
)
AN_numbers["BkgEstTotalSystErr"] = np.sqrt(
    AN_numbers["BkgEstMethodSystErr"]**2 + AN_numbers["BkgEstBkgCompSystErr"]**2
)

In [12]:
AN_numbers["BkgEstBkgCompSystErr"]

6.0163569634985645

In [10]:
# SR2 numbers for posterity
AN_numbers["SRTwoPredBkg"] = (
    AN_numbers["PredBkg"]*vbswh.bkg_count(selection=f"{regionD} and ST > 1500")/AN_numbers["PredBkgMC"]
)
AN_numbers["SRTwoBkgEstSystErr"] = np.sqrt(
    (vbswh.data_error(selection=f"{regionB} and ST > 1500")/vbswh.data_count(selection=f"{regionB} and ST > 1500"))**2
    + (AN_numbers["BkgEstTotalSystErr"]/100)**2
)
AN_numbers["SRTwoPredBkgSystErr"] = round(AN_numbers["SRTwoBkgEstSystErr"]*AN_numbers["SRTwoPredBkg"], 1)
AN_numbers["SRTwoBkgEstSystErr"] = round(AN_numbers["SRTwoBkgEstSystErr"]*100, 1)

AN_numbers["SRTwoBkgEstStatErr"] = round(AN_numbers["BkgEstStatErr"], 1)
AN_numbers["SRTwoPredBkgStatErr"] = round(AN_numbers["SRTwoPredBkg"]*(AN_numbers["BkgEstStatErr"]/100), 1)
AN_numbers["SRTwoPredBkg"] = round(AN_numbers["SRTwoPredBkg"])

AN_numbers["SRTwoExpSig"] = round(vbswh.sig_count(selection=f"{regionD} and ST > 1500"))
AN_numbers["SRTwoExpSigStatErr"] = round(vbswh.sig_error(selection=f"{regionD} and ST > 1500"), 1)

In [11]:
AN_numbers["PredBkgSystErr"] = round(pred*AN_numbers["BkgEstTotalSystErr"]/100, 1)

In [12]:
AN_numbers["BkgEstABMC"] = round(AN_numbers["BkgEstABMC"], 2)
AN_numbers["BkgEstABData"] = round(AN_numbers["BkgEstABData"], 2)

AN_numbers["BkgEstWJetsUpABMC"] = round(AN_numbers["BkgEstWJetsUpABMC"], 2)
AN_numbers["BkgEstWJetsDownABMC"] = round(AN_numbers["BkgEstWJetsDownABMC"], 2)
AN_numbers["BkgEstWJetsCompSyst"] = round(AN_numbers["BkgEstWJetsCompSyst"], 1)

AN_numbers["BkgEstBosonsUpABMC"] = round(AN_numbers["BkgEstBosonsUpABMC"], 2)
AN_numbers["BkgEstBosonsDownABMC"] = round(AN_numbers["BkgEstBosonsDownABMC"], 2)
AN_numbers["BkgEstBosonsCompSyst"] = round(AN_numbers["BkgEstBosonsCompSyst"], 1)

AN_numbers["BkgEstBkgCompSystErr"] = round(AN_numbers["BkgEstBkgCompSystErr"], 1)
AN_numbers["BkgEstMethodSystErr"] = round(AN_numbers["BkgEstMethodSystErr"], 1)
AN_numbers["BkgEstTotalSystErr"] = round(AN_numbers["BkgEstTotalSystErr"], 1)
AN_numbers["BkgEstStatErr"] = round(AN_numbers["BkgEstStatErr"], 1)

In [13]:
with open("AN_numbers.json", "w") as f_out:
    json.dump(AN_numbers, f_out)

AN_numbers # must run vbswh-sys.ipynb to fill completely

{'PredBkg': 120,
 'PredBkgStatErr': 16.1,
 'PredBkgSystErr': 15.3,
 'ExpSig': 397,
 'ExpSigStatErr': 8.7,
 'ExpSigSystErr': 0,
 'ExpBkg': 116,
 'BkgEstABMC': 0.71,
 'BkgEstABMCErr': 3.1,
 'BkgEstABData': 0.71,
 'BkgEstABDataErr': 11.0,
 'PredBkgMC': 129.4,
 'BkgEstMethodSystErr': 11.2,
 'BkgEstBkgCompSystErr': 6.0,
 'BkgEstTotalSystErr': 12.7,
 'BkgEstStatErr': 13.4,
 'BkgEstWJetsUpABMC': 0.68,
 'BkgEstWJetsUpABMCErr': 2.9,
 'BkgEstWJetsDownABMC': 0.74,
 'BkgEstWJetsDownABMCErr': 3.2,
 'BkgEstWJetsCompSyst': 5.4,
 'BkgEstBosonsUpABMC': 0.7,
 'BkgEstBosonsUpABMCErr': 4.5,
 'BkgEstBosonsDownABMC': 0.72,
 'BkgEstBosonsDownABMCErr': 2.6,
 'BkgEstBosonsCompSyst': 2.6,
 'SRTwoPredBkg': 5,
 'SRTwoPredBkgStatErr': 0.7,
 'SRTwoPredBkgSystErr': 1.9,
 'SRTwoBkgEstStatErr': 13.4,
 'SRTwoBkgEstSystErr': 35.7,
 'SRTwoExpSig': 106,
 'SRTwoExpSigStatErr': 4.5,
 'SRTwoExpSigSystErr': 0,
 'LambdaWZeqNegOneExcl': 0}