In [1]:
import glob
import json
import numpy as np
from utils.analysis import Extrapolation

In [2]:
# TAG = "pku"
TAG = "kscans"
babies = glob.glob(f"../analysis/studies/vbswh/output_{TAG}/Run2/*.root")
babies = [baby for baby in babies if "Lambda" not in baby]
data_babies = [baby for baby in babies if "data" in baby]
sig_babies = [baby for baby in babies if "VBSWH_mkW" in baby]
bkg_babies = list(set(babies) - set(data_babies + sig_babies))
print(data_babies)
print(sig_babies)
bkg_babies

['../analysis/studies/vbswh/output_kscans/Run2/data.root']
['../analysis/studies/vbswh/output_kscans/Run2/VBSWH_mkW.root']


['../analysis/studies/vbswh/output_kscans/Run2/VBSWH_SM.root',
 '../analysis/studies/vbswh/output_kscans/Run2/WJets.root',
 '../analysis/studies/vbswh/output_kscans/Run2/EWKWLep.root',
 '../analysis/studies/vbswh/output_kscans/Run2/TTbar2L.root',
 '../analysis/studies/vbswh/output_kscans/Run2/TTbar1L.root',
 '../analysis/studies/vbswh/output_kscans/Run2/Bosons.root',
 '../analysis/studies/vbswh/output_kscans/Run2/VH.root',
 '../analysis/studies/vbswh/output_kscans/Run2/SingleTop.root',
 '../analysis/studies/vbswh/output_kscans/Run2/TTX.root']

In [3]:
vbswh = Extrapolation(
    sig_root_files=sig_babies,
    bkg_root_files=bkg_babies,
    data_root_files=data_babies,
    ttree_name="tree",
    weight_columns=[
        "xsec_sf", "lep_id_sf", "ewkfix_sf", 
        "elec_reco_sf", "muon_iso_sf", 
        "btag_sf", "pu_sf", "prefire_sf", "trig_sf",
        "xbb_sf" # applied only because Xbb > 0.9 applied everywhere for ABCD
    ],
    reweight_column="reweights",
    plots_dir=f"/home/users/jguiang/public_html/onelep_plots/{TAG}/val"
)
vbswh.df["presel_noVBS_noBVeto"] = vbswh.df.eval(
    "hbbjet_score > 0.3"
)
vbswh.df["presel_noVBS"] = vbswh.df.eval(
    "passes_bveto and hbbjet_score > 0.3"
)
vbswh.df["presel_noDetaJJ"] = vbswh.df.eval(
    "passes_bveto and M_jj > 500 and hbbjet_score > 0.3"
)
vbswh.df["presel"] = vbswh.df.eval(
    "passes_bveto and M_jj > 500 and abs(deta_jj) > 3 and hbbjet_score > 0.3"
)

ORIG_EVENT_WEIGHT = vbswh.df.event_weight.values.copy()

Loading sig babies: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.64it/s]
Loading bkg babies: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9/9 [00:38<00:00,  4.33s/it]
Loading data babies: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.15s/it]


In [4]:
SRlike  = "presel_noDetaJJ and M_jj > 600 and ST > 900 and hbbjet_score > 0.9"
regionA = f"{SRlike} and abs(deta_jj) > 4 and hbbjet_msoftdrop >= 150"
regionB = f"{SRlike} and abs(deta_jj) <= 4 and hbbjet_msoftdrop >= 150"
regionC = f"{SRlike} and abs(deta_jj) <= 4 and hbbjet_msoftdrop < 150"
regionD = f"{SRlike} and abs(deta_jj) > 4 and hbbjet_msoftdrop < 150"
AN_numbers = {
    "PredBkg": 0,
    "PredBkgStatErr": 0,
    "PredBkgSystErr": 0,
    "ExpSig": round(vbswh.sig_count(selection=regionD)),
    "ExpSigStatErr": round(vbswh.sig_error(selection=regionD), 1),
    "ExpSigSystErr": 0,
    "ExpBkg": round(vbswh.bkg_count(selection=regionD)),
    "BkgEstABMC": 0,
    "BkgEstABMCErr": 0,
    "BkgEstABData": 0,
    "BkgEstABDataErr": 0,
    "PredBkgMC": 0,
    "BkgEstMethodSystErr": 0,
    "BkgEstBkgCompSystErr": 0,
    "BkgEstTotalSystErr": 0,
    "BkgEstStatErr": 0,
    "BkgEstWJetsUpABMC": 0,
    "BkgEstWJetsUpABMCErr": 0,
    "BkgEstWJetsDownABMC": 0,
    "BkgEstWJetsDownABMCErr": 0,
    "BkgEstWJetsCompSyst": 0,
    "BkgEstBosonsUpABMC": 0,
    "BkgEstBosonsUpABMCErr": 0,
    "BkgEstBosonsDownABMC": 0,
    "BkgEstBosonsDownABMCErr": 0,
    "BkgEstBosonsCompSyst": 0,
    "SRTwoPredBkg": 0,
    "SRTwoPredBkgStatErr": 0,
    "SRTwoPredBkgSystErr": 0,
    "SRTwoExpSig": 0,
    "SRTwoExpSigStatErr": 0,
    "SRTwoExpSigSystErr": 0,
}

In [5]:
pred, stat, syst = vbswh.ABCD( 
    "abs(deta_jj) > 4",
    "hbbjet_msoftdrop < 150",
    "presel_noDetaJJ and M_jj > 600 and ST > 900 and hbbjet_score > 0.9",
    h_dir="left", v_dir="up", 
    show_data=True
)
AN_numbers["PredBkg"] = round(pred)
AN_numbers["PredBkgStatErr"] = round(pred*stat, 1)
AN_numbers["PredBkgSystErr"] = round(pred*syst, 1)
A_bkg_wgt = vbswh.bkg_count(selection=regionA)
A_bkg_err = vbswh.bkg_error(selection=regionA)
B_bkg_wgt = vbswh.bkg_count(selection=regionB)
B_bkg_err = vbswh.bkg_error(selection=regionB)
AN_numbers["PredBkgMC"] = round(A_bkg_wgt/B_bkg_wgt*vbswh.bkg_count(selection=regionC), 1)
AN_numbers["BkgEstABMC"] = A_bkg_wgt/B_bkg_wgt
AN_numbers["BkgEstABMCErr"] = round(np.sqrt((B_bkg_err/B_bkg_wgt)**2 + (A_bkg_err/A_bkg_wgt)**2)*100, 1)
AN_numbers["BkgEstMethodSystErr"] = syst*100
AN_numbers["BkgEstStatErr"] = stat*100
A_data     = vbswh.data_count(selection=regionA)
A_data_err = vbswh.data_error(selection=regionA)
B_data     = vbswh.data_count(selection=regionB)
B_data_err = vbswh.data_error(selection=regionB)
AN_numbers["BkgEstABData"] = A_data/B_data
AN_numbers["BkgEstABDataErr"] = round(np.sqrt((B_data_err/B_data)**2 + (A_data_err/A_data)**2)*100, 1)

cut,region,bkg_wgt,bkg_err,sig_wgt,sig_err,data,data_err
presel_noDetaJJ and M_jj > 600 and ST > 900 and hbbjet_score > 0.9 and abs(deta_jj) > 4 and (not (hbbjet_msoftdrop < 150)),A,186.50176049483372,3.4065824779873086,12.568182456706838,1.5489432782437886,148,12.165525060596439
presel_noDetaJJ and M_jj > 600 and ST > 900 and hbbjet_score > 0.9 and (not (abs(deta_jj) > 4)) and (not (hbbjet_msoftdrop < 150)),B,241.6673538568511,5.854033430391243,0.9358281716332852,0.4245276027409975,199,14.106735979665885
presel_noDetaJJ and M_jj > 600 and ST > 900 and hbbjet_score > 0.9 and (not (abs(deta_jj) > 4)) and hbbjet_msoftdrop < 150,C,183.41844058171293,4.531383787449446,16.34544715690842,1.7948482186077253,172,13.114877048604
presel_noDetaJJ and M_jj > 600 and ST > 900 and hbbjet_score > 0.9 and abs(deta_jj) > 4 and hbbjet_msoftdrop < 150,D,136.55738310058857,4.235047777507667,413.3360243909275,8.90573577705255,BLINDED,BLINDED

name,extp,rel_err
BtoA_MC,0.7717292282899987,0.03033832680150509

In [6]:
vbswh.df.loc[vbswh.df.name == "WJets", "event_weight"] *= 2
vbswh.ABCD( 
    "abs(deta_jj) > 4",
    "hbbjet_msoftdrop < 150",
    "presel_noDetaJJ and M_jj > 600 and ST > 900 and hbbjet_score > 0.9",
    h_dir="left", v_dir="up", 
    show_data=True
)
A_bkg_wgt = vbswh.bkg_count(selection=regionA)
A_bkg_err = vbswh.bkg_error(selection=regionA)
B_bkg_wgt = vbswh.bkg_count(selection=regionB)
B_bkg_err = vbswh.bkg_error(selection=regionB)
AN_numbers["BkgEstWJetsUpABMC"] = A_bkg_wgt/B_bkg_wgt
AN_numbers["BkgEstWJetsUpABMCErr"] = round(np.sqrt((B_bkg_err/B_bkg_wgt)**2 + (A_bkg_err/A_bkg_wgt)**2)*100, 1)

vbswh.df.event_weight = ORIG_EVENT_WEIGHT.copy()
print("")

vbswh.df.loc[vbswh.df.name == "WJets", "event_weight"] *= 0.5
vbswh.ABCD( 
    "abs(deta_jj) > 4",
    "hbbjet_msoftdrop < 150",
    "presel_noDetaJJ and M_jj > 600 and ST > 900 and hbbjet_score > 0.9",
    h_dir="left", v_dir="up", 
    show_data=True
)
A_bkg_wgt = vbswh.bkg_count(selection=regionA)
A_bkg_err = vbswh.bkg_error(selection=regionA)
B_bkg_wgt = vbswh.bkg_count(selection=regionB)
B_bkg_err = vbswh.bkg_error(selection=regionB)
AN_numbers["BkgEstWJetsDownABMC"] = A_bkg_wgt/B_bkg_wgt
AN_numbers["BkgEstWJetsDownABMCErr"] = round(np.sqrt((B_bkg_err/B_bkg_wgt)**2 + (A_bkg_err/A_bkg_wgt)**2)*100, 1)
AN_numbers["BkgEstWJetsCompSyst"] = 100*max(
    abs(1 - AN_numbers["BkgEstWJetsUpABMC"]/AN_numbers["BkgEstABMC"]),
    abs(1 - AN_numbers["BkgEstWJetsDownABMC"]/AN_numbers["BkgEstABMC"])
)

vbswh.df.event_weight = ORIG_EVENT_WEIGHT.copy()

cut,region,bkg_wgt,bkg_err,sig_wgt,sig_err,data,data_err
presel_noDetaJJ and M_jj > 600 and ST > 900 and hbbjet_score > 0.9 and abs(deta_jj) > 4 and (not (hbbjet_msoftdrop < 150)),A,198.82469600720674,3.64613876487425,12.568182456706838,1.5489432782437886,148,12.165525060596439
presel_noDetaJJ and M_jj > 600 and ST > 900 and hbbjet_score > 0.9 and (not (abs(deta_jj) > 4)) and (not (hbbjet_msoftdrop < 150)),B,272.47373312441835,6.0061801157638675,0.9358281716332852,0.4245276027409975,199,14.106735979665885
presel_noDetaJJ and M_jj > 600 and ST > 900 and hbbjet_score > 0.9 and (not (abs(deta_jj) > 4)) and hbbjet_msoftdrop < 150,C,226.5390184849875,4.84509125434271,16.34544715690842,1.7948482186077253,172,13.114877048604
presel_noDetaJJ and M_jj > 600 and ST > 900 and hbbjet_score > 0.9 and abs(deta_jj) > 4 and hbbjet_msoftdrop < 150,D,160.6099222951183,4.896435052340458,413.3360243909275,8.90573577705255,BLINDED,BLINDED

name,extp,rel_err
BtoA_MC,0.7297022495611288,0.02867402488778965
Bt

In [7]:
vbswh.df.loc[vbswh.df.name == "Bosons", "event_weight"] *= 2
vbswh.ABCD( 
    "abs(deta_jj) > 4",
    "hbbjet_msoftdrop < 150",
    "presel_noDetaJJ and M_jj > 600 and ST > 900 and hbbjet_score > 0.9",
    h_dir="left", v_dir="up", 
    show_data=True
)
A_bkg_wgt = vbswh.bkg_count(selection=regionA)
A_bkg_err = vbswh.bkg_error(selection=regionA)
B_bkg_wgt = vbswh.bkg_count(selection=regionB)
B_bkg_err = vbswh.bkg_error(selection=regionB)
AN_numbers["BkgEstBosonsUpABMC"] = A_bkg_wgt/B_bkg_wgt
AN_numbers["BkgEstBosonsUpABMCErr"] = round(np.sqrt((B_bkg_err/B_bkg_wgt)**2 + (A_bkg_err/A_bkg_wgt)**2)*100, 1)

vbswh.df.event_weight = ORIG_EVENT_WEIGHT.copy()
print("")

vbswh.df.loc[vbswh.df.name == "Bosons", "event_weight"] *= 0.5
vbswh.ABCD( 
    "abs(deta_jj) > 4",
    "hbbjet_msoftdrop < 150",
    "presel_noDetaJJ and M_jj > 600 and ST > 900 and hbbjet_score > 0.9",
    h_dir="left", v_dir="up", 
    show_data=True
)
A_bkg_wgt = vbswh.bkg_count(selection=regionA)
A_bkg_err = vbswh.bkg_error(selection=regionA)
B_bkg_wgt = vbswh.bkg_count(selection=regionB)
B_bkg_err = vbswh.bkg_error(selection=regionB)
AN_numbers["BkgEstBosonsDownABMC"] = A_bkg_wgt/B_bkg_wgt
AN_numbers["BkgEstBosonsDownABMCErr"] = round(np.sqrt((B_bkg_err/B_bkg_wgt)**2 + (A_bkg_err/A_bkg_wgt)**2)*100, 1)
AN_numbers["BkgEstBosonsCompSyst"] = 100*max(
    abs(1 - AN_numbers["BkgEstBosonsUpABMC"]/AN_numbers["BkgEstABMC"]),
    abs(1 - AN_numbers["BkgEstBosonsDownABMC"]/AN_numbers["BkgEstABMC"])
)

vbswh.df.event_weight = ORIG_EVENT_WEIGHT.copy()

cut,region,bkg_wgt,bkg_err,sig_wgt,sig_err,data,data_err
presel_noDetaJJ and M_jj > 600 and ST > 900 and hbbjet_score > 0.9 and abs(deta_jj) > 4 and (not (hbbjet_msoftdrop < 150)),A,187.57767068077456,3.6149805281362957,12.568182456706838,1.5489432782437886,148,12.165525060596439
presel_noDetaJJ and M_jj > 600 and ST > 900 and hbbjet_score > 0.9 and (not (abs(deta_jj) > 4)) and (not (hbbjet_msoftdrop < 150)),B,249.66143723028875,10.053235201993042,0.9358281716332852,0.4245276027409975,199,14.106735979665885
presel_noDetaJJ and M_jj > 600 and ST > 900 and hbbjet_score > 0.9 and (not (abs(deta_jj) > 4)) and hbbjet_msoftdrop < 150,C,204.89057389037592,7.431089786991426,16.34544715690842,1.7948482186077253,172,13.114877048604
presel_noDetaJJ and M_jj > 600 and ST > 900 and hbbjet_score > 0.9 and abs(deta_jj) > 4 and hbbjet_msoftdrop < 150,D,144.45612227695597,6.741645631137145,413.3360243909275,8.90573577705255,BLINDED,BLINDED

name,extp,rel_err
BtoA_MC,0.7513281697074913,0.044641639913415

In [8]:
AN_numbers["BkgEstBkgCompSystErr"] = np.sqrt(
    AN_numbers["BkgEstWJetsCompSyst"]**2 + AN_numbers["BkgEstBosonsCompSyst"]**2
)
AN_numbers["BkgEstTotalSystErr"] = np.sqrt(
    AN_numbers["BkgEstMethodSystErr"]**2 + AN_numbers["BkgEstBkgCompSystErr"]**2
)

In [9]:
AN_numbers["BkgEstABMC"] = round(AN_numbers["BkgEstABMC"], 2)
AN_numbers["BkgEstABData"] = round(AN_numbers["BkgEstABData"], 2)

AN_numbers["BkgEstWJetsUpABMC"] = round(AN_numbers["BkgEstWJetsUpABMC"], 2)
AN_numbers["BkgEstWJetsDownABMC"] = round(AN_numbers["BkgEstWJetsDownABMC"], 2)
AN_numbers["BkgEstWJetsCompSyst"] = round(AN_numbers["BkgEstWJetsCompSyst"], 1)

AN_numbers["BkgEstBosonsUpABMC"] = round(AN_numbers["BkgEstBosonsUpABMC"], 2)
AN_numbers["BkgEstBosonsDownABMC"] = round(AN_numbers["BkgEstBosonsDownABMC"], 2)
AN_numbers["BkgEstBosonsCompSyst"] = round(AN_numbers["BkgEstBosonsCompSyst"], 1)

AN_numbers["BkgEstBkgCompSystErr"] = round(AN_numbers["BkgEstBkgCompSystErr"], 1)
AN_numbers["BkgEstMethodSystErr"] = round(AN_numbers["BkgEstMethodSystErr"], 1)
AN_numbers["BkgEstTotalSystErr"] = round(AN_numbers["BkgEstTotalSystErr"], 1)
AN_numbers["BkgEstStatErr"] = round(AN_numbers["BkgEstStatErr"], 1)

In [10]:
with open("AN_numbers.json", "w") as f_out:
    json.dump(AN_numbers, f_out)

AN_numbers # completely filled in vbswh-sys.ipynb

{'PredBkg': 128,
 'PredBkgStatErr': 17.0,
 'PredBkgSystErr': 4.7,
 'ExpSig': 413,
 'ExpSigStatErr': 8.9,
 'ExpSigSystErr': 0,
 'ExpBkg': 137,
 'BkgEstABMC': 0.77,
 'BkgEstABMCErr': 3.0,
 'BkgEstABData': 0.74,
 'BkgEstABDataErr': 10.9,
 'PredBkgMC': 141.5,
 'BkgEstMethodSystErr': 3.7,
 'BkgEstBkgCompSystErr': 6.1,
 'BkgEstTotalSystErr': 7.1,
 'BkgEstStatErr': 13.3,
 'BkgEstWJetsUpABMC': 0.73,
 'BkgEstWJetsUpABMCErr': 2.9,
 'BkgEstWJetsDownABMC': 0.8,
 'BkgEstWJetsDownABMCErr': 3.2,
 'BkgEstWJetsCompSyst': 5.4,
 'BkgEstBosonsUpABMC': 0.75,
 'BkgEstBosonsUpABMCErr': 4.5,
 'BkgEstBosonsDownABMC': 0.78,
 'BkgEstBosonsDownABMCErr': 2.5,
 'BkgEstBosonsCompSyst': 2.6,
 'SRTwoPredBkg': 0,
 'SRTwoPredBkgStatErr': 0,
 'SRTwoPredBkgSystErr': 0,
 'SRTwoExpSig': 0,
 'SRTwoExpSigStatErr': 0,
 'SRTwoExpSigSystErr': 0}