In [25]:
import glob
import json
import numpy as np
from utils.analysis import Extrapolation

In [26]:
TAG = "kscans"
babies = glob.glob(f"../analysis/studies/vbswh/output_{TAG}/Run2/*.root")
babies = [baby for baby in babies if "Lambda" not in baby and "VBSWH_SM" not in baby]
data_babies = [baby for baby in babies if "data" in baby]
sig_babies = [baby for baby in babies if "VBSWH_mkW" in baby]
bkg_babies = list(set(babies) - set(data_babies + sig_babies))
print(data_babies)
print(sig_babies)
bkg_babies

['../analysis/studies/vbswh/output_kscans/Run2/data.root']
['../analysis/studies/vbswh/output_kscans/Run2/VBSWH_mkW.root']


['../analysis/studies/vbswh/output_kscans/Run2/TTbar2L.root',
 '../analysis/studies/vbswh/output_kscans/Run2/EWKWLep.root',
 '../analysis/studies/vbswh/output_kscans/Run2/VH.root',
 '../analysis/studies/vbswh/output_kscans/Run2/WJets.root',
 '../analysis/studies/vbswh/output_kscans/Run2/TTbar1L.root',
 '../analysis/studies/vbswh/output_kscans/Run2/SingleTop.root',
 '../analysis/studies/vbswh/output_kscans/Run2/TTX.root',
 '../analysis/studies/vbswh/output_kscans/Run2/Bosons.root']

In [27]:
# bkg_babies = [b for b in bkg_babies if "TTbar1L" in b or "TTbar2L" in b]
# bkg_babies = [b for b in bkg_babies if not ("TTbar1L" in b or "TTbar2L" in b)]
# bkg_babies = [b for b in bkg_babies if "TTbar1L" in b]
# bkg_babies = [b for b in bkg_babies if "TTbar2L" in b]
bkg_babies

['../analysis/studies/vbswh/output_kscans/Run2/TTbar2L.root',
 '../analysis/studies/vbswh/output_kscans/Run2/EWKWLep.root',
 '../analysis/studies/vbswh/output_kscans/Run2/VH.root',
 '../analysis/studies/vbswh/output_kscans/Run2/WJets.root',
 '../analysis/studies/vbswh/output_kscans/Run2/TTbar1L.root',
 '../analysis/studies/vbswh/output_kscans/Run2/SingleTop.root',
 '../analysis/studies/vbswh/output_kscans/Run2/TTX.root',
 '../analysis/studies/vbswh/output_kscans/Run2/Bosons.root']

In [28]:
vbswh = Extrapolation(
    sig_root_files=sig_babies,
    bkg_root_files=bkg_babies,
    data_root_files=data_babies,
    ttree_name="tree",
    weight_columns=[
        "xsec_sf", "lep_id_sf", "ewkfix_sf", 
        "elec_reco_sf", "muon_iso_sf", 
        "btag_sf", "pu_sf", "prefire_sf", "trig_sf", "puid_sf",
        "xbb_sf" # applied only because Xbb > 0.9 applied everywhere for ABCD
    ],
    reweight_column="reweights",
    plots_dir=f"/home/users/jguiang/public_html/onelep_plots/{TAG}/val"
)
vbswh.df["presel_noVBS_noBVeto"] = vbswh.df.eval(
    "hbbjet_score > 0.3"
)
vbswh.df["presel_noVBS"] = vbswh.df.eval(
    "passes_bveto and hbbjet_score > 0.3"
)
vbswh.df["presel_noDetaJJ"] = vbswh.df.eval(
    "passes_bveto and M_jj > 500 and hbbjet_score > 0.3"
)
vbswh.df["presel"] = vbswh.df.eval(
    "passes_bveto and M_jj > 500 and abs(deta_jj) > 3 and hbbjet_score > 0.3"
)

ORIG_EVENT_WEIGHT = vbswh.df.event_weight.values.copy()

Loading sig babies: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.58it/s]
Loading bkg babies: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:41<00:00,  5.13s/it]
Loading data babies: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.04s/it]


In [29]:
def get_year_str(year, doAPV=True):
    if doAPV and year == -2016:
        return "2016preVFP"
    elif doAPV and year == 2016:
        return "2016postVFP"
    else:
        return str(year)

In [30]:
doAPV = False
years = [2016, 2017, 2018] if not doAPV else [-2016, 2016, 2017, 2018]
for year in years:
    print()
    print(f"Running {year}")
    
    year_str = get_year_str(year, doAPV=doAPV)
    year_check = f"abs(year) == {year}" if not doAPV else f"year == {year}"
    SRlike  = f"{year_check} and presel_noDetaJJ and M_jj > 600 and ST > 900 and hbbjet_score > 0.9"
    regionA = f"{SRlike} and abs(deta_jj) > 4 and hbbjet_msoftdrop >= 150"
    regionB = f"{SRlike} and abs(deta_jj) <= 4 and hbbjet_msoftdrop >= 150"
    regionC = f"{SRlike} and abs(deta_jj) <= 4 and hbbjet_msoftdrop < 150"
    regionD = f"{SRlike} and abs(deta_jj) > 4 and hbbjet_msoftdrop < 150"
    AN_numbers = {
        "PredBkg": 0,
        "PredBkgStatErr": 0,
        "PredBkgSystErr": 0,
        "ExpSig": round(vbswh.sig_count(selection=regionD)),
        "ExpSigStatErr": round(vbswh.sig_error(selection=regionD), 1),
        "ExpSigSystErr": 0,
        "ExpBkg": round(vbswh.bkg_count(selection=regionD)),
        "BkgEstABMC": 0,
        "BkgEstABMCErr": 0,
        "BkgEstABData": 0,
        "BkgEstABDataErr": 0,
        "PredBkgMC": 0,
        "BkgEstMethodSystErr": 0,
        "BkgEstBkgCompSystErr": 0,
        "BkgEstTotalSystErr": 0,
        "BkgEstStatErr": 0,
        "BkgEstWJetsUpABMC": 0,
        "BkgEstWJetsUpABMCErr": 0,
        "BkgEstWJetsDownABMC": 0,
        "BkgEstWJetsDownABMCErr": 0,
        "BkgEstWJetsCompSyst": 0,
        "BkgEstBosonsUpABMC": 0,
        "BkgEstBosonsUpABMCErr": 0,
        "BkgEstBosonsDownABMC": 0,
        "BkgEstBosonsDownABMCErr": 0,
        "BkgEstBosonsCompSyst": 0,
    }


    # Regular ABCD
    pred, stat, syst = vbswh.ABCD( 
        "abs(deta_jj) > 4",
        "hbbjet_msoftdrop < 150",
        SRlike,
        h_dir="left", v_dir="up", 
        show_data=True
    )
    AN_numbers["PredBkg"] = round(pred)
    AN_numbers["PredBkgStatErr"] = round(pred*stat, 1)
    A_bkg_wgt = vbswh.bkg_count(selection=regionA)
    A_bkg_err = vbswh.bkg_error(selection=regionA)
    B_bkg_wgt = vbswh.bkg_count(selection=regionB)
    B_bkg_err = vbswh.bkg_error(selection=regionB)
    AN_numbers["PredBkgMC"] = round(A_bkg_wgt/B_bkg_wgt*vbswh.bkg_count(selection=regionC), 1)
    AN_numbers["BkgEstABMC"] = A_bkg_wgt/B_bkg_wgt
    AN_numbers["BkgEstABMCErr"] = round(np.sqrt((B_bkg_err/B_bkg_wgt)**2 + (A_bkg_err/A_bkg_wgt)**2)*100, 1)
    AN_numbers["BkgEstMethodSystErr"] = syst*100
    AN_numbers["BkgEstStatErr"] = stat*100
    A_data     = vbswh.data_count(selection=regionA)
    A_data_err = vbswh.data_error(selection=regionA)
    B_data     = vbswh.data_count(selection=regionB)
    B_data_err = vbswh.data_error(selection=regionB)
    AN_numbers["BkgEstABData"] = A_data/B_data
    AN_numbers["BkgEstABDataErr"] = round(np.sqrt((B_data_err/B_data)**2 + (A_data_err/A_data)**2)*100, 1)


    # WJets up by factor of 2
    vbswh.df.loc[vbswh.df.name == "WJets", "event_weight"] *= 2
    vbswh.ABCD( 
        "abs(deta_jj) > 4",
        "hbbjet_msoftdrop < 150",
        SRlike,
        h_dir="left", v_dir="up", 
        show_data=True
    )
    A_bkg_wgt = vbswh.bkg_count(selection=regionA)
    A_bkg_err = vbswh.bkg_error(selection=regionA)
    B_bkg_wgt = vbswh.bkg_count(selection=regionB)
    B_bkg_err = vbswh.bkg_error(selection=regionB)
    AN_numbers["BkgEstWJetsUpABMC"] = A_bkg_wgt/B_bkg_wgt
    AN_numbers["BkgEstWJetsUpABMCErr"] = round(np.sqrt((B_bkg_err/B_bkg_wgt)**2 + (A_bkg_err/A_bkg_wgt)**2)*100, 1)
    # Reset
    vbswh.df.event_weight = ORIG_EVENT_WEIGHT.copy()
    print("")
    # WJets down by factor of 2
    vbswh.df.loc[vbswh.df.name == "WJets", "event_weight"] *= 0.5
    vbswh.ABCD( 
        "abs(deta_jj) > 4",
        "hbbjet_msoftdrop < 150",
        SRlike,
        h_dir="left", v_dir="up", 
        show_data=True
    )
    A_bkg_wgt = vbswh.bkg_count(selection=regionA)
    A_bkg_err = vbswh.bkg_error(selection=regionA)
    B_bkg_wgt = vbswh.bkg_count(selection=regionB)
    B_bkg_err = vbswh.bkg_error(selection=regionB)
    AN_numbers["BkgEstWJetsDownABMC"] = A_bkg_wgt/B_bkg_wgt
    AN_numbers["BkgEstWJetsDownABMCErr"] = round(np.sqrt((B_bkg_err/B_bkg_wgt)**2 + (A_bkg_err/A_bkg_wgt)**2)*100, 1)
    AN_numbers["BkgEstWJetsCompSyst"] = 100*max(
        abs(1 - AN_numbers["BkgEstWJetsUpABMC"]/AN_numbers["BkgEstABMC"]),
        abs(1 - AN_numbers["BkgEstWJetsDownABMC"]/AN_numbers["BkgEstABMC"])
    )
    # Reset
    vbswh.df.event_weight = ORIG_EVENT_WEIGHT.copy()


    # Bosons up by factor of 2
    vbswh.df.loc[vbswh.df.name == "Bosons", "event_weight"] *= 2
    vbswh.ABCD( 
        "abs(deta_jj) > 4",
        "hbbjet_msoftdrop < 150",
        SRlike,
        h_dir="left", v_dir="up", 
        show_data=True
    )
    A_bkg_wgt = vbswh.bkg_count(selection=regionA)
    A_bkg_err = vbswh.bkg_error(selection=regionA)
    B_bkg_wgt = vbswh.bkg_count(selection=regionB)
    B_bkg_err = vbswh.bkg_error(selection=regionB)
    AN_numbers["BkgEstBosonsUpABMC"] = A_bkg_wgt/B_bkg_wgt
    AN_numbers["BkgEstBosonsUpABMCErr"] = round(np.sqrt((B_bkg_err/B_bkg_wgt)**2 + (A_bkg_err/A_bkg_wgt)**2)*100, 1)
    # Reset
    vbswh.df.event_weight = ORIG_EVENT_WEIGHT.copy()
    print("")
    # Bosons down by factor of 2
    vbswh.df.loc[vbswh.df.name == "Bosons", "event_weight"] *= 0.5
    vbswh.ABCD( 
        "abs(deta_jj) > 4",
        "hbbjet_msoftdrop < 150",
        SRlike,
        h_dir="left", v_dir="up", 
        show_data=True
    )
    A_bkg_wgt = vbswh.bkg_count(selection=regionA)
    A_bkg_err = vbswh.bkg_error(selection=regionA)
    B_bkg_wgt = vbswh.bkg_count(selection=regionB)
    B_bkg_err = vbswh.bkg_error(selection=regionB)
    AN_numbers["BkgEstBosonsDownABMC"] = A_bkg_wgt/B_bkg_wgt
    AN_numbers["BkgEstBosonsDownABMCErr"] = round(np.sqrt((B_bkg_err/B_bkg_wgt)**2 + (A_bkg_err/A_bkg_wgt)**2)*100, 1)
    AN_numbers["BkgEstBosonsCompSyst"] = 100*max(
        abs(1 - AN_numbers["BkgEstBosonsUpABMC"]/AN_numbers["BkgEstABMC"]),
        abs(1 - AN_numbers["BkgEstBosonsDownABMC"]/AN_numbers["BkgEstABMC"])
    )
    # Reset
    vbswh.df.event_weight = ORIG_EVENT_WEIGHT.copy()



    AN_numbers["BkgEstBkgCompSystErr"] = np.sqrt(
        AN_numbers["BkgEstWJetsCompSyst"]**2 + AN_numbers["BkgEstBosonsCompSyst"]**2
    )
    AN_numbers["BkgEstTotalSystErr"] = np.sqrt(
        AN_numbers["BkgEstMethodSystErr"]**2 + AN_numbers["BkgEstBkgCompSystErr"]**2
    )



    AN_numbers["PredBkgSystErr"] = round(pred*AN_numbers["BkgEstTotalSystErr"]/100, 1)

    AN_numbers["BkgEstABMC"] = round(AN_numbers["BkgEstABMC"], 2)
    AN_numbers["BkgEstABData"] = round(AN_numbers["BkgEstABData"], 2)

    AN_numbers["BkgEstWJetsUpABMC"] = round(AN_numbers["BkgEstWJetsUpABMC"], 2)
    AN_numbers["BkgEstWJetsDownABMC"] = round(AN_numbers["BkgEstWJetsDownABMC"], 2)
    AN_numbers["BkgEstWJetsCompSyst"] = round(AN_numbers["BkgEstWJetsCompSyst"], 1)

    AN_numbers["BkgEstBosonsUpABMC"] = round(AN_numbers["BkgEstBosonsUpABMC"], 2)
    AN_numbers["BkgEstBosonsDownABMC"] = round(AN_numbers["BkgEstBosonsDownABMC"], 2)
    AN_numbers["BkgEstBosonsCompSyst"] = round(AN_numbers["BkgEstBosonsCompSyst"], 1)

    AN_numbers["BkgEstBkgCompSystErr"] = round(AN_numbers["BkgEstBkgCompSystErr"], 1)
    AN_numbers["BkgEstMethodSystErr"] = round(AN_numbers["BkgEstMethodSystErr"], 1)
    AN_numbers["BkgEstTotalSystErr"] = round(AN_numbers["BkgEstTotalSystErr"], 1)
    AN_numbers["BkgEstStatErr"] = round(AN_numbers["BkgEstStatErr"], 1)

    with open(f"AN_numbers_{year_str}.json", "w") as f_out:
        json.dump(AN_numbers, f_out)


Running 2016
cut,region,bkg_wgt,bkg_err,sig_wgt,sig_err,data,data_err
abs(year) == 2016 and presel_noDetaJJ and M_jj > 600 and ST > 900 and hbbjet_score > 0.9 and abs(deta_jj) > 4 and (not (hbbjet_msoftdrop < 150)),A,50.36340024374087,1.7672062626493572,3.1844039861107967,0.4960965252427423,35,5.916079783099616
abs(year) == 2016 and presel_noDetaJJ and M_jj > 600 and ST > 900 and hbbjet_score > 0.9 and (not (abs(deta_jj) > 4)) and (not (hbbjet_msoftdrop < 150)),B,67.09937422847625,1.8406817310544439,0.2573658148364778,0.14917896399110292,56,7.483314773547883
abs(year) == 2016 and presel_noDetaJJ and M_jj > 600 and ST > 900 and hbbjet_score > 0.9 and (not (abs(deta_jj) > 4)) and hbbjet_msoftdrop < 150,C,50.43730662978969,2.2224010575256066,3.4967653807246455,0.5094745585219765,30,5.477225575051661
abs(year) == 2016 and presel_noDetaJJ and M_jj > 600 and ST > 900 and hbbjet_score > 0.9 and abs(deta_jj) > 4 and hbbjet_msoftdrop < 150,D,28.019911621142178,1.6377723396661092,101.0801522893

cut,region,bkg_wgt,bkg_err,sig_wgt,sig_err,data,data_err
abs(year) == 2017 and presel_noDetaJJ and M_jj > 600 and ST > 900 and hbbjet_score > 0.9 and abs(deta_jj) > 4 and (not (hbbjet_msoftdrop < 150)),A,45.01065814565198,1.5458847653959344,2.8010747006026815,0.6746008451255433,43,6.557438524302
abs(year) == 2017 and presel_noDetaJJ and M_jj > 600 and ST > 900 and hbbjet_score > 0.9 and (not (abs(deta_jj) > 4)) and (not (hbbjet_msoftdrop < 150)),B,68.24347208829732,5.2290073976825875,0.1708467062990074,0.1708467062990074,68,8.246211251235321
abs(year) == 2017 and presel_noDetaJJ and M_jj > 600 and ST > 900 and hbbjet_score > 0.9 and (not (abs(deta_jj) > 4)) and hbbjet_msoftdrop < 150,C,54.93277208044476,3.565863674904661,4.895636724283886,0.894858244514049,57,7.54983443527075
abs(year) == 2017 and presel_noDetaJJ and M_jj > 600 and ST > 900 and hbbjet_score > 0.9 and abs(deta_jj) > 4 and hbbjet_msoftdrop < 150,D,35.136976283392734,3.613289726013546,112.25551278778653,4.334289506126883,

In [7]:
for year in years:
    year_str = get_year_str(year, doAPV=doAPV)
    with open(f"AN_numbers_{year_str}.json", "r") as f_in:
        nums = json.load(f_in)
        print(f"{year_str}: {nums['PredBkg']} +- {nums['PredBkgSystErr']} +- {nums['PredBkgStatErr']}")
        print(f"{year_str}: {nums['BkgEstTotalSystErr']}% = sqrt({nums['BkgEstMethodSystErr']}%^2 + {nums['BkgEstBkgCompSystErr']}%^2)")
        print(f"{year_str}: {nums['BkgEstStatErr']}%")
        print()
        
# BkgEstBkgCompSystErr
# BkgEstTotalSystErr
# BkgEstStatErr

2016preVFP: 16 +- 7.5 +- 6.6
2016preVFP: 46.3% = sqrt(45.7%^2 + 7.6%^2)
2016preVFP: 40.9%

2016postVFP: 6 +- 1.4 +- 2.4
2016postVFP: 24.4% = sqrt(24.0%^2 + 4.1%^2)
2016postVFP: 40.7%

2017: 36 +- 2.3 +- 8.5
2017: 6.3% = sqrt(1.5%^2 + 6.1%^2)
2017: 23.6%

2018: 69 +- 5.6 +- 13.9
2018: 8.1% = sqrt(4.9%^2 + 6.4%^2)
2018: 20.2%



In [8]:
for year_i, year in enumerate(years):
    year_str = get_year_str(year, doAPV=doAPV)
    with open(f"AN_numbers_{year_str}.json", "r") as f_in:
        nums = json.load(f_in)
        for key in ["PredBkg", "PredBkgStatErr", "PredBkgSystErr"]:
            value = nums[key]
            if year_i == 0:
                key += "YearOne"
            elif year_i == 1:
                key += "YearTwo"
            elif year_i == 2:
                key += "YearThree"
            elif year_i == 3:
                key += "YearFour"
            print(f"\\newcommand{{\\{key}}}{{{value}}}")

\newcommand{\PredBkgYearOne}{16}
\newcommand{\PredBkgStatErrYearOne}{6.6}
\newcommand{\PredBkgSystErrYearOne}{7.5}
\newcommand{\PredBkgYearTwo}{6}
\newcommand{\PredBkgStatErrYearTwo}{2.4}
\newcommand{\PredBkgSystErrYearTwo}{1.4}
\newcommand{\PredBkgYearThree}{36}
\newcommand{\PredBkgStatErrYearThree}{8.5}
\newcommand{\PredBkgSystErrYearThree}{2.3}
\newcommand{\PredBkgYearFour}{69}
\newcommand{\PredBkgStatErrYearFour}{13.9}
\newcommand{\PredBkgSystErrYearFour}{5.6}
