In [1]:
import glob
import json
import numpy as np
from utils.analysis import Extrapolation

import pandas as pd
import warnings
warnings.simplefilter(action="ignore", category=pd.errors.PerformanceWarning)

In [2]:
TAG = "main"
# TAG = "max_detajj" # DEBUG
babies = glob.glob(f"../analysis/studies/vbswh/output_{TAG}/Run2/*.root")
babies = [baby for baby in babies if "Lambda" not in baby and "VBSWH_SM" not in baby]
data_babies = [baby for baby in babies if "data" in baby]
sig_babies = [baby for baby in babies if "VBSWH_mkW" in baby]
bkg_babies = list(set(babies) - set(data_babies + sig_babies))
print(data_babies)
print(sig_babies)
bkg_babies

['../analysis/studies/vbswh/output_main/Run2/data.root']
['../analysis/studies/vbswh/output_main/Run2/VBSWH_mkW.root']


['../analysis/studies/vbswh/output_main/Run2/TTbar2L.root',
 '../analysis/studies/vbswh/output_main/Run2/VH.root',
 '../analysis/studies/vbswh/output_main/Run2/Bosons.root',
 '../analysis/studies/vbswh/output_main/Run2/SingleTop.root',
 '../analysis/studies/vbswh/output_main/Run2/TTbar1L.root',
 '../analysis/studies/vbswh/output_main/Run2/TTX.root',
 '../analysis/studies/vbswh/output_main/Run2/WJets.root',
 '../analysis/studies/vbswh/output_main/Run2/EWKWLep.root']

In [3]:
vbswh = Extrapolation(
    sig_root_files=sig_babies,
    bkg_root_files=bkg_babies,
    data_root_files=data_babies,
    ttree_name="tree",
    weight_columns=[
        "xsec_sf", "lep_id_sf", "ewkfix_sf", 
        "elec_reco_sf", "muon_iso_sf", 
        "btag_sf", "pu_sf", "prefire_sf", "trig_sf", "puid_sf", 
        "xbb_sf" # applied only because Xbb > 0.9 applied everywhere for ABCD
    ],
    reweight_column="reweights",
    plots_dir=f"/home/users/jguiang/public_html/onelep_plots/{TAG}/val"
)
vbswh.df["presel_noVBS_noBVeto"] = vbswh.df.eval(
    "hbbjet_score > 0.3"
)
vbswh.df["presel_noVBS"] = vbswh.df.eval(
    "passes_bveto and hbbjet_score > 0.3"
)
vbswh.df["presel_noDetaJJ"] = vbswh.df.eval(
    "passes_bveto and M_jj > 500 and hbbjet_score > 0.3"
)
vbswh.df["presel"] = vbswh.df.eval(
    "passes_bveto and M_jj > 500 and abs(deta_jj) > 3 and hbbjet_score > 0.3"
)

ORIG_EVENT_WEIGHT = vbswh.df.event_weight.values.copy()

Loading sig babies: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.37s/it]
Loading bkg babies: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 8/8 [01:01<00:00,  7.69s/it]
Loading data babies: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.57s/it]


In [4]:
SRlike  = "presel_noDetaJJ and M_jj > 600 and ST > 900 and hbbjet_score > 0.9"
regionA = f"{SRlike} and abs(deta_jj) > 4 and hbbjet_msoftdrop >= 150"
regionB = f"{SRlike} and abs(deta_jj) <= 4 and hbbjet_msoftdrop >= 150"
regionC = f"{SRlike} and abs(deta_jj) <= 4 and hbbjet_msoftdrop < 150"
regionD = f"{SRlike} and abs(deta_jj) > 4 and hbbjet_msoftdrop < 150"
AN_numbers = {
    "PredBkg": 0,
    "PredBkgStatErr": 0,
    "PredBkgSystErr": 0,
    "ExpSig": round(vbswh.sig_count(selection=regionD)),
    "ExpSigStatErr": round(vbswh.sig_error(selection=regionD), 1),
    "ExpSigSystErr": 0,
    "ExpBkg": round(vbswh.bkg_count(selection=regionD)),
    "BkgEstABMC": 0,
    "BkgEstABMCErr": 0,
    "BkgEstABData": 0,
    "BkgEstABDataErr": 0,
    "PredBkgMC": 0,
    "BkgEstMethodSystErr": 0,
    "BkgEstBkgCompSystErr": 0,
    "BkgEstTotalSystErr": 0,
    "BkgEstStatErr": 0,
    "BkgEstWJetsUpABMC": 0,
    "BkgEstWJetsUpABMCErr": 0,
    "BkgEstWJetsDownABMC": 0,
    "BkgEstWJetsDownABMCErr": 0,
    "BkgEstWJetsCompSyst": 0,
    "BkgEstBosonsUpABMC": 0,
    "BkgEstBosonsUpABMCErr": 0,
    "BkgEstBosonsDownABMC": 0,
    "BkgEstBosonsDownABMCErr": 0,
    "BkgEstBosonsCompSyst": 0,
    "PNetSidebandPredData": 0,
    "PNetSidebandPredDataStatErr": 0,
    "PNetSidebandData": 0,
    "PNetSidebandDataStatErr": 0,
    "SRTwoPredBkg": 0,
    "SRTwoPredBkgStatErr": 0,
    "SRTwoPredBkgSystErr": 0,
    "SRTwoBkgEstStatErr": 0,
    "SRTwoBkgEstSystErr": 0,
    "SRTwoExpSig": round(vbswh.sig_count(selection=f"{regionD} and ST > 1500")),
    "SRTwoExpSigStatErr": round(vbswh.sig_error(selection=f"{regionD} and ST > 1500"), 1),
    "SRTwoExpSigSystErr": 0,
    "LambdaWZeqNegOneExcl": 0
}

In [5]:
def ABCD(A, B, C, D, sample=None, blind=True):
    
    A_sig_count = vbswh.sig_count(selection=A)
    B_sig_count = vbswh.sig_count(selection=B)
    C_sig_count = vbswh.sig_count(selection=C)
    D_sig_count = vbswh.sig_count(selection=D)

    A_sig_error = vbswh.sig_error(selection=A)
    B_sig_error = vbswh.sig_error(selection=B)
    C_sig_error = vbswh.sig_error(selection=C)
    D_sig_error = vbswh.sig_error(selection=D)
    
    A_data_count = vbswh.data_count(selection=A)
    B_data_count = vbswh.data_count(selection=B)
    C_data_count = vbswh.data_count(selection=C)
    D_data_count = vbswh.data_count(selection=D)
        
    if sample is None:
        A_bkg_count = vbswh.bkg_count(selection=A)
        B_bkg_count = vbswh.bkg_count(selection=B)
        C_bkg_count = vbswh.bkg_count(selection=C)
        D_bkg_count = vbswh.bkg_count(selection=D)
        
        A_bkg_error = vbswh.bkg_error(selection=A)
        B_bkg_error = vbswh.bkg_error(selection=B)
        C_bkg_error = vbswh.bkg_error(selection=C)
        D_bkg_error = vbswh.bkg_error(selection=D)
    else:
        A_bkg_count = vbswh.sample_count(sample, selection=A)
        B_bkg_count = vbswh.sample_count(sample, selection=B)
        C_bkg_count = vbswh.sample_count(sample, selection=C)
        D_bkg_count = vbswh.sample_count(sample, selection=D)
        
        A_bkg_error = vbswh.sample_error(sample, selection=A)
        B_bkg_error = vbswh.sample_error(sample, selection=B)
        C_bkg_error = vbswh.sample_error(sample, selection=C)
        D_bkg_error = vbswh.sample_error(sample, selection=D)
        
        A_data_count -= (vbswh.bkg_count(selection=A) - A_bkg_count)
        B_data_count -= (vbswh.bkg_count(selection=B) - B_bkg_count)
        C_data_count -= (vbswh.bkg_count(selection=C) - C_bkg_count)
        D_data_count -= (vbswh.bkg_count(selection=D) - D_bkg_count)
        
    A_data_error = np.sqrt(A_data_count)
    B_data_error = np.sqrt(B_data_count)
    C_data_error = np.sqrt(C_data_count)
    D_data_error = np.sqrt(D_data_count)
    
    if blind:
        D_data_count = 0
        D_data_error = 0
        
    print("region,bkg_count,bkg_error,sig_count,sig_error,data_count,data_err")
    print(f"A,{A_bkg_count:.2f},{A_bkg_error:.2f},{A_sig_count:.2f},{A_sig_error:.2f},{A_data_count:.2f},{A_data_error:.2f}")
    print(f"B,{B_bkg_count:.2f},{B_bkg_error:.2f},{B_sig_count:.2f},{B_sig_error:.2f},{B_data_count:.2f},{B_data_error:.2f}")
    print(f"C,{C_bkg_count:.2f},{C_bkg_error:.2f},{C_sig_count:.2f},{C_sig_error:.2f},{C_data_count:.2f},{C_data_error:.2f}")
    print(f"D,{D_bkg_count:.2f},{D_bkg_error:.2f},{D_sig_count:.2f},{D_sig_error:.2f},{D_data_count:.2f},{D_data_error:.2f}")
        
    # Transfer factor
    mc_tf = A_bkg_count/B_bkg_count
    mc_tf_err = mc_tf*np.sqrt((A_bkg_error/A_bkg_count)**2 + (B_bkg_error/B_bkg_count)**2)
    
    data_tf = A_data_count/B_data_count
    data_tf_err = data_tf*np.sqrt((A_data_error/A_data_count)**2 + (B_data_error/B_data_count)**2)
    
    # Prediction
    D_bkg_pred = mc_tf*C_bkg_count
    syst = abs(1 - D_bkg_pred/D_bkg_count)
    print(syst)
    
    # Transfer factor correction
    mc_tf_fix = (D_bkg_count/C_bkg_count)/mc_tf
    print(f"Calculated MC TF fix: {mc_tf_fix}")
    
    mc_tf *= mc_tf_fix
    data_tf *= mc_tf_fix
    
    # Prediction
    D_bkg_pred = mc_tf*C_bkg_count
    D_bkg_pred_err = D_bkg_pred*np.sqrt(
        (A_bkg_error/A_bkg_count)**2 + (B_bkg_error/B_bkg_count)**2 + (C_bkg_error/C_bkg_count)**2
    )
    
    D_data_pred = data_tf*C_data_count
    D_data_pred_err = D_data_pred*np.sqrt(
        (A_data_error/A_data_count)**2 + (B_data_error/B_data_count)**2 + (C_data_error/C_data_count)**2
    )
    
    print(f"D_pred,{D_bkg_pred:.2f},{D_bkg_pred_err:.2f},,,{D_data_pred:.2f},{D_data_pred_err:.2f}")
    
    return D_data_pred, D_data_pred_err, syst*D_data_pred

In [6]:
# Doing ABCD only in 50 < MSD < 150

sample = None

SRlike  = "presel_noDetaJJ and M_jj > 600 and ST > 900 and hbbjet_score > 0.9 and hbbjet_msoftdrop < 150"
regionA = f"{SRlike} and abs(deta_jj) > 4 and hbbjet_msoftdrop < 100"
regionB = f"{SRlike} and abs(deta_jj) <= 4 and hbbjet_msoftdrop < 100"
regionC = f"{SRlike} and abs(deta_jj) <= 4 and hbbjet_msoftdrop >= 100"
regionD = f"{SRlike} and abs(deta_jj) > 4 and hbbjet_msoftdrop >= 100"

ABCD(regionA,regionB,regionC,regionD,sample=sample)
print()

# Reset
SRlike  = "presel_noDetaJJ and M_jj > 600 and ST > 900 and hbbjet_score > 0.9"
regionA = f"{SRlike} and abs(deta_jj) > 4 and hbbjet_msoftdrop >= 150"
regionB = f"{SRlike} and abs(deta_jj) <= 4 and hbbjet_msoftdrop >= 150"
regionC = f"{SRlike} and abs(deta_jj) <= 4 and hbbjet_msoftdrop < 150"
regionD = f"{SRlike} and abs(deta_jj) > 4 and hbbjet_msoftdrop < 150"

region,bkg_count,bkg_error,sig_count,sig_error,data_count,data_err
A,52.07,3.02,40.85,0.97,56.00,7.48
B,90.46,3.46,1.27,0.17,82.00,9.06
C,90.64,2.72,10.36,0.49,88.00,9.38
D,64.33,2.36,325.46,2.75,0.00,0.00
0.18886736534936854
Calculated MC TF fix: 1.232843997739922
D_pred,64.33,4.87,,,74.09,15.08



In [7]:
sample = None

# Regular ABCD
SRlike  = "presel_noDetaJJ and M_jj > 600 and ST > 900 and hbbjet_score > 0.9"
regionA = f"{SRlike} and abs(deta_jj) > 4 and hbbjet_msoftdrop >= 150"
regionB = f"{SRlike} and abs(deta_jj) <= 4 and hbbjet_msoftdrop >= 150"
regionC = f"{SRlike} and abs(deta_jj) <= 4 and hbbjet_msoftdrop < 150"
regionD = f"{SRlike} and abs(deta_jj) > 4 and hbbjet_msoftdrop < 150"

ABCD(regionA,regionB,regionC,regionD,sample=sample)
print()

# Doing ABCD for MSD > 150
SRlike  = "presel_noDetaJJ and M_jj > 600 and ST > 900 and hbbjet_score > 0.9"
regionA = f"{SRlike} and abs(deta_jj) > 4 and hbbjet_msoftdrop >= 200"  # A1
regionB = f"{SRlike} and abs(deta_jj) <= 4 and hbbjet_msoftdrop >= 200" # B1
regionC = f"{SRlike} and abs(deta_jj) <= 4 and hbbjet_msoftdrop >= 150 and hbbjet_msoftdrop < 200"  # B2
regionD = f"{SRlike} and abs(deta_jj) > 4 and hbbjet_msoftdrop >= 150 and hbbjet_msoftdrop < 200"   # A2

ABCD(regionA,regionB,regionC,regionD,sample=sample,blind=False)
print()

SRlike  = "presel_noDetaJJ and M_jj > 600 and ST > 900 and hbbjet_score > 0.9 and hbbjet_msoftdrop < 250"
regionA = f"{SRlike} and abs(deta_jj) > 4 and hbbjet_msoftdrop >= 200"  # A1
regionB = f"{SRlike} and abs(deta_jj) <= 4 and hbbjet_msoftdrop >= 200" # B1
regionC = f"{SRlike} and abs(deta_jj) <= 4 and hbbjet_msoftdrop >= 150 and hbbjet_msoftdrop < 200"  # B2
regionD = f"{SRlike} and abs(deta_jj) > 4 and hbbjet_msoftdrop >= 150 and hbbjet_msoftdrop < 200"   # A2

ABCD(regionA,regionB,regionC,regionD,sample=sample,blind=False)
print()

SRlike  = "presel_noDetaJJ and M_jj > 600 and ST > 900 and hbbjet_score > 0.9 and hbbjet_msoftdrop < 250"
regionA = f"{SRlike} and abs(deta_jj) > 4 and hbbjet_msoftdrop >= 180"  # A1
regionB = f"{SRlike} and abs(deta_jj) <= 4 and hbbjet_msoftdrop >= 180" # B1
regionC = f"{SRlike} and abs(deta_jj) <= 4 and hbbjet_msoftdrop >= 150 and hbbjet_msoftdrop < 180"  # B2
regionD = f"{SRlike} and abs(deta_jj) > 4 and hbbjet_msoftdrop >= 150 and hbbjet_msoftdrop < 180"   # A2

ABCD(regionA,regionB,regionC,regionD,sample=sample,blind=False)
print()

# Reset
SRlike  = "presel_noDetaJJ and M_jj > 600 and ST > 900 and hbbjet_score > 0.9"
regionA = f"{SRlike} and abs(deta_jj) > 4 and hbbjet_msoftdrop >= 150"
regionB = f"{SRlike} and abs(deta_jj) <= 4 and hbbjet_msoftdrop >= 150"
regionC = f"{SRlike} and abs(deta_jj) <= 4 and hbbjet_msoftdrop < 150"
regionD = f"{SRlike} and abs(deta_jj) > 4 and hbbjet_msoftdrop < 150"

region,bkg_count,bkg_error,sig_count,sig_error,data_count,data_err
A,172.97,3.25,6.92,0.40,142.00,11.92
B,241.93,5.83,0.27,0.08,201.00,14.18
C,181.10,4.40,11.62,0.52,170.00,13.04
D,116.41,3.84,366.30,2.92,0.00,0.00
0.1122552957372065
Calculated MC TF fix: 0.8990741638476054
D_pred,116.41,4.54,,,107.98,14.45

region,bkg_count,bkg_error,sig_count,sig_error,data_count,data_err
A,41.76,1.71,0.29,0.08,28.00,5.29
B,96.05,4.41,0.03,0.02,70.00,8.37
C,145.88,3.81,0.24,0.08,131.00,11.45
D,131.20,2.76,6.63,0.39,114.00,10.68
0.5165590905399182
Calculated MC TF fix: 2.0685051273729065
D_pred,131.20,8.77,,,108.39,26.02

region,bkg_count,bkg_error,sig_count,sig_error,data_count,data_err
A,26.70,1.23,0.18,0.06,22.00,4.69
B,54.62,4.16,0.02,0.02,40.00,6.32
C,145.88,3.81,0.24,0.08,131.00,11.45
D,131.20,2.76,6.63,0.39,114.00,10.68
0.4565744410524356
Calculated MC TF fix: 1.8401784449312053
D_pred,131.20,12.18,,,132.58,37.05

region,bkg_count,bkg_error,sig_count,sig_error,data_count,data_err
A,86.17,2.20,0

In [8]:
sample = "TTbar1L"

SRlike  = "presel_noDetaJJ and M_jj > 600 and ST > 900 and hbbjet_score > 0.9"
regionA = f"{SRlike} and abs(deta_jj) > 4 and hbbjet_msoftdrop >= 150"
regionB = f"{SRlike} and abs(deta_jj) <= 4 and hbbjet_msoftdrop >= 150"
regionC = f"{SRlike} and abs(deta_jj) <= 4 and hbbjet_msoftdrop < 150"
regionD = f"{SRlike} and abs(deta_jj) > 4 and hbbjet_msoftdrop < 150"

ABCD(regionA,regionB,regionC,regionD,sample=sample)
print()

SRlike  = "presel_noDetaJJ and M_jj > 600 and ST > 900 and hbbjet_score > 0.9 and hbbjet_msoftdrop < 200"
regionA = f"{SRlike} and abs(deta_jj) > 4 and hbbjet_msoftdrop >= 150"
regionB = f"{SRlike} and abs(deta_jj) <= 4 and hbbjet_msoftdrop >= 150"
regionC = f"{SRlike} and abs(deta_jj) <= 4 and hbbjet_msoftdrop < 150"
regionD = f"{SRlike} and abs(deta_jj) > 4 and hbbjet_msoftdrop < 150"

ABCD(regionA,regionB,regionC,regionD,sample=sample)
print()

SRlike  = "presel_noDetaJJ and M_jj > 600 and ST > 900 and hbbjet_score > 0.9 and (hbbjet_msoftdrop > 200 or hbbjet_msoftdrop < 150)"
regionA = f"{SRlike} and abs(deta_jj) > 4 and hbbjet_msoftdrop >= 150"
regionB = f"{SRlike} and abs(deta_jj) <= 4 and hbbjet_msoftdrop >= 150"
regionC = f"{SRlike} and abs(deta_jj) <= 4 and hbbjet_msoftdrop < 150"
regionD = f"{SRlike} and abs(deta_jj) > 4 and hbbjet_msoftdrop < 150"

ABCD(regionA,regionB,regionC,regionD,sample=sample)
print()


SRlike  = "presel_noDetaJJ and M_jj > 600 and ST > 900 and hbbjet_score > 0.9"
regionA = f"{SRlike} and abs(deta_jj) > 4 and hbbjet_msoftdrop >= 200"  # A1
regionB = f"{SRlike} and abs(deta_jj) <= 4 and hbbjet_msoftdrop >= 200" # B1
regionC = f"{SRlike} and abs(deta_jj) <= 4 and hbbjet_msoftdrop >= 150 and hbbjet_msoftdrop < 200"  # B2
regionD = f"{SRlike} and abs(deta_jj) > 4 and hbbjet_msoftdrop >= 150 and hbbjet_msoftdrop < 200"   # A2

ABCD(regionA,regionB,regionC,regionD,sample=sample,blind=False)

# Reset
SRlike  = "presel_noDetaJJ and M_jj > 600 and ST > 900 and hbbjet_score > 0.9"
regionA = f"{SRlike} and abs(deta_jj) > 4 and hbbjet_msoftdrop >= 150"
regionB = f"{SRlike} and abs(deta_jj) <= 4 and hbbjet_msoftdrop >= 150"
regionC = f"{SRlike} and abs(deta_jj) <= 4 and hbbjet_msoftdrop < 150"
regionD = f"{SRlike} and abs(deta_jj) > 4 and hbbjet_msoftdrop < 150"

region,bkg_count,bkg_error,sig_count,sig_error,data_count,data_err
A,136.74,2.63,6.92,0.40,105.78,10.28
B,169.11,2.84,0.27,0.08,128.17,11.32
C,59.39,1.69,11.62,0.52,48.29,6.95
D,49.95,1.58,366.30,2.92,0.00,0.00
0.03846793363152079
Calculated MC TF fix: 1.0400069170618582
D_pred,49.95,1.91,,,41.45,8.08

region,bkg_count,bkg_error,sig_count,sig_error,data_count,data_err
A,109.73,2.36,6.63,0.39,92.53,9.62
B,109.48,2.31,0.24,0.08,94.60,9.73
C,59.39,1.69,11.62,0.52,48.29,6.95
D,49.95,1.58,366.30,2.92,0.00,0.00
0.19185248778038555
Calculated MC TF fix: 0.8390300060222412
D_pred,49.95,2.07,,,39.63,8.13

region,bkg_count,bkg_error,sig_count,sig_error,data_count,data_err
A,26.92,1.15,0.29,0.08,13.26,3.64
B,59.41,1.65,0.03,0.02,33.64,5.80
C,59.39,1.69,11.62,0.52,48.29,6.95
D,49.95,1.58,366.30,2.92,0.00,0.00
0.4612460615584768
Calculated MC TF fix: 1.8561349229162822
D_pred,49.95,2.91,,,35.33,12.53

region,bkg_count,bkg_error,sig_count,sig_error,data_count,data_err
A,27.01,1.15,0.29,0.08,13.25,3.

In [9]:
pred, stat, syst = vbswh.ABCD( 
    "abs(deta_jj) > 4",
    "hbbjet_msoftdrop < 150",
#     SRlike,
    "presel_noDetaJJ and M_jj > 600 and ST > 900 and hbbjet_score > 0.9 and ((is_data or is_signal) or (name == 'TTbar1L'))",
#     "presel_noDetaJJ and M_jj > 600 and ST > 900 and hbbjet_score > 0.9 and (hbbjet_msoftdrop > 200 or hbbjet_msoftdrop < 150)",
    h_dir="left", v_dir="up", 
    show_data=True
)

cut,region,bkg_wgt,bkg_err,sig_wgt,sig_err,data,data_err
presel_noDetaJJ and M_jj > 600 and ST > 900 and hbbjet_score > 0.9 and ((is_data or is_signal) or (name == 'TTbar1L')) and abs(deta_jj) > 4 and (not (hbbjet_msoftdrop < 150)),A,136.7437246819876,2.625319769465052,6.922700925844113,0.39741721099955374,142,11.916375287812984
presel_noDetaJJ and M_jj > 600 and ST > 900 and hbbjet_score > 0.9 and ((is_data or is_signal) or (name == 'TTbar1L')) and (not (abs(deta_jj) > 4)) and (not (hbbjet_msoftdrop < 150)),B,169.1066296395771,2.840797400309562,0.2728229361420638,0.0843215664975972,201,14.177446878757825
presel_noDetaJJ and M_jj > 600 and ST > 900 and hbbjet_score > 0.9 and ((is_data or is_signal) or (name == 'TTbar1L')) and (not (abs(deta_jj) > 4)) and hbbjet_msoftdrop < 150,C,59.389843911136694,1.688793572473669,11.623110701742963,0.5219428213060152,170,13.038404810405298
presel_noDetaJJ and M_jj > 600 and ST > 900 and hbbjet_score > 0.9 and ((is_data or is_signal) or (name == 'TTba

# Calculations for review

In [10]:
def get_closure_data(regionA, regionB, regionC, regionD):
    # Counts
    A_count = vbswh.data_count(selection=regionA)
    B_count = vbswh.data_count(selection=regionB)
    C_count = vbswh.data_count(selection=regionC)
    D_count = vbswh.data_count(selection=regionD)
    
    # Errors
    A_error = vbswh.data_error(selection=regionA)
    B_error = vbswh.data_error(selection=regionB)
    C_error = vbswh.data_error(selection=regionC)
    D_error = vbswh.data_error(selection=regionD)
        
    # Transfer factor
    tf = A_count/B_count
    tf_err = tf*np.sqrt((A_error/A_count)**2 + (B_error/B_count)**2)
    
    # Prediction
    D_pred = tf*C_count
    D_pred_err = D_pred*np.sqrt((A_error/A_count)**2 + (B_error/B_count)**2 + (C_error/C_count)**2)
    
    print("For cross check with vbswh.ABCD:")
    print("A", A_count)
    print("B", B_count)
    print("C", C_count)
    
    return D_count, D_error, D_pred, D_pred_err

In [11]:
pred, stat, syst = vbswh.ABCD( 
    "abs(deta_jj) > 4",
    "hbbjet_msoftdrop < 200",
    "presel_noDetaJJ and M_jj > 600 and ST > 900 and hbbjet_score > 0.9 and hbbjet_msoftdrop >= 150",
    h_dir="left", v_dir="up", 
    show_data=True
)

cut,region,bkg_wgt,bkg_err,sig_wgt,sig_err,data,data_err
presel_noDetaJJ and M_jj > 600 and ST > 900 and hbbjet_score > 0.9 and hbbjet_msoftdrop >= 150 and abs(deta_jj) > 4 and (not (hbbjet_msoftdrop < 200)),A,41.764499330043776,1.714211180298156,0.28888284342762005,0.08420035903428198,28,5.291502622129181
presel_noDetaJJ and M_jj > 600 and ST > 900 and hbbjet_score > 0.9 and hbbjet_msoftdrop >= 150 and (not (abs(deta_jj) > 4)) and (not (hbbjet_msoftdrop < 200)),B,96.0539085941475,4.406398084194562,0.02902564212143425,0.01692882925349821,70,8.366600265340756
presel_noDetaJJ and M_jj > 600 and ST > 900 and hbbjet_score > 0.9 and hbbjet_msoftdrop >= 150 and (not (abs(deta_jj) > 4)) and hbbjet_msoftdrop < 200,C,145.88028965313845,3.8104878863266167,0.24379729402062955,0.08260472938467028,131,11.445523142259598
presel_noDetaJJ and M_jj > 600 and ST > 900 and hbbjet_score > 0.9 and hbbjet_msoftdrop >= 150 and abs(deta_jj) > 4 and hbbjet_msoftdrop < 200,D,131.20351087028484,2.760690516111737

In [12]:
# Test closure in data in AB
SRlike  = "presel_noDetaJJ and M_jj > 600 and ST > 900 and hbbjet_score > 0.9"
regionA = f"{SRlike} and abs(deta_jj) > 4 and hbbjet_msoftdrop >= 200"  # A1
regionB = f"{SRlike} and abs(deta_jj) <= 4 and hbbjet_msoftdrop >= 200" # B1
regionC = f"{SRlike} and abs(deta_jj) <= 4 and hbbjet_msoftdrop >= 150 and hbbjet_msoftdrop < 200"  # B2
regionD = f"{SRlike} and abs(deta_jj) > 4 and hbbjet_msoftdrop >= 150 and hbbjet_msoftdrop < 200"   # A2
print(get_closure_data(regionA, regionB, regionC, regionD))

# Reset
SRlike  = "presel_noDetaJJ and M_jj > 600 and ST > 900 and hbbjet_score > 0.9"
regionA = f"{SRlike} and abs(deta_jj) > 4 and hbbjet_msoftdrop >= 150"
regionB = f"{SRlike} and abs(deta_jj) <= 4 and hbbjet_msoftdrop >= 150"
regionC = f"{SRlike} and abs(deta_jj) <= 4 and hbbjet_msoftdrop < 150"
regionD = f"{SRlike} and abs(deta_jj) > 4 and hbbjet_msoftdrop < 150"

For cross check with vbswh.ABCD:
A 28
B 70
C 131
(114, 10.677078252031311, 52.400000000000006, 12.57966613229461)


In [13]:
vbswh.df.event_weight /= vbswh.df.xbb_sf

pred, stat, syst = vbswh.ABCD( 
    "abs(deta_jj) > 4",
    "hbbjet_msoftdrop < 150",
    "presel_noDetaJJ and M_jj > 600 and ST > 900 and hbbjet_score <= 0.5 and hbbjet_score > 0.3",
    h_dir="left", v_dir="up", 
    show_data=True
)

vbswh.df.event_weight = ORIG_EVENT_WEIGHT.copy()

cut,region,bkg_wgt,bkg_err,sig_wgt,sig_err,data,data_err
presel_noDetaJJ and M_jj > 600 and ST > 900 and hbbjet_score <= 0.5 and hbbjet_score > 0.3 and abs(deta_jj) > 4 and (not (hbbjet_msoftdrop < 150)),A,80.13847261270763,2.6816135654763578,0.08432259475889595,0.039495855428519695,87,9.327379053088816
presel_noDetaJJ and M_jj > 600 and ST > 900 and hbbjet_score <= 0.5 and hbbjet_score > 0.3 and (not (abs(deta_jj) > 4)) and (not (hbbjet_msoftdrop < 150)),B,119.61053554308243,2.6484914913282003,0.03545809722221798,0.03545809722221798,119,10.908712114635714
presel_noDetaJJ and M_jj > 600 and ST > 900 and hbbjet_score <= 0.5 and hbbjet_score > 0.3 and (not (abs(deta_jj) > 4)) and hbbjet_msoftdrop < 150,C,164.07787245080965,7.259507711411256,0.06196954418901307,0.03641677545529165,188,13.711309200802088
presel_noDetaJJ and M_jj > 600 and ST > 900 and hbbjet_score <= 0.5 and hbbjet_score > 0.3 and abs(deta_jj) > 4 and hbbjet_msoftdrop < 150,D,96.88083927555962,4.0487005970492955,5.46316584

In [14]:
table = """
cut,region,bkg_wgt,bkg_err,sig_wgt,sig_err,data,data_err
presel_noDetaJJ and M_jj > 600 and ST > 900 and hbbjet_score <= 0.5 and hbbjet_score > 0.3 and abs(deta_jj) > 4 and (not (hbbjet_msoftdrop < 150)),A,80.13847261270763,2.6816135654763578,0.08432259475889595,0.039495855428519695,87,9.327379053088816
presel_noDetaJJ and M_jj > 600 and ST > 900 and hbbjet_score <= 0.5 and hbbjet_score > 0.3 and (not (abs(deta_jj) > 4)) and (not (hbbjet_msoftdrop < 150)),B,119.61053554308242,2.6484914913282003,0.03545809722221798,0.03545809722221798,119,10.908712114635714
presel_noDetaJJ and M_jj > 600 and ST > 900 and hbbjet_score <= 0.5 and hbbjet_score > 0.3 and (not (abs(deta_jj) > 4)) and hbbjet_msoftdrop < 150,C,164.07787245080965,7.259507711411256,0.06196954418901307,0.03641677545529165,188,13.711309200802088
presel_noDetaJJ and M_jj > 600 and ST > 900 and hbbjet_score <= 0.5 and hbbjet_score > 0.3 and abs(deta_jj) > 4 and hbbjet_msoftdrop < 150,D,96.88083927555962,4.048700597049295,5.463165849701192,0.3450759994857886,BLINDED,BLINDED
"""

print("Region & Total bkg. (MC) &    Total sig.   &   Total data    \\\\")
print("\\hline")
for line in table.splitlines()[2:]:
    cut, region, bkg_wgt, bkg_err, sig_wgt, sig_err, data, data_err = line.split(",")
    bkg_wgt, bkg_err = (float(bkg_wgt), float(bkg_err))
    sig_wgt, sig_err = (float(sig_wgt), float(sig_err))
    if region == "D":
        print(f"{region:^6} & ${bkg_wgt:>5.1f} \pm {bkg_err:<4.1f}$ & ${sig_wgt:>4.1f} \pm {sig_err:<4.1f}$ & {'--':^14} \\\\")
    else:
        data, data_err = (int(data), float(data_err))
        print(f"{region:^6} & ${bkg_wgt:>5.1f} \pm {bkg_err:<4.1f}$ & ${sig_wgt:>4.1f} \pm {sig_err:<4.1f}$ & ${data:>3} \pm {data_err:<4.1f}$ \\\\")

Region & Total bkg. (MC) &    Total sig.   &   Total data    \\
\hline
  A    & $ 80.1 \pm 2.7 $ & $ 0.1 \pm 0.0 $ & $ 87 \pm 9.3 $ \\
  B    & $119.6 \pm 2.6 $ & $ 0.0 \pm 0.0 $ & $119 \pm 10.9$ \\
  C    & $164.1 \pm 7.3 $ & $ 0.1 \pm 0.0 $ & $188 \pm 13.7$ \\
  D    & $ 96.9 \pm 4.0 $ & $ 5.5 \pm 0.3 $ &       --       \\


In [15]:
# Test closure in data in PNet sideband
# SRlike = "presel_noDetaJJ and M_jj > 600 and ST > 900 and hbbjet_score <= 0.9 and hbbjet_score > 0.7"
SRlike = "presel_noDetaJJ and M_jj > 600 and ST > 900 and hbbjet_score <= 0.5 and hbbjet_score > 0.3"
regionA = f"{SRlike} and abs(deta_jj) > 4 and hbbjet_msoftdrop >= 150"
regionB = f"{SRlike} and abs(deta_jj) <= 4 and hbbjet_msoftdrop >= 150"
regionC = f"{SRlike} and abs(deta_jj) <= 4 and hbbjet_msoftdrop < 150"
regionD = f"{SRlike} and abs(deta_jj) > 4 and hbbjet_msoftdrop < 150"
D, D_err, D_pred, D_pred_err = get_closure_data(regionA, regionB, regionC, regionD)
print(D, D_err, D_pred, D_pred_err)

AN_numbers["PNetSidebandPredData"] = round(D_pred, 1)
AN_numbers["PNetSidebandPredDataStatErr"] = round(D_pred_err, 1)
AN_numbers["PNetSidebandData"] = D
AN_numbers["PNetSidebandDataStatErr"] = round(D_err, 1)

# Reset
SRlike  = "presel_noDetaJJ and M_jj > 600 and ST > 900 and hbbjet_score > 0.9"
regionA = f"{SRlike} and abs(deta_jj) > 4 and hbbjet_msoftdrop >= 150"
regionB = f"{SRlike} and abs(deta_jj) <= 4 and hbbjet_msoftdrop >= 150"
regionC = f"{SRlike} and abs(deta_jj) <= 4 and hbbjet_msoftdrop < 150"
regionD = f"{SRlike} and abs(deta_jj) > 4 and hbbjet_msoftdrop < 150"

For cross check with vbswh.ABCD:
A 87
B 119
C 188
118 10.862780491200215 137.44537815126048 21.8260331349876


In [16]:
def get_closure_numbers(regionA, regionB, regionC, region_D, sample=None):
    if sample is None:
        A_count = vbswh.bkg_count(selection=regionA)
        B_count = vbswh.bkg_count(selection=regionB)
        C_count = vbswh.bkg_count(selection=regionC)
        D_count = vbswh.bkg_count(selection=regionD)
        
        A_error = vbswh.bkg_error(selection=regionA)
        B_error = vbswh.bkg_error(selection=regionB)
        C_error = vbswh.bkg_error(selection=regionC)
        D_error = vbswh.bkg_error(selection=regionD)
    else:
        A_count = vbswh.sample_count(sample, selection=regionA)
        B_count = vbswh.sample_count(sample, selection=regionB)
        C_count = vbswh.sample_count(sample, selection=regionC)
        D_count = vbswh.sample_count(sample, selection=regionD)
        
        A_error = vbswh.sample_error(sample, selection=regionA)
        B_error = vbswh.sample_error(sample, selection=regionB)
        C_error = vbswh.sample_error(sample, selection=regionC)
        D_error = vbswh.sample_error(sample, selection=regionD)
        
    # Transfer factor
    tf = A_count/B_count
    tf_err = tf*np.sqrt((A_error/A_count)**2 + (B_error/B_count)**2)
    
    # Prediction
    D_pred = tf*C_count
    D_pred_err = D_pred*np.sqrt((A_error/A_count)**2 + (B_error/B_count)**2 + (C_error/C_count)**2)
    
    # Systematic
    syst = abs(1 - D_pred/D_count)
    
    return tf, tf_err, D_pred, D_pred_err, D_count, D_error, syst

sample_tex = {
    "TTbar1L": "$t\\bar{t}+1\\ell$",
    "TTbar2L": "$t\\bar{t}+2\\ell$",
    "WJets": "W+jets",
    "SingleTop": "Single top"
}

print("Sample           & Transfer factor & $D_{MC}^{pred}$   & $D_{MC}$          & $|1 - D_{MC}^{pred}/D_{MC}|$")
print("\\hline")
for sample in [None, "TTbar1L", "TTbar2L", "WJets", "SingleTop", "Bosons"]:
    tf, tf_err, D_pred, D_pred_err, D, D_err, syst = get_closure_numbers(
        regionA, regionB, regionC, regionD, sample=sample
    )
    if sample is None:
        sample = "Total bkg."
    print(
        f"{sample_tex.get(sample, sample):<16}"
        f" & ${tf:.2f} \pm {tf_err:.2f}$"
        f" & ${D_pred:>6.1f} \pm {D_pred_err:<4.1f}$"
        f" & ${D:>6.1f} \pm {D_err:<4.1f}$"
        f" & ${syst*100:>5.1f}$"
    )

Sample           & Transfer factor & $D_{MC}^{pred}$   & $D_{MC}$          & $|1 - D_{MC}^{pred}/D_{MC}|$
\hline
Total bkg.       & $0.71 \pm 0.02$ & $ 129.5 \pm 5.1 $ & $ 116.4 \pm 3.8 $ & $ 11.2$
$t\bar{t}+1\ell$ & $0.81 \pm 0.02$ & $  48.0 \pm 1.8 $ & $  49.9 \pm 1.6 $ & $  3.8$
$t\bar{t}+2\ell$ & $0.33 \pm 0.03$ & $  12.7 \pm 1.3 $ & $  21.4 \pm 0.9 $ & $ 40.6$
W+jets           & $0.37 \pm 0.03$ & $  15.8 \pm 1.1 $ & $  21.2 \pm 1.3 $ & $ 25.5$
Single top       & $1.17 \pm 0.16$ & $  17.1 \pm 3.0 $ & $  13.1 \pm 1.4 $ & $ 30.6$
Bosons           & $0.13 \pm 0.11$ & $   2.7 \pm 2.5 $ & $   6.0 \pm 2.6 $ & $ 54.8$


# Main ABCD

In [17]:
D_pred, D_pred_stat_err, D_pred_syst_err = ABCD(regionA, regionB, regionC, regionD)

print(D_pred, D_pred_stat_err, D_pred_syst_err)

AN_numbers["PredBkg"] = round(D_pred)
AN_numbers["PredBkgStatErr"] = round(D_pred_stat_err, 1)

region,bkg_count,bkg_error,sig_count,sig_error,data_count,data_err
A,172.97,3.25,6.92,0.40,142.00,11.92
B,241.93,5.83,0.27,0.08,201.00,14.18
C,181.10,4.40,11.62,0.52,170.00,13.04
D,116.41,3.84,366.30,2.92,0.00,0.00
0.1122552957372065
Calculated MC TF fix: 0.8990741638476054
D_pred,116.41,4.54,,,107.98,14.45
107.97835977751838 14.446411333162647 12.121142710043808


In [18]:
pred, stat, syst = vbswh.ABCD( 
    "abs(deta_jj) > 4",
    "hbbjet_msoftdrop < 150",
    SRlike,
    h_dir="left", v_dir="up", 
    show_data=True
)
AN_numbers["PredBkgNoCorrection"] = round(pred)
AN_numbers["PredBkgNoCorrectionStatErr"] = round(pred*stat, 1)
A_bkg_wgt = vbswh.bkg_count(selection=regionA)
A_bkg_err = vbswh.bkg_error(selection=regionA)
B_bkg_wgt = vbswh.bkg_count(selection=regionB)
B_bkg_err = vbswh.bkg_error(selection=regionB)
AN_numbers["PredBkgMC"] = round(A_bkg_wgt/B_bkg_wgt*vbswh.bkg_count(selection=regionC), 1)
AN_numbers["BkgEstABMC"] = A_bkg_wgt/B_bkg_wgt
AN_numbers["BkgEstABMCErr"] = round(np.sqrt((B_bkg_err/B_bkg_wgt)**2 + (A_bkg_err/A_bkg_wgt)**2)*100, 1)
AN_numbers["BkgEstMethodSystErr"] = syst*100
AN_numbers["BkgEstStatErr"] = stat*100
A_data     = vbswh.data_count(selection=regionA)
A_data_err = vbswh.data_error(selection=regionA)
B_data     = vbswh.data_count(selection=regionB)
B_data_err = vbswh.data_error(selection=regionB)
AN_numbers["BkgEstABData"] = A_data/B_data
AN_numbers["BkgEstABDataErr"] = round(np.sqrt((B_data_err/B_data)**2 + (A_data_err/A_data)**2)*100, 1)

cut,region,bkg_wgt,bkg_err,sig_wgt,sig_err,data,data_err
presel_noDetaJJ and M_jj > 600 and ST > 900 and hbbjet_score > 0.9 and abs(deta_jj) > 4 and (not (hbbjet_msoftdrop < 150)),A,172.96801020032862,3.2496049138946854,6.922700925844113,0.39741721099955374,142,11.916375287812984
presel_noDetaJJ and M_jj > 600 and ST > 900 and hbbjet_score > 0.9 and (not (abs(deta_jj) > 4)) and (not (hbbjet_msoftdrop < 150)),B,241.93419824728596,5.825475260288674,0.2728229361420638,0.0843215664975972,201,14.177446878757825
presel_noDetaJJ and M_jj > 600 and ST > 900 and hbbjet_score > 0.9 and (not (abs(deta_jj) > 4)) and hbbjet_msoftdrop < 150,C,181.09960266319064,4.4037606061757675,11.623110701742963,0.5219428213060152,170,13.038404810405298
presel_noDetaJJ and M_jj > 600 and ST > 900 and hbbjet_score > 0.9 and abs(deta_jj) > 4 and hbbjet_msoftdrop < 150,D,116.4076556171012,3.8367571247557883,366.30367883174995,2.915976282353836,BLINDED,BLINDED

name,extp,rel_err
BtoA_MC,0.7149382412796988,0.030540955

In [29]:
table = """cut,region,bkg_wgt,bkg_err,sig_wgt,sig_err,data,data_err
presel_noDetaJJ and M_jj > 600 and ST > 900 and hbbjet_score > 0.9 and abs(deta_jj) > 4 and (not (hbbjet_msoftdrop < 150)),A,172.96801020032862,3.2496049138946854,6.922700925844113,0.39741721099955374,142,11.916375287812984
presel_noDetaJJ and M_jj > 600 and ST > 900 and hbbjet_score > 0.9 and (not (abs(deta_jj) > 4)) and (not (hbbjet_msoftdrop < 150)),B,241.93419824728596,5.825475260288674,0.2728229361420638,0.0843215664975972,201,14.177446878757825
presel_noDetaJJ and M_jj > 600 and ST > 900 and hbbjet_score > 0.9 and (not (abs(deta_jj) > 4)) and hbbjet_msoftdrop < 150,C,181.09960266319064,4.4037606061757675,11.623110701742963,0.5219428213060152,170,13.038404810405298
presel_noDetaJJ and M_jj > 600 and ST > 900 and hbbjet_score > 0.9 and abs(deta_jj) > 4 and hbbjet_msoftdrop < 150,D,116.4076556171012,3.8367571247557883,366.30367883174995,2.915976282353836,BLINDED,BLINDED
"""

print("Region & Total bkg. (MC) &    Total sig.   &   Total data   \\\\")
print("\\hline")
for line in table.splitlines()[1:]:
    cut, region, bkg_wgt, bkg_err, sig_wgt, sig_err, data, data_err = line.split(",")
    bkg_wgt, bkg_err = (float(bkg_wgt), float(bkg_err))
    sig_wgt, sig_err = (float(sig_wgt), float(sig_err))
    if region == "D":
        print(f"{region:^6} & ${bkg_wgt:.1f} \pm {bkg_err:.1f}$ & ${sig_wgt:.1f} \pm {sig_err:.1f}$ & {'--':^14} \\\\")
    else:
        data, data_err = (int(data), float(data_err))
        print(f"{region:^6} & ${bkg_wgt:.1f} \pm {bkg_err:.1f}$ & ${sig_wgt:>5.1f} \pm {sig_err:.1f}$ & ${data} \pm {data_err:.1f}$ \\\\")

Region & Total bkg. (MC) &    Total sig.   &   Total data   \\
\hline
  A    & $173.0 \pm 3.2$ & $  6.9 \pm 0.4$ & $142 \pm 11.9$ \\
  B    & $241.9 \pm 5.8$ & $  0.3 \pm 0.1$ & $201 \pm 14.2$ \\
  C    & $181.1 \pm 4.4$ & $ 11.6 \pm 0.5$ & $170 \pm 13.0$ \\
  D    & $116.4 \pm 3.8$ & $366.3 \pm 2.9$ &       --       \\


# W+jets systematic

In [20]:
vbswh.df.loc[vbswh.df.name == "WJets", "event_weight"] *= 2
vbswh.ABCD( 
    "abs(deta_jj) > 4",
    "hbbjet_msoftdrop < 150",
    SRlike,
    h_dir="left", v_dir="up", 
    show_data=True
)
A_bkg_wgt = vbswh.bkg_count(selection=regionA)
A_bkg_err = vbswh.bkg_error(selection=regionA)
B_bkg_wgt = vbswh.bkg_count(selection=regionB)
B_bkg_err = vbswh.bkg_error(selection=regionB)
AN_numbers["BkgEstWJetsUpABMC"] = A_bkg_wgt/B_bkg_wgt
AN_numbers["BkgEstWJetsUpABMCErr"] = round(np.sqrt((B_bkg_err/B_bkg_wgt)**2 + (A_bkg_err/A_bkg_wgt)**2)*100, 1)

vbswh.df.event_weight = ORIG_EVENT_WEIGHT.copy()
print("")

vbswh.df.loc[vbswh.df.name == "WJets", "event_weight"] *= 0.5
vbswh.ABCD( 
    "abs(deta_jj) > 4",
    "hbbjet_msoftdrop < 150",
    SRlike,
    h_dir="left", v_dir="up", 
    show_data=True
)
A_bkg_wgt = vbswh.bkg_count(selection=regionA)
A_bkg_err = vbswh.bkg_error(selection=regionA)
B_bkg_wgt = vbswh.bkg_count(selection=regionB)
B_bkg_err = vbswh.bkg_error(selection=regionB)
AN_numbers["BkgEstWJetsDownABMC"] = A_bkg_wgt/B_bkg_wgt
AN_numbers["BkgEstWJetsDownABMCErr"] = round(np.sqrt((B_bkg_err/B_bkg_wgt)**2 + (A_bkg_err/A_bkg_wgt)**2)*100, 1)
AN_numbers["BkgEstWJetsCompSyst"] = 100*max(
    abs(1 - AN_numbers["BkgEstWJetsUpABMC"]/AN_numbers["BkgEstABMC"]),
    abs(1 - AN_numbers["BkgEstWJetsDownABMC"]/AN_numbers["BkgEstABMC"])
)

vbswh.df.event_weight = ORIG_EVENT_WEIGHT.copy()

cut,region,bkg_wgt,bkg_err,sig_wgt,sig_err,data,data_err
presel_noDetaJJ and M_jj > 600 and ST > 900 and hbbjet_score > 0.9 and abs(deta_jj) > 4 and (not (hbbjet_msoftdrop < 150)),A,184.25640179685288,3.4823729135553605,6.922700925844113,0.39741721099955374,142,11.916375287812984
presel_noDetaJJ and M_jj > 600 and ST > 900 and hbbjet_score > 0.9 and (not (abs(deta_jj) > 4)) and (not (hbbjet_msoftdrop < 150)),B,272.50360034858056,5.975932487186179,0.2728229361420638,0.0843215664975972,201,14.177446878757825
presel_noDetaJJ and M_jj > 600 and ST > 900 and hbbjet_score > 0.9 and (not (abs(deta_jj) > 4)) and hbbjet_msoftdrop < 150,C,223.95092961983713,4.724667057374665,11.623110701742963,0.5219428213060152,170,13.038404810405298
presel_noDetaJJ and M_jj > 600 and ST > 900 and hbbjet_score > 0.9 and abs(deta_jj) > 4 and hbbjet_msoftdrop < 150,D,137.63535367359418,4.415761139294962,366.30367883174995,2.915976282353836,BLINDED,BLINDED

name,extp,rel_err
BtoA_MC,0.6761613481845972,0.0289501019

# Bosons systematic

In [21]:
vbswh.df.loc[vbswh.df.name == "Bosons", "event_weight"] *= 2
vbswh.ABCD( 
    "abs(deta_jj) > 4",
    "hbbjet_msoftdrop < 150",
    SRlike,
    h_dir="left", v_dir="up", 
    show_data=True
)
A_bkg_wgt = vbswh.bkg_count(selection=regionA)
A_bkg_err = vbswh.bkg_error(selection=regionA)
B_bkg_wgt = vbswh.bkg_count(selection=regionB)
B_bkg_err = vbswh.bkg_error(selection=regionB)
AN_numbers["BkgEstBosonsUpABMC"] = A_bkg_wgt/B_bkg_wgt
AN_numbers["BkgEstBosonsUpABMCErr"] = round(np.sqrt((B_bkg_err/B_bkg_wgt)**2 + (A_bkg_err/A_bkg_wgt)**2)*100, 1)

vbswh.df.event_weight = ORIG_EVENT_WEIGHT.copy()
print("")

vbswh.df.loc[vbswh.df.name == "Bosons", "event_weight"] *= 0.5
vbswh.ABCD( 
    "abs(deta_jj) > 4",
    "hbbjet_msoftdrop < 150",
    SRlike,
    h_dir="left", v_dir="up", 
    show_data=True
)
A_bkg_wgt = vbswh.bkg_count(selection=regionA)
A_bkg_err = vbswh.bkg_error(selection=regionA)
B_bkg_wgt = vbswh.bkg_count(selection=regionB)
B_bkg_err = vbswh.bkg_error(selection=regionB)
AN_numbers["BkgEstBosonsDownABMC"] = A_bkg_wgt/B_bkg_wgt
AN_numbers["BkgEstBosonsDownABMCErr"] = round(np.sqrt((B_bkg_err/B_bkg_wgt)**2 + (A_bkg_err/A_bkg_wgt)**2)*100, 1)
AN_numbers["BkgEstBosonsCompSyst"] = 100*max(
    abs(1 - AN_numbers["BkgEstBosonsUpABMC"]/AN_numbers["BkgEstABMC"]),
    abs(1 - AN_numbers["BkgEstBosonsDownABMC"]/AN_numbers["BkgEstABMC"])
)

vbswh.df.event_weight = ORIG_EVENT_WEIGHT.copy()

cut,region,bkg_wgt,bkg_err,sig_wgt,sig_err,data,data_err
presel_noDetaJJ and M_jj > 600 and ST > 900 and hbbjet_score > 0.9 and abs(deta_jj) > 4 and (not (hbbjet_msoftdrop < 150)),A,173.96489523776356,3.4563270082954083,6.922700925844113,0.39741721099955374,142,11.916375287812984
presel_noDetaJJ and M_jj > 600 and ST > 900 and hbbjet_score > 0.9 and (not (abs(deta_jj) > 4)) and (not (hbbjet_msoftdrop < 150)),B,249.86936238562845,9.9929460766949,0.2728229361420638,0.0843215664975972,201,14.177446878757825
presel_noDetaJJ and M_jj > 600 and ST > 900 and hbbjet_score > 0.9 and (not (abs(deta_jj) > 4)) and hbbjet_msoftdrop < 150,C,202.62920229973548,7.167156595446391,11.623110701742963,0.5219428213060152,170,13.038404810405298
presel_noDetaJJ and M_jj > 600 and ST > 900 and hbbjet_score > 0.9 and abs(deta_jj) > 4 and hbbjet_msoftdrop < 150,D,122.39083663442076,5.971976534453302,366.30367883174995,2.915976282353836,BLINDED,BLINDED

name,extp,rel_err
BtoA_MC,0.6962233928034762,0.044655910668

# AN numbers

In [22]:
AN_numbers["BkgEstBosonsCompSyst"]

2.6176874302770714

In [23]:
AN_numbers["BkgEstBkgCompSystErr"] = np.sqrt(
    AN_numbers["BkgEstWJetsCompSyst"]**2 + AN_numbers["BkgEstBosonsCompSyst"]**2
)
AN_numbers["BkgEstTotalSystErr"] = np.sqrt(
    AN_numbers["BkgEstMethodSystErr"]**2 + AN_numbers["BkgEstBkgCompSystErr"]**2
)

In [24]:
AN_numbers["BkgEstBkgCompSystErr"]

6.0224583291886615

In [25]:
# SR2 numbers for posterity
AN_numbers["SRTwoPredBkg"] = (
    AN_numbers["PredBkg"]*vbswh.bkg_count(selection=f"{regionD} and ST > 1500")/AN_numbers["PredBkgMC"]
)
AN_numbers["SRTwoBkgEstSystErr"] = np.sqrt(
    (vbswh.data_error(selection=f"{regionB} and ST > 1500")/vbswh.data_count(selection=f"{regionB} and ST > 1500"))**2
    + (AN_numbers["BkgEstTotalSystErr"]/100)**2
)
AN_numbers["SRTwoPredBkgSystErr"] = round(AN_numbers["SRTwoBkgEstSystErr"]*AN_numbers["SRTwoPredBkg"], 1)
AN_numbers["SRTwoBkgEstSystErr"] = round(AN_numbers["SRTwoBkgEstSystErr"]*100, 1)

AN_numbers["SRTwoBkgEstStatErr"] = round(AN_numbers["BkgEstStatErr"], 1)
AN_numbers["SRTwoPredBkgStatErr"] = round(AN_numbers["SRTwoPredBkg"]*(AN_numbers["BkgEstStatErr"]/100), 1)
AN_numbers["SRTwoPredBkg"] = round(AN_numbers["SRTwoPredBkg"])

AN_numbers["SRTwoExpSig"] = round(vbswh.sig_count(selection=f"{regionD} and ST > 1500"))
AN_numbers["SRTwoExpSigStatErr"] = round(vbswh.sig_error(selection=f"{regionD} and ST > 1500"), 1)

In [26]:
pred, stat, syst = vbswh.ABCD( 
    "abs(deta_jj) > 4",
    "hbbjet_msoftdrop < 150",
    SRlike,
    h_dir="left", v_dir="up", 
    show_data=True
)

AN_numbers["PredBkgNoCorrectionSystErr"] = round(pred*AN_numbers["BkgEstTotalSystErr"]/100, 1)

D_pred, D_pred_stat_err, D_pred_syst_err = ABCD(regionA, regionB, regionC, regionD)
AN_numbers["PredBkgSystErr"] = round(D_pred*AN_numbers["BkgEstTotalSystErr"]/100, 1)

cut,region,bkg_wgt,bkg_err,sig_wgt,sig_err,data,data_err
presel_noDetaJJ and M_jj > 600 and ST > 900 and hbbjet_score > 0.9 and abs(deta_jj) > 4 and (not (hbbjet_msoftdrop < 150)),A,172.96801020032862,3.2496049138946854,6.922700925844113,0.39741721099955374,142,11.916375287812984
presel_noDetaJJ and M_jj > 600 and ST > 900 and hbbjet_score > 0.9 and (not (abs(deta_jj) > 4)) and (not (hbbjet_msoftdrop < 150)),B,241.93419824728596,5.825475260288674,0.2728229361420638,0.0843215664975972,201,14.177446878757825
presel_noDetaJJ and M_jj > 600 and ST > 900 and hbbjet_score > 0.9 and (not (abs(deta_jj) > 4)) and hbbjet_msoftdrop < 150,C,181.09960266319064,4.4037606061757675,11.623110701742963,0.5219428213060152,170,13.038404810405298
presel_noDetaJJ and M_jj > 600 and ST > 900 and hbbjet_score > 0.9 and abs(deta_jj) > 4 and hbbjet_msoftdrop < 150,D,116.4076556171012,3.8367571247557883,366.30367883174995,2.915976282353836,BLINDED,BLINDED

name,extp,rel_err
BtoA_MC,0.7149382412796988,0.030540955

In [27]:
AN_numbers["BkgEstABMC"] = round(AN_numbers["BkgEstABMC"], 2)
AN_numbers["BkgEstABData"] = round(AN_numbers["BkgEstABData"], 2)

AN_numbers["BkgEstWJetsUpABMC"] = round(AN_numbers["BkgEstWJetsUpABMC"], 2)
AN_numbers["BkgEstWJetsDownABMC"] = round(AN_numbers["BkgEstWJetsDownABMC"], 2)
AN_numbers["BkgEstWJetsCompSyst"] = round(AN_numbers["BkgEstWJetsCompSyst"], 1)

AN_numbers["BkgEstBosonsUpABMC"] = round(AN_numbers["BkgEstBosonsUpABMC"], 2)
AN_numbers["BkgEstBosonsDownABMC"] = round(AN_numbers["BkgEstBosonsDownABMC"], 2)
AN_numbers["BkgEstBosonsCompSyst"] = round(AN_numbers["BkgEstBosonsCompSyst"], 1)

AN_numbers["BkgEstBkgCompSystErr"] = round(AN_numbers["BkgEstBkgCompSystErr"], 1)
AN_numbers["BkgEstMethodSystErr"] = round(AN_numbers["BkgEstMethodSystErr"], 1)
AN_numbers["BkgEstTotalSystErr"] = round(AN_numbers["BkgEstTotalSystErr"], 1)
AN_numbers["BkgEstStatErr"] = round(AN_numbers["BkgEstStatErr"], 1)

In [28]:
with open("AN_numbers.json", "w") as f_out:
    json.dump(AN_numbers, f_out)

AN_numbers # must run vbswh-sys.ipynb to fill completely

{'PredBkg': 108,
 'PredBkgStatErr': 14.4,
 'PredBkgSystErr': 13.8,
 'ExpSig': 366,
 'ExpSigStatErr': 2.9,
 'ExpSigSystErr': 0,
 'ExpBkg': 116,
 'BkgEstABMC': 0.71,
 'BkgEstABMCErr': 3.1,
 'BkgEstABData': 0.71,
 'BkgEstABDataErr': 11.0,
 'PredBkgMC': 129.5,
 'BkgEstMethodSystErr': 11.2,
 'BkgEstBkgCompSystErr': 6.0,
 'BkgEstTotalSystErr': 12.7,
 'BkgEstStatErr': 13.4,
 'BkgEstWJetsUpABMC': 0.68,
 'BkgEstWJetsUpABMCErr': 2.9,
 'BkgEstWJetsDownABMC': 0.74,
 'BkgEstWJetsDownABMCErr': 3.2,
 'BkgEstWJetsCompSyst': 5.4,
 'BkgEstBosonsUpABMC': 0.7,
 'BkgEstBosonsUpABMCErr': 4.5,
 'BkgEstBosonsDownABMC': 0.72,
 'BkgEstBosonsDownABMCErr': 2.6,
 'BkgEstBosonsCompSyst': 2.6,
 'PNetSidebandPredData': 137.4,
 'PNetSidebandPredDataStatErr': 21.8,
 'PNetSidebandData': 118,
 'PNetSidebandDataStatErr': 10.9,
 'SRTwoPredBkg': 5,
 'SRTwoPredBkgStatErr': 0.6,
 'SRTwoPredBkgSystErr': 1.7,
 'SRTwoBkgEstStatErr': 13.4,
 'SRTwoBkgEstSystErr': 35.7,
 'SRTwoExpSig': 99,
 'SRTwoExpSigStatErr': 1.5,
 'SRTwoExpSigS