# ABCD Yields

In [72]:
import uproot
import numpy as np

In [92]:
def get_abcd(file, tree="Events", BDT_CUT=0.87, DNN_CUT=0.92):
    with uproot.open(file) as f:
        df = f.get(tree).arrays(library="pd")

    a = df.query("VBSBDTscore > @BDT_CUT & abcdnet_score > @DNN_CUT").weight
    b = df.query("VBSBDTscore > @BDT_CUT & abcdnet_score < @DNN_CUT").weight
    c = df.query("VBSBDTscore < @BDT_CUT & abcdnet_score > @DNN_CUT").weight
    d = df.query("VBSBDTscore < @BDT_CUT & abcdnet_score < @DNN_CUT").weight

    err_a = np.sqrt((a**2).sum())
    err_b = np.sqrt((b**2).sum())
    err_c = np.sqrt((c**2).sum())
    err_d = np.sqrt((d**2).sum())

    a = a.sum()
    b = b.sum()
    c = c.sum()
    d = d.sum()

    if "data_MVA" in file:
        a = np.array(1)

    print(f"Actual A: {a} \\pm {err_a}")
    print(f"Region B: {b} \\pm {err_b}")
    print(f"Region C: {c} \\pm {err_c}")
    print(f"Region D: {d} \\pm {err_d}")

    err_pwhite_a = np.sqrt((err_b * c / d) ** 2 + (err_c * b / d) ** 2 + ((b * c / d**2) * err_d) ** 2)

    print(f"Predicted A: {b * c / d} \\pm {err_pwhite_a}")

    return a, b, c, d

In [93]:
eff = []
for i in np.arange(0.6, 0.95, 0.01):    
    sig = get_abcd("/data/userdata/aaarora/output/run2/ABCDNet_simpleDisco_VBSVVH1lep_30/output/sig_MVA_abcdnet.root", BDT_CUT=i)
    bkg = get_abcd("/data/userdata/aaarora/output/run2/ABCDNet_simpleDisco_VBSVVH1lep_30/output/bkg_MVA_abcdnet.root", BDT_CUT=i)
    eff.append(sig[0] / np.sqrt(bkg[0]))

Actual A: 3.9501641791173387 \pm 0.025818247003779615
Region B: 1.2928193045972833 \pm 0.01626424949582608
Region C: 0.5609043196039143 \pm 0.010399718538740487
Region D: 0.41094796309130005 \pm 0.009892173808998098
Predicted A: 1.764573614043779 \pm 0.058029463470910696
Actual A: 1.0783499872909932 \pm 0.30597592369947224
Region B: 50.381960953086605 \pm 3.351756913355154
Region C: 18.497581858014414 \pm 1.8058072131257692
Region D: 1012.3542569371178 \pm 11.309423243452446
Predicted A: 0.9205714704224264 \pm 0.10923840189092844
Actual A: 3.9383099180049164 \pm 0.025783943005860947
Region B: 1.2824014226005946 \pm 0.016191575463950825
Region C: 0.5727585807163363 \pm 0.01048447934347299
Region D: 0.4213658450879889 \pm 0.010010684208638916
Predicted A: 1.743156041904509 \pm 0.05672427302787375
Actual A: 1.0767389300568773 \pm 0.3059730409915346
Region B: 47.97993852631948 \pm 3.3029356992848626
Region C: 18.499192915248532 \pm 1.8058077015693543
Region D: 1014.7562793638848 \pm 11.323

In [94]:
idx = eff.index(max(eff))
cut = 0.6 + idx * 0.01
print(cut)

0.87


# DATA

In [95]:
data = get_abcd("/data/userdata/aaarora/output/run2/ABCDNet_simpleDisco_VBSVVH1lep_30/output/data_MVA_abcdnet.root")

Actual A: 1 \pm 0.0
Region B: 38.0 \pm 6.164414002968976
Region C: 12.0 \pm 3.4641016151377544
Region D: 1053.0 \pm 32.449961479175904
Predicted A: 0.43304843304843305 \pm 0.144016303675641


In [96]:
# data_cr = get_abcd("/data/userdata/aaarora/output/run2/ABCDNet_simpleDisco_VBSVVH1lep_30/output/data_cr_MVA_abcdnet.root")

# SIG

In [97]:
sig = get_abcd("/data/userdata/aaarora/output/run2/ABCDNet_simpleDisco_VBSVVH1lep_30/output/sig_MVA_abcdnet.root")

Actual A: 3.1390326064886187 \pm 0.022912392547066018
Region B: 0.9283383774164965 \pm 0.013733794608388826
Region C: 1.3720358922326334 \pm 0.01580374296811838
Region D: 0.775428890272087 \pm 0.013181949779319328
Predicted A: 1.642592363956816 \pm 0.04157157728905591


# BKG

In [89]:
bkg = get_abcd("/data/userdata/aaarora/output/run2/ABCDNet_simpleDisco_VBSVVH1lep_30/output/bkg_MVA_abcdnet.root")

Actual A: 0.4421398312035284 \pm 0.167357635175318
Region B: 28.60203044989712 \pm 1.6819302955046662
Region C: 19.13379201410188 \pm 1.823883872073364
Region D: 1034.1341874403072 \pm 11.675120512732624
Predicted A: 0.5292014406408257 \pm 0.059571831541921326
