In [1]:
import uproot3 as uproot
from uproot3_methods import TLorentzVectorArray
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import glob

In [2]:
YEAR = 2018
# SAMPLE = "vbshwwc2v"
SAMPLE = "bosons"

NOMINAL_SAMPLE = f"../outputs/minintuphadd/v2.6/miniNtupV5myULSF/{YEAR}/{SAMPLE}.root"
JEC_UP_SAMPLE = f"../outputs/minintuphadd/v2.6/miniNtupV5myULSF_jecUp/{YEAR}/{SAMPLE}.root"
JEC_DN_SAMPLE = f"../outputs/minintuphadd/v2.6/miniNtupV5myULSF_jecDn/{YEAR}/{SAMPLE}.root"

RUN2_NOMINAL_SAMPLE = f"../outputs/minintuphadd/v2.6/miniNtupV5myULSF/Run2/{SAMPLE}.root"

In [3]:
def get_systs(df, wgt_col_pattern, sr_col_pattern, nom_wgt):
    rows = []
    for SR in df.columns[df.columns.str.contains(sr_col_pattern)]:
        for wgt_col in df.columns[df.columns.str.contains(wgt_col_pattern)]:
            in_SR = df[SR]
            n_pass_wgt = np.sum(df[in_SR][nom_wgt]*df[in_SR][wgt_col])
            n_pass_nom = np.sum(df[in_SR][nom_wgt])
            rows.append({
                "region":SR.split("is_")[-1],
                "wgt_name": wgt_col,
                "n_pass_wgt": n_pass_wgt,
                "n_pass_nom": n_pass_nom,
                "delta_percent": (1 - n_pass_wgt/(n_pass_nom))*100
            })
            
    return pd.DataFrame(data=rows)

def make_syst_table(df, wgt_col_pattern, sr_col_pattern, nominal_wgt="", debug=False):
    # Compute all systematics
    systs = get_systs(df, wgt_col_pattern, sr_col_pattern, nominal_wgt)
    if debug:
        display(systs)
    # Designate how columns should be renamed
    renames = {
        "delta_percent": wgt_col_pattern.replace("_", ""), 
        "region": "systematics"
    }
    # Return only one row: max systematic for each SR
    return (systs.groupby("region", as_index=False)[["delta_percent"]]
                 .agg(func=lambda x: np.max(np.abs(x)))
                 .round(1)
                 .rename(columns=renames)
                 .set_index("systematics")
                 .transpose())

def append_systs(syst_table, new_syst):
    if np.any(np.isin(new_syst.index, syst_table.index)):
        print("WARNING: this systematic is already in the table. No action taken.")
        return syst_table
    else:
        return syst_table.append(new_syst)

In [4]:
with uproot.open(NOMINAL_SAMPLE) as f:
    df = f.get("variable").pandas.df(
        branches=["is_ps*", "is_bdt_*", "is_cut_*", "LHE_*", "pu_rewgt*",
                  "lepsf*", "btagsf*", "trigsf*", "wgt", "genrewgt"]
    )
# Compute nominal event weight
df["evt_wgt_nominal"] = df.wgt*df.genrewgt*df.lepsf*df.btagsf*df.trigsf*df.pu_rewgt
# Alter up/down variations of scale factors s.t. we can apply them as follows:
# (event weight up) = (nominal event weight)*(sf up/down)
for sf in ["lepsf", "btagsf", "trigsf", "pu_rewgt"]:
    df[f"{sf}_up"] /= df[sf]
    df[f"{sf}_dn"] /= df[sf]

df[df.columns[df.columns.str.contains("is_")]] = df[df.columns[df.columns.str.contains("is_")]].astype(bool)

df

Unnamed: 0_level_0,is_ps_el,is_ps_mu,is_ps_tau,is_ps_neg,is_ps_lgt,is_ps,is_bdt_sr_el,is_bdt_sr_mu,is_bdt_sr_tau,is_bdt_sr_neg,...,lepsf_dn,btagsf,btagsf_up,btagsf_dn,trigsf,trigsf_up,trigsf_dn,wgt,genrewgt,evt_wgt_nominal
entry,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,True,False,False,False,True,True,False,False,False,False,...,0.969751,1.358486,1.113965,0.891226,1.00,1.01,0.99,0.001675,1.0,0.002104
1,True,False,False,False,True,True,False,False,False,False,...,0.970062,0.829254,1.057662,0.943365,1.00,1.01,0.99,0.001675,1.0,0.000977
2,True,False,False,False,True,True,False,False,False,False,...,0.970345,0.906120,1.027302,0.973054,1.00,1.01,0.99,0.001675,1.0,0.001343
3,False,True,False,False,True,True,False,False,False,False,...,0.970476,0.892789,1.088909,0.912792,1.00,1.01,0.99,0.001675,1.0,0.001305
4,False,True,False,False,True,True,False,False,False,False,...,0.970139,1.347205,1.089564,0.912143,0.98,1.01,0.99,0.001675,1.0,0.002261
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
300,False,False,False,True,False,True,False,False,False,False,...,0.953559,0.855898,1.022367,0.977881,1.00,1.01,0.99,0.000765,1.0,0.000573
301,True,False,False,False,True,True,False,False,False,False,...,0.969688,1.294906,1.251570,0.773581,1.00,1.01,0.99,0.000765,1.0,0.000849
302,False,False,False,True,False,True,False,False,False,False,...,0.970969,1.287248,1.083123,0.918473,0.98,1.01,0.99,0.000765,1.0,0.000916
303,False,True,False,False,True,True,False,False,False,False,...,0.967493,1.869489,1.321055,0.713698,1.00,1.01,0.99,0.000765,1.0,0.001219


In [5]:
for SR in df.columns[df.columns.str.contains("is_cut_sr_")]:
    print(f"{SR}: {df[df[SR]].evt_wgt_nominal.sum()}")

print("")
    
for SR in df.columns[df.columns.str.contains("is_bdt_sr_")]:
    print(f"{SR}: {df[df[SR]].evt_wgt_nominal.sum()}")

is_cut_sr_el: 0.0011716659646481276
is_cut_sr_mu: 0.001986859133467078
is_cut_sr_tau: 0.0
is_cut_sr_neg: 0.0028035766445100307
is_cut_sr_lgt: 0.009015710093080997

is_bdt_sr_el: 0.007203652989119291
is_bdt_sr_mu: 0.008498722687363625
is_bdt_sr_tau: 0.0
is_bdt_sr_neg: 0.0028035766445100307


In [6]:
cut_based_table = pd.DataFrame()
bdt_based_table = pd.DataFrame()

# b-tagging scale factor variations

In [7]:
systs = make_syst_table(
    df, nominal_wgt="evt_wgt_nominal", wgt_col_pattern="btagsf_", sr_col_pattern="is_cut_sr_"
)
display(systs)
cut_based_table = append_systs(cut_based_table, systs)

  "delta_percent": (1 - n_pass_wgt/(n_pass_nom))*100


systematics,cut_sr_el,cut_sr_lgt,cut_sr_mu,cut_sr_neg,cut_sr_tau
btagsf,3.5,13.8,18.6,4.4,


In [8]:
systs = make_syst_table(
    df, nominal_wgt="evt_wgt_nominal", wgt_col_pattern="btagsf_", sr_col_pattern="is_bdt_sr_"
)
display(systs)
bdt_based_table = append_systs(bdt_based_table, systs)

  "delta_percent": (1 - n_pass_wgt/(n_pass_nom))*100


systematics,bdt_sr_el,bdt_sr_mu,bdt_sr_neg,bdt_sr_tau
btagsf,9.6,9.2,4.4,


# Jet energy scale variations

In [9]:
if JEC_UP_SAMPLE and JEC_DN_SAMPLE:
    with uproot.open(JEC_UP_SAMPLE) as f:
        df_up = f.get("variable").pandas.df(
            branches=["is_ps*", "is_bdt_*", "is_cut_*", "LHE_*", "pu_rewgt*",
                      "lepsf*", "btagsf*", "trigsf*", "wgt", "genrewgt"]
        )
    # Compute nominal event weight
    df_up["evt_wgt_nominal"] = df_up.wgt*df_up.genrewgt*df_up.lepsf*df_up.btagsf*df_up.trigsf*df_up.pu_rewgt
    df_up[df_up.columns[df_up.columns.str.contains("is_")]] = df_up[df_up.columns[df_up.columns.str.contains("is_")]].astype(bool)

    with uproot.open(JEC_DN_SAMPLE) as f:
        df_dn = f.get("variable").pandas.df(
            branches=["is_ps*", "is_bdt_*", "is_cut_*", "LHE_*", "pu_rewgt*",
                      "lepsf*", "btagsf*", "trigsf*", "wgt", "genrewgt"]
        )
    # Compute nominal event weight
    df_dn["evt_wgt_nominal"] = df_dn.wgt*df_dn.genrewgt*df_dn.lepsf*df_dn.btagsf*df_dn.trigsf*df_dn.pu_rewgt
    df_dn[df_dn.columns[df_dn.columns.str.contains("is_")]] = df_dn[df_dn.columns[df_dn.columns.str.contains("is_")]].astype(bool)

In [10]:
if JEC_UP_SAMPLE and JEC_DN_SAMPLE:
    # Compute all systematics
    rows = []
    for SR in df.columns[df.columns.str.contains("is_cut_sr_")]:
        n_pass_jec_up = df_up[df_up[SR]].evt_wgt_nominal.sum()
        n_pass_jec_dn = df_dn[df_dn[SR]].evt_wgt_nominal.sum()
        n_pass_nom = df[df[SR]].evt_wgt_nominal.sum()
        rows.append({
            "region":SR.split("is_")[-1],
            "wgt_name": "jec_up",
            "n_pass_wgt": n_pass_jec_up,
            "n_pass_nom": n_pass_nom,
            "delta_percent": (1 - n_pass_jec_up/(n_pass_nom))*100
        })
        rows.append({
            "region":SR.split("is_")[-1],
            "wgt_name": "jec_dn",
            "n_pass_wgt": n_pass_jec_dn,
            "n_pass_nom": n_pass_nom,
            "delta_percent": (1 - n_pass_jec_dn/(n_pass_nom))*100
        })

    systs = pd.DataFrame(data=rows)
    # Designate how columns should be renamed
    renames = {
        "delta_percent": "jec", 
        "region": "systematics"
    }
    # Return only one row: max systematic for each SR
    row = (systs.groupby("region", as_index=False)[["delta_percent"]]
                .agg(func=lambda x: np.max(np.abs(x)))
                .round(1)
                .rename(columns=renames)
                .set_index("systematics")
                .transpose())
    display(row)

    cut_based_table = append_systs(cut_based_table, row)

  "delta_percent": (1 - n_pass_jec_up/(n_pass_nom))*100
  "delta_percent": (1 - n_pass_jec_dn/(n_pass_nom))*100


systematics,cut_sr_el,cut_sr_lgt,cut_sr_mu,cut_sr_neg,cut_sr_tau
jec,100.0,56.1,0.1,0.0,


In [11]:
if JEC_UP_SAMPLE and JEC_DN_SAMPLE:
    # Compute all systematics
    rows = []
    for SR in df.columns[df.columns.str.contains("is_bdt_sr_")]:
        n_pass_jec_up = df_up[df_up[SR]].evt_wgt_nominal.sum()
        n_pass_jec_dn = df_dn[df_dn[SR]].evt_wgt_nominal.sum()
        n_pass_nom = df[df[SR]].evt_wgt_nominal.sum()
        rows.append({
            "region":SR.split("is_")[-1],
            "wgt_name": "jec_up",
            "n_pass_wgt": n_pass_jec_up,
            "n_pass_nom": n_pass_nom,
            "delta_percent": (1 - n_pass_jec_up/(n_pass_nom))*100
        })
        rows.append({
            "region":SR.split("is_")[-1],
            "wgt_name": "jec_dn",
            "n_pass_wgt": n_pass_jec_dn,
            "n_pass_nom": n_pass_nom,
            "delta_percent": (1 - n_pass_jec_dn/(n_pass_nom))*100
        })

    systs = pd.DataFrame(data=rows)
    # Designate how columns should be renamed
    renames = {
        "delta_percent": "jec", 
        "region": "systematics"
    }
    # Return only one row: max systematic for each SR
    row = (systs.groupby("region", as_index=False)[["delta_percent"]]
                .agg(func=lambda x: np.max(np.abs(x)))
                .round(1)
                .rename(columns=renames)
                .set_index("systematics")
                .transpose())
    display(row)

    bdt_based_table = append_systs(bdt_based_table, row)

  "delta_percent": (1 - n_pass_jec_up/(n_pass_nom))*100
  "delta_percent": (1 - n_pass_jec_dn/(n_pass_nom))*100


systematics,bdt_sr_el,bdt_sr_mu,bdt_sr_neg,bdt_sr_tau
jec,16.3,18.9,44.7,


# PU reweighting

In [12]:
systs = make_syst_table(
    df, nominal_wgt="evt_wgt_nominal", wgt_col_pattern="pu_rewgt_", sr_col_pattern="is_cut_sr_"
)
display(systs)
cut_based_table = append_systs(cut_based_table, systs)

  "delta_percent": (1 - n_pass_wgt/(n_pass_nom))*100


systematics,cut_sr_el,cut_sr_lgt,cut_sr_mu,cut_sr_neg,cut_sr_tau
purewgt,17.3,1.1,8.3,8.3,


In [13]:
systs = make_syst_table(
    df, nominal_wgt="evt_wgt_nominal", wgt_col_pattern="pu_rewgt_", sr_col_pattern="is_bdt_sr_"
)
display(systs)
bdt_based_table = append_systs(bdt_based_table, systs)

  "delta_percent": (1 - n_pass_wgt/(n_pass_nom))*100


systematics,bdt_sr_el,bdt_sr_mu,bdt_sr_neg,bdt_sr_tau
purewgt,13.2,6.9,8.3,


# LHE scale weight variations

In [14]:
# LHE_scale_wgt_sums = {}
# with open("LHE_scale_wgt_sums.txt", "r") as f_in:
#     for line in f_in.readlines():
#         name, value = line.split()
#         LHE_scale_wgt_sums[name] = float(value)
        
# NOMINAL_N_TOTAL = LHE_scale_wgt_sums["LHE_muF1p0_muR0p5"] # mu_R does not effect our signal sample

# print(LHE_scale_wgt_sums)

In [15]:
systs = make_syst_table(
    df, nominal_wgt="evt_wgt_nominal", wgt_col_pattern="LHE_mu", sr_col_pattern="is_cut_sr_"
)
display(systs)
cut_based_table = append_systs(cut_based_table, systs)

  "delta_percent": (1 - n_pass_wgt/(n_pass_nom))*100


systematics,cut_sr_el,cut_sr_lgt,cut_sr_mu,cut_sr_neg,cut_sr_tau
LHEmu,11.4,9.3,9.2,6.3,


In [16]:
systs = make_syst_table(
    df, nominal_wgt="evt_wgt_nominal", wgt_col_pattern="LHE_mu", sr_col_pattern="is_bdt_sr_"
)
display(systs)
bdt_based_table = append_systs(bdt_based_table, systs)

  "delta_percent": (1 - n_pass_wgt/(n_pass_nom))*100


systematics,bdt_sr_el,bdt_sr_mu,bdt_sr_neg,bdt_sr_tau
LHEmu,10.0,17.0,6.3,


# PDF variations

In [17]:
# LHE_pdf_wgt_sums = {}
# with open("LHE_pdf_wgt_sums.txt", "r") as f_in:
#     print(line)
#     for line in f_in.readlines():
#         name, value = line.split()
#         LHE_pdf_wgt_sums[name] = float(value)

# print(LHE_pdf_wgt_sums)

In [18]:
systs = make_syst_table(
    df, nominal_wgt="evt_wgt_nominal", wgt_col_pattern="LHE_pdf_wgt", sr_col_pattern="is_cut_sr_"
)
display(systs)
cut_based_table = append_systs(cut_based_table, systs)

  "delta_percent": (1 - n_pass_wgt/(n_pass_nom))*100


systematics,cut_sr_el,cut_sr_lgt,cut_sr_mu,cut_sr_neg,cut_sr_tau
LHEpdfwgt,0.3,0.4,1.1,2.7,


In [19]:
systs = make_syst_table(
    df, nominal_wgt="evt_wgt_nominal", wgt_col_pattern="LHE_pdf_wgt", sr_col_pattern="is_bdt_sr_"
)
display(systs)
bdt_based_table = append_systs(bdt_based_table, systs)

  "delta_percent": (1 - n_pass_wgt/(n_pass_nom))*100


systematics,bdt_sr_el,bdt_sr_mu,bdt_sr_neg,bdt_sr_tau
LHEpdfwgt,0.3,0.5,2.7,


# Trigger scale factor variations

In [20]:
systs = make_syst_table(
    df, nominal_wgt="evt_wgt_nominal", wgt_col_pattern="trigsf", sr_col_pattern="is_cut_sr_"
)
display(systs)
cut_based_table = append_systs(cut_based_table, systs)

  "delta_percent": (1 - n_pass_wgt/(n_pass_nom))*100


systematics,cut_sr_el,cut_sr_lgt,cut_sr_mu,cut_sr_neg,cut_sr_tau
trigsf,1.0,1.0,1.0,2.0,


In [21]:
systs = make_syst_table(
    df, nominal_wgt="evt_wgt_nominal", wgt_col_pattern="trigsf", sr_col_pattern="is_bdt_sr_"
)
display(systs)
bdt_based_table = append_systs(bdt_based_table, systs)

  "delta_percent": (1 - n_pass_wgt/(n_pass_nom))*100


systematics,bdt_sr_el,bdt_sr_mu,bdt_sr_neg,bdt_sr_tau
trigsf,1.0,1.5,2.0,


# Lepton ID scale factor variations

In [22]:
systs = make_syst_table(
    df, nominal_wgt="evt_wgt_nominal", wgt_col_pattern="lepsf", sr_col_pattern="is_cut_sr_"
)
display(systs)
cut_based_table = append_systs(cut_based_table, systs)

  "delta_percent": (1 - n_pass_wgt/(n_pass_nom))*100


systematics,cut_sr_el,cut_sr_lgt,cut_sr_mu,cut_sr_neg,cut_sr_tau
lepsf,20.1,12.2,11.7,3.1,


In [23]:
systs = make_syst_table(
    df, nominal_wgt="evt_wgt_nominal", wgt_col_pattern="lepsf", sr_col_pattern="is_bdt_sr_"
)
display(systs)
bdt_based_table = append_systs(bdt_based_table, systs)

  "delta_percent": (1 - n_pass_wgt/(n_pass_nom))*100


systematics,bdt_sr_el,bdt_sr_mu,bdt_sr_neg,bdt_sr_tau
lepsf,13.1,4.7,3.1,


# Statistical uncertainty

In [24]:
with uproot.open(RUN2_NOMINAL_SAMPLE) as f:
    run2_df = f.get("variable").pandas.df(
        branches=["is_bdt_*", "is_cut_*", "pu_rewgt", "lepsf", "btagsf", "trigsf", "wgt", "genrewgt"]
    )
# Compute nominal event weight
run2_df["evt_wgt_nominal"] = run2_df.wgt*run2_df.genrewgt*run2_df.lepsf*run2_df.btagsf*run2_df.trigsf*run2_df.pu_rewgt

run2_df[run2_df.columns[run2_df.columns.str.contains("is_")]] = run2_df[run2_df.columns[run2_df.columns.str.contains("is_")]].astype(bool)

run2_df

Unnamed: 0_level_0,is_bdt_sr_el,is_bdt_sr_mu,is_bdt_sr_tau,is_bdt_sr_neg,is_bdt_cr_el,is_bdt_cr_mu,is_bdt_cr_tau,is_bdt_cr_neg,is_bdt_sr2_el,is_bdt_sr2_mu,...,is_cut_cr2,is_cut_sm_sr2,is_cut_c3_sr2,pu_rewgt,lepsf,btagsf,trigsf,wgt,genrewgt,evt_wgt_nominal
entry,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,False,False,False,False,False,False,False,False,False,False,...,True,True,True,0.921167,0.974846,0.968175,0.99,0.000489,1.0,0.000421
1,False,False,False,False,False,False,False,False,False,False,...,True,True,True,0.998717,0.919098,0.453545,1.00,0.000489,1.0,0.000204
2,False,False,False,False,True,False,False,False,False,False,...,True,True,True,1.051490,0.880283,0.875462,1.00,0.000489,1.0,0.000396
3,False,False,False,False,False,False,False,False,False,False,...,True,True,True,1.075390,0.974846,0.957826,0.99,0.000489,1.0,0.000486
4,False,False,False,False,False,False,False,False,False,False,...,True,True,True,1.217420,0.975401,0.910487,0.99,0.000489,1.0,0.000524
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
904,False,False,False,False,False,False,False,False,False,False,...,True,True,True,1.081840,0.808522,0.855898,1.00,0.000765,1.0,0.000573
905,False,False,False,False,False,False,False,False,False,False,...,True,True,True,0.990842,0.865011,1.294906,1.00,0.000765,1.0,0.000849
906,False,False,False,False,False,False,False,True,False,False,...,True,True,True,0.980164,0.968090,1.287248,0.98,0.000765,1.0,0.000916
907,False,False,False,False,False,False,False,False,False,False,...,True,True,True,1.109820,0.767616,1.869489,1.00,0.000765,1.0,0.001219


In [25]:
for SR in run2_df.columns[run2_df.columns.str.contains("is_cut_sr_")]:
    count = run2_df[run2_df[SR]].evt_wgt_nominal.sum()
    error = np.sqrt(np.sum(run2_df[run2_df[SR]].evt_wgt_nominal**2))
    print(f"{SR}: {count} +- {error}")

print("")
    
for SR in run2_df.columns[run2_df.columns.str.contains("is_bdt_sr_")]:
    count = run2_df[run2_df[SR]].evt_wgt_nominal.sum()
    error = np.sqrt(np.sum(run2_df[run2_df[SR]].evt_wgt_nominal**2))
    print(f"{SR}: {count} +- {error}")

is_cut_sr_el: 0.0036519570276141167 +- 0.002743108430877328
is_cut_sr_mu: 0.0032217875123023987 +- 0.002339371247217059
is_cut_sr_tau: 0.0 +- 0.0
is_cut_sr_neg: 0.004015718586742878 +- 0.0029347927775233984
is_cut_sr_lgt: 0.0115872947499156 +- 0.005079356487840414

is_bdt_sr_el: 0.0086275115609169 +- 0.004392529372125864
is_bdt_sr_mu: 0.014018497429788113 +- 0.004982937127351761
is_bdt_sr_tau: 0.0 +- 0.0
is_bdt_sr_neg: 0.004403216764330864 +- 0.0029602639842778444


In [26]:
def get_stats(df, sr_col_pattern, nom_wgt):
    rows = []
    for SR in df.columns[df.columns.str.contains(sr_col_pattern)]:
        in_SR = df[SR]
        count = np.sum(df[in_SR][nom_wgt])
        error = np.sqrt(np.sum(df[in_SR][nom_wgt]**2))
        rows.append({
            "region":SR.split("is_")[-1],
            "count": count,
            "error": error,
            "delta_percent": (error/count)*100
        })
            
    return pd.DataFrame(data=rows)

def make_stat_table(df, sr_col_pattern, nominal_wgt="", debug=False):
    # Compute all systematics
    stats = get_stats(df, sr_col_pattern, nominal_wgt)
    if debug:
        display(stats)
    # Designate how columns should be renamed
    renames = {
        "delta_percent": "statunc", 
        "region": "systematics"
    }
    # Return only one row: max systematic for each SR
    return (stats.groupby("region", as_index=False)[["delta_percent"]]
                 .agg(func=lambda x: np.max(np.abs(x)))
                 .round(1)
                 .rename(columns=renames)
                 .set_index("systematics")
                 .transpose())

In [27]:
stats = make_stat_table(run2_df, "is_cut_sr_", nominal_wgt="evt_wgt_nominal", debug=True)
display(stats)
cut_based_table = append_systs(cut_based_table, stats)

  "delta_percent": (error/count)*100


Unnamed: 0,region,count,error,delta_percent
0,cut_sr_el,0.003652,0.002743,75.11338
1,cut_sr_mu,0.003222,0.002339,72.610974
2,cut_sr_tau,0.0,0.0,
3,cut_sr_neg,0.004016,0.002935,73.082632
4,cut_sr_lgt,0.011587,0.005079,43.835568


systematics,cut_sr_el,cut_sr_lgt,cut_sr_mu,cut_sr_neg,cut_sr_tau
statunc,75.1,43.8,72.6,73.1,


In [28]:
stats = make_stat_table(run2_df, "is_bdt_sr_", nominal_wgt="evt_wgt_nominal", debug=True)
display(stats)
bdt_based_table = append_systs(bdt_based_table, stats)

  "delta_percent": (error/count)*100


Unnamed: 0,region,count,error,delta_percent
0,bdt_sr_el,0.008628,0.004393,50.913054
1,bdt_sr_mu,0.014018,0.004983,35.545444
2,bdt_sr_tau,0.0,0.0,
3,bdt_sr_neg,0.004403,0.00296,67.229575


systematics,bdt_sr_el,bdt_sr_mu,bdt_sr_neg,bdt_sr_tau
statunc,50.9,35.5,67.2,


# Final tables

In [29]:
cut_based_table = cut_based_table[["cut_sr_el", "cut_sr_mu", "cut_sr_tau", "cut_sr_neg", "cut_sr_lgt"]]
display(cut_based_table)
print(cut_based_table.to_latex())

systematics,cut_sr_el,cut_sr_mu,cut_sr_tau,cut_sr_neg,cut_sr_lgt
btagsf,3.5,18.6,,4.4,13.8
jec,100.0,0.1,,0.0,56.1
purewgt,17.3,8.3,,8.3,1.1
LHEmu,11.4,9.2,,6.3,9.3
LHEpdfwgt,0.3,1.1,,2.7,0.4
trigsf,1.0,1.0,,2.0,1.0
lepsf,20.1,11.7,,3.1,12.2
statunc,75.1,72.6,,73.1,43.8


\begin{tabular}{lrrrrr}
\toprule
systematics &  cut\_sr\_el &  cut\_sr\_mu &  cut\_sr\_tau &  cut\_sr\_neg &  cut\_sr\_lgt \\
\midrule
btagsf    &        3.5 &       18.6 &         NaN &         4.4 &        13.8 \\
jec       &      100.0 &        0.1 &         NaN &         0.0 &        56.1 \\
purewgt   &       17.3 &        8.3 &         NaN &         8.3 &         1.1 \\
LHEmu     &       11.4 &        9.2 &         NaN &         6.3 &         9.3 \\
LHEpdfwgt &        0.3 &        1.1 &         NaN &         2.7 &         0.4 \\
trigsf    &        1.0 &        1.0 &         NaN &         2.0 &         1.0 \\
lepsf     &       20.1 &       11.7 &         NaN &         3.1 &        12.2 \\
statunc   &       75.1 &       72.6 &         NaN &        73.1 &        43.8 \\
\bottomrule
\end{tabular}



In [30]:
aggs = cut_based_table.transpose().agg(["min", "max"]).transpose()
aggs["min_to_max"] = aggs["min"].astype(str) + "%-" + aggs["max"].astype(str) + "%"
print(aggs[["min_to_max"]].to_latex())

\begin{tabular}{ll}
\toprule
{} &   min\_to\_max \\
\midrule
btagsf    &   3.5\%-18.6\% \\
jec       &  0.0\%-100.0\% \\
purewgt   &   1.1\%-17.3\% \\
LHEmu     &   6.3\%-11.4\% \\
LHEpdfwgt &    0.3\%-2.7\% \\
trigsf    &    1.0\%-2.0\% \\
lepsf     &   3.1\%-20.1\% \\
statunc   &  43.8\%-75.1\% \\
\bottomrule
\end{tabular}



In [31]:
bdt_based_table = bdt_based_table[["bdt_sr_el", "bdt_sr_mu", "bdt_sr_tau", "bdt_sr_neg"]]
display(bdt_based_table)
print(bdt_based_table.to_latex())

systematics,bdt_sr_el,bdt_sr_mu,bdt_sr_tau,bdt_sr_neg
btagsf,9.6,9.2,,4.4
jec,16.3,18.9,,44.7
purewgt,13.2,6.9,,8.3
LHEmu,10.0,17.0,,6.3
LHEpdfwgt,0.3,0.5,,2.7
trigsf,1.0,1.5,,2.0
lepsf,13.1,4.7,,3.1
statunc,50.9,35.5,,67.2


\begin{tabular}{lrrrr}
\toprule
systematics &  bdt\_sr\_el &  bdt\_sr\_mu &  bdt\_sr\_tau &  bdt\_sr\_neg \\
\midrule
btagsf    &        9.6 &        9.2 &         NaN &         4.4 \\
jec       &       16.3 &       18.9 &         NaN &        44.7 \\
purewgt   &       13.2 &        6.9 &         NaN &         8.3 \\
LHEmu     &       10.0 &       17.0 &         NaN &         6.3 \\
LHEpdfwgt &        0.3 &        0.5 &         NaN &         2.7 \\
trigsf    &        1.0 &        1.5 &         NaN &         2.0 \\
lepsf     &       13.1 &        4.7 &         NaN &         3.1 \\
statunc   &       50.9 &       35.5 &         NaN &        67.2 \\
\bottomrule
\end{tabular}



In [32]:
aggs = bdt_based_table.transpose().agg(["min", "max"]).transpose()
aggs["min_to_max"] = aggs["min"].astype(str) + "%-" + aggs["max"].astype(str) + "%"
print(aggs[["min_to_max"]].to_latex())

\begin{tabular}{ll}
\toprule
{} &   min\_to\_max \\
\midrule
btagsf    &    4.4\%-9.6\% \\
jec       &  16.3\%-44.7\% \\
purewgt   &   6.9\%-13.2\% \\
LHEmu     &   6.3\%-17.0\% \\
LHEpdfwgt &    0.3\%-2.7\% \\
trigsf    &    1.0\%-2.0\% \\
lepsf     &   3.1\%-13.1\% \\
statunc   &  35.5\%-67.2\% \\
\bottomrule
\end{tabular}

