In [1]:
import uproot # uproot 3
import yahist
import json
import glob
import numpy as np
from utils import make_table_1D

In [2]:
def make_pu_func(jsons, years=[2016, 2017, 2018], func_name="pileUpReweight"):
    if len(jsons) != len(years):
        print("ERROR: check yo' inputs")
        return
    
    all_cpp = f"\nfloat {func_name}(int n_vtx, int year) {{\n"
    for i, (data_json, mc_json) in enumerate(jsons):
        # Grab histograms
        mc_hist = yahist.Hist1D.from_json(mc_json)
        data_hist = yahist.Hist1D.from_json(data_json)
        # Check MC histogram for zeros
        if np.sum(mc_hist.counts == 0) > 0:
            # Lower bin edge of first MC bin with count == 0
            clip_to = mc_hist.edges[:-1][mc_hist.counts == 0][0]
            # Clip MC hist
            orig_mc_hist = mc_hist
            mc_hist = mc_hist.restrict(high=clip_to)
            mc_hist._counts = orig_mc_hist.counts[0:len(mc_hist.counts)]
            mc_hist._counts[-1] += orig_mc_hist.counts[len(mc_hist.counts):-1].sum()
            # Clip data hist
            orig_data_hist = data_hist
            data_hist = data_hist.restrict(high=clip_to)
            data_hist._counts = orig_data_hist.counts[0:len(data_hist.counts)]
            data_hist._counts[-1] += orig_data_hist.counts[len(data_hist.counts):-1].sum()
        # Divide data by MC hist
        sf_hist = data_hist.divide(mc_hist)
        # Generate C++ lookup function
        cpp = make_table_1D(
            sf_hist.counts, 
            sf_hist.edges,
            x_name="n_vtx",
            x_type="int",
            overflow=True
        )
        # Indent C++ code
        cpp = "\n".join(["    "+line for line in cpp.split("\n")])
        # Append to function
        all_cpp += "\n".join([f"    if (year == {years[i]}) {{"]+cpp.split("\n")[2:-3]+["    }", ""])
    all_cpp += "    return 0.0;\n"
    all_cpp += "}\n"
    
    return all_cpp

def write_pu_tool(pu_funcs, writemode="w", name="pileUpReweight"):
    with open(f"{name}.cc", writemode) as f_out:
        if writemode == "w":
            f_out.write(f"#include \"{name}.h\"\n")
        for cpp in pu_funcs:
            f_out.write(cpp)
    
    if writemode == "w":
        with open(f"{name}.h", writemode) as f_out:
            f_out.write(f"#ifndef {name.upper()}_H\n#define {name.upper()}_H\n\n")
            for cpp in pu_funcs:
                func_decl = cpp.split(" {")[0].split("\n")[1]
                f_out.write(f"{func_decl};\n")
            f_out.write("\n#endif")
    elif writemode == "a":
        with open(f"{name}.h", "r") as f_in:
            h_cpp = "".join(f_in.readlines()[:-1])
        with open(f"{name}.h", "w") as f_out:
            f_out.write(h_cpp)
            for cpp in pu_funcs:
                func_decl = cpp.split(" {")[0].split("\n")[1]
                f_out.write(f"{func_decl};\n")
            f_out.write("\n#endif")

In [3]:
hist_dir = "data/pileup_scalefactors"

pu_mc_hists = [
    "mcPileupUL2016.root",
    "mcPileupUL2017.root",
    "mcPileupUL2018.root"
]
for hist in pu_mc_hists:
    with uproot.open(f"{hist_dir}/{hist}") as file:
        counts, bins = file.get("pu_mc").numpy()
        nominal_hist = yahist.Hist1D.from_bincounts(counts, bins=bins).normalize()
        with open(f"{hist_dir}/{hist.replace('.root', '.json')}", "w") as f_out:
            f_out.write(nominal_hist.to_json())

pu_data_hists = [
    "PileupHistogram-UL2016-100bins_withVar.root",
    "PileupHistogram-UL2017-100bins_withVar.root",
    "PileupHistogram-UL2018-100bins_withVar.root"
]
for hist in pu_data_hists:
    with uproot.open(f"{hist_dir}/{hist}") as file:
        counts, bins = file.get("pileup").numpy()
        nominal_hist = yahist.Hist1D.from_bincounts(counts, bins=bins).normalize()
        with open(f"{hist_dir}/{hist.replace('.root', '.json')}", "w") as f_out:
            f_out.write(nominal_hist.to_json())

        counts, bins = file.get("pileup_plus").numpy()
        up_hist = yahist.Hist1D.from_bincounts(counts, bins=bins).normalize()
        with open(f"{hist_dir}/{hist.replace('.root', '_up.json')}", "w") as f_out:
            f_out.write(up_hist.to_json())

        counts, bins = file.get("pileup_minus").numpy()
        dn_hist = yahist.Hist1D.from_bincounts(counts, bins=bins).normalize()
        with open(f"{hist_dir}/{hist.replace('.root', '_down.json')}", "w") as f_out:
            f_out.write(dn_hist.to_json())

In [4]:
# Get MC jsons
pu_mc_jsons = glob.glob(f"{hist_dir}/mcPileupUL*.json")
pu_mc_jsons = [f for f in pu_mc_jsons for _ in (0, 1, 2)] # silly trick for zipping later
# Get data nominal/up/down jsons
pu_data_jsons = glob.glob(f"{hist_dir}/PileupHistogram-UL*.json")
# Pair everything up
json_pairs = list(zip(pu_data_jsons, pu_mc_jsons))

ul_nominal_jsons = json_pairs[0::3]
ul_up_jsons = json_pairs[1::3]
ul_dn_jsons = json_pairs[2::3]

ul_nominal_pu = make_pu_func(ul_nominal_jsons, func_name="pileUpReweightUL", years=[2016, 2017, 2018])
ul_up_pu = make_pu_func(ul_up_jsons, func_name="pileUpReweightUpUL", years=[2016, 2017, 2018])
ul_dn_pu = make_pu_func(ul_dn_jsons, func_name="pileUpReweightDownUL", years=[2016, 2017, 2018])

write_pu_tool([ul_nominal_pu, ul_up_pu, ul_dn_pu], writemode="w")