In [2]:
import awkward as ak
import matplotlib.pyplot as plt
import os, sys
import subprocess
import json
import uproot
from coffea.nanoevents import NanoEventsFactory #, ScoutingNanoAODSchema
from coffea.lookup_tools.lookup_base import lookup_base
import numpy as np
from coffea import processor, util
from hist import Hist
import hist
from coffea.analysis_tools import Weights, PackedSelection
from collections import defaultdict
import mplhep
import pickle
plt.style.use(mplhep.style.CMS)

## MC

In [3]:
run3summer22 = [
    "nminus1_QCD_jetId_btag0p1.coffea",
    "nminus1_TTtoLNu2Q_isoTrack_jetId_btag0p1.coffea"
]
run3summer22ee = [
    "nminus1_QCD_jetId_btag0p1.coffea",
    "nminus1_TTtoLNu2Q_jetId_btag0p1.coffea"
]
era = "Run3Summer22EE"

if "EE" in era:
    infiles = run3summer22ee
else:
    infiles = run3summer22
    
outsum = defaultdict()

started = 0
for file in infiles:
    
    filename = f"outfiles/{era}/fulllumi/"+file

    print("Loading "+filename)

    if os.path.isfile(filename):
        out = util.load(filename)[0]

        if started == 0:
            outsum['templates'] = out['cutflow']
            outsum['sumw'] = out['sumw']
            started += 1
        else:
            outsum['templates'] += out['cutflow']
            for k,v in out['sumw'].items():
                outsum['sumw'][k] = v
        del out

lumis = {
    "Run3Summer22" : 3820.82926407,
    "Run3Summer22EE" : 3582.31000642,
}

scale = {
    "Run3Summer22" : 0.47837295482,
    "Run3Summer22EE" : 1,
}

xs = {
    'TTtoLNu2Q' : 762.1,
    'QCD_PT-120to170' : 4.418e+05,
    'QCD_PT-170to300' : 1.125e+05,
    'QCD_PT-300to470' : 7.574e+03,
    'QCD_PT-470to600' : 6.271e+02,
    'QCD_PT-600to800': 1.795e+02,
    'QCD_PT-800to1000': 3.101e+01,
    'QCD_PT-1000to1400': 8.992e+00,
    'QCD_PT-1400to1800': 8.071e-01,
    'QCD_PT-1800to2400' : 1.167e-01,
    'QCD_PT-2400to3200': 7.624e-03,
    'QCD_PT-3200': 2.313e-0,
}

scale_lumi = {k: xs[k] * lumis[era] * scale[era] / w for k, w in outsum['sumw'].items()}

for i, name in enumerate(outsum["templates"].axes["dataset"]):
    outsum["templates"].view(flow=True)[i] *= scale_lumi[name]

def group(h: hist.Hist, oldname: str, newname: str, grouping: dict):
    hnew = hist.Hist(
        hist.axis.StrCategory(grouping, name=newname),
        *(ax for ax in h.axes if ax.name != oldname),
        storage=h._storage_type,
    )
    for i, indices in enumerate(grouping.values()):
        hnew.view(flow=True)[i] = h[{oldname: indices}][{oldname: sum}].view(flow=True)

    return hnew

grouping = {
    "TTtoLNu2Q": ["TTtoLNu2Q"],
    "QCD" : ["QCD_PT-470to600","QCD_PT-300to470","QCD_PT-600to800","QCD_PT-800to1000","QCD_PT-1000to1400","QCD_PT-1400to1800","QCD_PT-1800to2400","QCD_PT-2400to3200","QCD_PT-3200"]
}

output = group(outsum["templates"], "dataset", "process", grouping)

del outsum

picklename = f"outfiles/{era}/fulllumi/mc.pkl"

outfile = open(picklename, 'wb')
pickle.dump(output, outfile, protocol=-1)
outfile.close()

Loading outfiles/Run3Summer22EE/fulllumi/nminus1_QCD_jetId_btag0p1.coffea
Loading outfiles/Run3Summer22EE/fulllumi/nminus1_TTtoLNu2Q_jetId_btag0p1.coffea


  return super().__getitem__(self._index_transform(index))


## Data

In [5]:
run3summer22 = [
    "nminus1_Run2022C_jetId_btag0p1.coffea",
    "nminus1_Run2022D_jetId_btag0p1.coffea"
]
run3summer22ee = [
    "nminus1_Run3Summer22EE_HLT_Mu50_jetId_btag0p1.coffea",
]
era = "Run3Summer22EE"

if "EE" in era:
    infiles = run3summer22ee
else:
    infiles = run3summer22

outsum = defaultdict()

started = 0
for file in infiles:
    
    filename = f"outfiles/{era}/fulllumi/"+file

    print("Loading "+filename)

    if os.path.isfile(filename):
        out = util.load(filename)[0]

        if started == 0:
            outsum['templates'] = out['cutflow']
            outsum['sumw'] = out['sumw']
            started += 1
        else:
            outsum['templates'] += out['cutflow']
            for k,v in out['sumw'].items():
                outsum['sumw'][k] = v
        del out


grouping = {
    "Run3Summer22": {
        "Run3Summer22": ["Run2022C", "Run2022D"],
    },
    "Run3Summer22EE": {
        
        "Run3Summer22EE": ["Run2022E"],
    }
}

output = group(outsum["templates"], "dataset", "process", grouping[era])

del outsum

picklename = f"outfiles/{era}/fulllumi/data.pkl"

outfile = open(picklename, 'wb')
pickle.dump(output, outfile, protocol=-1)
outfile.close()

Loading outfiles/Run3Summer22EE/fulllumi/nminus1_Run3Summer22EE_HLT_Mu50_jetId_btag0p1.coffea
