In [1]:
import json
import dask
import dask_awkward as dak
import awkward as ak
import numpy as np
from coffea import dataset_tools
from coffea.nanoevents import NanoEventsFactory, PFNanoAODSchema
import fastjet
import time
import os
import warnings
from variable_functions import *
import scipy

Issue: coffea.nanoevents.methods.vector will be removed and replaced with scikit-hep vector. Nanoevents schemas internal to coffea will be migrated. Otherwise please consider using that package!.
  import coffea.nanoevents.methods.vector as vector


In [2]:
warnings.filterwarnings("ignore", "Found duplicate branch")
warnings.filterwarnings("ignore", "Missing cross-reference index for")
warnings.filterwarnings("ignore", "dcut")
warnings.filterwarnings("ignore", "Please ensure")
warnings.filterwarnings("ignore", "invalid value")

In [3]:
files = os.listdir("/cms/cephfs/data/store/user/cmoore24/samples/qcd/470to600")[:1]

In [6]:
events = NanoEventsFactory.from_root(
    {f'/cms/cephfs/data/store/user/cmoore24/samples/qcd/470to600/{files[0]}': '/Events'},
    delayed=True,
    schemaclass=PFNanoAODSchema,
    metadata={'dataset':'Hgg'},
).events()[:100]

In [23]:
events.SV.ndof[:10].compute()

In [5]:
with open('triggers.json', 'r') as f:
        triggers = json.load(f)

In [6]:
def apply_selections(events, region, trigger, goodmuon, pdgid=None, is_wz=False):     
    fatjetSelect = (
        (events.FatJet.pt >= 450)
        & (events.FatJet.pt <= 1000)
        & (abs(events.FatJet.eta) <= 2.4)
        & (events.FatJet.msoftdrop >= 40)
        & (events.FatJet.msoftdrop <= 200)
        & (region)
        # & (ak.fill_none(events.FatJet.delta_r(events.FatJet.nearest(events.Muon[goodmuon], axis=1)) > 0.8, True))
        & (trigger)
        & (events.FatJet.btag_count == 0)
    )
    
    if (pdgid != None) or (is_wz):
        if is_wz:
            genparts = events.GenPart[
                ((abs(events.GenPart.pdgId) == 24)|(events.GenPart.pdgId == 23))
                & events.GenPart.hasFlags(["fromHardProcess", "isLastCopy"])
            ]
        else:
            genparts = events.GenPart[
                (abs(events.GenPart.pdgId) == pdgid)
                & events.GenPart.hasFlags(['fromHardProcess', 'isLastCopy'])
            ]
        parents = events.FatJet.nearest(genparts, threshold=0.2)
        matched_jets = ~ak.is_none(parents, axis=1)
        fatjetSelect = ((fatjetSelect) & (matched_jets))
    return fatjetSelect

In [7]:
def ecf_reorg(ecf_dict, jet_array):
    output_dict = {}        
    for i in ecf_dict:
        if i[1] == '2':
            output_dict[f'1{i}'] = ak.unflatten(ecf_dict[i], counts = ak.num(jet_array))
        elif i[1] == '3':
            output_dict[f'1{i}'] = ak.unflatten(ecf_dict[i][:,0], counts = ak.num(jet_array))
            output_dict[f'2{i}'] = ak.unflatten(ecf_dict[i][:,1], counts = ak.num(jet_array))
            output_dict[f'3{i}'] = ak.unflatten(ecf_dict[i][:,2], counts = ak.num(jet_array))
        elif i[1] == '4':
            output_dict[f'1{i}'] = ak.unflatten(ecf_dict[i][:,0], counts = ak.num(jet_array))
            output_dict[f'2{i}'] = ak.unflatten(ecf_dict[i][:,1], counts = ak.num(jet_array))
            output_dict[f'3{i}'] = ak.unflatten(ecf_dict[i][:,2], counts = ak.num(jet_array))
            output_dict[f'4{i}'] = ak.unflatten(ecf_dict[i][:,3], counts = ak.num(jet_array))
            output_dict[f'5{i}'] = ak.unflatten(ecf_dict[i][:,4], counts = ak.num(jet_array))
            output_dict[f'6{i}'] = ak.unflatten(ecf_dict[i][:,5], counts = ak.num(jet_array))
        elif i[1] == '5':
            output_dict[f'1{i}'] = ak.unflatten(ecf_dict[i][:,0], counts = ak.num(jet_array))
            output_dict[f'2{i}'] = ak.unflatten(ecf_dict[i][:,1], counts = ak.num(jet_array))
            output_dict[f'3{i}'] = ak.unflatten(ecf_dict[i][:,2], counts = ak.num(jet_array))
            output_dict[f'4{i}'] = ak.unflatten(ecf_dict[i][:,3], counts = ak.num(jet_array))
            output_dict[f'5{i}'] = ak.unflatten(ecf_dict[i][:,4], counts = ak.num(jet_array))
            output_dict[f'6{i}'] = ak.unflatten(ecf_dict[i][:,5], counts = ak.num(jet_array))
            output_dict[f'7{i}'] = ak.unflatten(ecf_dict[i][:,6], counts = ak.num(jet_array))
            output_dict[f'8{i}'] = ak.unflatten(ecf_dict[i][:,7], counts = ak.num(jet_array))
            output_dict[f'9{i}'] = ak.unflatten(ecf_dict[i][:,8], counts = ak.num(jet_array))
            output_dict[f'10{i}'] = ak.unflatten(ecf_dict[i][:,9], counts = ak.num(jet_array))
    return output_dict

In [8]:
def ecf_reorg2(ecf_dict):
    output_dict = {}        
    for i in ecf_dict:
        if i[1] == '2':
            output_dict[f'1{i}'] = ecf_dict[i]
        elif i[1] == '3':
            output_dict[f'1{i}'] = ecf_dict[i][:,0]
            output_dict[f'2{i}'] = ecf_dict[i][:,1]
            output_dict[f'3{i}'] = ecf_dict[i][:,2]
        elif i[1] == '4':
            output_dict[f'1{i}'] = ecf_dict[i][:,0]
            output_dict[f'2{i}'] = ecf_dict[i][:,1]
            output_dict[f'3{i}'] = ecf_dict[i][:,2]
            output_dict[f'4{i}'] = ecf_dict[i][:,3]
            output_dict[f'5{i}'] = ecf_dict[i][:,4]
            output_dict[f'6{i}'] = ecf_dict[i][:,5]
        elif i[1] == '5':
            output_dict[f'1{i}'] = ecf_dict[i][:,0]
            output_dict[f'2{i}'] = ecf_dict[i][:,1]
            output_dict[f'3{i}'] = ecf_dict[i][:,2]
            output_dict[f'4{i}'] = ecf_dict[i][:,3]
            output_dict[f'5{i}'] = ecf_dict[i][:,4]
            output_dict[f'6{i}'] = ecf_dict[i][:,5]
            output_dict[f'7{i}'] = ecf_dict[i][:,6]
            output_dict[f'8{i}'] = ecf_dict[i][:,7]
            output_dict[f'9{i}'] = ecf_dict[i][:,8]
            output_dict[f'10{i}'] = ecf_dict[i][:,9]
    return output_dict

In [9]:
events['PFCands', 'pt'] = (
        events.PFCands.pt
        * events.PFCands.puppiWeight
    )

In [10]:
cut_to_fix_softdrop = (ak.num(events.FatJet.constituents.pf, axis=2) > 0)
events = events[ak.all(cut_to_fix_softdrop, axis=1)]

In [11]:
trigger = ak.zeros_like(ak.firsts(events.FatJet.pt), dtype='bool')
for t in triggers['2017']:
    if t in events.HLT.fields:
        trigger = trigger | events.HLT[t]
trigger = ak.fill_none(trigger, False)

In [12]:
events['FatJet', 'num_fatjets'] = ak.num(events.FatJet)

In [13]:
goodmuon = (
    (events.Muon.pt > 10)
    & (abs(events.Muon.eta) < 2.4)
    & (events.Muon.pfRelIso04_all < 0.25) # invert the isolation cut; > 0.25, check for QCD (maybe try > 1.0)
    & events.Muon.looseId
)

nmuons = ak.sum(goodmuon, axis=1)
leadingmuon = ak.firsts(events.Muon[goodmuon])

goodelectron = (
    (events.Electron.pt > 10)
    & (abs(events.Electron.eta) < 2.5)
    & (events.Electron.cutBased >= 2) #events.Electron.LOOSE
)
nelectrons = ak.sum(goodelectron, axis=1)

ntaus = ak.sum(
    (
        (events.Tau.pt > 20)
        & (abs(events.Tau.eta) < 2.3)
        & (events.Tau.rawIso < 5)
        & (events.Tau.idDeepTau2017v2p1VSjet)
        & ak.all(events.Tau.metric_table(events.Muon[goodmuon]) > 0.4, axis=2)
        & ak.all(events.Tau.metric_table(events.Electron[goodelectron]) > 0.4, axis=2)
    ),
    axis=1,
)

nolepton = ((nmuons == 0) & (nelectrons == 0) & (ntaus == 0))

onemuon = ((nmuons == 1) & (nelectrons == 0) & (ntaus == 0))

In [14]:
region = nolepton

In [15]:
events['FatJet', 'btag_count'] = ak.sum(events.Jet[(events.Jet.pt > 20) & (abs(events.Jet.eta) < 2.4)].btagDeepFlavB > 0.3040, axis=1)
events['FatJet', 'trigger_mask'] = trigger

In [16]:
fatjetSelect = apply_selections(events, region, trigger, goodmuon)
do_li = True

In [17]:
events["goodjets"] = events.FatJet[fatjetSelect]
mask = ~ak.is_none(ak.firsts(events.goodjets))
events = events[mask]
events = events[ak.num(events.goodjets)  < 3]

In [18]:
if do_li:
    events['goodjets'] = events.goodjets[(ak.local_index(events.goodjets, axis=1) == 0)]

In [19]:
jetdef = fastjet.JetDefinition(
    fastjet.cambridge_algorithm, 1.0
)
pf = ak.flatten(events.goodjets.constituents.pf, axis=1)
cluster = fastjet.ClusterSequence(pf, jetdef)

In [30]:
ungroomed_ecf_classes = {}
for n in range(2, 4):
    for b in range(5, 45, 5):
        ecf_class = f'e{n}^{b/10}'
        ecf_result = cluster.exclusive_jets_energy_correlator(
                func='generalized', npoint=n, beta=b/10, normalized=True, all_angles=True
        )
        ungroomed_ecf_classes[ecf_class] = ak.unflatten(ecf_result, counts = int((n*(n-1))/2))
ungroomed_ecfs = ecf_reorg(ungroomed_ecf_classes, events.goodjets)
events["ungroomed_ecfs_all"] = ak.zip(ungroomed_ecfs, depth_limit=1)

In [21]:
ungroomed_ecfs = {}
for n in range(2,4):
    for v in range(1, int(scipy.special.binom(n, 2))+1):
        for b in range(5, 45, 5):
            ecf_name = f'{v}e{n}^{b/10}'
            ungroomed_ecfs[ecf_name] = ak.unflatten(
                cluster.exclusive_jets_energy_correlator(
                    func='generalized', npoint=n, angles=v, beta=b/10, normalized=True), 
                counts=dak.num(events.goodjets)
            )
events["ungroomed_ecfs_control"] = ak.zip(ungroomed_ecfs, depth_limit=1)

In [24]:
a = events.ungroomed_ecfs_control.compute()

In [31]:
b = events.ungroomed_ecfs_all.compute()

In [32]:
a['1e3^1.0']

In [33]:
b['1e3^1.0']