In [1]:
import numpy as np
import awkward as ak
from coffea import processor
import json
import hist
from coffea.nanoevents import NanoEventsFactory, BaseSchema, PFNanoAODSchema
import coffea.nanoevents.methods.vector as vector
import warnings
import matplotlib.pyplot as plt
from lpcjobqueue import LPCCondorCluster
from distributed import Client
import fastjet

In [2]:
warnings.filterwarnings("ignore", "Found duplicate branch")
warnings.filterwarnings("ignore", "Missing cross-reference index for")
warnings.filterwarnings("ignore", "dcut")

In [3]:
cluster = LPCCondorCluster(ship_env=True,
                           log_directory='/uscmst1b_scratch/lpc1/3DayLifetime/cjmoore/mylog',
                           memory='7340032000'
                          )
cluster.adapt(minimum=0, maximum=75)
client = Client(cluster)

In [4]:
with open("qcd_hj_hbb.json") as fin:
    filesets = json.load(fin)

In [5]:
class MyProcessor(processor.ProcessorABC):
    
    def __init__(self):
        pass
    
    def process(self, events):
        dataset = events.metadata['dataset']
        
        fatjet = events.FatJet
        
        genhiggs = (events.GenPart[
            (events.GenPart.pdgId==25)
            & events.GenPart.hasFlags(["fromHardProcess", "isLastCopy"])
        ])
        parents = events.FatJet.nearest(genhiggs, threshold=0.4)
        higgs_events = ak.is_none(parents, axis=1)
        cut = (fatjet.pt > 300) & (~higgs_events)
        boosted_fatjet = fatjet[cut]
        
        
        def color_ring(fatjet):
            jetdef = fastjet.JetDefinition(fastjet.antikt_algorithm, 1.0)
            pf = ak.flatten(fatjet.constituents.pf, axis=1)
            cluster = fastjet.ClusterSequence(pf, jetdef)
            subjets = cluster.exclusive_subjets_up_to(data=cluster.exclusive_jets(n_jets=1), nsub=3)
            vec = ak.zip({
                "x": subjets.px,
                "y": subjets.py,
                "z": subjets.pz,
                "t": subjets.E,
                },
                with_name = "LorentzVector",
                behavior=vector.behavior,
                )
            vec = ak.pad_none(vec, 3)
            vec["norm3"] = np.sqrt(vec.dot(vec))
            i, j = ak.unzip(ak.combinations(vec, 2))
            best = ak.argmax((i + j).mass, axis=1, keepdims=True)
            leg1, leg2 = i[best], j[best]
            #assert ak.all((leg1 + leg2).mass == ak.max((i + j).mass, axis=1))
            leg3 = vec[(best == 0)*2 + (best == 1)*1 + (best == 2)*0]
            #assert ak.all(leg3.x != leg1.x)
            #assert ak.all(leg3.x != leg2.x)
            a12 = np.arccos(leg1.dot(leg2) / (leg1.norm3 * leg2.norm3))
            a13 = np.arccos(leg1.dot(leg3) / (leg1.norm3 * leg3.norm3))
            a23 = np.arccos(leg2.dot(leg3) / (leg2.norm3 * leg3.norm3))
            color_ring = ((a13**2 + a23**2)/(a12**2))
            return color_ring
        uf_cr = ak.unflatten(ak.flatten(color_ring(boosted_fatjet)), counts=ak.num(boosted_fatjet))
        boosted_fatjet['color_ring'] = uf_cr
        
        hcr = (
            hist.Hist.new
            .Reg(40, 0, 10, name='color_ring', label='Color_Ring')
            .Double()
        )
        
        fill_cr = ak.fill_none(ak.flatten(boosted_fatjet.color_ring), 0)
        hcr.fill(color_ring=fill_cr)
        
        return {
            dataset: {
                "entries": len(events),
                "Color_Ring": hcr,
            }
        }
    
    def postprocess(self, accumulator):
        pass

In [6]:
processor_instance=MyProcessor()
futures_run = processor.Runner(
    #executor = processor.FuturesExecutor(compression=None, workers=8),
    executor = processor.DaskExecutor(client=client),
    schema=PFNanoAODSchema,
)

out = futures_run(
    filesets,
    "Events",
    processor_instance=MyProcessor()
)
out

[########################################] | 100% Completed | 20min 19.7s[2K[2K

{'QCD_Pt_470to600_TuneCP5_13TeV_pythia8': {'entries': 20178000,
  'Color_Ring': Hist(Regular(40, 0, 10, name='color_ring', label='Color_Ring'), storage=Double())},
 'HJ': {'entries': 2392373,
  'Color_Ring': Hist(Regular(40, 0, 10, name='color_ring', label='Color_Ring'), storage=Double()) # Sum: 545812.0 (550403.0 with flow)},
 'Hbb': {'entries': 494000,
  'Color_Ring': Hist(Regular(40, 0, 10, name='color_ring', label='Color_Ring'), storage=Double()) # Sum: 90811.0 (91454.0 with flow)},
 'QCD_Pt_1400to1800_TuneCP5_13TeV_pythia8': {'entries': 10978000,
  'Color_Ring': Hist(Regular(40, 0, 10, name='color_ring', label='Color_Ring'), storage=Double())},
 'QCD_Pt_3200toInf_TuneCP5_13TeV_pythia8': {'entries': 643000,
  'Color_Ring': Hist(Regular(40, 0, 10, name='color_ring', label='Color_Ring'), storage=Double())},
 'QCD_Pt_2400to3200_TuneCP5_13TeV_pythia8': {'entries': 2427000,
  'Color_Ring': Hist(Regular(40, 0, 10, name='color_ring', label='Color_Ring'), storage=Double())},
 'QCD_Pt_1800t

In [None]:
type(out)

In [7]:
import pickle
with open('color_ring_output_w_higgs_parent_selection.pkl', 'wb') as f:
    pickle.dump(out, f)

In [None]:
import awkward as ak