In [6]:
import correctionlib
import hist
import awkward as ak
import numpy as np

from coffea.nanoevents import NanoEventsFactory, NanoAODSchema
from coffea.lookup_tools.correctionlib_wrapper import correctionlib_wrapper
from coffea.lookup_tools.dense_lookup import dense_lookup

import sys
sys.path.append("../")

from boostedhiggs.corrections import btagWPs

In [7]:
%load_ext autoreload
%autoreload 2

Load a root file

In [8]:
! ls ../rootfiles/

[34mHWW[m[m   [34mQCD[m[m   [34mTTbar[m[m [34mWJets[m[m


In [9]:
events = NanoEventsFactory.from_root(
    "../../boostedhiggs/rootfiles/HWW/file1.root",
    entry_stop=100_000,
    schemaclass=NanoAODSchema,
).events()



# Compute efficiencies

In [15]:
year = "2017"
wp = "T"

# define your good jets
goodjets = events.Jet[
            (events.Jet.pt > 30) & (abs(events.Jet.eta) < 5.0) & events.Jet.isTight & (events.Jet.puId > 0)
        ]
# reject EE noisy jets for 2017
if year == "2017":
    goodjets = goodjets[(goodjets.pt > 50) | (abs(goodjets.eta) < 2.65) | (abs(goodjets.eta) > 3.139)]

jets = ak.flatten(goodjets)

# get efficiencies
efficiencyinfo = (
    hist.Hist.new
    .Reg(20, 40, 300, name="pt")
    .Reg(4, 0, 2.5, name="abseta")
    .IntCat([0, 4, 5], name="flavor")
    .Bool(name="passWP")
    .Double()
    .fill(
        pt=jets.pt,
        abseta=abs(jets.eta),
        flavor=jets.hadronFlavour,
        passWP=jets.btagDeepFlavB > btagWPs["deepJet"][year][wp], # UL 2017 Tight
    )
)
efficiencyinfo

Hist(
  Regular(20, 40, 300, name='pt'),
  Regular(4, 0, 2.5, name='abseta'),
  IntCategory([0, 4, 5], name='flavor'),
  Boolean(name='passWP'),
  storage=Double()) # Sum: 48122.0 (63042.0 with flow)

In [16]:
eff = efficiencyinfo[{"passWP": True}] / efficiencyinfo[{"passWP": sum}]
# note this seems to turn 0,4,5 into 0,1,2
efflookup = dense_lookup(eff.values(), [ax.edges for ax in eff.axes])
efflookup

3 dimensional histogram with axes:
	1: [ 40.  53.  66.  79.  92. 105. 118. 131. 144. 157. 170. 183. 196. 209.
 222. 235. 248. 261. 274. 287. 300.]
	2: [0.    0.625 1.25  1.875 2.5  ]
	3: [0. 1. 2. 3.]

In [17]:
eff

Hist(
  Regular(20, 40, 300, name='pt'),
  Regular(4, 0, 2.5, name='abseta'),
  IntCategory([0, 4, 5], name='flavor'),
  storage=Double()) # Sum: nan (nan with flow)

In [18]:
efflookup

3 dimensional histogram with axes:
	1: [ 40.  53.  66.  79.  92. 105. 118. 131. 144. 157. 170. 183. 196. 209.
 222. 235. 248. 261. 274. 287. 300.]
	2: [0.    0.625 1.25  1.875 2.5  ]
	3: [0. 1. 2. 3.]

In [19]:
# Efficiency at 42 GeV, |eta|=0.2, for light, c, and b quark respectively
efflookup(np.array([42,60]), 0.2, 2)

array([0.46511628, 0.72413793])