In [1]:
import coffea
coffea.__version__

'0.7.23'

In [2]:
import uproot
from glob import glob
from coffea.nanoevents import NanoEventsFactory
import awkward as ak
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt
import mplhep
from coffea.analysis_tools import PackedSelection


mplhep.style.use(mplhep.style.CMS)

# below, for base_directory, just give it the path to where the .root file is you are working with
#SWAN work area:
base_directory = "/eos/user/d/dgrove/datasets/"

# sort Monte Carlo files here
files = sorted(glob(f"{base_directory}WZ_MC/combined.root"))

file = uproot.open(files[0])
print("Example file info:")
dict(file)

Example file info:


{'tag;2': <TObjString 'untagged' at 0x7fa030d71cf0>,
 'tag;1': <TObjString 'untagged' at 0x7fa030d71dd0>,
 'Events;1': <TTree 'Events' (1639 branches) at 0x7fa0484de640>,
 'LuminosityBlocks;1': <TTree 'LuminosityBlocks' (2 branches) at 0x7fa030d7e970>,
 'Runs;1': <TTree 'Runs' (8 branches) at 0x7fa030d7e580>,
 'MetaData;1': <TTree 'MetaData' (1 branches) at 0x7fa030d7ee50>,
 'ParameterSets;1': <TTree 'ParameterSets' (1 branches) at 0x7fa027367160>}

### If root file loads succesfully, proceed loading events tree into events variable:
also print the length (number of events)

In [3]:
events = NanoEventsFactory.from_root(files[0]).events()
len(events)



1371000

## Custodial Cuts

below is where we define our cuts for electrons and muons, the cuts are loaded as individual cuts (first block) or all of them in one (second block). I recommend using the "e selection" and "mu selection" cuts, it is easier to implement and it will guarentee you check the event for at least one electron or muon that passes **all** the cuts for the electron or muon respectively.

In [4]:
selection = PackedSelection()

#first block
selection.add("e pt", ak.any(events.Electron.pt >= 5, axis=1))
selection.add("e eta", ak.any(np.abs(events.Electron.eta) < 2.4, axis=1))
selection.add("e SIP3D", ak.any(events.Electron.sip3d < 8, axis=1))
selection.add("e dxy", ak.any(np.abs(events.Electron.dxy) < 0.05, axis=1))
selection.add("e dz", ak.any(np.abs(events.Electron.dz) < 0.1, axis=1))
selection.add("e miniPFRelIso", ak.any(events.Electron.miniPFRelIso_all < (20 + 300/events.Electron.pt), axis =1))

selection.add("mu pt", ak.any(events.Muon.pt >= 3, axis=1))
selection.add("mu eta", ak.any(np.abs(events.Muon.eta) < 2.4, axis=1))
selection.add("mu SIP3D", ak.any(events.Muon.sip3d < 8, axis=1))
selection.add("mu dxy", ak.any(np.abs(events.Muon.dxy) < 0.05, axis=1))
selection.add("mu dz", ak.any(np.abs(events.Muon.dz) < 0.1, axis=1))
selection.add("mu miniPFRelIso", ak.any(events.Muon.miniPFRelIso_all < (20 + 300/events.Muon.pt), axis=1))

# individual cuts above, all in one selection cuts below:
# please use cuts below if doing preselection cut lest you run into some logical bugs down the road

#second block
selection.add("e selection", ak.any((events.Electron.pt >= 5) & (np.abs(events.Electron.eta) < 2.4) & (events.Electron.sip3d < 8) & (np.abs(events.Electron.dxy) < 0.05) & (np.abs(events.Electron.dz) < 0.1) & (events.Electron.miniPFRelIso_all < (20 + 300/events.Electron.pt)), axis=1))
selection.add("mu selection", ak.any((events.Muon.pt >= 5) & (np.abs(events.Muon.eta) < 2.4) & (events.Muon.sip3d < 8) & (np.abs(events.Muon.dxy) < 0.05) & (np.abs(events.Muon.dz) < 0.1) & (events.Muon.miniPFRelIso_all < (20 + 300/events.Muon.pt)), axis=1))


Using the selections (applying the cuts)

In [5]:
selected_events = events[selection.any("e selection", "mu selection")]

#test that they were applied correctly, i.e. that we reduced our events:
print(f"event count without any cuts: {len(events)}")
print(f"event count after cuts: {len(selected_events)}")

event count without any cuts: 1371000
event count after cuts: 601162


So, if you continue beyond this point in your code you want to work with the `selected_events` awkward array since that is the list of events after our cuts. Note this was done to the regular electron collection and muon collection, so if you want to work with the LowPtElectron collection you will need to write your own selection like I did above but swap `.Electron` for `.LowPtElectron`

**Second question to answer:** "What was the MVA full variable name"?

We can find it by printing out all the fields for our electron collection like so:

In [6]:
events.Electron.fields

['dEscaleDown',
 'dEscaleUp',
 'dEsigmaDown',
 'dEsigmaUp',
 'deltaEtaSC',
 'dr03EcalRecHitSumEt',
 'dr03HcalDepth1TowerSumEt',
 'dr03TkSumPt',
 'dr03TkSumPtHEEP',
 'dxy',
 'dxyErr',
 'dz',
 'dzErr',
 'eCorr',
 'eInvMinusPInv',
 'energyErr',
 'eta',
 'hoe',
 'ip3d',
 'jetPtRelv2',
 'jetRelIso',
 'mass',
 'miniPFRelIso_all',
 'miniPFRelIso_chg',
 'mvaFall17V2Iso',
 'mvaFall17V2noIso',
 'pfRelIso03_all',
 'pfRelIso03_chg',
 'phi',
 'pt',
 'r9',
 'scEtOverPt',
 'sieie',
 'sip3d',
 'mvaTTH',
 'charge',
 'cutBased',
 'jetIdx',
 'pdgId',
 'photonIdx',
 'tightCharge',
 'vidNestedWPBitmap',
 'vidNestedWPBitmapHEEP',
 'convVeto',
 'cutBased_HEEP',
 'isPFcand',
 'jetNDauCharged',
 'lostHits',
 'mvaFall17V2Iso_WP80',
 'mvaFall17V2Iso_WP90',
 'mvaFall17V2Iso_WPL',
 'mvaFall17V2noIso_WP80',
 'mvaFall17V2noIso_WP90',
 'mvaFall17V2noIso_WPL',
 'seedGain',
 'genPartIdx',
 'genPartFlav',
 'cleanmask',
 'genPartIdxG',
 'jetIdxG',
 'photonIdxG']

Please note: the above print out is going to be dependant on what type of events you have in your NanoAOD file, but for most you should have two variables `mvaFall17V2Iso` and `mvaFall17V2noIso`. My WZ has even more, they have names that seem related to the Z and W mass. You probably won't have those, so look for the ones I listed above. Those are the MVA variables in the NanoAOD file you can work with