In [None]:
import time

from coffea import hist
from coffea.analysis_objects import JaggedCandidateArray
import coffea.processor as processor
from functools import partial
import uproot

from awkward import JaggedArray
import numpy as np
import pickle
import sys
from coffea.lookup_tools import extractor, dense_lookup

import matplotlib.pyplot as plt
from matplotlib import ticker,colors

import numba

from utils.plotting import plotWithRatio

In [None]:
from utils.crossSections import *
from utils.efficiencies import getMuSF, getEleSF


In [None]:
with open('utils/taggingEfficienciesDenseLookup.pkl','rb') as _file:
    taggingEffLookup = pickle.load(_file)

In [None]:
muSFFileList = [{'id'   : ("ScaleFactors/MuEGammaScaleFactors/mu2016/EfficienciesStudies_2016_legacy_rereco_rootfiles_RunBCDEF_SF_ID.root", "NUM_TightID_DEN_genTracks_eta_pt"),
                 'iso'   : ("ScaleFactors/MuEGammaScaleFactors/mu2016/EfficienciesStudies_2016_legacy_rereco_rootfiles_RunBCDEF_SF_ISO.root", "NUM_TightRelIso_DEN_TightIDandIPCut_eta_pt"),
                 'trig'  : ("ScaleFactors/MuEGammaScaleFactors/mu2016/EfficienciesStudies_2016_trigger_EfficienciesAndSF_RunBtoF.root", "IsoMu24_OR_IsoTkMu24_PtEtaBins/abseta_pt_ratio"),
                 'scale' : 19.656062760/35.882515396},
                {'id'     : ("ScaleFactors/MuEGammaScaleFactors/mu2016/EfficienciesStudies_2016_legacy_rereco_rootfiles_RunGH_SF_ID.root", "NUM_TightID_DEN_genTracks_eta_pt"),
                 'iso'   : ("ScaleFactors/MuEGammaScaleFactors/mu2016/EfficienciesStudies_2016_legacy_rereco_rootfiles_RunGH_SF_ISO.root", "NUM_TightRelIso_DEN_TightIDandIPCut_eta_pt"),
                 'trig'  : ("ScaleFactors/MuEGammaScaleFactors/mu2016/EfficienciesStudies_2016_trigger_EfficienciesAndSF_RunGtoH.root", "IsoMu24_OR_IsoTkMu24_PtEtaBins/abseta_pt_ratio"),
                 'scale' : 16.226452636/35.882515396}]



In [None]:
from utils.fileSet_2016 import fileSet_2016 as fileset


###In skim, look at event count histograms to find total mcEventYields
# mcEventYields = {}
# for sample,fNameList in fileset.items():
#     _hist = uproot.open(fNameList[0])['hEvents'].values
#     for fName in fNameList[1:]:
#         _hist += uproot.open(fName)['hEvents'].values
#     mcEventYields[sample] = _hist[2] - _hist[0]

# print (mcEventYields)

mcEventYields = {'DYjetsM10to50_2016': 35114961.0, 'DYjetsM50_2016': 146280395.0, 'GJets_HT40To100_2016': 9326139.0, 'GJets_HT100To200_2016': 10104155.0, 'GJets_HT200To400_2016': 20527506.0, 'GJets_HT400To600_2016': 5060070.0, 'GJets_HT600ToInf_2016': 5080857.0, 'QCD_Pt20to30_Ele_2016': 9241500.0, 'QCD_Pt30to50_Ele_2016': 11508842.0, 'QCD_Pt50to80_Ele_2016': 45789059.0, 'QCD_Pt80to120_Ele_2016': 77800204.0, 'QCD_Pt120to170_Ele_2016': 75367655.0, 'QCD_Pt170to300_Ele_2016': 11105095.0, 'QCD_Pt300toInf_Ele_2016': 7090318.0, 'QCD_Pt20to30_Mu_2016': 31878740.0, 'QCD_Pt30to50_Mu_2016': 29936360.0, 'QCD_Pt50to80_Mu_2016': 19662175.0, 'QCD_Pt80to120_Mu_2016': 23686772.0, 'QCD_Pt120to170_Mu_2016': 7897731.0, 'QCD_Pt170to300_Mu_2016': 17350231.0, 'QCD_Pt300to470_Mu_2016': 49005976.0, 'QCD_Pt470to600_Mu_2016': 19489276.0, 'QCD_Pt600to800_Mu_2016': 9981311.0, 'QCD_Pt800to1000_Mu_2016': 19940747.0, 'QCD_Pt1000toInf_Mu_2016': 13608903.0, 'ST_s_channel_2016': 6137801.0, 'ST_tW_channel_2016': 4945734.0, 'ST_tbarW_channel_2016': 4942374.0, 'ST_tbar_channel_2016': 17780700.0, 'ST_t_channel_2016': 31848000.0, 'TTGamma_Dilepton_2016': 5728644.0, 'TTGamma_Hadronic_2016': 5635346.0, 'TTGamma_SingleLept_2016': 10991612.0, 'TTWtoLNu_2016': 2716249.0, 'TTWtoQQ_2016': 430310.0, 'TTZtoLL_2016': 6420825.0, 'TTbarPowheg_Dilepton_2016': 67339946.0, 'TTbarPowheg_Hadronic_2016': 67963984.0, 'TTbarPowheg_Semilept_2016': 106438920.0, 'W1jets_2016': 45283121.0, 'W2jets_2016': 60438768.0, 'W3jets_2016': 59300029.0, 'W4jets_2016': 29941394.0, 'WGamma_01J_5f_2016': 6103817.0, 'ZGamma_01J_5f_lowMass_2016': 9696539.0, 'WW_2016': 7982180.0, 'WZ_2016': 3997571.0, 'ZZ_2016': 1988098.0}


In [None]:
@numba.jit(nopython=True)
def maxHistoryPDGID(idxList_contents, idxList_starts, idxList_stops, pdgID_contents, pdgID_starts, pdgID_stops, motherIdx_contents, motherIdx_starts, motherIdx_stops):
    maxPDGID_array = np.ones(len(idxList_starts),np.int32)*-1
    for i in range(len(idxList_starts)):
        if idxList_starts[i]==idxList_stops[i]:
            continue
            
        idxList = idxList_contents[idxList_starts[i]:idxList_stops[i]]
        pdgID = pdgID_contents[pdgID_starts[i]:pdgID_stops[i]]
        motherIdx = motherIdx_contents[motherIdx_starts[i]:motherIdx_stops[i]]
    
        idx = idxList[0]
        maxPDGID = -1
        while idx>-1:
            pdg = pdgID[idx]
            maxPDGID = max(maxPDGID, abs(pdg))
            idx = motherIdx[idx]
        maxPDGID_array[i] = maxPDGID
    return maxPDGID_array

In [None]:
# Look at ProcessorABC to see the expected methods and what they are supposed to do
class TTGammaProcessor(processor.ProcessorABC):
#     def __init__(self, runNum = -1, eventNum = -1):
    def __init__(self, runNum = -1, eventNum = -1, mcEventYields = None):
        dataset_axis = hist.Cat("dataset", "Dataset")
        lep_axis = hist.Bin("lepFlavor", r"ElectronOrMuon", 3, -1.5, 1.5)
        
        m3_axis = hist.Bin("M3", r"$M_3$ [GeV]", 200, 0., 1000)
        mass_axis = hist.Bin("mass", r"$m_{\ell\gamm}$ [GeV]", 400, 0., 400)
        pt_axis = hist.Bin("pt", r"$p_{T}$ [GeV]", 200, 0., 1000)
        eta_axis = hist.Bin("eta", r"$\eta_{\gamma}$", 300, -1.5, 1.5)
        chIso_axis = hist.Bin("chIso", r"Charged Hadron Isolation", np.arange(-0.1,20.001,.05))

        ## Define axis to keep track of photon category
        phoCategory_axis = hist.Bin("category", r"Photon Category", [1,2,3,4,5])
        phoCategory_axis.identifiers()[0].label = "Genuine Photon"    
        phoCategory_axis.identifiers()[1].label = "Misidentified Electron"    
        phoCategory_axis.identifiers()[2].label = "Hadronic Photon"    
        phoCategory_axis.identifiers()[3].label = "Hadronic Fake"    
        
        ###
        self._accumulator = processor.dict_accumulator({
            ##photon histograms
            'photon_pt': hist.Hist("Counts", dataset_axis, pt_axis, phoCategory_axis, lep_axis),
            'photon_eta': hist.Hist("Counts", dataset_axis, eta_axis, phoCategory_axis, lep_axis),
            'photon_chIso': hist.Hist("Counts", dataset_axis, chIso_axis, phoCategory_axis, lep_axis),
            'photon_chIsoSideband': hist.Hist("Counts", dataset_axis, chIso_axis, phoCategory_axis, lep_axis),
            'photon_lepton_mass': hist.Hist("Counts", dataset_axis, mass_axis, phoCategory_axis, lep_axis),
            'photon_lepton_mass_3j0t': hist.Hist("Counts", dataset_axis, mass_axis, phoCategory_axis, lep_axis),
            'M3'      : hist.Hist("Counts", dataset_axis, m3_axis, phoCategory_axis, lep_axis),
            'M3Presel': hist.Hist("Counts", dataset_axis, m3_axis, lep_axis),
            'EventCount':processor.value_accumulator(int)
        })

        self.eventNum = eventNum
        self.runNum = runNum

        ext = extractor()
        ext.add_weight_sets(["btag2016 * ScaleFactors/Btag/DeepCSV_2016LegacySF_V1.btag.csv"])
        ext.finalize()
        self.evaluator = ext.make_evaluator()
        
        ele_id_file = uproot.open('ScaleFactors/MuEGammaScaleFactors/ele2016/2016LegacyReReco_ElectronTight_Fall17V2.root')
        self.ele_id_sf = dense_lookup.dense_lookup(ele_id_file["EGamma_SF2D"].values, ele_id_file["EGamma_SF2D"].edges)
        self.ele_id_err = dense_lookup.dense_lookup(ele_id_file["EGamma_SF2D"].variances**0.5, ele_id_file["EGamma_SF2D"].edges)

        ele_reco_file = uproot.open('ScaleFactors/MuEGammaScaleFactors/ele2016/egammaEffi.txt_EGM2D_runBCDEF_passingRECO.root')
        self.ele_reco_sf = dense_lookup.dense_lookup(ele_reco_file["EGamma_SF2D"].values, ele_reco_file["EGamma_SF2D"].edges)
        self.ele_reco_err = dense_lookup.dense_lookup(ele_reco_file["EGamma_SF2D"].variances**.5, ele_reco_file["EGamma_SF2D"].edges)

        
        
        
        mu_id_vals = 0
        mu_id_err = 0
        mu_iso_vals = 0
        mu_iso_err = 0
        mu_trig_vals = 0
        mu_trig_err = 0

        for scaleFactors in muSFFileList:
            id_file = uproot.open(scaleFactors['id'][0])
            iso_file = uproot.open(scaleFactors['iso'][0])
            trig_file = uproot.open(scaleFactors['trig'][0])

            mu_id_vals += id_file[scaleFactors['id'][1]].values * scaleFactors['scale']
            mu_id_err += id_file[scaleFactors['id'][1]].variances**0.5 * scaleFactors['scale']
            mu_id_edges = id_file[scaleFactors['id'][1]].edges

            mu_iso_vals += iso_file[scaleFactors['iso'][1]].values * scaleFactors['scale']
            mu_iso_err += iso_file[scaleFactors['iso'][1]].variances**0.5 * scaleFactors['scale']
            mu_iso_edges = iso_file[scaleFactors['iso'][1]].edges

            mu_trig_vals += trig_file[scaleFactors['trig'][1]].values * scaleFactors['scale']
            mu_trig_err += trig_file[scaleFactors['trig'][1]].variances**0.5 * scaleFactors['scale']
            mu_trig_edges = trig_file[scaleFactors['trig'][1]].edges

        self.mu_id_sf = dense_lookup.dense_lookup(mu_id_vals, mu_id_edges)
        self.mu_id_err = dense_lookup.dense_lookup(mu_id_err, mu_id_edges)
        self.mu_iso_sf = dense_lookup.dense_lookup(mu_iso_vals, mu_iso_edges)
        self.mu_iso_err = dense_lookup.dense_lookup(mu_iso_err, mu_iso_edges)
        self.mu_trig_sf = dense_lookup.dense_lookup(mu_trig_vals, mu_trig_edges)
        self.mu_trig_err = dense_lookup.dense_lookup(mu_trig_err, mu_trig_edges)
        

        
        
    @property
    def accumulator(self):
        return self._accumulator

    def process(self, df):
        output = self.accumulator.identity()

        datasetFull = df['dataset']
        dataset=datasetFull.replace('_2016','')

        isData = 'Data' in dataset
        
        year=2016
        yearStr="2016"
        muTrigger = df['HLT_IsoMu24'] | df['HLT_IsoTkMu24']
        eleTrigger = df['HLT_Ele27_WPTight_Gsf']
        photonBitMapName = 'Photon_cutBased'
#        btagSF = 'test_DeepCSV_2016LegacySF_V1.btag.csv'

        
        filters = (df['Flag_goodVertices'] &
                   df['Flag_globalSuperTightHalo2016Filter'] &
                   df['Flag_HBHENoiseFilter'] &
                   df['Flag_HBHENoiseIsoFilter'] &
                   df['Flag_EcalDeadCellTriggerPrimitiveFilter'] &
                   df['Flag_BadPFMuonFilter'] 
                  )
        if year > 2016:
            filters = (filters & 
                       df['Flag_ecalBadCalibFilterV2']
                      )
        
        
        
        muons = JaggedCandidateArray.candidatesfromcounts(
            df['nMuon'],
            pt=df['Muon_pt'],
            eta=df['Muon_eta'],
            phi=df['Muon_phi'],
            mass=df['Muon_mass'],
            charge=df['Muon_charge'],
            relIso=df['Muon_pfRelIso04_all'],
            tightId=df['Muon_tightId'],
            isPFcand=df['Muon_isPFcand'],
            isTracker=df['Muon_isTracker'],
            isGlobal=df['Muon_isGlobal'],           
        )
        
        electrons = JaggedCandidateArray.candidatesfromcounts(
            df['nElectron'],
            pt=df['Electron_pt'],
            eta=df['Electron_eta'],
            phi=df['Electron_phi'],
            mass=df['Electron_mass'],
            charge=df['Electron_charge'],
            cutBased=df['Electron_cutBased'],
            d0=df['Electron_dxy'],
            dz=df['Electron_dz'],
        )

        jets = JaggedCandidateArray.candidatesfromcounts(
            df['nJet'],
            pt=df['Jet_pt'],
            eta=df['Jet_eta'],
            phi=df['Jet_phi'],
            mass=df['Jet_mass'],
            jetId=df['Jet_jetId'],
            btag=df['Jet_btagDeepB'],
            hadFlav=df['Jet_hadronFlavour'] if not isData else np.ones_like(df['Jet_jetId']),
            genIdx=df['Jet_genJetIdx'] if not isData else np.ones_like(df['Jet_jetId']),
        )

        photons = JaggedCandidateArray.candidatesfromcounts(
            df['nPhoton'],
            pt=df['Photon_pt'],
            eta=df['Photon_eta'],
            phi=df['Photon_phi'],
            mass=np.zeros_like(df['Photon_pt']),
            isEE=df['Photon_isScEtaEE'],
            isEB=df['Photon_isScEtaEB'],
            photonId=df[photonBitMapName],
            passEleVeto=df['Photon_electronVeto'],
            pixelSeed=df['Photon_pixelSeed'],
            sieie=df['Photon_sieie'],
            chIso=df['Photon_pfRelIso03_chg']*df['Photon_pt'],
            vidCuts=df['Photon_vidNestedWPBitmap'],
            genFlav=df['Photon_genPartFlav'] if not isData else np.ones_like(df['Photon_electronVeto']),
            genIdx=df['Photon_genPartIdx'] if not isData else np.ones_like(df['Photon_electronVeto']),
        )
        if not isData:
            genPart = JaggedCandidateArray.candidatesfromcounts(
                df['nGenPart'],
                pt=df['GenPart_pt'],
                eta=df['GenPart_eta'],
                phi=df['GenPart_phi'],
                mass=df['GenPart_mass'],
                pdgid=df['GenPart_pdgId'],
                motherIdx=df['GenPart_genPartIdxMother'],
                status=df['GenPart_status'],
                statusFlags=df['GenPart_statusFlags'],
            )

            genmotherIdx = genPart.motherIdx
            genpdgid = genPart.pdgid

        ## TTbar vs TTGamma Overlap Removal (work in progress, still buggy)
        if 'TTbar' in dataset:
            overlapPhoSelect = ((genPart.pt>=10) & 
                                (abs(genPart.eta) < 5.) & 
                                (genPart.pdgid==22) & 
                                (genPart.status==1)
                               )
            
            OverlapPhotons = genPart[overlapPhoSelect] 

            idx = OverlapPhotons.motherIdx
            maxParent = maxHistoryPDGID(idx.content, idx.starts, idx.stops, 
                                        genpdgid.content, genpdgid.starts, genpdgid.stops, 
                                        genmotherIdx.content, genmotherIdx.starts, genmotherIdx.stops)
            
            isNonPrompt = (maxParent>37).any()

            finalGen = genPart[((genPart.status==1)|(genPart.status==71)) & ~((abs(genPart.pdgid)==12) | (abs(genPart.pdgid)==14) | (abs(genPart.pdgid)==16))]

            genPairs = OverlapPhotons['p4'].cross(finalGen['p4'],nested=True)
            ##remove the case where the cross produce is the gen photon with itself
            genPairs = genPairs[~(genPairs.i0==genPairs.i1)]

            dRPairs = genPairs.i0.delta_r(genPairs.i1)
            
            isOverlap = ((dRPairs.min()>0.1) & (maxParent<37)).any()
            passOverlapRemoval = ~isOverlap
        else:
            passOverlapRemoval = np.ones_like(df['event'])==1
            


        
        muonSelectTight = ((muons.pt>30) & 
                           (abs(muons.eta)<2.4) & 
                           (muons.tightId) & 
                           (muons.relIso < 0.15)
                          )
        
        muonSelectLoose = ((muons.pt>15) & 
                           (abs(muons.eta)<2.4) & 
                           ((muons.isPFcand) & (muons.isTracker | muons.isGlobal)) & 
                           (muons.relIso < 0.25) &
                           np.invert(muonSelectTight)
                          )

        eleEtaGap = (abs(electrons.eta) < 1.4442) | (abs(electrons.eta) > 1.566)
        elePassD0 = ((abs(electrons.eta) < 1.479) & (abs(electrons.d0) < 0.05) |
                     (abs(electrons.eta) > 1.479)  & (abs(electrons.d0) < 0.1)
                    )
        elePassDZ = ((abs(electrons.eta) < 1.479) & (abs(electrons.dz) < 0.1) |
                     (abs(electrons.eta) > 1.479)  & (abs(electrons.dz) < 0.2)
                    )

        
        
        electronSelectTight = ((electrons.pt>35) & 
                               (abs(electrons.eta)<2.1) & 
                               eleEtaGap &      
                               (electrons.cutBased>=4) &
                               elePassD0 & 
                               elePassDZ
                              )

        electronSelectLoose = ((electrons.pt>15) & 
                               (abs(electrons.eta)<2.4) & 
                               eleEtaGap &      
                               (electrons.cutBased>=1) &
                               elePassD0 & 
                               elePassDZ & 
                               np.invert(electronSelectTight)
                              )
        
        tightMuon = muons[muonSelectTight]
        looseMuon = muons[muonSelectLoose]
        
        tightElectron = electrons[electronSelectTight]
        looseElectron = electrons[electronSelectLoose]


        
        oneMuon = (tightMuon.counts == 1)
        muVeto = (tightMuon.counts == 0)
        oneEle = (tightElectron.counts == 1)
        eleVeto = (tightElectron.counts == 0)
        looseMuonSel = (looseMuon.counts == 0)
        looseElectronSel = (looseElectron.counts == 0)

        
        #### Calculate deltaR between photon and nearest muon
        ####### make combination pairs
        phoMu = photons['p4'].cross(tightMuon['p4'],nested=True)
        
        ####### check delta R of each combination, if min is >0.1 it is okay, or if there are no tight muons it passes
        dRphomu = (phoMu.i0.delta_r(phoMu.i1)>0.4).all() | (tightMuon.counts==0)
        phoEle = photons['p4'].cross(tightElectron['p4'],nested=True)
        dRphoele = ((phoEle.i0.delta_r(phoEle.i1)).min()>0.4) | (tightElectron.counts==0)
        
        #photon selection (no ID requirement used here)
        photonSelect = ((photons.pt>20) & 
                        (abs(photons.eta) < 1.4442) &
                        (photons.isEE | photons.isEB) &
                        (photons.passEleVeto) & 
                        np.invert(photons.pixelSeed) & 
                        dRphomu & dRphoele
                       )
        
        
        #split out the ID requirement, enabling Iso and SIEIE to be inverted for control regions
        photonID = photons.photonId >= 2

        #parse VID cuts, define loose photons (not used yet)
        photon_MinPtCut = (photons.vidCuts>>0 & 3)>=2 
        photon_PhoSCEtaMultiRangeCut = (photons.vidCuts>>2 & 3)>=2 
        photon_PhoSingleTowerHadOverEmCut = (photons.vidCuts>>4 & 3)>=2  
        photon_PhoFull5x5SigmaIEtaIEtaCut = (photons.vidCuts>>6 & 3)>=2  
        photon_ChIsoCut = (photons.vidCuts>>8 & 3)>=2  
        photon_NeuIsoCut = (photons.vidCuts>>10 & 3)>=2  
        photon_PhoIsoCut = (photons.vidCuts>>12 & 3)>=2  
        
        photonID_NoChIsoSIEIE = (photon_MinPtCut & 
                                 photon_PhoSCEtaMultiRangeCut & 
                                 photon_PhoSingleTowerHadOverEmCut & 
                                 photon_PhoFull5x5SigmaIEtaIEtaCut & 
                                 photon_NeuIsoCut & 
                                 photon_PhoIsoCut)

        
        tightPhotons = photons[photonSelect & photonID]
        loosePhotons = photons[photonSelect & photonID_NoChIsoSIEIE & photon_PhoFull5x5SigmaIEtaIEtaCut]
        loosePhotonsSideband = photons[photonSelect & photonID_NoChIsoSIEIE & (photons.sieie>0.012)]
        
        ##medium jet ID cut
        jetIDbit = 1
        if year>2016: jetIDbit=2

        ##check dR jet,lepton & jet,photon
        jetMu = jets['p4'].cross(tightMuon['p4'],nested=True)
        dRjetmu = ((jetMu.i0.delta_r(jetMu.i1)).min()>0.4) | (tightMuon.counts==0)

        jetEle = jets['p4'].cross(tightElectron['p4'],nested=True)
        dRjetele = ((jetEle.i0.delta_r(jetEle.i1)).min()>0.4) | (tightElectron.counts==0)

        jetPho = jets['p4'].cross(tightPhotons['p4'],nested=True)
        dRjetpho = ((jetPho.i0.delta_r(jetPho.i1)).min()>0.1) | (tightPhotons.counts==0)
        
        jetSelect = ((jets.pt > 30) &
                     (abs(jets.eta) < 2.4) &
                     ((jets.jetId >> jetIDbit & 1)==1) &
                     dRjetmu & dRjetele & dRjetpho                    
                    )

        tightJets = jets[jetSelect]
        
        bTagWP = 0.6321   #2016 DeepCSV working point

        btagged = tightJets.btag>bTagWP

        bJets = tightJets[btagged]

        ## Define M3, mass of 3-jet pair with highest pT
        triJet = tightJets['p4'].choose(3)

        triJetPt = (triJet.i0 + triJet.i1 + triJet.i2).pt
        triJetMass = (triJet.i0 + triJet.i1 + triJet.i2).mass
        M3 = triJetMass[triJetPt.argmax()]


        leadingMuon = tightMuon[::1] 
        leadingElectron = tightElectron[::1]        
        
        leadingPhoton = tightPhotons[:,:1]
        leadingPhotonLoose = loosePhotons[:,:1]
        leadingPhotonSideband = loosePhotonsSideband[:,:1]

        
#        egammaMass = (leadingElectron['p4'] + leadingPhoton['p4']).mass
        egamma = leadingElectron['p4'].cross(leadingPhoton['p4'])
        mugamma = leadingMuon['p4'].cross(leadingPhoton['p4'])
        egammaMass = (egamma.i0 + egamma.i1).mass
        mugammaMass = (mugamma.i0 + mugamma.i1).mass
        
        
        
        if not isData:
            #### Photon categories, using genIdx branch
            # reco photons really generated as electrons
            isMisIDele = (leadingPhoton.genFlav==13).any()
            matchedPho = (leadingPhoton.genFlav==1).any()

            idx = leadingPhoton.genIdx

            maxParent = maxHistoryPDGID(idx.content, idx.starts, idx.stops, 
                                        genpdgid.content, genpdgid.starts, genpdgid.stops, 
                                        genmotherIdx.content, genmotherIdx.starts, genmotherIdx.stops)

            hadronicParent = maxParent>25

            isGenPho = matchedPho & ~hadronicParent
            isHadPho = matchedPho & hadronicParent
            isHadFake = ~(isMisIDele | isGenPho | isHadPho) & (leadingPhoton.counts==1)
            
            #define integer definition for the photon category axis
            phoCategory = 1*isGenPho + 2*isMisIDele + 3*isHadPho + 4*isHadFake
            

            isMisIDeleLoose = (leadingPhotonLoose.genFlav==13).any()
            matchedPhoLoose = (leadingPhotonLoose.genFlav==1).any()

            # look through parentage to find if any hadrons in genPhoton parent history
            idx = leadingPhotonLoose.genIdx

            maxParent = maxHistoryPDGID(idx.content, idx.starts, idx.stops, 
                                        genpdgid.content, genpdgid.starts, genpdgid.stops, 
                                        genmotherIdx.content, genmotherIdx.starts, genmotherIdx.stops)

            hadronicParent = maxParent>25

            isGenPhoLoose = matchedPhoLoose & ~hadronicParent
            isHadPhoLoose = matchedPhoLoose & hadronicParent
            isHadFakeLoose = ~(isMisIDeleLoose | isGenPhoLoose | isHadPhoLoose) & (leadingPhotonLoose.counts==1)        

            #define integer definition for the photon category axis
            phoCategoryLoose = 1*isGenPhoLoose + 2*isMisIDeleLoose + 3*isHadPhoLoose + 4*isHadFakeLoose

            
            isMisIDeleSideband = (leadingPhotonSideband.genFlav==13).any()
            matchedPhoSideband = (leadingPhotonSideband.genFlav==1).any()

            # look through parentage to find if any hadrons in genPhoton parent history
            idx = leadingPhotonSideband.genIdx

            maxParent = maxHistoryPDGID(idx.content, idx.starts, idx.stops, 
                                        genpdgid.content, genpdgid.starts, genpdgid.stops, 
                                        genmotherIdx.content, genmotherIdx.starts, genmotherIdx.stops)

            hadronicParent = maxParent>25

            isGenPhoSideband = matchedPhoSideband & ~hadronicParent
            isHadPhoSideband = matchedPhoSideband & hadronicParent
            isHadFakeSideband = ~(isMisIDeleSideband | isGenPhoSideband | isHadPhoSideband) & (leadingPhotonSideband.counts==1)        

            #define integer definition for the photon category axis
            phoCategorySideband = 1*isGenPhoSideband + 2*isMisIDeleSideband + 3*isHadPhoSideband + 4*isHadFakeSideband            
        else:
            phoCategory = np.ones_like(df['event'])
            phoCategoryLoose = np.ones_like(df['event'])
            phoCategorySideband = np.ones_like(df['event'])
        
                            
        #ele_trigger = eleTrigger
        mu_noLoose = (muTrigger & filters & passOverlapRemoval &
                      oneMuon & eleVeto &
                      looseMuonSel & looseElectronSel)
        ele_noLoose = (eleTrigger & filters & passOverlapRemoval &
                       oneEle & muVeto &
                       looseMuonSel & looseElectronSel)

        lep_noLoose = mu_noLoose| ele_noLoose
        
        lep_jetSel = (lep_noLoose & 
                      (tightJets.counts >= 4) & (bJets.counts >= 1)
                     )
        lep_zeropho = (lep_jetSel & 
                       (tightPhotons.counts == 0)
                      )
        lep_phosel = (lep_jetSel & 
                      (tightPhotons.counts == 1)
                     )
        lep_phoselLoose = (lep_jetSel & 
                           (loosePhotons.counts == 1)
                          )
        lep_phoselSideband = (lep_jetSel & 
                              (loosePhotonsSideband.counts == 1)
                             )

        lep_phosel_3j0t = (lep_noLoose & 
                           (tightJets.counts >= 3) & (bJets.counts ==0) &
                           (tightPhotons.counts == 1)
                          )

#         mu_jetSel = (mu_noLoose & 
#                      (tightJets.counts >= 4) & (bJets.counts >= 1) )                     
#         mu_phosel = (mu_jetSel & 
#                      (tightPhotons.counts == 1))
#         mu_zeropho = (mu_jetSel & 
#                       (tightPhotons.counts == 0))
#         mu_phoselLoose = (mu_jetSel & 
#                           (loosePhotons.counts == 1))    
#         mu_phoselSideband = (mu_jetSel & 
#                              (loosePhotonsSideband.counts == 1))    
        
#         ele_jetSel = (ele_noLoose & 
#                       (tightJets.counts >= 4) & (bJets.counts >= 1) )                     
#         ele_phosel = (ele_jetSel & 
#                       (tightPhotons.counts == 1))
#         ele_zeropho = (ele_jetSel & 
#                        (tightPhotons.counts == 0))
#         ele_phoselLoose = (ele_jetSel & 
#                            (loosePhotons.counts == 1))    
#         ele_phoselSideband = (ele_jetSel & 
#                               (loosePhotonsSideband.counts == 1))    
                            
#         lep_jetSel = mu_jetSel | ele_jetSel
#         lep_zeropho = mu_zeropho | ele_zeropho
#         lep_phosel = mu_phosel | ele_phosel
#         lep_phoselLoose = mu_phoselLoose | ele_phoselLoose
#         lep_phoselSideband = mu_phoselSideband | ele_phoselSideband
        
        lepFlavor = -1*ele_noLoose + 1*mu_noLoose
        

        
        evtWeight = np.ones_like(df['event'],dtype=np.float64)        
        if not 'Data' in dataset:
            nMCevents = mcEventYields[datasetFull]
            xsec = crossSections[dataset]

            evtWeight *= xsec * lumis[year] / nMCevents

            #btag key name
            #name / working Point / type / systematic / jetType
            #  ... / 0-loose 1-medium 2-tight / comb,mujets,iterativefit / central,up,down / 0-b 1-c 2-udcsg 
            bJetSF = self.evaluator['btag%iDeepCSV_1_comb_central_0'%year](tightJets.eta, tightJets.pt, tightJets.btag)
            bJetSF_c = self.evaluator['btag%iDeepCSV_1_comb_central_1'%year](tightJets.eta, tightJets.pt, tightJets.btag)
            bJetSF_udcsg = self.evaluator['btag%iDeepCSV_1_incl_central_2'%year](tightJets.eta, tightJets.pt, tightJets.btag)

            bJetSF.content[(tightJets.hadFlav==4).content] = bJetSF_c[tightJets.hadFlav==4].content
            bJetSF.content[(tightJets.hadFlav==0).content] = bJetSF_udcsg[tightJets.hadFlav==0].content


            ## mc efficiency lookup, data efficiency is eff* scale factor
            btagEfficiencies = taggingEffLookup(datasetFull,tightJets.hadFlav,tightJets.pt,tightJets.eta)
            btagEfficienciesData = btagEfficiencies*bJetSF

            ##probability is the product of all efficiencies of tagged jets, times product of 1-eff for all untagged jets
            ## https://twiki.cern.ch/twiki/bin/view/CMS/BTagSFMethods#1a_Event_reweighting_using_scale
            pMC   = btagEfficiencies[btagged].prod()     * (1.-btagEfficiencies[np.invert(btagged)]).prod() 
            pData = btagEfficienciesData[btagged].prod() * (1.-btagEfficienciesData[np.invert(btagged)]).prod()
            btagWeight = pData/pMC
            btagWeight[pData==0]=0

            evtWeight *= btagWeight

            eleID = self.ele_id_sf(tightElectron.eta, tightElectron.pt)
            eleIDerr = self.ele_id_err(tightElectron.eta, tightElectron.pt)
            eleRECO = self.ele_reco_sf(tightElectron.eta, tightElectron.pt)
            eleRECOerr = self.ele_reco_err(tightElectron.eta, tightElectron.pt)
            
            eleSF = (eleID*eleRECO).prod()
            eleSFup = ((eleID + eleIDerr) * (eleRECO + eleRECOerr)).prod()
            eleSFdo = ((eleID - eleIDerr) * (eleRECO - eleRECOerr)).prod()

            evtWeight *= eleSF

            muID = self.mu_id_sf(tightMuon.eta, tightMuon.pt)
            muIDerr = self.mu_id_err(tightMuon.eta, tightMuon.pt)
            muIso = self.mu_iso_sf(tightMuon.eta, tightMuon.pt)
            muIsoerr = self.mu_iso_err(tightMuon.eta, tightMuon.pt)
            muTrig = self.mu_iso_sf(abs(tightMuon.eta), tightMuon.pt)
            muTrigerr = self.mu_iso_err(abs(tightMuon.eta), tightMuon.pt)
            
            muSF = (muID*muIso*muTrig).prod()
            muSF_up = ((muID + muIDerr) * (muIso + muIsoerr) * (muTrig + muTrigerr)).prod()
            muSF_down = ((muID - muIDerr) * (muIso - muIsoerr) * (muTrig - muTrigerr)).prod()

            evtWeight *= muSF
        
        output['photon_pt'].fill(dataset=dataset,
                                 pt=tightPhotons.p4.pt[:,:1][lep_phosel].flatten(),
                                 category=phoCategory[lep_phosel].flatten(),
                                 lepFlavor=lepFlavor[lep_phosel],
                                 weight=evtWeight[lep_phosel].flatten())

        output['photon_eta'].fill(dataset=dataset,
                                  eta=tightPhotons.eta[:,:1][lep_phosel].flatten(),
                                  category=phoCategory[lep_phosel].flatten(),
                                  lepFlavor=lepFlavor[lep_phosel],
                                  weight=evtWeight[lep_phosel].flatten())

        output['photon_chIsoSideband'].fill(dataset=dataset,
                                            chIso=loosePhotonsSideband.chIso[:,:1][lep_phoselSideband].flatten(),
                                            category=phoCategorySideband[lep_phoselSideband].flatten(),
                                            lepFlavor=lepFlavor[lep_phoselSideband],
                                            weight=evtWeight[lep_phoselSideband].flatten())

        output['photon_chIso'].fill(dataset=dataset,
                                    chIso=loosePhotons.chIso[:,:1][lep_phoselLoose].flatten(),
                                    category=phoCategoryLoose[lep_phoselLoose].flatten(),
                                    lepFlavor=lepFlavor[lep_phoselLoose],
                                    weight=evtWeight[lep_phoselLoose].flatten())

        output['photon_lepton_mass'].fill(dataset=dataset,
                                          mass=egammaMass[lep_phosel & ele_noLoose].flatten(),
                                          category=phoCategory[lep_phosel & ele_noLoose].flatten(),
                                          lepFlavor=lepFlavor[lep_phosel & ele_noLoose],
                                          weight=evtWeight[lep_phosel & ele_noLoose].flatten())
        output['photon_lepton_mass'].fill(dataset=dataset,
                                          mass=mugammaMass[lep_phosel & mu_noLoose].flatten(),
                                          category=phoCategory[lep_phosel & mu_noLoose].flatten(),
                                          lepFlavor=lepFlavor[lep_phosel & mu_noLoose],
                                          weight=evtWeight[lep_phosel & mu_noLoose].flatten())

        output['photon_lepton_mass_3j0t'].fill(dataset=dataset,
                                               mass=egammaMass[lep_phosel_3j0t & ele_noLoose].flatten(),
                                               category=phoCategory[lep_phosel_3j0t & ele_noLoose].flatten(),
                                               lepFlavor=lepFlavor[lep_phosel_3j0t & ele_noLoose],
                                               weight=evtWeight[lep_phosel_3j0t & ele_noLoose].flatten())
        output['photon_lepton_mass_3j0t'].fill(dataset=dataset,
                                               mass=mugammaMass[lep_phosel_3j0t & mu_noLoose].flatten(),
                                               category=phoCategory[lep_phosel_3j0t & mu_noLoose].flatten(),
                                               lepFlavor=lepFlavor[lep_phosel_3j0t & mu_noLoose],
                                               weight=evtWeight[lep_phosel_3j0t & mu_noLoose].flatten())
        
        
        output['M3'].fill(dataset=dataset,
                          M3=M3[lep_phosel].flatten(),
                          category=phoCategoryLoose[lep_phosel].flatten(),
                          lepFlavor=lepFlavor[lep_phosel],
                          weight=evtWeight[lep_phosel].flatten())

        output['M3Presel'].fill(dataset=dataset,
                          M3=M3[lep_zeropho].flatten(),
                          lepFlavor=lepFlavor[lep_zeropho],
                          weight=evtWeight[lep_zeropho].flatten())                            
        
        output['EventCount'] = len(df['event'])

        return output

    def postprocess(self, accumulator):
        return accumulator


In [None]:
#filesetTemp = {k:fileset[k] for k in fileset if 'TTGamma' in k}
tstart = time.time()
output = processor.run_uproot_job(fileset,
                                  treename='Events',
                                  processor_instance=TTGammaProcessor(mcEventYields=mcEventYields),
                                  executor=processor.futures_executor,
                                  executor_args={'workers': 4, 'flatten': True},
#                                   chunksize=25000,
#                                   maxchunks=0
                                 )

elapsed = time.time() - tstart
print("Total time: %.1f seconds"%elapsed)
print("Total rate: %.1f events / second"%(output['EventCount'].value/elapsed))

In [None]:
from utils.fileSet_2016 import fileSet_Data_2016

tstart = time.time()
outputData = processor.run_uproot_job(fileSet_Data_2016,
                                  treename='Events',
                                  processor_instance=TTGammaProcessor(mcEventYields=mcEventYields),
                                  executor=processor.futures_executor,
                                  executor_args={'workers': 4, 'flatten': True},
#                                   chunksize=5000,
#                                   maxchunks=0                                                                                              
                                 )

elapsed = time.time() - tstart
print("Total time: %.1f seconds"%elapsed)
print("Total rate: %.1f events / second"%(outputData['EventCount'].value/elapsed))

In [None]:
with open('histogramOutputMC.pkl','wb') as _file:
    pickle.dump(output,_file)

with open('histogramOutputData.pkl','wb') as _file:
    pickle.dump(outputData,_file)
    