# Running the TTGammaProcessor

This cell will copy the test files from their location on eos to your local area. This only needs to be done once!

In [None]:
# #If you have not already done so, you can copy the files to test the code on from here
# # ONLY NEEDS TO BE DONE ONCE, CAN BE COMMENTED OUT WHEN YOU 
# !xrdcp root://cmseos.fnal.gov//store/user/cmsdas/2021/long_exercises/TTGamma/TestFiles/TTGamma_1l.root .
# !xrdcp root://cmseos.fnal.gov//store/user/cmsdas/2021/long_exercises/TTGamma/TestFiles/TTbar_1l.root .
# !xrdcp root://cmseos.fnal.gov//store/user/cmsdas/2021/long_exercises/TTGamma/TestFiles/WGamma.root .
# !xrdcp root://cmseos.fnal.gov//store/user/cmsdas/2021/long_exercises/TTGamma/TestFiles/ZGamma.root .
# !xrdcp root://cmseos.fnal.gov//store/user/cmsdas/2021/long_exercises/TTGamma/TestFiles/W4Jets.root .
# !xrdcp root://cmseos.fnal.gov//store/user/cmsdas/2021/long_exercises/TTGamma/TestFiles/ZJets.root .

In [None]:
%load_ext autoreload
from coffea import util, processor
from coffea.nanoevents import NanoEventsFactory, NanoAODSchema
from coffea import hist
import matplotlib.pylab as plt

List of samples to be run on (fileset variable) and a dictionary containing the number of events processed for each sample

In [None]:
!ls *.root

In [None]:
fileset = {'TTGamma_SingleLept':['TTGamma_1l.root'],
           'TTbarPowheg_Semilept':['TTbar_1l.root'],
           'W4jets':['W4Jets.root'],
           'WGamma_01J_5f':['WGamma.root'],
           'ZGamma_01J_5f_lowMass':['ZGamma.root'],
           'DYjetsM50':['ZJets.root'],
        }

Run the TTGammaProcessor on the list of files included in fileset.

You can specify the chunksize and maximum number of chunks to process from each sample (selecting a small number of events and one chunk will force coffea to process only a subset of the events for quicker debugging)

In [None]:
#autoreload forces the kernel to reload the processor to include any new changes
%autoreload 2
from ttgamma import TTGammaProcessor
import awkward as ak

import time
tstart = time.time()

#Run Coffea code using uproot
output2 = processor.run_uproot_job(
    fileset,
    "Events",
    TTGammaProcessor(isMC=True),
    processor.iterative_executor,
    executor_args={'schema': NanoAODSchema,'workers': 4},
    chunksize=50000,
#     maxchunks=-1,
)

elapsed = time.time() - tstart
print("Total time: %.1f seconds"%elapsed)
print("Total rate: %.1f events / second"%(output['EventCount'].value/elapsed))

In [None]:
output

In [None]:
plt.yscale('log')
hist.plot1d(output['photon_chIso'].sum('category').sum('lepFlavor').integrate('dataset','TTGamma_SingleLept'), overlay='systematic')

In [None]:
hist.plot1d(output['M3'].sum('category').sum('lepFlavor').integrate('dataset','TTGamma_SingleLept'), overlay='systematic') 

In [None]:
hist.plot1d(output['photon_eta'].sum('category').sum('lepFlavor').integrate('dataset','TTGamma_SingleLept'), overlay='systematic') 

In [None]:
hist.plot1d(output['photon_lepton_mass_3j0t'].sum('category').sum('lepFlavor').integrate('systematic','nominal'), overlay='dataset')

In [None]:
hist.plot1d(output['M3'].sum('category').sum('lepFlavor').integrate('systematic','nominal'), overlay='dataset')

In [None]:
import numpy as np

import matplotlib.pyplot as plt
from cycler import cycler
from coffea import hist, util

from ttgamma.utils.plotting import plotWithRatio, RebinHist, SetRangeHist

grouping= {'$t\overline{t}+\gamma$': ['TTGamma_Dilepton','TTGamma_SingleLept','TTGamma_Hadronic'],
            '$t\overline{t}$'  : ['TTbarPowheg_Dilepton', 'TTbarPowheg_Semilept', 'TTbarPowheg_Hadronic'],
            'Single top':['ST_s_channel', 'ST_tW_channel', 'ST_tbarW_channel', 'ST_tbar_channel', 'ST_t_channel'],
            'W+jets':['W1jets', 'W2jets', 'W3jets', 'W4jets'],
            'Z+jets'  : ['DYjetsM10to50', 'DYjetsM50'],
            'W+$\gamma$' : ['WGamma_01J_5f'],
            'Z+$\gamma$' : ['ZGamma_01J_5f_lowMass'],
            'TTV'    : ['TTWtoLNu','TTWtoQQ','TTZtoLL'],
            'GJets'  : [ 'GJets_HT40To100', 'GJets_HT100To200', 'GJets_HT200To400', 'GJets_HT400To600', 'GJets_HT600ToInf'],
            'QCD'    :['QCD_Pt20to30_Ele', 'QCD_Pt30to50_Ele', 'QCD_Pt50to80_Ele', 'QCD_Pt80to120_Ele', 'QCD_Pt120to170_Ele', 'QCD_Pt170to300_Ele', 'QCD_Pt300toInf_Ele', 'QCD_Pt20to30_Mu', 'QCD_Pt30to50_Mu', 'QCD_Pt50to80_Mu', 'QCD_Pt80to120_Mu', 'QCD_Pt120to170_Mu', 'QCD_Pt170to300_Mu', 'QCD_Pt300to470_Mu', 'QCD_Pt470to600_Mu', 'QCD_Pt600to800_Mu', 'QCD_Pt800to1000_Mu', 'QCD_Pt1000toInf_Mu'],
  }

groupCategory= {"Genuine $\gamma$": slice(1,2),
                "MisID e":slice(2,3),
                "NonPrompt":slice(3,5),
               }


#Get photon pt distribution from coffea output
h = output['photon_pt']

#sum over lepton flavors (get both electron and muon)
h = h.sum('lepFlavor')

#integrate over systematics, selecting only "nominal"
h = h.integrate('systematic','nominal')

#group the datasets into the sample types
h = h.group('dataset',hist.Cat(r'dataset',r'Samples',sorting='placement'),grouping)

#group the photon category axis into the category types listed above
h = h.group('category',hist.Cat(r'category',r'Category',sorting='placement'),groupCategory)

#rebin the pt axis
h = h.rebin("pt",hist.Bin("pt",h.axis("pt").label,np.array([20,25,30,35,40,45,50,60,70,80,90,100,120,140,160,180,200,250,300,400,500])))

hData = output['photon_pt'].sum('lepFlavor').sum('systematic').sum('dataset').sum('category')
hData = hData.rebin("pt",hist.Bin("pt",h.axis("pt").label,np.array([20,25,30,35,40,45,50,60,70,80,90,100,120,140,160,180,200,250,300,400,500])))

plotWithRatio(h.sum('category'), hData, overlay='dataset', invertStack=True, binwnorm=1., xRange=[20,500], yRange=[5e-2,None], logY=True,leg='right')
plotWithRatio(h.sum('dataset'), hData, overlay='category', invertStack=True, binwnorm=1., xRange=[20,500], yRange=[5e-2,None], logY=True,leg='right')

# Accessing Arrays Interactively

Below is an example of loading a NanoAOD file interactively. This can be very useful for developing the code, and debugging any issues. Use this area to build your intuition for working with Coffea and awkward arrays!

In [None]:
import awkward as ak
from coffea.nanoevents import NanoEventsFactory, NanoAODSchema

fname = "./TTGamma_1l.root"
events = NanoEventsFactory.from_root(fname, schemaclass=NanoAODSchema).events()

Once you have opened the file, you can explore its contents using the 'fields' syntax

In [None]:
events.fields

In [None]:
events.GenPart.fields

There is also a docstring for each of these variables in NanoAOD, which you can access using '?':

In [None]:
events.Jet.rawFactor?

In [None]:
from coffea import hist, util

In [None]:
puLookup = util.load('/ScaleFactors/puLookup.coffea')