This script loads coffea output files and transforms the coffea histograms to a set of 1D ROOT histograms for use with the ROOT template fitting tool, `TFractionFitter`.

In [1]:
from coffea import hist, util
import numpy as np
import uproot

from ttgamma.utils.plotting import RebinHist, SetRangeHist

Since we ran our condor jobs on some of the datasets separately, here we add together all the outputs into a single output, one each for MC and Data

In [2]:
nJets = 4

outputMC = util.load(f'outputMCOther_ttgamma_condorFull_{nJets}jet.coffea')
outputMC.add(util.load(f'outputMCSingletop_ttgamma_condorFull_{nJets}jet.coffea'))
outputMC.add(util.load(f'outputMCTTbar1l_ttgamma_condorFull_{nJets}jet.coffea'))
outputMC.add(util.load(f'outputMCTTbar2l_ttgamma_condorFull_{nJets}jet.coffea'))
outputMC.add(util.load(f'outputMCTTGamma_ttgamma_condorFull_{nJets}jet.coffea'))
outputMC.add(util.load(f'outputMCWJets_ttgamma_condorFull_{nJets}jet.coffea'))
outputMC.add(util.load(f'outputMCZJets_ttgamma_condorFull_{nJets}jet.coffea'))

outputData = util.load(f'outputData_ttgamma_condorFull_{nJets}jet.coffea')

outputMC

{'photon_pt': <Hist (dataset,pt,category,lepFlavor,systematic) instance at 0x7f281bebd710>,
 'photon_eta': <Hist (dataset,eta,category,lepFlavor,systematic) instance at 0x7f281bd816d8>,
 'photon_chIso': <Hist (dataset,chIso,category,lepFlavor,systematic) instance at 0x7f281bbd5d30>,
 'photon_lepton_mass': <Hist (dataset,mass,category,lepFlavor,systematic) instance at 0x7f281bab2438>,
 'photon_lepton_mass_3j0t': <Hist (dataset,mass,category,lepFlavor,systematic) instance at 0x7f281b985a90>,
 'M3': <Hist (dataset,M3,category,lepFlavor,systematic) instance at 0x7f281b7e2080>,
 'M3Presel': <Hist (dataset,M3,lepFlavor,systematic) instance at 0x7f281b69e6a0>,
 'EventCount': value_accumulator(int, 152664130)}

If we look at the output above, we can see what histograms the processor produced.  Each histogram is multidimensional and needs reduction to a simple 1-dimension ROOT `TH1` before fitting.  We'll need to merge different datasets into the various signal and background categories, which are different depending on each fit. Also we'll handle merging the lepton flavor categories, and making the different templates for systematic variations.  Below we enumerate the MC datasets that were processed in making the `M3` histogram.  You can switch out the axis name for any of the other axes in the histogram (each name listed in `<Hist (dataset,M3,category,lepFlavor,systematic) instance at ...>`) to see what bins are filled along each axis.

In [3]:
outputMC['M3'].identifiers('dataset')

[<StringBin (DYjetsM10to50) instance at 0x7f281cf62e48>,
 <StringBin (DYjetsM50) instance at 0x7f2850da8ba8>,
 <StringBin (GJets_HT100To200) instance at 0x7f281bec21d0>,
 <StringBin (GJets_HT200To400) instance at 0x7f281bec2128>,
 <StringBin (GJets_HT400To600) instance at 0x7f281bec2160>,
 <StringBin (GJets_HT40To100) instance at 0x7f281bec2208>,
 <StringBin (GJets_HT600ToInf) instance at 0x7f281bec2198>,
 <StringBin (QCD_Pt1000toInf_Mu) instance at 0x7f281bebdcf8>,
 <StringBin (QCD_Pt120to170_Ele) instance at 0x7f281bec2048>,
 <StringBin (QCD_Pt120to170_Mu) instance at 0x7f281bebde48>,
 <StringBin (QCD_Pt170to300_Ele) instance at 0x7f281bebdf60>,
 <StringBin (QCD_Pt170to300_Mu) instance at 0x7f281bebddd8>,
 <StringBin (QCD_Pt20to30_Ele) instance at 0x7f281bec20f0>,
 <StringBin (QCD_Pt20to30_Mu) instance at 0x7f281bebdeb8>,
 <StringBin (QCD_Pt300to470_Mu) instance at 0x7f281bebde10>,
 <StringBin (QCD_Pt300toInf_Ele) instance at 0x7f281bebdf98>,
 <StringBin (QCD_Pt30to50_Ele) instance a

Below, we first sum up all lepton flavors and photon gen categories for MC, then we group the M3 distributions into $t\bar{t}$ and non-$t\bar{t}$ categories, then we rebin the `M3` variable to be slightly coarser (merging every 5 bins), leaving a 3D histogram with `dataset,M3,systematic` axes.  For data, we only have to rebin the `M3` axis, and sum the rest (since each only has one entry).  Then we open a new ROOT output file and loop through the dataset and systematic axes, saving a 1D projection histogram for each.

In [4]:
h = outputMC['M3'].sum('lepFlavor', 'category')

groupingTop = {'TopPair': ['TTGamma_Dilepton','TTGamma_SingleLept','TTGamma_Hadronic',
                           'TTbarPowheg_Dilepton', 'TTbarPowheg_Semilept', 'TTbarPowheg_Hadronic'],
               'NonTop' : ['W1jets', 'W2jets', 'W3jets', 'W4jets',
                           'DYjetsM10to50', 'DYjetsM50'
                           'ST_s_channel', 'ST_tW_channel', 'ST_tbarW_channel', 'ST_tbar_channel', 'ST_t_channel',
                           'WGamma_01J_5f',
                           'ZGamma_01J_5f_lowMass',
                           'TTWtoLNu','TTWtoQQ','TTZtoLL',
                           'GJets_HT40To100', 'GJets_HT100To200', 'GJets_HT200To400', 'GJets_HT400To600', 'GJets_HT600ToInf', 
                           'QCD_Pt20to30_Ele', 'QCD_Pt30to50_Ele', 'QCD_Pt50to80_Ele', 'QCD_Pt80to120_Ele', 'QCD_Pt120to170_Ele', 'QCD_Pt170to300_Ele', 'QCD_Pt300toInf_Ele', 'QCD_Pt20to30_Mu', 'QCD_Pt30to50_Mu', 'QCD_Pt50to80_Mu', 'QCD_Pt80to120_Mu', 'QCD_Pt120to170_Mu', 'QCD_Pt170to300_Mu', 'QCD_Pt300to470_Mu', 'QCD_Pt470to600_Mu', 'QCD_Pt600to800_Mu', 'QCD_Pt800to1000_Mu', 'QCD_Pt1000toInf_Mu'
                          ],
              }
h = h.group('dataset', hist.Cat('dataset', 'Samples', sorting='placement'), groupingTop)

h = RebinHist(h, "M3", 5)
h = h[:, 50.:550.]  # first axis is dataset, second is the M3 numeric range

hData = outputData['M3']
hData = hData.sum('lepFlavor', 'dataset', 'category', 'systematic')
hData = RebinHist(hData, "M3", 5)
hData = hData[50.:550.]

!mkdir -p RootFiles
outputFile = uproot.recreate("RootFiles/M3_Output.root")

outputFile['dataObs'] = hist.export1d(hData)

datasets = h.axis('dataset').identifiers()
systematics = h.axis('systematic').identifiers()
for _dataset in datasets:
    for _systematic in systematics:
        outputFile[f'{_dataset}_{_systematic}'] = hist.export1d(h.integrate('dataset',_dataset).integrate('systematic',_systematic))

outputFile.close()



Below we get the photon charged hadron isolation histogram, and sum all lepton flavors and datasets (since we don't care here what dataset the photon came from) and then group them into isolated and nonprompt categories based on the gen-matching `category` axis.  Lastly, we rebin the `chIso` axis to be slightly coarser to help guard against low statistics when fitting, leaving a 3D histogram with `category,M3,systematic` axes.  We are going to fit the data to the sum of the categories, so we simply sum all axes except `chIso` for the data and rebin it to match the MC binning.  Then, as before, we save a 1D projection for each category and systematic combination.

In [5]:
h = outputMC['photon_chIso'].sum('lepFlavor', 'dataset')

groupingPho= {"Isolated": slice(1,3),
              "NonPrompt":slice(3,5),
             }
h = h.group('category', hist.Cat('category', 'Samples', sorting='placement'), groupingPho)

chIso_newbins = np.array([0,1.141,2.5,5,10,15,20])
h = h.rebin("chIso", hist.Bin("chIso", h.axis("chIso").label, chIso_newbins))

hData = outputData['photon_chIso'].sum('lepFlavor', 'dataset')
hData = hData.sum('category')
hData = hData.sum('systematic')
hData = hData.rebin("chIso", hist.Bin("chIso", hData.axis("chIso").label, chIso_newbins))

outputFile = uproot.recreate("RootFiles/Isolation_Output.root")
outputFile['dataObs'] = hist.export1d(hData)

categories = h.axis('category').identifiers()
systematics = h.axis('systematic').identifiers()
for _category in categories:
    for _systematic in systematics:
        outputFile[f'{_category}_{_systematic}'] = hist.export1d(h.integrate('category',_category).integrate('systematic',_systematic))
    
outputFile.close()

For the $e\gamma$ mass histograms, we regroup the gen-matching photon category into just three categories: genuine, mis-ID electrons, and non-prompt photons, then regroup the different datasets into signal and background samples, then rebin the mass to be slightly coarser and restrict the range.  Then we create 1D projections for each flavor, sample, and systematic combination.

In [6]:
h = outputMC['photon_lepton_mass_3j0t']

groupingPho= {"Genuine":slice(1,2),
              "MisIDele": slice(2,3),
              "NonPrompt":slice(3,5),
             }
h = h.group('category', hist.Cat('category', 'Samples', sorting='placement'), groupingPho)


groupingDataset = {'WGamma' : ['WGamma_01J_5f'],
                   "ZGamma" : ['ZGamma_01J_5f_lowMass'],
                   "Other"  : ['TTGamma_Dilepton','TTGamma_SingleLept','TTGamma_Hadronic',
                               'TTbarPowheg_Dilepton', 'TTbarPowheg_Semilept', 'TTbarPowheg_Hadronic',
                               'W1jets', 'W2jets', 'W3jets', 'W4jets',
                               'DYjetsM50', 'DYjetsM10to50',
                               'ST_s_channel', 'ST_tW_channel', 'ST_tbarW_channel', 'ST_tbar_channel', 'ST_t_channel',
                               'TTWtoLNu','TTWtoQQ','TTZtoLL',
                              ],
                  }
h = h.group('dataset', hist.Cat('dataset', 'Samples', sorting='placement'), groupingDataset)

h = RebinHist(h,"mass",5)[:, :, 40.:200.]  # mass is the 3rd axis

hData = outputData['photon_lepton_mass_3j0t']
hData = hData.sum('dataset')
hData = hData.sum('category')
hData = hData.sum('systematic')
hData = RebinHist(hData,"mass",5)
hData = SetRangeHist(hData,"mass",40,200)


systematics = h.axis('systematic').identifiers()

for _lepton in ['electron', 'muon']:
    outputFile = uproot.recreate(f"RootFiles/MisID_Output_{_lepton}.root")

    outputFile["dataObs"] = hist.export1d(hData.integrate("lepFlavor",_lepton))

    hMisID = h.integrate("category","MisIDele").sum("dataset").integrate("lepFlavor",_lepton)
    hOther = h.integrate("category",["Genuine","NonPrompt"]).integrate("lepFlavor",_lepton)
    datasets = hOther.axis('dataset').identifiers()

    for _systematic in systematics:
        outputFile[f'MisIDele_{_systematic}'] = hist.export1d(hMisID.integrate('systematic',_systematic))
        for _dataset in datasets:
            outputFile[f'{_dataset}_{_systematic}'] =  hist.export1d(hOther.integrate("dataset",_dataset).integrate('systematic',_systematic))

    
    outputFile.close()