In [8]:
import ROOT
from ROOT import RDataFrame, TCanvas, THStack
ROOT.EnableImplicitMT()
%jsroot on

In [9]:
import json


# process into "fileset" summarizing all info
def construct_fileset(n_files_max_per_sample = 1):
    with open ('ntuples.json') as f:
        file_info = json.load(f)
    fileset = {}
    for process in file_info.keys():
        if process == "data":
            continue  # skip data

        for variation in file_info[process].keys():
            if variation != 'nominal':
                continue
            file_list = file_info[process][variation]["files"]
            if n_files_max_per_sample != -1:
                file_list = file_list[:n_files_max_per_sample]  # use partial set of samples

            file_paths = [f["path"] for f in file_list]
            fileset.update({process: file_paths})
    return fileset
 

In [10]:
fileset = construct_fileset(1)
fileset

{'ttbar': ['https://xrootd-local.unl.edu:1094//store/user/AGC/datasets/RunIIFall15MiniAODv2/TT_TuneCUETP8M1_13TeV-powheg-pythia8/MINIAODSIM//PU25nsData2015v1_76X_mcRun2_asymptotic_v12_ext3-v1/00000/00DF0A73-17C2-E511-B086-E41D2D08DE30.root'],
 'single_top_s_chan': ['https://xrootd-local.unl.edu:1094//store/user/AGC/datasets/RunIIFall15MiniAODv2/ST_s-channel_4f_InclusiveDecays_13TeV-amcatnlo-pythia8/MINIAODSIM/PU25nsData2015v1_76X_mcRun2_asymptotic_v12-v1/00000/0EB5E88C-FE0D-E611-915D-003048FFD76C.root'],
 'single_top_t_chan': ['https://xrootd-local.unl.edu:1094//store/user/AGC/datasets/RunIIFall15MiniAODv2/ST_t-channel_antitop_4f_inclusiveDecays_13TeV-powhegV2-madspin-pythia8_TuneCUETP8M1/MINIAODSIM/PU25nsData2015v1_76X_mcRun2_asymptotic_v12-v1/00000/00004F9A-E3D2-E511-ABEC-0CC47A78A478.root'],
 'single_top_tW': ['https://xrootd-local.unl.edu:1094//store/user/AGC/datasets/RunIIFall15MiniAODv2/ST_tW_antitop_5f_inclusiveDecays_13TeV-powheg-pythia8_TuneCUETP8M1/MINIAODSIM/PU25nsData2015v1

In [11]:
xsec_info = {
    "ttbar": 396.87 + 332.97, # nonallhad + allhad, keep same x-sec for all
    "single_top_s_chan": 2.0268 + 1.2676,
    "single_top_t_chan": (36.993 + 22.175)/0.252,  # scale from lepton filter to inclusive
    "single_top_tW": 37.936 + 37.906,
    "wjets": 61457 * 0.252,  # e/mu+nu final states
    "data": None
}
def get_histo(process, nbins=25, bin_low=50, bin_high=550):
    

    
    d = RDataFrame('events', fileset[process])

    x_sec = xsec_info[process]
    nevts_total = d.Count().GetValue()
    lumi = 3378 # /pb
    xsec_weight = x_sec * lumi / nevts_total
    
    
    d = d.Define('electron_pt_mask', 'electron_pt>25').Define('muon_pt_mask', 'muon_pt>25').Define('jet_pt_mask', 'jet_pt>25')\
         .Filter('Sum(electron_pt_mask) + Sum(muon_pt_mask) == 1')\
         .Filter('Sum(jet_pt_mask) >= 4')\
         .Filter('Sum(jet_btag[jet_pt_mask]>=0.5)>=2')            


    jet_p4 = d.Define("jet_p4", 
        "ROOT::VecOps::Construct<ROOT::Math::PxPyPzMVector>(jet_px[jet_pt_mask], jet_py[jet_pt_mask], jet_pz[jet_pt_mask], jet_mass[jet_pt_mask])"
    )

    trijet = jet_p4.Define('trijet', 
        'ROOT::VecOps::Combinations(jet_pt[jet_pt_mask],3)'
    ).Define('ntrijet', 'trijet[0].size()')

    trijet_p4 = trijet.Define('trijet_p4', 
                          'ROOT::VecOps::RVec<ROOT::Math::PxPyPzMVector> trijet_p4(ntrijet);'              +\
                          'for (int i = 0; i < ntrijet; ++i) {'                                            +\
                              'int j1 = trijet[0][i]; int j2 = trijet[1][i]; int j3 = trijet[2][i];'       +\
                              'trijet_p4[i] = jet_p4[j1] + jet_p4[j2] + jet_p4[j3];'                       +\
                          '}'                                                                              +\
                          'return trijet_p4;'                                                                                                                          
                         )

    #TODO  implement references
    trijet_pt = trijet_p4.Define('trijet_pt', 
            'return (ROOT::VecOps::Map(trijet_p4, [](ROOT::Math::PxPyPzMVector v) { return v.Pt(); }))'
                                )

    trijet_pt_btag = trijet_pt.Define('trijet_btag', 
                                      'ROOT::VecOps::RVec<bool> btag(ntrijet);'                                   +\
                                      'for (int i = 0; i < ntrijet; ++i) {'                                       +\
                                       'int j1 = trijet[0][i]; int j2 = trijet[1][i]; int j3 = trijet[2][i];'     +\
                                       'btag[i]=std::max({jet_btag[j1], jet_btag[j2], jet_btag[j3]})>0.5;'        +\
                                      '}'                                                                         +\
                                      'return btag;'
                                )

    trijet_mass=trijet_pt_btag.Define('trijet_mass',
                                      'double mass;'+\
                                      'double Pt = 0;'+\
                                      'double indx = 0;'+\
                                      'for (int i = 0; i < ntrijet; ++i) {'               +\
                                      '    if ((Pt < trijet_pt[i]) && (trijet_btag[i])) {'+\
                                      '        Pt = trijet_pt[i];'+\
                                      '        indx=i;'+\
                                      '    }'                                            +\
                                      '}'                                                +\
                                      'mass = trijet_p4[indx].M();'             +\
                                      'return mass;'
                                     )

    
    mass = trijet_mass.Define('weights', str(xsec_weight)).Histo1D(('h_'+process, process, nbins, bin_low, bin_high), 'trijet_mass', 'weights')
#     mass.Scale(xsec_weight)
    print(process +' histogram has been created')
    
    return mass
    

In [12]:
import json
with open('ntuples.json') as f:
    processes = list(json.load(f).keys())
processes.remove('data')
processes

['ttbar', 'single_top_s_chan', 'single_top_t_chan', 'single_top_tW', 'wjets']

In [13]:
ttbar = get_histo(processes[0])
ttbar.SetFillColor(ROOT.kOrange)
# st.Add(ttbar.GetPtr())
# ttbar.Draw('hist')

top_s_chan = get_histo(processes[1])
top_s_chan.SetFillColor(ROOT.kBlack)
# st.Add(top_s_chan.GetPtr())
# top_s_chan.Draw('same&hist')

top_tW = get_histo(processes[3])
top_tW.SetFillColor(ROOT.kBlue)
# st.Add(top_tW.GetPtr())
# top_tW.Draw('same&hist')

top_t_chan = get_histo(processes[2])
top_t_chan.SetFillColor(ROOT.kViolet)
# st.Add(top_t_chan.GetPtr())
# top_s_chan.Draw('same&hist')

wjets = get_histo(processes[4])
wjets.SetFillColor(ROOT.kRed)
# st.Add(wjets.GetPtr())
# wjets.Draw('same&hist')

# c.Draw()


ttbar histogram has been created
single_top_s_chan histogram has been created
single_top_tW histogram has been created
single_top_t_chan histogram has been created
wjets histogram has been created


In [14]:
c = TCanvas('c', 'c', 600, 500)
st = THStack()
st.Add(top_t_chan.GetPtr())
st.Add(wjets.GetPtr())
st.Add(top_tW.GetPtr())
st.Add(top_s_chan.GetPtr())
st.Add(ttbar.GetPtr())
st.Draw('stack hist')
st.SetTitle('RDF t#bar{t}-analysis')
c.BuildLegend(0.7, 0.6, 0.9, 0.9)
c.Draw()
c.SaveAs('rdf_analysis.pdf')

Info in <TCanvas::Print>: pdf file rdf_analysis.pdf has been created


In [None]:
tterrorbar = ttbar
c1 = TCanvas()
tterrorbar.Draw('hist')
tterrorbar.Draw('same&E0')
tterrorbar.GetXaxis().SetTitle("#m_{bjj} [Gev]")
tterrorbar.SetTitle("Jet energy variations");
tterrorbar.SetFillColor(ROOT.kWhite)
c1.Draw()
c1.SaveAs('rdf_jetvar.png')

In [None]:

c = TCanvas('c', 'c', 600, 500)
st = THStack()
st.Add(top_t_chan.GetPtr())
st.Add(wjets.GetPtr())
st.Add(top_tW.GetPtr())
st.Add(top_s_chan.GetPtr())
st.Add(ttbar.GetPtr())
st.Draw('stack hist')
st.SetTitle('RDF t#bar{t}-analysis')
c.BuildLegend(0.7, 0.6, 0.9, 0.9)
c.Draw()
c.SaveAs('rdf_analysis.pdf')

In [21]:
import numpy as np
def get_values(histo, nbins):
    return np.array([histo.GetBinContent(i+1) for i in range(nbins)])

In [22]:
ttbar_values = get_values(ttbar, nbins=25)
s_chan_values = get_values(top_s_chan, nbins=25)
t_chan_values = get_values(top_t_chan, nbins=25)
tW_values = get_values(top_tW, nbins=25)
wjets_values = get_values(wjets, nbins=25)
rdf_values = np.array([ttbar_values, s_chan_values, t_chan_values,tW_values, wjets_values])


In [None]:
coffea_values = numpy.load('tt-s-t-tW-wjets.npy')

In [None]:
def compare(number, rdf_values, coffea_values, precision):
    rdf_values = rdf_values[number][1:-1]
    coffea_values = coffea_values[number]
    rdf_values = numpy.round(rdf_values, precision)
    coffea_values = numpy.round(coffea_values, precision)
    mask = rdf_values == coffea_values

    return not (False in mask)
for i in range(5):
    print(compare(i, rdf_values, coffea_values, precision=5), end=' ')

In [15]:
h2 = ROOT.TH2F("h2", "h2 title", 25, 50, 550, len(processes), 0, len(processes));


In [16]:
for i in range(0, len(processes)):
    h2.GetYaxis().SetBinLabel(i+1,processes[i])


In [23]:
processes

['ttbar', 'single_top_s_chan', 'single_top_t_chan', 'single_top_tW', 'wjets']

In [24]:
for y in range(len(processes)):
    vals = rdf_values[y]
    for x in range(25):
        h2.SetBinContent(x+1, y+1, vals[x])

In [25]:
ttbar_pr = h2.ProjectionX('ttbar')


In [31]:
c = TCanvas()
ttbar_pr.Draw()
ttbar.Draw('same')
c.Draw()

In [None]:
projections = [h2.ProjectionX(process) for process in processes]


In [None]:

projections = numpy.array([get_values(projection, 1, 25) for projection in projections])

In [None]:
for num in range(5):
    print(compare(num, rdf_values, projections, 3))

In [None]:
print(projections[0])

In [None]:
print(rdf_values[0][1:-1])

In [28]:
print(np.around(get_values(ttbar_pr, 25), 8))

[ 2840.74289274  9569.91792679 21442.68762207 38584.13357925
 60176.55635071 72792.68106842 74137.85597229 45559.1844635
 38354.85467148 29948.80326271 25275.47347641 19747.06269264
 16488.58906555 14823.32815742 12005.34440422 10848.45878983
  9009.39957905 10224.21515894  8175.81281948  5023.47009134
  4415.05417633  6413.97860384  4594.96604681  4412.15584469
  5577.69049311]


In [30]:
print(np.around(get_values(ttbar, 25), 8))

[ 2417.52081508  8137.02128002 18750.52729743 31840.51805223
 49470.7308256  66393.37653114 63327.25257055 39328.93618674
 31545.69844064 25118.63090787 21993.54302497 17158.50139481
 12795.17114321 11380.03700756  8785.62442552  9257.33580408
  7665.30990146  7370.49028987  5778.46438726  4009.54671769
  3419.9074945   4009.54671769  3596.79926146  3655.76318377
  3655.76318377]


In [35]:
np.save(arr=rdf_values, file='rdf_values')

In [None]:
clone