In [1]:
import ROOT

OBJ: TStyle	ildStyle	ILD Style : 0 at: 0x4fc3220
Welcome to JupyROOT 6.28/10


In [2]:
%jsroot on

In [3]:
ROOT.EnableImplicitMT(6)
ROOT.TH1.SetDefaultSumw2()

In [4]:
ROOT.ildStyle.SetOptStat(1)

In [5]:
%%cpp
using namespace ROOT::VecOps;

In [6]:
df = ROOT.RDataFrame("events", (
    "data/truejet/test/sw_sl/eLpL.0_truejet.edm4hep.root",
    "data/truejet/test/sw_sl/eLpR.0-6_truejet.edm4hep.root",
    "data/truejet/test/sw_sl/eRpR.0_truejet.edm4hep.root",
    "data/truejet/test/sw_sl/eRpL.0_truejet.edm4hep.root",
    )
)
# df = ROOT.RDataFrame("events", "data/truejet/test/sw_sl/eLpL_truejet.edm4hep.root")
# df = ROOT.RDataFrame("events", "data/truejet/test/sw_sl/eLpR_truejet.edm4hep.root")

In [7]:
# df = df.Range(100)

In [8]:
%%cpp
//auto bPol_e = [](unsigned int slot, const ROOT::RDF::RSampleInfo &id) { return id.AsString(); };
auto bPol_e = [](unsigned int slot, const ROOT::RDF::RSampleInfo &id) { return id.Contains("eL") ? -1 : 1; };
auto bPol_p = [](unsigned int slot, const ROOT::RDF::RSampleInfo &id) { return id.Contains("pL") ? -1 : 1; };
auto xsec = [](unsigned int slot, const ROOT::RDF::RSampleInfo &id) {
    // manually extracted via dumpevent :(, not possible to load from json yet because root in the stack is too old
    if (id.Contains("eLpL")) {
        return 190.531;
    } else if (id.Contains("eLpR")) {
        return 10264.;
    } else if (id.Contains("eRpR")) {
        return 190.637;
    } else if (id.Contains("eRpL")) {
        return 86.6962;
    } else {
        return 0.; //should not happen...
    }
};
auto nevents = [](unsigned int slot, const ROOT::RDF::RSampleInfo &id) {
    // manually extracted :(, not possible to load from json yet because root in the stack is too old
    if (id.Contains("eLpL")) {
        return 100000;
    } else if (id.Contains("eLpR")) {
        return 700000;
    } else if (id.Contains("eRpR")) {
        return 100000;
    } else if (id.Contains("eRpL")) {
        return 100000;
    } else {
        return 0; //should not happen...
    }
};

In [9]:
df = df.DefinePerSample("beamPol_e", "bPol_e(rdfslot_, rdfsampleinfo_)")
df = df.DefinePerSample("beamPol_p", "bPol_p(rdfslot_, rdfsampleinfo_)")
df = df.DefinePerSample("xsec_fb", "xsec(rdfslot_, rdfsampleinfo_)")
df = df.DefinePerSample("n_events", "nevents(rdfslot_, rdfsampleinfo_)")

In [10]:
# let's start with the InitialColourNeutrals as they have set energy and momentum (and mass, I hope it's consistent ;))

# rely on the relevant particle ID being the first one
# returns a vector containing the idx of the first PID in the ICN_PID collection belonging to the ICN
df = df.Define("pid_idx", "InitialColourNeutrals.particleIDs_begin")

# get the masks for the qq and lnu InitialColourNeutrals (W's)
# for each pid idx take the ICN_PID type and check if it is 1 (quarks) or 2 (lepton)
# returns a vector of ICN.size() with a 1 at the corresponding position, can be used to access all ICN.something fields
df = df.Define("qq_ICN_mask", "Take(InitialColourNeutrals_particleIDs.type, pid_idx) == 1")
df = df.Define("lnu_ICN_mask", "Take(InitialColourNeutrals_particleIDs.type, pid_idx) == 2")

# get the masses of the InitialColourNeutrals (inv. mass of the lnu and qqbar systems)
df = df.Define("m_qq", "InitialColourNeutrals.mass[qq_ICN_mask]")
df = df.Define("m_lnu", "InitialColourNeutrals.mass[lnu_ICN_mask]")

# There is only one lnu ICN so for some things its easier to use its index as
# RVec[mask] -> RVec, but RVec[idx] -> single element
df = df.Define("lnu_ICN_idx", "ArgMax(lnu_ICN_mask)")
df = df.Define("qq_ICN_idx", "ArgMax(qq_ICN_mask)")

df = df.Define("lnu_ICN_lvec", "ROOT::Math::PxPyPzEVector(InitialColourNeutrals.momentum.x[lnu_ICN_idx], InitialColourNeutrals.momentum.y[lnu_ICN_idx], InitialColourNeutrals.momentum.z[lnu_ICN_idx], InitialColourNeutrals.energy[lnu_ICN_idx])")
df = df.Define("qq_ICN_lvec", "ROOT::Math::PxPyPzEVector(InitialColourNeutrals.momentum.x[qq_ICN_idx], InitialColourNeutrals.momentum.y[qq_ICN_idx], InitialColourNeutrals.momentum.z[qq_ICN_idx], InitialColourNeutrals.energy[qq_ICN_idx])")

# should be able to get e, nu, q, qbar + gluon jets by looking at which truejets belong to the INC
# I only need the separate q when I also try to do quark charge stuff so for now I am happy with qq ICN
# returns a vector of size two with the indices of l and nu in the truejet collection
df = df.Define("lnu_TJ_idx", "Range(InitialColourNeutrals.particles_begin[lnu_ICN_idx], InitialColourNeutrals.particles_end[lnu_ICN_idx])")

# TrueJets and TrueJets_particleIDs have the same number of entries and in the same order (this assumption should be safe)
# Take the TJ_PID.PDGs and check if e or nu
# returns vector of TrueJets.size() with a 1 where the l is
# FIXME: need to change hardcoded comparison for other l than e
df = df.Define("l_TJ_idxs_mask", "abs(Take(TrueJets_particleIDs.PDG, lnu_TJ_idx)) == 11")
df = df.Define("nu_TJ_idxs_mask", "abs(Take(TrueJets_particleIDs.PDG, lnu_TJ_idx)) == 12")

# As we only deal with exactly _one_ l and _one_ nu it might be easier to use the idx instead of the mask as
# RVec[mask] -> RVec, but RVec[idx] -> single element
# so that we don't need to worry about the additional vector layer
# the idx is the only 1 in the mask, the rest is 0 -> just use ArgMax
df = df.Define("l_TJ_idx", "lnu_TJ_idx[ArgMax(l_TJ_idxs_mask)]")
df = df.Define("nu_TJ_idx", "lnu_TJ_idx[ArgMax(nu_TJ_idxs_mask)]")


In [11]:
# FIXME: debug stuff
# df.Range(17).Display(("lnu_ICN_idx", "lnu_TJ_idx", "l_TJ_idxs_mask", "nu_TJ_idxs_mask", "l_TJ_idx", "nu_TJ_idx"), 17, 500).Print()
# foo = df.Range(1).Take["RVec<std::size_t>"]("lnu_TJ_idx").GetValue()

In [12]:
# FIXME: debug stuff
# print(foo)
# print(foo[0])

In [13]:
# Unfortunately, none of the kinematics of the true jets are set (maybe just because we are doing a generator level thing here)
# We still need to go through the TrueJetMCParticleLink where rec are the TrueJets and sim are the MCParticles
# get mask for the TrueJetMCParticleLinks pointing to our truejet l
df = df.Define("l_TJ2MC_mask", "_TrueJetMCParticleLink_rec.index == l_TJ_idx")
# Get indices of all MCParticles belonging to the links
df = df.Define("l_MC_idxs", "_TrueJetMCParticleLink_sim.index[l_TJ2MC_mask]")
# There will be multiple results, containing the l in various stages and possibly FSR gammas
# We take the only l with genstat 1 -> the one also used in the detector simulation
# TODO: this is a choice! Figure out if it is the one we want to make
# XXX: instead of parsing this this complicatedly it should also be possible to use the Initial/FinalElementonLink!
df = df.Define("l_MC_idxs_mask", "Take(MCParticles.generatorStatus, l_MC_idxs) == 1 && abs(Take(MCParticles.PDG, l_MC_idxs)) == 11")
# Finally, convert the mask into an idx... probably not the most efficient solution over all but manual looping avoided :)
df = df.Define("l_MC_idx", "l_MC_idxs[ArgMax(l_MC_idxs_mask)]")

df = df.Define("l_MC_lvec", "ROOT::Math::PxPyPzMVector(MCParticles.momentum.x[l_MC_idx], MCParticles.momentum.y[l_MC_idx], MCParticles.momentum.z[l_MC_idx], MCParticles.mass[l_MC_idx])")
df = df.Define("l_MC_pdg", "MCParticles.PDG[l_MC_idx]")

# the same ordeal for nu, oof
df = df.Define("nu_TJ2MC_mask", "_TrueJetMCParticleLink_rec.index == nu_TJ_idx")
df = df.Define("nu_MC_idxs", "_TrueJetMCParticleLink_sim.index[nu_TJ2MC_mask]")
# urgh I accidentally had this with 11 instead of 12 and it should have not given a result but it did?!...
df = df.Define("nu_MC_idxs_mask", "Take(MCParticles.generatorStatus, nu_MC_idxs) == 1 && abs(Take(MCParticles.PDG, nu_MC_idxs)) == 12")
df = df.Define("nu_MC_idx", "nu_MC_idxs[ArgMax(nu_MC_idxs_mask)]")
df = df.Define("nu_MC_lvec", "ROOT::Math::PxPyPzMVector(MCParticles.momentum.x[nu_MC_idx], MCParticles.momentum.y[nu_MC_idx], MCParticles.momentum.z[nu_MC_idx], MCParticles.mass[nu_MC_idx])")
df = df.Define("nu_MC_pdg", "MCParticles.PDG[nu_MC_idx]")
# FIXME: for debugging
# df = df.Define("multi_qq", "Sum(qq_mask) > 1")

In [14]:

df = df.Define("l_MC_lvec_e", "l_MC_lvec.energy()")
df = df.Define("l_MC_lvec_theta", "l_MC_lvec.theta()")
df = df.Define("l_MC_lvec_cosTheta", "cos(l_MC_lvec_theta)")

df = df.Define("nu_MC_lvec_e", "nu_MC_lvec.energy()")

In [15]:
# weight calc
ePol_target = 0.0
pPol_target = 0.0

lumi_target = 500. # fb^{-1}

# polweight = (1 + epol*epol_target)*(1 + ppol*ppol_target)/4.;

# assume one file per process-polarisation combination
# - in case of several files per process-polarisation combination, need to add up *beforehand*
# the numbers of events of all the files belonging to the same process-polarisation combination!
# weight = polweight * xsection * lumi_target / nevt_per_file[ifile];

# nevt_per_pol = {}
# polweight = {}

# def polfilter(ePol: int, pPol: int) -> str:
#     return f"beamPol_e == {ePol} && beamPol_p == {pPol}"

# from itertools import product

# for ePol, pPol in product([-1, +1], repeat=2):
#     # FIXME: needed for more than one file per pol! also needs summing of the crosssections then!
#     # nevt_per_pol[(ePol, pPol)] = df.Filter(polfilter(ePol, pPol)).Count()
#     nevt_per_pol[(ePol, pPol)] = 100000
#     polweight[(ePol, pPol)] = (1 + ePol * ePol_target) * (1 + pPol * pPol_target) / 4

df = df.Define("weight", f"1./4. * xsec_fb * {lumi_target} / (double) n_events")

In [16]:
h_m_qq = df.Histo1D("m_qq")
h_m_lnu = df.Histo1D("m_lnu")

h_m_lnu_eLpL = df.Filter("beamPol_e == -1 && beamPol_p == 1").Histo1D("m_lnu")

In [17]:
h_l_e = df.Histo1D(("", ";E_{e} [GeV]", 300, 0., 150.), "l_MC_lvec_e", "weight")
h_nu_e = df.Histo1D(("", ";E_{#nu} [GeV]", 300, 0., 150.), "nu_MC_lvec_e", "weight")
h_2d_lnu_e = df.Histo2D(("", ";E_{e} [GeV];E_{#nu} [GeV]", 300, 0., 150., 300, 0., 150.), "l_MC_lvec_e", "nu_MC_lvec_e", "weight")

# h_l_idx = df.Histo1D("l_MC_idx")
# h_nu_idx = df.Histo1D("nu_MC_idx")
# h_2d_lnu_idx = df.Histo2D(("", ";e MC idx;#nu MC idx", 50, 0., 50., 50, 0., 50.), "l_MC_idx", "nu_MC_idx")
# h_2d_lnu_TJ_idx = df.Histo2D(("", ";e TJ idx;#nu TJ idx", 50, 0., 50., 50, 0., 50.), "l_TJ_idx", "nu_TJ_idx")

# h_l_theta = df.Histo1D("l_MC_lvec_theta")
# h_l_cosTheta = df.Histo1D("l_MC_lvec_cosTheta")

h_n_events = df.Histo1D("n_events")
h_weight = df.Histo1D("weight")
h_xsec_fb = df.Histo1D("xsec_fb")

In [18]:
c_m_qq = ROOT.TCanvas()
h_m_qq.Draw()
c_m_qq.Draw()

c_m_lnu = ROOT.TCanvas()
h_m_lnu.Draw()
c_m_lnu.Draw()

c_m_lnu_eLpL = ROOT.TCanvas()
h_m_lnu_eLpL.Draw()
c_m_lnu_eLpL.Draw()

c_l_e = ROOT.TCanvas()
h_l_e.Draw()
c_l_e.Draw()

c_nu_e = ROOT.TCanvas()
h_nu_e.Draw()
c_nu_e.Draw()

c_2d_lnu_e = ROOT.TCanvas()
h_2d_lnu_e.Draw("colz0")
c_2d_lnu_e.Draw()

# c_l_idx = ROOT.TCanvas()
# h_l_idx.Draw()
# c_l_idx.Draw()

# c_nu_idx = ROOT.TCanvas()
# h_nu_idx.Draw()
# c_nu_idx.Draw()

# c_2d_lnu_idx = ROOT.TCanvas()
# h_2d_lnu_idx.Draw("colz0")
# c_2d_lnu_idx.Draw()

# c_2d_lnu_TJ_idx = ROOT.TCanvas()
# h_2d_lnu_TJ_idx.Draw("colz0")
# c_2d_lnu_TJ_idx.Draw()

# c_l_theta = ROOT.TCanvas()
# h_l_theta.Draw()
# c_l_theta.Draw()

# c_l_cosTheta = ROOT.TCanvas()
# h_l_cosTheta.Draw()
# c_l_cosTheta.Draw()

c_n_events = ROOT.TCanvas()
h_n_events.Draw("hist")
c_n_events.Draw()

c_weight = ROOT.TCanvas()
h_weight.Draw("hist")
c_weight.Draw()

c_xsec_fb = ROOT.TCanvas()
h_xsec_fb.Draw("hist")
c_xsec_fb.Draw()

In [19]:
# FIXME: for debug
# multi_qq_evts = df.Filter("l_MC_idx > 20").Take["ULong64_t"]("rdfentry_").GetValue()
# print(multi_qq_evts)

In [20]:
# WARNING: l_MC_lvec + nu_MC_lvec != lnu_ICN_lvec as we used the final elementon for l/nu_MC not the initial!
df.Snapshot("events", "data/truejet/test/sw_sl/nano.root", ("beamPol_e", "beamPol_p", "xsec_fb", "weight", "l_MC_lvec", "nu_MC_lvec", "l_MC_pdg", "nu_MC_pdg", "lnu_ICN_lvec", "qq_ICN_lvec"))

<cppyy.gbl.ROOT.RDF.RResultPtr<ROOT::RDF::RInterface<ROOT::Detail::RDF::RLoopManager,void> > object at 0x11907190>