In [1]:
import uproot

import numpy as np
import awkward as ak

import pandas as pd

import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio

# File locations

In [2]:
datadir = '/home/abelo/data'
filepath = datadir + '/atlas/mc_361106.Zee.1largeRjet1lep.root'

# Tree listing

In [3]:
tree = uproot.open(filepath)['mini']
tree.show()

name                 | typename                 | interpretation                
---------------------+--------------------------+-------------------------------
runNumber            | int32_t                  | AsDtype('>i4')
eventNumber          | int32_t                  | AsDtype('>i4')
channelNumber        | int32_t                  | AsDtype('>i4')
mcWeight             | float                    | AsDtype('>f4')
scaleFactor_PILEUP   | float                    | AsDtype('>f4')
scaleFactor_ELE      | float                    | AsDtype('>f4')
scaleFactor_MUON     | float                    | AsDtype('>f4')
scaleFactor_PHOTON   | float                    | AsDtype('>f4')
scaleFactor_TAU      | float                    | AsDtype('>f4')
scaleFactor_BTAG     | float                    | AsDtype('>f4')
scaleFactor_LepTR... | float                    | AsDtype('>f4')
scaleFactor_Photo... | float                    | AsDtype('>f4')
trigE                | bool                     | AsDtype(

# Picking out branches with floats

In [4]:
# int32 branch
print(tree['runNumber'].array().type.content)
print(tree['runNumber'].array().ndim)

int32
1


In [5]:
# std::vector<float> branch
print(tree['lep_pt_syst'].array().type.content)
print(tree['lep_pt_syst'].array().type.content.content)
print(tree['lep_pt_syst'].array().ndim)

var * float32
float32
2


In [22]:
float_vector_branches = []
float_branches = []

for branch in tree.keys():
    branch_arr = tree[branch].array()
    
    if branch_arr.ndim > 1:
        branch_arr = ak.flatten(branch_arr)
        if 'float32' in branch_arr.typestr:
            float_vector_branches.append(branch)
    else:
        if 'float32' in branch_arr.typestr:
            float_branches.append(branch)
            
print('float branches:')
for branch in float_branches:
    print(f'\t{branch}')
    
print('float vector branches:')
for branch in float_vector_branches:
    print(f'\t{branch}')
        

float branches:
	mcWeight
	scaleFactor_PILEUP
	scaleFactor_ELE
	scaleFactor_MUON
	scaleFactor_PHOTON
	scaleFactor_TAU
	scaleFactor_BTAG
	scaleFactor_LepTRIGGER
	scaleFactor_PhotonTRIGGER
	met_et
	met_phi
	ditau_m
	met_et_syst
	XSection
	SumWeights
float vector branches:
	lep_pt
	lep_eta
	lep_phi
	lep_E
	lep_z0
	lep_ptcone30
	lep_etcone20
	lep_trackd0pvunbiased
	lep_tracksigd0pvunbiased
	jet_pt
	jet_eta
	jet_phi
	jet_E
	jet_jvt
	jet_MV2c10
	photon_pt
	photon_eta
	photon_phi
	photon_E
	photon_ptcone30
	photon_etcone20
	tau_pt
	tau_eta
	tau_phi
	tau_E
	tau_BDTid
	lep_pt_syst
	jet_pt_syst
	photon_pt_syst
	tau_pt_syst
	largeRjet_pt
	largeRjet_eta
	largeRjet_phi
	largeRjet_E
	largeRjet_m
	largeRjet_truthMatched
	largeRjet_D2
	largeRjet_tau32
	largeRjet_pt_syst


In [None]:
float_branches = []
for branch in tree.keys():
    branch_arr = tree[branch].array()
    
    if branch_arr.ndim > 1:
        branch_arr = ak.flatten(branch_arr)
        
    print(f'{branch}: {branch_arr.type.content}, {branch_arr.typestr}')

    if 'float32' in branch_arr.typestr or 'float64' in branch_arr.typestr:
        float_branches.append(branch)

print(f'\nFound {len(float_branches)} float branches')

runNumber: int32, 53653 * int32
eventNumber: int32, 53653 * int32
channelNumber: int32, 53653 * int32
mcWeight: float32, 53653 * float32
scaleFactor_PILEUP: float32, 53653 * float32
scaleFactor_ELE: float32, 53653 * float32
scaleFactor_MUON: float32, 53653 * float32
scaleFactor_PHOTON: float32, 53653 * float32
scaleFactor_TAU: float32, 53653 * float32
scaleFactor_BTAG: float32, 53653 * float32
scaleFactor_LepTRIGGER: float32, 53653 * float32
scaleFactor_PhotonTRIGGER: float32, 53653 * float32
trigE: bool, 53653 * bool
trigM: bool, 53653 * bool
trigP: bool, 53653 * bool
lep_n: uint32, 53653 * uint32
lep_truthMatched: bool, 92664 * bool
lep_trigMatched: bool, 92664 * bool
lep_pt: float32, 92664 * float32
lep_eta: float32, 92664 * float32
lep_phi: float32, 92664 * float32
lep_E: float32, 92664 * float32
lep_z0: float32, 92664 * float32
lep_charge: int32, 92664 * int32
lep_type: uint32, 92664 * uint32
lep_isTightID: bool, 92664 * bool
lep_ptcone30: float32, 92664 * float32
lep_etcone20: fl

# Group float branches

In [7]:
scale_factors = set()
leps = set()
largeRjets = set()
jets = set()
mets = set()
photons = set()
taus = set()
other = set()

for branch in float_branches:
    if 'scaleFactor' in branch:
        scale_factors.add(branch)
    elif 'lep' in branch:
        leps.add(branch)
    elif 'largeRjet' in branch:
        largeRjets.add(branch)
    elif 'jet' in branch:
        jets.add(branch)
    elif 'met' in branch:
        mets.add(branch)
    elif 'photon' in branch:
        photons.add(branch)
    elif 'tau' in branch:
        taus.add(branch)
    else:
        other.add(branch)

# Sort the sets
scale_factors = sorted(scale_factors)
leps = sorted(leps)
largeRjets = sorted(largeRjets)
jets = sorted(jets)
mets = sorted(mets)
photons = sorted(photons)
taus = sorted(taus)
other = sorted(other)

print(f'\nScale factors: {scale_factors}')
print(f'\nLeptons: {leps}')
print(f'\nLarge R jets: {largeRjets}')
print(f'\nJets: {jets}')
print(f'\nMET: {mets}')
print(f'\nPhotons: {photons}')
print(f'\nTaus: {taus}')
print(f'\nOther: {other}')


Scale factors: ['scaleFactor_BTAG', 'scaleFactor_ELE', 'scaleFactor_LepTRIGGER', 'scaleFactor_MUON', 'scaleFactor_PHOTON', 'scaleFactor_PILEUP', 'scaleFactor_PhotonTRIGGER', 'scaleFactor_TAU']

Leptons: ['lep_E', 'lep_eta', 'lep_etcone20', 'lep_phi', 'lep_pt', 'lep_pt_syst', 'lep_ptcone30', 'lep_trackd0pvunbiased', 'lep_tracksigd0pvunbiased', 'lep_z0']

Large R jets: ['largeRjet_D2', 'largeRjet_E', 'largeRjet_eta', 'largeRjet_m', 'largeRjet_phi', 'largeRjet_pt', 'largeRjet_pt_syst', 'largeRjet_tau32', 'largeRjet_truthMatched']

Jets: ['jet_E', 'jet_MV2c10', 'jet_eta', 'jet_jvt', 'jet_phi', 'jet_pt', 'jet_pt_syst']

MET: ['met_et', 'met_et_syst', 'met_phi']

Photons: ['photon_E', 'photon_eta', 'photon_etcone20', 'photon_phi', 'photon_pt', 'photon_pt_syst', 'photon_ptcone30']

Taus: ['ditau_m', 'tau_BDTid', 'tau_E', 'tau_eta', 'tau_phi', 'tau_pt', 'tau_pt_syst']

Other: ['SumWeights', 'XSection', 'mcWeight']


# Branch values

In [8]:
def print_unique_values(branches):
    for branch in branches:
        branch_arr = tree[branch].array()
        if branch_arr.ndim > 1:
            branch_arr = ak.flatten(branch_arr)

        print(f'{branch}: {np.unique(branch_arr)}')

## Scale factors

In [9]:
print_unique_values(scale_factors)

scaleFactor_BTAG: [0.836, 0.839, 0.841, 0.842, 0.843, 0.843, ..., 1.68, 1.68, 1.68, 1.72, 1.75]
scaleFactor_ELE: [0.712, 0.717, 0.717, 0.725, 0.725, 0.73, ..., 1.08, 1.09, 1.09, 1.1, 1.1, 1.11]
scaleFactor_LepTRIGGER: [0, 0.704, 0.739, 0.83, 0.832, 0.84, ..., 1.04, 1.04, 1.05, 1.06, 1.07, 1.21]
scaleFactor_MUON: [0.825, 0.839, 0.965, 0.972, 0.977, 0.983, ..., 1.01, 1.01, 1.01, 1.02, 1.03]
scaleFactor_PHOTON: [1]
scaleFactor_PILEUP: [0, 0.00134, 0.00183, 0.00221, 0.00279, ..., 1.26, 1.32, 1.55, 1.75, 3.35]
scaleFactor_PhotonTRIGGER: [1]
scaleFactor_TAU: [1]


## Leptons

In [10]:
print_unique_values(leps)

lep_E: [7.05e+03, 7.06e+03, 7.07e+03, 7.09e+03, ..., 2.89e+06, 3.19e+06, 3.48e+06]
lep_eta: [-2.55, -2.53, -2.47, -2.47, -2.47, -2.47, ..., 2.53, 2.58, 2.64, 2.66, 2.68]
lep_etcone20: [-1.53e+04, -1.09e+04, -8.77e+03, -7.59e+03, ..., 2.91e+04, 3.19e+04, 4.32e+04]
lep_phi: [-3.14, -3.14, -3.14, -3.14, -3.14, -3.14, ..., 3.14, 3.14, 3.14, 3.14, 3.14]
lep_pt: [7e+03, 7.01e+03, 7.01e+03, 7.02e+03, ..., 1.28e+06, 1.47e+06, 2.02e+06]
lep_pt_syst: [4.92, 8.15, 10.4, 11.2, 11.2, ..., 3.62e+05, 4e+05, 4.03e+05, 6.73e+05]
lep_ptcone30: [0, 1e+03, 1e+03, 1e+03, 1e+03, ..., 1.12e+06, 1.27e+06, 1.32e+06, 2.03e+06]
lep_trackd0pvunbiased: [-6.98, -6.61, -6.22, -5.66, -5.26, -4.94, ..., 5.8, 6.3, 6.39, 6.68, 6.84]
lep_tracksigd0pvunbiased: [3.01e-05, 7.88e-05, 9.72e-05, 0.000135, 0.000153, ..., 205, 222, 250, 268, 371]
lep_z0: [-190, -187, -187, -186, -186, -180, -176, ..., 204, 206, 231, 232, 249, 253]


## Large R Jet

In [11]:
print_unique_values(largeRjets)

largeRjet_D2: [0, 2.96e-05, 5.24e-05, 6.8e-05, 8.08e-05, ..., 300, 305, 316, 356, 445]
largeRjet_E: [2.5e+05, 2.5e+05, 2.51e+05, 2.51e+05, ..., 3.96e+06, 4.38e+06, 5.02e+06]
largeRjet_eta: [-2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, ..., 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]
largeRjet_m: [-0.0271, -0.0221, -0.0191, -0.0175, ..., 4e+05, 4.17e+05, 4.19e+05, 5.26e+05]
largeRjet_phi: [-3.14, -3.14, -3.14, -3.14, -3.14, -3.14, ..., 3.14, 3.14, 3.14, 3.14, 3.14]
largeRjet_pt: [2.5e+05, 2.5e+05, 2.5e+05, 2.5e+05, ..., 1.9e+06, 2.08e+06, 2.4e+06, 2.51e+06]
largeRjet_pt_syst: [0, 2.5, 2.91, 3.53, 3.69, ..., 1.42e+05, 1.65e+05, 1.84e+05, 3.74e+05]
largeRjet_tau32: [-1, 6.9e-05, 8.39e-05, 0.000161, 0.000212, ..., 0.907, 0.908, 0.909, 0.915]
largeRjet_truthMatched: [0, 1]


## Jets

In [12]:
print_unique_values(jets)

jet_E: [2.02e+04, 2.03e+04, 2.03e+04, 2.03e+04, ..., 4.09e+06, 4.19e+06, 5.28e+06]
jet_MV2c10: [-0.999, -0.999, -0.999, -0.999, -0.999, -0.999, -0.999, ..., 1, 1, 1, 1, 1, 1]
jet_eta: [-2.5, -2.5, -2.5, -2.5, -2.5, -2.5, -2.5, ..., 2.5, 2.5, 2.5, 2.5, 2.5, 2.5]
jet_jvt: [-0.1, 0, 7.56e-06, 2.17e-05, 2.32e-05, 2.36e-05, ..., 1, 1, 1, 1, 1, 1]
jet_phi: [-3.14, -3.14, -3.14, -3.14, -3.14, -3.14, ..., 3.14, 3.14, 3.14, 3.14, 3.14]
jet_pt: [2e+04, 2e+04, 2e+04, 2e+04, 2e+04, ..., 2.01e+06, 2.18e+06, 2.4e+06, 2.48e+06]
jet_pt_syst: [0, 131, 132, 132, 133, ..., 1.34e+05, 1.53e+05, 1.7e+05, 2.25e+05, 2.84e+05]


## MET

In [13]:
print_unique_values(mets)

met_et: [81.8, 224, 234, 281, 300, ..., 5.34e+05, 8.42e+05, 8.71e+05, 1.99e+06]
met_et_syst: [0, 0.0332, 0.153, 0.172, 0.383, ..., 7.5e+04, 7.62e+04, 7.65e+04, 9.12e+04]
met_phi: [-3.14, -3.14, -3.14, -3.14, -3.14, -3.14, ..., 3.14, 3.14, 3.14, 3.14, 3.14]


## Photons

In [14]:
print_unique_values(photons)

photon_E: []
photon_eta: []
photon_etcone20: []
photon_phi: []
photon_pt: []
photon_pt_syst: []
photon_ptcone30: []


## Taus

In [15]:
print_unique_values(taus)

ditau_m: [0]
tau_BDTid: []
tau_E: []
tau_eta: []
tau_phi: []
tau_pt: []
tau_pt_syst: []


## Other

In [16]:
print_unique_values(other)

SumWeights: [1.5e+11]
XSection: [1.95e+03]
mcWeight: [-1.94e+03, -1.94e+03, -1.94e+03, -1.94e+03, ..., 1.94e+03, 1.94e+03, 1.94e+03]


# Final selection of branches

In [None]:
final_branches = [
    'jet_E',
    'jet_eta',
    'jet_jvt',
    'jet_MV2c10',
    'jet_phi',
    'jet_pt_syst',
    'jet_pt',
    'largeRjet_D2',
    'largeRjet_E',
    'largeRjet_eta',
    'largeRjet_m',
    'largeRjet_phi',
    'largeRjet_pt_syst',
    'largeRjet_pt',
    'largeRjet_tau32',
    'lep_E',
    'lep_eta',
    'lep_etcone20',
    'lep_phi',
    'lep_pt_syst',
    'lep_pt',
    'lep_ptcone30',
    'lep_trackd0pvunbiased',
    'lep_tracksigd0pvunbiased',
    'lep_z0',
    'mcWeight'
    'met_et_syst',
    'met_et',
    'met_phi',
    'scaleFactor_BTAG',
    'scaleFactor_ELE',
    'scaleFactor_LepTRIGGER',
    'scaleFactor_MUON',
    'scaleFactor_PILEUP',
]