In [None]:
import uproot

import numpy as np
import awkward as ak

import pandas as pd

import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio

# File locations

In [None]:
datadir = '/home/abyrnes/data'
filepath = datadir + '/atlas/mc_361106.Zee.1largeRjet1lep.root'

# Tree listing

In [None]:
tree = uproot.open(filepath)['mini']
tree.show()

# Picking out branches with floats

In [None]:
# int32 branch
print(tree['runNumber'].array().type.content)
print(tree['runNumber'].array().ndim)

In [None]:
# std::vector<float> branch
print(tree['lep_pt_syst'].array().type.content)
print(tree['lep_pt_syst'].array().type.content.content)
print(tree['lep_pt_syst'].array().ndim)

In [None]:
float_branches = []
for branch in tree.keys():
    branch_arr = tree[branch].array()
    
    if branch_arr.ndim > 1:
        branch_arr = ak.flatten(branch_arr)

    print(f'{branch}: {branch_arr.type.content}, {branch_arr.typestr}')

    if 'float32' in branch_arr.typestr or 'float64' in branch_arr.typestr:
        float_branches.append(branch)

print(f'\nFound {len(float_branches)} float branches')

# Group float branches

In [None]:
scale_factors = set()
leps = set()
largeRjets = set()
jets = set()
mets = set()
photons = set()
taus = set()
other = set()

for branch in float_branches:
    if 'scaleFactor' in branch:
        scale_factors.add(branch)
    elif 'lep' in branch:
        leps.add(branch)
    elif 'largeRjet' in branch:
        largeRjets.add(branch)
    elif 'jet' in branch:
        jets.add(branch)
    elif 'met' in branch:
        mets.add(branch)
    elif 'photon' in branch:
        photons.add(branch)
    elif 'tau' in branch:
        taus.add(branch)
    else:
        other.add(branch)

# Sort the sets
scale_factors = sorted(scale_factors)
leps = sorted(leps)
largeRjets = sorted(largeRjets)
jets = sorted(jets)
mets = sorted(mets)
photons = sorted(photons)
taus = sorted(taus)
other = sorted(other)

print(f'\nScale factors: {scale_factors}')
print(f'\nLeptons: {leps}')
print(f'\nLarge R jets: {largeRjets}')
print(f'\nJets: {jets}')
print(f'\nMET: {mets}')
print(f'\nPhotons: {photons}')
print(f'\nTaus: {taus}')
print(f'\nOther: {other}')

# Branch values

In [None]:
def print_unique_values(branches):
    for branch in branches:
        branch_arr = tree[branch].array()
        if branch_arr.ndim > 1:
            branch_arr = ak.flatten(branch_arr)
        unique_values = np.unique(branch_arr)
        print(f'Unique values in {branch}: {unique_values}')

## Scale factors

In [None]:
print_unique_values(scale_factors)

## Leptons

In [None]:
print_unique_values(leps)

## Large R Jet

In [None]:
print_unique_values(largeRjets)

## Jets

In [None]:
print_unique_values(jets)

## MET

In [None]:
print_unique_values(mets)

## Photons

In [None]:
print_unique_values(photons)

## Taus

In [None]:
print_unique_values(taus)

## Other

In [None]:
print_unique_values(other)

# Final selection of branches

In [None]:
final_branches = [
    'scaleFactor_BTAG',
    'scaleFactor_ELE',
    'scaleFactor_LepTRIGGER',
    'scaleFactor_MUON',
    'scaleFactor_PILEUP',
    'lep_E',
    'lep_eta',
    'lep_etcone20',
    'lep_phi',
    'lep_pt',
    'lep_pt_syst',
    'lep_ptcone30',
    'lep_trackd0pvunbiased',
    'lep_tracksigd0pvunbiased',
    'lep_z0',
    'largeRjet_D2',
    'largeRjet_E',
    'largeRjet_eta',
    'largeRjet_m',
    'largeRjet_phi',
    'largeRjet_pt',
    'largeRjet_pt_syst',
    'largeRjet_tau32',
    'jet_E',
    'jet_MV2c10',
    'jet_eta',
    'jet_jvt',
    'jet_phi',
    'jet_pt',
    'jet_pt_syst',
    'met_et',
    'met_et_syst',
    'met_phi',
    'mcWeight'
]