In [1]:
import glob
import pandas as pd
import numpy as np
import awkward as ak

import utils.tools as tools
import utils.plotting as plotting

from collections import OrderedDict, defaultdict
import uproot

import mplhep as cms
import matplotlib.pyplot as plt

def getArrays(inputFiles, branches, nFiles=1, fname="data.parquet"):

    files = [{file: 'Events'} for file in inputFiles][:nFiles]

    # get the data
    data = ak.concatenate([batch for batch in uproot.iterate(files, filter_name=branches)])
        
    data = formatBranches(data)

    return data


def getL1Types(useEmu=False, useMP=False):
    
    l1Type = 'L1Emul' if useEmu else 'L1' 
    l1SumType = l1Type + 'MP' if useMP else l1Type
    
    return l1Type, l1SumType


def getBranches(inputs=[], useEmu=False, useMP=False):
    
    l1Type, l1SumType = getL1Types(useEmu, useMP)
    
    sumBranches = [l1SumType + var for var in branches.sumBranches]
    all_branches = sumBranches + branches.puppiMETBranches + branches.muonBranches + branches.puppiJetBranches
    
    for input in inputs:
        all_branches += [l1Type + input + "_" + var for var in branches.objectBranches]

    return all_branches

def formatBranches(data):
    
    # remove the prefixes to the branch names for tidyness
    for branch in ak.fields(data):
        if branch.startswith("Jet_"):
            data[branch.replace("Jet", "recoJet")] = data[branch]
            del data[branch]
            
        if "L1" in branch:
            data[branch.replace("L1", "").replace("MP", "").replace("Emul", "")] = data[branch]
            del data[branch]
            
    return data

In [2]:
nComp = 4

l1Labels = ['Default', 'Default_noPUM', 'BaselineZS', 'ConservativeZS']
branchTypes = ['unp', 'emu', 'emu', 'emu'] # unp or emu
rootDir = "/eos/home-d/ddharmen/JEC/CMSSW_14_1_4_patch1/src/JETMET/perf_job1/code/L1T_fixRateEff/"

sigPaths  = ["zmu_base/", "zmu_pumOff/", "zmu_base/", "zmu_con/"]
bkgPaths  = ["zb_base/", "zb_pumOff/", "zb_base/", "zb_con/"]


inputFormat = 'nano'     # nanoAOD
#inputFormat = 'hdf5'     # pandas dataframes

sigName = "zmu"
bkgName = "zb"

writeDir = "./data/"

# fileName = "/eos/home-d/ddharmen/JEC/CMSSW_14_1_4_patch1/src/JETMET/zerobias_perf_raw_test/default/nano*.root"
fileName = "nano_10.root"
# fileName = "nano_99.root"

sigFiles = [glob.glob(rootDir + path + fileName) for path in sigPaths]
bkgFiles = [glob.glob(rootDir + path + fileName) for path in bkgPaths]

len(sigFiles)
print(bkgFiles)

awkSigFiles = [writeDir + "/" + sigName + label + ".parq" for label in l1Labels]
awkBkgFiles = [writeDir + "/" + bkgName + label + ".parq" for label in l1Labels]

sig_hdf5s = [writeDir + "/" + sigName + label + ".hdf5" for label in l1Labels]
bkg_hdf5s = [writeDir + "/" + bkgName + label + ".hdf5" for label in l1Labels]

# L1 thresholds (GeV)
l1JetThresholds = [30, 120, 180]
l1METThresholds = [50, 90]
# arrays containing our signal and background data
# for the different sets of input files
sigs = []
bkgs = []

sig_dfs = []
bkg_dfs = []

[['/eos/home-d/ddharmen/JEC/CMSSW_14_1_4_patch1/src/JETMET/perf_job1/code/L1T_fixRateEff/zb_base/nano_10.root'], ['/eos/home-d/ddharmen/JEC/CMSSW_14_1_4_patch1/src/JETMET/perf_job1/code/L1T_fixRateEff/zb_pumOff/nano_10.root'], ['/eos/home-d/ddharmen/JEC/CMSSW_14_1_4_patch1/src/JETMET/perf_job1/code/L1T_fixRateEff/zb_base/nano_10.root'], ['/eos/home-d/ddharmen/JEC/CMSSW_14_1_4_patch1/src/JETMET/perf_job1/code/L1T_fixRateEff/zb_con/nano_10.root']]


In [3]:
for sigFile, awkSigFile, branchType in  zip(sigFiles, awkSigFiles, branchTypes):
    sigs.append(tools.getArrays(sigFile, tools.getBranches(['Jet'], branchType=='emu', False), len(sigFile), awkSigFile))
                   
# for bkgFile, awkBkgFile, branchType in zip(bkgFiles, awkBkgFiles, branchTypes):
#     bkgs.append(tools.getArrays(bkgFile, tools.getBranches(['Jet'], branchType=='emu', False), len(bkgFile), awkBkgFile))

Branches selected:  ['L1EtSum_pt', 'L1EtSum_etSumType', 'L1EtSum_bx', 'PuppiMET_pt', 'PuppiMET_phi', 'Muon_pt', 'Muon_phi', 'Muon_isPFcand', 'Jet_pt', 'Jet_eta', 'Jet_phi', 'L1Jet_pt', 'L1Jet_eta', 'L1Jet_phi', 'L1Jet_bx']
Branches selected:  ['L1EmulEtSum_pt', 'L1EmulEtSum_etSumType', 'L1EmulEtSum_bx', 'PuppiMET_pt', 'PuppiMET_phi', 'Muon_pt', 'Muon_phi', 'Muon_isPFcand', 'Jet_pt', 'Jet_eta', 'Jet_phi', 'L1EmulJet_pt', 'L1EmulJet_eta', 'L1EmulJet_phi', 'L1EmulJet_bx']
Branches selected:  ['L1EmulEtSum_pt', 'L1EmulEtSum_etSumType', 'L1EmulEtSum_bx', 'PuppiMET_pt', 'PuppiMET_phi', 'Muon_pt', 'Muon_phi', 'Muon_isPFcand', 'Jet_pt', 'Jet_eta', 'Jet_phi', 'L1EmulJet_pt', 'L1EmulJet_eta', 'L1EmulJet_phi', 'L1EmulJet_bx']
Branches selected:  ['L1EmulEtSum_pt', 'L1EmulEtSum_etSumType', 'L1EmulEtSum_bx', 'PuppiMET_pt', 'PuppiMET_phi', 'Muon_pt', 'Muon_phi', 'Muon_isPFcand', 'Jet_pt', 'Jet_eta', 'Jet_phi', 'L1EmulJet_pt', 'L1EmulJet_eta', 'L1EmulJet_phi', 'L1EmulJet_bx']


In [4]:
print(sigs[0])
# sigs_df = pd.DataFrame(sigs)
# print(sigs_df.columns.tolist())  # Returns the column names as a list

[{Muon_isPFcand: [True, True], Muon_phi: [1.88, ...], Muon_pt: [...], ...}, ...]


In [5]:
for array in sigs:
    print(array.fields)

['Muon_isPFcand', 'Muon_phi', 'Muon_pt', 'PuppiMET_phi', 'PuppiMET_pt', 'recoJet_eta', 'recoJet_phi', 'recoJet_pt', 'EtSum_bx', 'EtSum_etSumType', 'EtSum_pt', 'Jet_bx', 'Jet_eta', 'Jet_phi', 'Jet_pt']
['Muon_isPFcand', 'Muon_phi', 'Muon_pt', 'PuppiMET_phi', 'PuppiMET_pt', 'recoJet_eta', 'recoJet_phi', 'recoJet_pt', 'EtSum_bx', 'EtSum_etSumType', 'EtSum_pt', 'Jet_bx', 'Jet_eta', 'Jet_phi', 'Jet_pt']
['Muon_isPFcand', 'Muon_phi', 'Muon_pt', 'PuppiMET_phi', 'PuppiMET_pt', 'recoJet_eta', 'recoJet_phi', 'recoJet_pt', 'EtSum_bx', 'EtSum_etSumType', 'EtSum_pt', 'Jet_bx', 'Jet_eta', 'Jet_phi', 'Jet_pt']
['Muon_isPFcand', 'Muon_phi', 'Muon_pt', 'PuppiMET_phi', 'PuppiMET_pt', 'recoJet_eta', 'recoJet_phi', 'recoJet_pt', 'EtSum_bx', 'EtSum_etSumType', 'EtSum_pt', 'Jet_bx', 'Jet_eta', 'Jet_phi', 'Jet_pt']
