In [1]:
from coffea.nanoevents import NanoAODSchema
from coffea.dataset_tools import apply_to_fileset, max_chunks, max_files, preprocess

import dask
import numpy as np

from template_processor import TestProcessor

from dask.distributed import Client

Issue: coffea.nanoevents.methods.vector will be removed and replaced with scikit-hep vector. Nanoevents schemas internal to coffea will be migrated. Otherwise please consider using that package!.
  from coffea.nanoevents.methods import vector


In [2]:
import gzip
import json
import os

# Define the base directory where the preprocessed files are stored (preprocessed files point to sample on DAS and define slicing of root files by event (chunks)
base_dir = "../tools/preprocessing/preprocessed"
sample = "2023_ttbar_100000_preprocessed_available.json.gz"
#sample = "2023_SlepSnu_MN1_220_100000_preprocessed_available.json.gz" # These preprocessed files are generated one time in advance, stored until needed now in analysis
file_path = os.path.join(base_dir, sample)


with gzip.open(file_path, "rt") as file:
    preprocessed_available = json.load(file)


In [3]:
#client = Client("tls://localhost:8786")
#client

In [4]:
### SWITCH HERE ###

reduced_computation = True

num_files = 2 # number of root files from DAS to run over
num_chunks = 5# number of events (chunks) per root file to run over (chunksize set during preprocessing, my default is 1 chunk = 100,000 events)

###################

In [5]:
if reduced_computation:
    
    test_preprocessed_files = max_files(preprocessed_available, num_files)
    test_preprocessed = max_chunks(test_preprocessed_files, num_chunks)

    small_tg, small_rep = apply_to_fileset(
        data_manipulation=TestProcessor(),
        fileset=test_preprocessed,
        schemaclass=NanoAODSchema,
        uproot_options={"allow_read_errors_with_report": (OSError, KeyError)},
    )
    computed, rep = dask.compute(small_tg, small_rep)

    
else:
    full_tg, full_rep = apply_to_fileset(
        data_manipulation=TestProcessor(),
        fileset=preprocessed_available,
        schemaclass=NanoAODSchema,
        uproot_options={"allow_read_errors_with_report": (OSError, KeyError)},
    )
    computed, rep = dask.compute(full_tg, full_rep)


In [6]:
computed.keys()

dict_keys(['/TTto2L2Nu_TuneCP5_13p6TeV_powheg-pythia8/Run3Summer23NanoAODv12-130X_mcRun3_2023_realistic_v14-v2/NANOAODSIM'])

In [7]:
sample_name = next(iter(computed))
sample_name 

'/TTto2L2Nu_TuneCP5_13p6TeV_powheg-pythia8/Run3Summer23NanoAODv12-130X_mcRun3_2023_realistic_v14-v2/NANOAODSIM'

In [8]:
results = computed[sample_name]

In [9]:
results #did this to get in front of the massive sample name, work with the results dictionary from here forward for simplicity

{'total_entries': 651000,
 'ele_pt': <Array [] type='0 * var * float32[parameters={"__doc__": "pt"}]'>,
 'fields': ['BeamSpot',
  'SV',
  'Muon',
  'FatJet',
  'run',
  'LHEWeight',
  'L1simulation',
  'Generator',
  'GenPart',
  'genTtbarId',
  'DeepMETResponseTune',
  'genWeight',
  'HLTriggerFinalPath',
  'CaloMET',
  'SubGenJetAK8',
  'HLT',
  'HTXS',
  'LHEPdfWeight',
  'GenJet',
  'GenProton',
  'HLTriggerFirstPath',
  'TkMET',
  'SoftActivityJetHT2',
  'luminosityBlock',
  'PSWeight',
  'MET',
  'Electron',
  'L1Reco',
  'Jet',
  'OtherPV',
  'SoftActivityJetHT10',
  'GenVtx',
  'PuppiMET',
  'Photon',
  'Tau',
  'TrigObj',
  'LHEPart',
  'SubJet',
  'GenMET',
  'SoftActivityJetHT5',
  'ChsMET',
  'SoftActivityJetNjets2',
  'DeepMETResolutionTune',
  'SoftActivityJetNjets10',
  'LHEReweightingWeight',
  'LHE',
  'GenDressedLepton',
  'bunchCrossing',
  'GenJetAK8',
  'Flag',
  'LowPtElectron',
  'SoftActivityJetNjets5',
  'GenIsolatedPhoton',
  'GenVisTau',
  'CorrT1METJet',
  '

In [10]:
results['total_entries']

651000

In [11]:
results['fields']

['BeamSpot',
 'SV',
 'Muon',
 'FatJet',
 'run',
 'LHEWeight',
 'L1simulation',
 'Generator',
 'GenPart',
 'genTtbarId',
 'DeepMETResponseTune',
 'genWeight',
 'HLTriggerFinalPath',
 'CaloMET',
 'SubGenJetAK8',
 'HLT',
 'HTXS',
 'LHEPdfWeight',
 'GenJet',
 'GenProton',
 'HLTriggerFirstPath',
 'TkMET',
 'SoftActivityJetHT2',
 'luminosityBlock',
 'PSWeight',
 'MET',
 'Electron',
 'L1Reco',
 'Jet',
 'OtherPV',
 'SoftActivityJetHT10',
 'GenVtx',
 'PuppiMET',
 'Photon',
 'Tau',
 'TrigObj',
 'LHEPart',
 'SubJet',
 'GenMET',
 'SoftActivityJetHT5',
 'ChsMET',
 'SoftActivityJetNjets2',
 'DeepMETResolutionTune',
 'SoftActivityJetNjets10',
 'LHEReweightingWeight',
 'LHE',
 'GenDressedLepton',
 'bunchCrossing',
 'GenJetAK8',
 'Flag',
 'LowPtElectron',
 'SoftActivityJetNjets5',
 'GenIsolatedPhoton',
 'GenVisTau',
 'CorrT1METJet',
 'RawPuppiMET',
 'PV',
 'L1',
 'event',
 'IsoTrack',
 'RawMET',
 'boostedTau',
 'Pileup',
 'FsrPhoton',
 'SoftActivityJetHT',
 'LHEScaleWeight',
 'Rho',
 'SoftActivityJet']

In [12]:
print(results['ele_pt'])

[]
