In [None]:
from tqdm import tqdm
import json
import time

import numpy as np

from coffea.util import save, load
from coffea import hist
from coffea.processor import run_funcx_job
from coffea.processor.funcx.executor import funcx_executor

import funcx
funcx.set_file_logger('/afs/crc.nd.edu/user/a/awoodard/funcx.log')

ndt3_uuid = '81404f4b-9b35-4b92-9881-a02fe5e52693'
wisconsin_uuid = 'af21d0db-27f2-4906-beba-6baffac18393'
chunksize=750000

In [None]:
with open('metadata/samplefiles.json') as f:
    datasets = json.load(f)['Hbb_2017']

In [None]:
start = time.time()
treenames = ['otree', 'Events']  # process mixed skims and full derived trees
stageout_path = 'root://deepthought.crc.nd.edu://store/user/awoodard/funcx'

final_accumulator = run_funcx_job(
    [ndt3_uuid, wisconsin_uuid],
    datasets,
    treenames,
    'boostedHbbProcessor.coffea', # the hbb analysis processor
    funcx_executor,
    stageout_path,
    executor_args={'local_path': '/hadoop/store/user/awoodard/funcx'},
    chunksize=chunksize
)
dt = time.time() - start

In [None]:
num_events = sum(funcx_executor.counts.values())
nbins = sum(sum(arr.size for arr in h._sumw.values()) for h in final_accumulator.values() if isinstance(h, hist.Hist))
nfilled = sum(sum(np.sum(arr>0) for arr in h._sumw.values()) for h in final_accumulator.values() if isinstance(h, hist.Hist))

print('processed: {:,d} events'.format(num_events))
print('total time: {:.2f} min'.format(dt / 60))
print('{:.2f} μs/event'.format(dt / num_events * 1e6))
print('{:.2f} Mevent/s'.format(num_events / dt / 1e6))

print("filled {:.1f} bins".format(nbins/ 1e6))
print("nonzero bins: {:.1f}%".format(100 * nfilled / nbins))


In [None]:
save(final_accumulator, 'hists.coffea')