In [None]:
import parsl
import os
from parsl.app.app import python_app, bash_app
from parsl.configs.local_threads import config

from parsl.providers import LocalProvider,CondorProvider
from parsl.channels import LocalChannel,SSHChannel
from parsl.config import Config
from parsl.executors import HighThroughputExecutor

from parsl.addresses import address_by_hostname

x509_proxy = 'x509up_u%s'%(os.getuid())

wrk_init = '''
export XRD_RUNFORKHANDLER=1
source /cvmfs/sft.cern.ch/lcg/views/LCG_95apython3/x86_64-centos7-gcc7-opt/setup.sh
export PATH=`pwd`/.local/bin:$PATH
export PYTHONPATH=`pwd`/.local/lib/python3.6/site-packages:$PYTHONPATH

export X509_USER_PROXY=`pwd`/%s
mkdir -p ./coffea_parsl_condor
'''%(x509_proxy)

twoGB = 2048
nproc = 8

condor_cfg = '''
transfer_output_files = coffea_parsl_condor
RequestMemory = %d
RequestCpus = %d
''' % (twoGB*nproc, nproc)

xfer_files = ['%s/.local' % (os.environ['HOME'], ), '%s/%s' % (os.environ['HOME'], x509_proxy, )]

#envs={'PYTHONPATH':'/afs/hep.wisc.edu/home/lgray/.local/lib/python3.6/site-packages:%s'%os.environ['PYTHONPATH'],
#      'X509_USER_PROXY':'./%s'%x509_proxy,
#      'PATH':'/afs/hep.wisc.edu/home/lgray/.local/bin:%s'%os.environ['PATH']}

condor_htex = Config(
    executors=[
        HighThroughputExecutor(
            label="coffea_parsl_condor",
            address=address_by_hostname(),
            prefetch_capacity=0,
            cores_per_worker=1,
            max_workers=nproc,
            worker_logdir_root='./',
            provider=CondorProvider(
                channel=LocalChannel(),
                init_blocks=64,
                max_blocks=64,
                nodes_per_block=1,
                worker_init = wrk_init,                
                transfer_input_files=xfer_files,
                scheduler_options=condor_cfg
            ),
        )
    ],
    retries = 10,
    app_cache = True,
    strategy = None
)

#parsl.set_stream_logger() # <-- log everything to stdout

dfk = parsl.load(condor_htex)

chunksize=500000


In [None]:
@python_app
def hello(test=None):
    say_hello = 'Hello World!'
    print(say_hello)
    return say_hello, test

print(hello('what').result())


In [None]:
from tqdm import tqdm
import json

datasets = {}

with open('metadata/samplefiles.json') as f:
    temp = json.load(f)
    datasets = temp['Hbb_2017']    


In [None]:
#get the hbb analysis worker
from coffea.util import load

processor_instance = load('boostedHbbProcessor.coffea')

In [None]:
import time
from coffea.processor import run_parsl_job
from coffea.processor.parsl.parsl_executor import parsl_executor

tic = time.time()
treenames = ['otree', 'Events']  # deal with mixed skims and full derived trees
final_accumulator = run_parsl_job(datasets, treenames, processor_instance, parsl_executor, 
                                  executor_args={'config':None}, data_flow=dfk, chunksize=chunksize)
dt = time.time() - tic


In [None]:
nevt = sum(parsl_executor.counts.values())
print('processed:',nevt,'events')
print('total time: ',dt/60)
print('μs/evt', dt/nevt*1e6)
print('Mevt/s', nevt/dt/1e6)


parsl_executor.counts

In [None]:
from coffea import hist
import gzip
import pickle
import numexpr
import numpy as np

nbins = sum(sum(arr.size for arr in h._sumw.values()) for h in final_accumulator.values() if isinstance(h, hist.Hist))
nfilled = sum(sum(np.sum(arr>0) for arr in h._sumw.values()) for h in final_accumulator.values() if isinstance(h, hist.Hist))
print("Processed %.1fM events" % (nevt/1e6, ))
print("Filled %.1fM bins" % (nbins/1e6, ))
print("Nonzero bins: %.1f%%" % (100*nfilled/nbins, ))

# Pickle is not very fast or memory efficient, will be replaced by something better soon
with lz4f.open("hists.cpkl.lz4", mode="wb", compression_level=6) as fout:
    cpkl.dump(final_accumulator, fout)

#dt = time.time() - tstart
#print("%.2f us*cpu/event overall" % (1e6*dt*nworkers/final_accumulators['nentries'], ))


In [None]:
parsl.dfk().cleanup()
parsl.clear()


In [None]:
!for a in 1 2 3; do condor_rm $USER -name lpcschedd${a}.fnal.gov; done
!for a in 1 2 3; do condor_rm -f $USER -name lpcschedd${a}.fnal.gov; done