# Multi-user exercise at 2024 IRIS-HEP retreat

***Important:*** add a new cell below with your Dask cluster (see instructions).
It should look similar to the following:
```python
from dask.distributed import Client

client = Client("tcp://dask-alheld-4027af07-8.af-jupyter:8786")
client
```

Your client address will be different (the one in the example above won't work for you!).

In [None]:
# your cell here to connect to your own client

We will stagger the data processing a bit by waiting up to 5 minutes per user: this is meant to capture the reality of multi-user environments a bit better, where not everyone launches at the exact same time.

In [None]:
import time
import random

time_to_wait = random.uniform(0, 300)
print(f"waiting for {time_to_wait:.0f} seconds before running the rest of the notebook")

for i in range(10):
    time.sleep(time_to_wait/10)
    print(f"{(i+1)/10:.0%} of waiting time done")

In [None]:
import datetime
import glob
import json
import os
from collections import defaultdict
from pathlib import Path
import traceback
import time
import warnings
import copy
import pathlib

import awkward as ak
import dask
import dask_awkward as dak
import hist.dask
import coffea
import numpy as np
import uproot
from dask.distributed import Client
import matplotlib.pyplot as plt
import matplotlib as mpl
mpl.style.use("ggplot")

from coffea.nanoevents import NanoEventsFactory, PHYSLITESchema
from coffea import dataset_tools

import utils
import warnings

warnings.filterwarnings("ignore")

import input_files.utils

from dask.distributed import LocalCluster, Client, progress, performance_report

# create a folder for output tracking of uproot.open setup
MEASUREMENT_PATH = pathlib.Path(datetime.datetime.now().strftime("measurements/%Y-%m-%d_%H-%M-%S"))
os.makedirs(MEASUREMENT_PATH)

In [None]:
# -------------------
# INPUT CONFIGURATION
# -------------------

PROCESSES_TO_USE = ["ttbar", "othertop"]  # 9 TB
# PROCESSES_TO_USE = ["db", "zjets", "wjets", "ttV", "othertop", "ttbar", "data15_13TeV", "data16_13TeV", "data17_13TeV", "data18_13TeV"]  # 190 TB

fileset = input_files.utils.get_fileset(PROCESSES_TO_USE, max_files_per_container=None, max_containers_per_dsid=None, max_dsid_per_process=None)

utils.save_fileset(fileset, MEASUREMENT_PATH)
print(f"total number of files (including duplicates): {sum([len(v['files']) for v in fileset.values()])}")

## Dask distributing `uproot.open`

In [None]:
# turn fileset into simple list of files to run over
all_files = []
for process in fileset:
    all_files += fileset[process]["files"]

# define work to be done
def uproot_open_materialize(fname):
    # ~15%, around 300 Mbps single core, ~130 Mbps with 100 workers
    BRANCH_LIST = [
        "InDetTrackParticlesAuxDyn.definingParametersCovMatrixOffDiag",
        'PrimaryVerticesAuxDyn.z',
        'PrimaryVerticesAuxDyn.x',
        'PrimaryVerticesAuxDyn.y',
        'AnalysisJetsAuxDyn.Timing',
        'AnalysisJetsAuxDyn.JetConstitScaleMomentum_phi',
        'AnalysisJetsAuxDyn.DetectorEta',
        'AnalysisJetsAuxDyn.ActiveArea4vec_eta',
        'AnalysisJetsAuxDyn.JetConstitScaleMomentum_eta',
        'AnalysisJetsAuxDyn.phi',
        'AnalysisJetsAuxDyn.m',
        'AnalysisJetsAuxDyn.JetConstitScaleMomentum_pt',
        'AnalysisJetsAuxDyn.ActiveArea4vec_phi',
        'AnalysisJetsAuxDyn.JetConstitScaleMomentum_m',
        'AnalysisJetsAuxDyn.ActiveArea4vec_m',
        'AnalysisJetsAuxDyn.pt',
        'AnalysisJetsAuxDyn.Width',
        'AnalysisJetsAuxDyn.EMFrac',
        'AnalysisJetsAuxDyn.ActiveArea4vec_pt',
        'AnalysisJetsAuxDyn.PSFrac',
        'AnalysisJetsAuxDyn.JVFCorr',
        'AnalysisJetsAuxDyn.DFCommonJets_QGTagger_TracksC1',
        'AnalysisJetsAuxDyn.eta',
        'AnalysisPhotonsAuxDyn.topoetcone40_CloseByCorr',
        'AnalysisPhotonsAuxDyn.topoetcone40',
        'AnalysisPhotonsAuxDyn.eta',
        'AnalysisJetsAuxDyn.DFCommonJets_fJvt',
        'AnalysisPhotonsAuxDyn.phi',
        'AnalysisPhotonsAuxDyn.topoetcone20_CloseByCorr',
        'AnalysisPhotonsAuxDyn.topoetcone40ptCorrection',
        'AnalysisPhotonsAuxDyn.topoetcone20ptCorrection',
        'AnalysisPhotonsAuxDyn.pt',
        'AnalysisJetsAuxDyn.DFCommonJets_QGTagger_NTracks',
        'AnalysisJetsAuxDyn.DFCommonJets_QGTagger_TracksWidth',
        'AnalysisJetsAuxDyn.GhostMuonSegmentCount',
        'AnalysisPhotonsAuxDyn.topoetcone20',
        'AnalysisPhotonsAuxDyn.f1',
        'AnalysisPhotonsAuxDyn.DFCommonPhotonsIsEMTightIsEMValue',
        'AnalysisPhotonsAuxDyn.ptcone20_CloseByCorr',
        'AnalysisPhotonsAuxDyn.OQ',
        'AnalysisPhotonsAuxDyn.ptcone20',
        'AnalysisTauJetsAuxDyn.RNNJetScore',
        'AnalysisTauJetsAuxDyn.JetDeepSetScore',
        'AnalysisTauJetsAuxDyn.etaTauEnergyScale',
        'AnalysisTauJetsAuxDyn.etaFinalCalib',
        'AnalysisTauJetsAuxDyn.RNNEleScoreSigTrans_v1'
    ]

    filter_name = lambda x: x in BRANCH_LIST

    size_uncompressed = 0
    t0 = time.perf_counter()
    try:
        with uproot.open(fname, filter_name=filter_name) as f:
            num_entries = f["CollectionTree"].num_entries

            # iterate approach
            # for _ in f["CollectionTree"].iterate(expressions=BRANCH_LIST):
            #     pass

            # branch loop approach
            for b in BRANCH_LIST:
                f["CollectionTree"][b].array()
                size_uncompressed += f["CollectionTree"][b].uncompressed_bytes

            size_read = f.file.source.num_requested_bytes
        exception = None

    except:
        num_entries = 0
        size_read = 0
        size_uncompressed = 0
        exception = traceback.format_exc()

    t1 = time.perf_counter()
    time_finished = datetime.datetime.now()
    return {"fname": fname, "read": size_read, "uncompressed": size_uncompressed, "num_entries": num_entries,
            "runtime": t1-t0, "time_finished": time_finished, "exception": exception}

The following cell launches the computation.
Make sure the cells afterwards also finish: these will write out all the data that is being gathered!

In [None]:
# perform computation
print(f"running with {len(all_files)} files")

utils.start_tracking_workers(client, MEASUREMENT_PATH)  # track worker count in background
with performance_report(filename=MEASUREMENT_PATH/"dask-report-plain-uproot.html"):
    tasks = [dask.delayed(uproot_open_materialize)(f) for f in all_files]  # create tasks
    t0 = time.perf_counter()
    out = ak.Array(dask.compute(*tasks))  # perform computations
    t1 = time.perf_counter()

utils.stop_tracking_workers()

print(f"wall clock time: {t1-t0:.2f}s")
utils.save_measurement(out, t0, t1, MEASUREMENT_PATH)

while waiting, check out out the XCache output: https://grafana.mwt2.org/d/EKefjM-Sz/af-network-200gbps-challenge?orgId=1&viewPanel=205&from=now-30m&to=now

In [None]:
# load measurements from file again
timestamps, nworkers, avg_num_workers = utils.get_timestamps_and_counts(MEASUREMENT_PATH)  # worker count info
out, t0, t1 = utils.load_measurement(MEASUREMENT_PATH)

# summary of performance
read_GB = sum(out['read']) / 1000**3
print(f"total read (compressed): {read_GB:.2f} GB")
print(f"total read (uncompressed): {sum(out['uncompressed']) / 1000**3:.2f} GB")

rate_Gbps = read_GB*8/(t1-t0)
print(f"average data rate: {rate_Gbps:.2f} Gbps (need to scale by x{200/rate_Gbps:.1f} to reach 200 Gbps)")

n_evts = sum(out["num_entries"])
print(f"total event rate (wall clock time): {n_evts / (t1-t0) / 1000:.2f} kHz (processed {n_evts} events total)")

total_runtime = sum(out["runtime"])
print(f"total aggregated runtime in function: {total_runtime:.2f} s")
print(f"ratio total runtime / wall clock time: {total_runtime / (t1-t0):.2f} "\
      "(should match # cores without overhead / scheduling issues)")
print(f"time-averaged number of workers: {avg_num_workers:.1f}")
print(f"\"efficiency\" (ratio of two numbers above): {total_runtime / (t1-t0) / avg_num_workers:.1%}")
print(f"event rate (aggregated time spent in function): {n_evts / total_runtime / 1000:.2f} kHz")

In [None]:
# get arrays for starting time, runtime and end time of all tasks
runtimes = np.asarray([datetime.timedelta(seconds=t) for t in out["runtime"]], dtype=np.timedelta64)
ends = out["time_finished"].to_numpy()
starts = ends - runtimes

# calculate instantaneous rates for given timestamp
times_for_rates = []
instantaneous_rates = []
for t in timestamps[::10]:  # only calculate every 30 seconds
    mask = np.logical_and((starts <= t), (t <= ends))  # mask for tasks running at given timestamp
    rate_Gbps_at_timestamp = sum(out[mask]['read']*8 / 1000**3 / out[mask]["runtime"])
    times_for_rates.append(t)
    instantaneous_rates.append(rate_Gbps_at_timestamp)

utils.plot_worker_count(timestamps, nworkers, avg_num_workers, times_for_rates, instantaneous_rates, MEASUREMENT_PATH)

In [None]:
print(f"{sum(o is not None for o in out['exception'])} files failed\n")

# use below to get full list with details
# for report in out:
#     if report["exception"] is not None:
#         print(f"{report['fname']} failed in {report['runtime']:.2f} s\n{report['exception']}\n")

In [None]:
# runtime distribution for all files
fig, ax = plt.subplots() 
bins = np.linspace(0, max(out["runtime"])*1.01, 100)
ax.hist(out["runtime"], bins=bins)
ax.set_xlabel("runtime [s]")
ax.set_xlim([0, ax.get_xlim()[1]])
ax.set_ylabel("count")
ax.semilogy()
fig.savefig(MEASUREMENT_PATH / "runtime_distribution.pdf")

In [None]:
# runtime vs number of events in file
fig, ax = plt.subplots()
ax.scatter(out["num_entries"], out["runtime"], marker="x")
ax.set_xlabel("number of events")
ax.set_ylabel("runtime [s]")

xlim = ax.get_xlim()
ylim = ax.get_ylim()
xvals = np.linspace(*xlim, 100)
ax.plot(xvals, xvals/(5*1_000), label="5 kHz", linestyle="-", c="C1")
ax.plot(xvals, xvals/(10*1_000), label="10 kHz", linestyle="--", c="C2")
ax.plot(xvals, xvals/(20*1_000), label="20 kHz", linestyle=":", c="C3")
ax.set_xlim([0, xlim[1]])
ax.set_ylim([0, ylim[1]])
ax.legend()

fig.savefig(MEASUREMENT_PATH / "runtime_vs_nevts.pdf")