In [1]:
# need to run instead of import to be able to serialize functions to workers
%run utils.py
help(setup_rucio_and_proxy)
help(get_signed_url)
help(get_signed_url_worker)

Help on function setup_rucio_and_proxy in module __main__:

setup_rucio_and_proxy(x509_data, rucio_account='nihartma', rucio_home='/srv/conda/envs/notebook', proxy_path='/tmp/x509')

Help on function get_signed_url in module __main__:

get_signed_url(client, scope, name, rse='GOOGLE_EU')

Help on function get_signed_url_worker in module __main__:

get_signed_url_worker(proxy_data, scope, name, rucio_account='nihartma', rucio_home='/srv/conda/envs/notebook')



Upload x509 proxy certificate file (`echo $X509_USER_PROXY` on the machine it was setup)

In [2]:
from ipywidgets import FileUpload
upload = FileUpload()
display(upload)

FileUpload(value={}, description='Upload')

In [4]:
import os
import rucio.client
import uproot
import awkward as ak
from functools import partial
from dask_gateway import GatewayCluster
import dask
from dask import delayed

x509_data = upload.data[-1]
setup_rucio_and_proxy(x509_data, rucio_account="nihartma")

In [5]:
rucio_client = rucio.client.Client()

In [6]:
files = list(
    rucio_client.list_files(
        "data17_13TeV", "data17_13TeV.00338183.physics_Main.deriv.DAOD_PHYSLITE.r10258_p3399_p4309_tid22958105_00"
    )
)

In [7]:
files[0]

{'scope': 'data17_13TeV',
 'name': 'DAOD_PHYSLITE.22958105._000001.pool.root.1',
 'bytes': 338979171,
 'adler32': 'a3972cfd',
 'guid': 'A8ED6B22B0069F4AA04BD8E7D09492A0',
 'events': 44235}

In [8]:
url = get_signed_url(rucio_client, files[0]["scope"], files[0]["name"])

In [9]:
tree = uproot.open(f"{url}:CollectionTree", http_handler=uproot.MultithreadedHTTPSource)

In [10]:
pt = tree["AnalysisElectronsAuxDyn.pt"].array()

In [11]:
pt

<Array [[], [], [], ... 3.45e+04], [], []] type='44235 * var * float32'>

In [12]:
cluster = GatewayCluster(image=os.environ["JUPYTER_IMAGE"])

In [13]:
cluster

VBox(children=(HTML(value='<h2>GatewayCluster</h2>'), HBox(children=(HTML(value='\n<div>\n<style scoped>\n    …

In [14]:
cluster.scale(4)

In [15]:
client = cluster.get_client()

Use `get_signed_url_worker` to get signed urls on gateway cluster workers:

In [16]:
def test_task(url):
    with uproot.open(f"{url}:CollectionTree", http_handler=uproot.MultithreadedHTTPSource) as tree:
        pt = tree["AnalysisElectronsAuxDyn.pt"].array()
        n_electrons = ak.sum(ak.num(pt))
        return n_electrons

In [17]:
tasks = []
for file in files[:4]:
    task_url = delayed(partial(get_signed_url_worker, rucio_account="nihartma"))(
        x509_data, file["scope"], file["name"]
    )
    tasks.append(delayed(test_task)(task_url))

In [18]:
dask.compute(*tasks)

(6595, 6436, 5736, 5550)

In [19]:
cluster.shutdown()