# Expression Parsing

Part of predicate push-down is parsing the expressions and re-building the map.

In [1]:
from pathlib import Path
from coffea.nanoevents import NanoEventsFactory, NanoAODSchema
from dask.distributed import Client
import dask
import dask_awkward
import awkward as ak
import hist.dask as hda
from typing import Dict, List
from dataclasses import dataclass

dask.config.set({"awkward.optimization.enabled": True, "awkward.raise-failed-meta": True, "awkward.optimization.on-fail": "raise"})

<dask.config.set at 0x2c702c1d2d0>

Info about each function in dask

In [2]:
@dataclass
class func_info:
    n_args: int

function_info = {
    'any': func_info(1),
    'getitem': func_info(2),
    'bitwise_and': func_info(2),
    'greater': func_info(2),
}

Read through the layers, and try to extract the directed graph.

In [4]:
@dataclass
class node_info:
    name: str
    function_name: str
    input_nodes: List[str]

def layer_names(input_names) -> List[str]:
    result = []
    for l_info in input_names:
        if isinstance(l_info, tuple):
            result.append(l_info[0])
        else:
            result.append(l_info)
    return result

def extract_pushdown(coll) -> List[node_info]:
    hlg_sorted = coll.dask._toposort_layers()
    pushdown_deps = []
    result: List[node_info] = []
    for key in hlg_sorted:
        annotations = coll.dask.layers[key].annotations
        if annotations is not None and "pushdown" in annotations:
            #print(key, coll.dask.layers[key].annotations)
            pushdown_deps = [key] + pushdown_deps
    for dep in pushdown_deps:
        layer = coll.dask.layers[dep]

        fcn = list(layer.dsk.values())[0][0]
        if isinstance(layer, dask_awkward.layers.AwkwardBlockwiseLayer) and not isinstance(layer, dask_awkward.layers.AwkwardInputLayer):
            result.append(node_info(dep, fcn.fn.__name__, layer_names(layer[(dep, 0)][1:])))
            print(layer.dsk)
            print(dir(layer))
            print(list(layer.keys()))
            print(dep, fcn.fn)
            print(dir(fcn))
            print(fcn.arg_repackers[0])
            print("")
        else:
            print("Not a good instance", dep, fcn)

    return result

In [5]:

with dask.annotate(pushdown="servicex"):
    events = NanoEventsFactory.from_root(
        {
            "nano_dy.root": "Events",
            "nano_dy.root": "Events",
            "nano_dy.root": "Events",
            "nano_dy.root": "Events",
        },
        metadata={"dataset": "nano_dy"},
        schemaclass=NanoAODSchema,
    ).events()

    mask_pt = events.Muon.pt > 30
    mask_eta = abs(events.Muon.eta) < 2.4
    events = events[ak.any(mask_pt & mask_eta, axis=1)]
    
myhist = hda.Hist.new.Regular(50, 0, 2.5, name="abseta").Double()

myhist.fill(abseta=abs(events.Muon.eta))

Issue: coffea.nanoevents.methods.vector will be removed and replaced with scikit-hep vector. Nanoevents schemas internal to coffea will be migrated. Otherwise please consider using that package!.
  from coffea.nanoevents.methods import vector


In [6]:
extract_pushdown(myhist)

{'getitem-f8ff719ba2ef2b31a45cbda6e474eae2': (<dask_awkward.lib.core.ArgsKwargsPackedFunction object at 0x000002C72AF95E50>, '__dask_blockwise__0', '__dask_blockwise__1')}
['__abstractmethods__', '__annotations__', '__class__', '__class_getitem__', '__contains__', '__copy__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__getitem__', '__getstate__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__iter__', '__le__', '__len__', '__lt__', '__module__', '__ne__', '__new__', '__orig_bases__', '__reduce__', '__reduce_ex__', '__repr__', '__reversed__', '__setattr__', '__sizeof__', '__slots__', '__str__', '__subclasshook__', '__weakref__', '_abc_impl', '_cached_dict', '_cull', '_cull_dependencies', '_dict', '_dims', '_repr_html_', 'annotations', 'clone', 'collection_annotations', 'concatenate', 'cull', 'dims', 'dsk', 'from_blockwise', 'get', 'get_dependencies', 'get_output_keys', 'has_been_unpickled', 'indices', 'io_deps', 'i

[node_info(name='getitem-f8ff719ba2ef2b31a45cbda6e474eae2', function_name='getitem', input_nodes=['from-uproot-16bb4126f2e8acd5aadb5203ff224585', 'any-c2dc4b16a8b57db82c1b72b4e8226b04']),
 node_info(name='any-c2dc4b16a8b57db82c1b72b4e8226b04', function_name='any', input_nodes=['bitwise-and-339f1ef696cd35c704cb209077a2a6e1']),
 node_info(name='bitwise-and-339f1ef696cd35c704cb209077a2a6e1', function_name='bitwise_and', input_nodes=['greater-d4c26721f337dbb96cb25e1caf1a6d4e', 'less-35d80e42713d5febbb503e2b5b318e8b']),
 node_info(name='greater-d4c26721f337dbb96cb25e1caf1a6d4e', function_name='greater', input_nodes=['pt-a0e1b763db4e4ef4f5cbf5c8d8d54d1c', 30]),
 node_info(name='pt-a0e1b763db4e4ef4f5cbf5c8d8d54d1c', function_name='getitem', input_nodes=['Muon-b944fd5cfe0593eea7308cb0662d916a', 'pt']),
 node_info(name='Muon-b944fd5cfe0593eea7308cb0662d916a', function_name='getitem', input_nodes=['from-uproot-16bb4126f2e8acd5aadb5203ff224585', 'Muon']),
 node_info(name='less-35d80e42713d5febbb5