In [None]:
import os
os.environ["NUMBA_NUM_THREADS"] = str(24)

In [None]:
import sys
import matplotlib.pyplot as plt
sys.path += ["../"]

In [None]:
import hepaccelerate
import uproot
import numpy as np
import cupy as cp

import pandas
import copy

import hepaccelerate.kernels as kernels
import hepaccelerate.backend_cuda as backend_cuda
import hepaccelerate.backend_cpu as backend_cpu

In [None]:
#!curl http://opendata.atlas.cern/release/samples/Data/DataMuons.root -o DataMuons.root
#!curl http://opendata.atlas.cern/release/samples/MC/mc_147771.Zmumu.root -o mc_147771.Zmumu.root

In [None]:
uproot.open("DataMuons.root").get("mini").keys()

In [None]:
def load_file(filename):
    fi = uproot.open(filename)
    tt = fi.get("mini")
    
    #Load arrays from ROOT file
    arrdata = {
        str(k, "ascii"): tt.array(k) for k in [
            b"jet_pt", b"jet_eta", b"jet_phi", b"jet_m",
            b"lep_pt", b"lep_eta", b"lep_phi", b"lep_E", b"lep_type", b"lep_charge",
            b"pvxp_n",
            b"scaleFactor_PILEUP",
        ]
    }
    numev = len(tt)
    
    return arrdata, numev

In [None]:
arrdata_d, numev_d = load_file("DataMuons.root")
arrdata_m, numev_m = load_file("mc_147771.Zmumu.root")

In [None]:
def compute_inv_mass(offsets, pt, eta, phi, m, mask_content, nplib, backend):
    #Convert all the jet momenta to cartesian
    px, py, pz, e = backend.spherical_to_cartesian(
        pt, eta, phi, m
    )
    
    #Add up the jet momenta on a per-event basis
    pxtot = kernels.sum_in_offsets(
        backend,
        offsets, px, mask_content=mask_content
    )
    pytot = kernels.sum_in_offsets(
        backend,
        offsets, py, mask_content=mask_content
    )
    pztot = kernels.sum_in_offsets(
        backend,
        offsets, pz, mask_content=mask_content
    )
    ptot2 = (pxtot**2 + pytot**2 + pztot**2)
    etot = kernels.sum_in_offsets(
        backend,
        offsets, e, mask_content=mask_content
    )
    etot2 = etot**2
    diff = etot2 - ptot2
    diff[diff<0] = 0.0
    return nplib.sqrt(diff)

In [None]:
#Copy to GPU device in case cupy is specified, otherwise do nothing
def to_device(arr, nplib):
    if nplib == cp:
        return cp.array(arr)
    return arr

In [None]:
def process_array_data(arrdata, numev, nplib, backend, parameters):
    #Move arrays to GPU if applicable
    jet_pt = to_device(arrdata["jet_pt"].content/1000.0, nplib)
    jet_m = to_device(arrdata["jet_m"].content/1000.0, nplib)
    jet_eta = to_device(arrdata["jet_eta"].content, nplib)
    jet_phi = to_device(arrdata["jet_phi"].content, nplib)
    jet_offsets = to_device(arrdata["jet_pt"].offsets, nplib)
    
    lep_pt = to_device(arrdata["lep_pt"].content/1000.0, nplib)
    lep_e = to_device(arrdata["lep_E"].content/1000.0, nplib)
    lep_eta = to_device(arrdata["lep_eta"].content, nplib)
    lep_phi = to_device(arrdata["lep_phi"].content, nplib)
    lep_type = to_device(arrdata["lep_type"].content, nplib)
    lep_charge = to_device(arrdata["lep_charge"].content, nplib)
    lep_offsets = to_device(arrdata["lep_pt"].offsets, nplib)
    
    #Set the lepton masses to the experimental values
    lep_m = nplib.zeros_like(lep_pt)
    lep_m[lep_type==11] = 0.510/1000.0
    lep_m[lep_type==13] = 105.658/1000.0
    
    #Lepton selection
    sel_leps = lep_pt > parameters["lep_pt_cut"]
    leps_opposite_charge = kernels.select_opposite_sign(backend, lep_offsets, lep_charge, sel_leps)
    sel_leps = sel_leps & leps_opposite_charge
    num_leps = kernels.sum_in_offsets(backend, lep_offsets, sel_leps, dtype=nplib.int8)
    inv_mass_leps = compute_inv_mass(lep_offsets, lep_pt, lep_eta, lep_phi, lep_m, sel_leps, nplib, backend)
    
    #Find jets that pass the selection cuts
    sel_jets = (
        (jet_pt > parameters["jet_pt_cut"]) &
        (nplib.abs(jet_eta) < parameters["jet_eta_cut"])
    )
    
    #Mask the jets that are closer than a certain dR value to selected leptons
    jet_dr_masked = kernels.mask_deltar_first(
        backend,
        {"eta": jet_eta, "phi": jet_phi, "offsets": jet_offsets}, sel_jets,
        {"eta": lep_eta, "phi": lep_phi, "offsets": jet_offsets}, sel_leps,
        parameters["jet_lepton_dr"]
    )
    sel_jets = sel_jets & jet_dr_masked
    
    #Find events with a minimum number of jets
    num_jets = kernels.sum_in_offsets(backend, jet_offsets, sel_jets, dtype=nplib.int8)
    sel_ev = (num_jets >= parameters["min_num_jet"]) * (num_leps >= 2)
    
    #Compute the total pt of jets for all events that pass the selection
    sum_pt = kernels.sum_in_offsets(backend, jet_offsets, jet_pt, mask_rows=sel_ev, mask_content=sel_jets)
    
    #Create per-event weights
    weights_ev = nplib.ones(numev, dtype=nplib.float32)
    if parameters["is_mc"]:
        weights_ev *= nplib.array(arrdata["scaleFactor_PILEUP"])

    #Create a per-jet array of the event weights using broadcasting
    weights_jet = nplib.ones(len(jet_pt), dtype=nplib.float32)
    kernels.broadcast(backend, jet_offsets, weights_ev, weights_jet)
    
    #Prepare histograms of jet properties
    hists_jet = kernels.histogram_from_vector_several(
        backend,
        [
            (jet_pt, nplib.linspace(0, 500, 100, dtype=nplib.float32)),
            (jet_eta, nplib.linspace(-5, 5, 100, dtype=nplib.float32)),
            (jet_phi, nplib.linspace(-4, 4, 100, dtype=nplib.float32)),
        ],
        weights_jet,
        mask=sel_jets
    )
    
    #Compute the invariant mass of the jets in the event
    inv_mass_jets = compute_inv_mass(jet_offsets, jet_pt, jet_eta, jet_phi, jet_m, sel_jets, nplib, backend)
    
    hists_ev = kernels.histogram_from_vector_several(
        backend,
        [
            (sum_pt, nplib.linspace(0, 1000, 100, dtype=nplib.float32)),
            (inv_mass_jets, nplib.linspace(0, 1000, 40, dtype=nplib.float32)),
            (inv_mass_leps, nplib.linspace(0, 200, 40, dtype=nplib.float32)),
        ],
        weights_ev,
        mask=sel_ev
    )
    
    return {
        "numev": numev,
        "hist_jet_pt": hists_jet[0],
        "hist_jet_eta": hists_jet[1],
        "hist_sum_pt": hists_ev[0],
        "hist_inv_mass_jets": hists_ev[1],
        "hist_inv_mass_leps": hists_ev[2],
    }

In [None]:
def process_analysis(pars, nplib, backend):
    p_d = copy.deepcopy(pars)
    p_d["is_mc"] = False
    r_d = process_array_data(arrdata_d, numev_d, nplib, backend, p_d)
    
    p_m = copy.deepcopy(pars)
    p_m["is_mc"] = True
    r_m = process_array_data(arrdata_m, numev_m, nplib, backend, p_m)
    
    return {
        "data": r_d,
        "mc": r_m
    }

In [None]:
pars = {
    "lep_pt_cut": 40.0, "jet_lepton_dr": 0.4,
    "jet_pt_cut": 20, "jet_eta_cut": 2.5, "min_num_jet": 3
}

In [None]:
r = process_analysis(pars, cp, backend_cuda)

In [None]:
plt.plot(r["data"]["hist_jet_pt"][2][:-1], r["data"]["hist_jet_pt"][0])
plt.plot(r["mc"]["hist_jet_pt"][2][:-1], r["mc"]["hist_jet_pt"][0])

In [None]:
plt.plot(r["data"]["hist_inv_mass_leps"][2][:-1], r["data"]["hist_inv_mass_leps"][0], lw=0, marker="o", color="black")
plt.plot(r["mc"]["hist_inv_mass_leps"][2][:-1], r["mc"]["hist_inv_mass_leps"][0])

In [None]:
tr = %timeit -o process_analysis(pars, cp, backend_cuda)
numev = r["data"]["numev"] + r["mc"]["numev"]
print("Event processing speed: {0:.2f} MHz".format(numev/tr.average/1e6))

In [None]:
tr = %timeit -o process_analysis(pars, np, backend_cpu)
print("Event processing speed: {0:.2f} MHz".format(numev/tr.average/1e6))