In [None]:
# Set up notebook and check compression of parquet file
import cupy
import cudf
import pandas as pd
import awkward as ak
import pyarrow.parquet as pq
filepath = "/home/fstrug/uscmshome/nobackup/temp/Run2012B_SingleMu_compressed_zstdlv3_Pv2-6_PPv2-0_PLAIN.parquet"
pqfile=pq.ParquetFile(filepath)
pqfile.metadata.row_group(0).column(0).compression

# Benchmarking

## Jet pt read

In [None]:
#################################
# Jet pt
jet_pt_cudf_togpu = cudf.read_parquet(filepath, engine="cudf", columns = ["Jet_pt"])
jet_pt_pandas_read = pd.read_parquet(filepath, columns = ["Jet_pt"])
jet_pt_awk = ak.from_parquet(filepath, columns = ["Jet_pt"])

### Squaring an array - JET pt

CUDF does not support jagged arrays fully right now.

In [None]:
%%timeit
jet_pt_pandas_read ** 2

In [None]:
%%timeit
jet_pt_awk["Jet_pt"] ** 2

In [None]:
%%timeit
ak.to_backend(jet_pt_awk["Jet_pt"], "cuda")**2

## Read and square an array - MET pt

In [None]:
def square_array(array):
    return array ** 2
    
cudf.set_option("copy_on_write", False)

In [None]:
%%timeit
# CUDF - Read to GPU
with cupy.cuda.Device() as d:
    MET_pt_cudf_togpu = cudf.read_parquet(filepath, engine="cudf", columns = ["MET_pt"])
    MET_pt_cudf_togpu.MET_pt.apply(square_array)
    d.synchronize()

In [None]:
%%timeit
# CUDF - Read to CPU, Copy to GPU
with cupy.cuda.Device() as d:
    MET_pt_arrow = pq.read_table(filepath, columns = ["MET_pt"])
    MET_pt_cudf_cputogpu = cudf.DataFrame.from_arrow(MET_pt_arrow)
    MET_pt_cudf_cputogpu.MET_pt ** 2
    d.synchronize()

In [None]:
%%timeit
# Awkward - Read to CPU, Copy to GPU
with cupy.cuda.Device() as d:
    MET_pt_arrow = pq.read_table(filepath, columns = ["MET_pt"])
    MET_pt_ak = ak.from_arrow(MET_pt_arrow)
    MET_pt_ak_cuda = ak.to_backend(MET_pt_ak, "cuda")
    MET_pt_ak_cuda.MET_pt**2
    d.synchronize()