# Uproot - GPU Decompression of Multiple Branches

In [1]:
import uproot
import numpy as np
import awkward as ak
from kvikio.nvcomp_codec import NvCompBatchCodec
import cramjam

def basket_to_compcont_border(data: bytes, dtype: np.dtype, expected_entries: int):
    format1 = uproot.models.TBasket._tbasket_format1
    (
        fNbytes,
        key_version,
        fObjlen,
        fDatime,
        fKeylen,
        fCycle,
    ) = format1.unpack(data[: format1.size])
    assert fNbytes == len(data)
    assert fNbytes - fKeylen != fObjlen  # this is only true for uncompressed baskets

    format2 = uproot.models.TBasket._tbasket_format2
    (
        fVersion,
        fBufferSize,
        fNevBufSize,
        fNevBuf,
        fLast,
    ) = format2.unpack(data[fKeylen - format2.size - 1: fKeylen - 1])
    border = fLast - fKeylen  # border between the contents and offsets in uncompressed data
    assert border <= fObjlen, f"{border} <= {fObjlen}"
    assert expected_entries == border // dtype.itemsize

    format3 = uproot.compression._decompress_header_format
    algo, method, c1, c2, c3, u1, u2, u3 = format3.unpack(
        data[fKeylen : fKeylen + format3.size]
    )
    assert algo == b"ZS", f"Unsupported algorithm: {algo}"  # zstd, we can support more later
    block_compressed_bytes = c1 + (c2 << 8) + (c3 << 16)
    block_uncompressed_bytes = u1 + (u2 << 8) + (u3 << 16)
    assert fObjlen == block_uncompressed_bytes
    assert len(data) == fKeylen + format3.size + block_compressed_bytes  # may not be true for baskets larger than 16 MiB
    compressed_content = data[fKeylen + format3.size :]
    
    return (compressed_content, border)

def basket_to_array(data: bytes, dtype: np.dtype, expected_entries: int):
    format1 = uproot.models.TBasket._tbasket_format1
    (
        fNbytes,
        key_version,
        fObjlen,
        fDatime,
        fKeylen,
        fCycle,
    ) = format1.unpack(data[: format1.size])
    assert fNbytes == len(data)
    assert fNbytes - fKeylen != fObjlen  # this is only true for uncompressed baskets

    format2 = uproot.models.TBasket._tbasket_format2
    (
        fVersion,
        fBufferSize,
        fNevBufSize,
        fNevBuf,
        fLast,
    ) = format2.unpack(data[fKeylen - format2.size - 1: fKeylen - 1])
    border = fLast - fKeylen  # border between the contents and offsets in uncompressed data
    assert border <= fObjlen, f"{border} <= {fObjlen}"
    assert expected_entries == border // dtype.itemsize

    format3 = uproot.compression._decompress_header_format
    algo, method, c1, c2, c3, u1, u2, u3 = format3.unpack(
        data[fKeylen : fKeylen + format3.size]
    )
    assert algo == b"ZS", f"Unsupported algorithm: {algo}"  # zstd, we can support more later
    block_compressed_bytes = c1 + (c2 << 8) + (c3 << 16)
    block_uncompressed_bytes = u1 + (u2 << 8) + (u3 << 16)
    assert fObjlen == block_uncompressed_bytes
    assert len(data) == fKeylen + format3.size + block_compressed_bytes  # may not be true for baskets larger than 16 MiB
    compressed_content = data[fKeylen + format3.size :]    
    
    raw_content = cramjam.zstd.decompress(compressed_content, output_len=block_uncompressed_bytes)
    content = np.frombuffer(
    raw_content, dtype=dtype, count=border // dtype.itemsize
    )
    return content

def get_counts(filehandle, branch):
    basket_starts = branch.all_members["fBasketSeek"]
    basket_lengths = branch.all_members["fBasketBytes"]
    basket_entryoffsets = branch.all_members["fBasketEntry"]
    assert basket_starts[-1] == 0  # offsets array is one larger than the number of baskets
    assert basket_lengths[-1] == 0
    dtype = branch.interpretation.from_dtype
    content = np.empty(basket_entryoffsets[-1], dtype=dtype)
    for i in range(len(basket_starts) - 1):
        filehandle.seek(basket_starts[i])
        data = filehandle.read(basket_lengths[i])
        # print(f"Reading basket {i} with {len(data)} bytes at entry offset {basket_entryoffsets[i]}:{basket_entryoffsets[i+1]}")
        basket_content = basket_to_array(data, dtype, basket_entryoffsets[i+1] - basket_entryoffsets[i])
        # would be better to pass the content buffer into GPU_basket_to_array and decompress directly into it
        content[basket_entryoffsets[i]:basket_entryoffsets[i+1]] = basket_content
    return content.astype(branch.interpretation.to_dtype)

In [5]:
def GPU_all_baskets_to_array(filehandle, branches):
    arrays = {}
    branches_metadatas = {}
    for branch in branches:
        if isinstance(branch.interpretation, uproot.interpretation.jagged.AsJagged):
            basket_starts = branch.all_members["fBasketSeek"]
            basket_lengths = branch.all_members["fBasketBytes"]
            basket_entryoffsets = branch.all_members["fBasketEntry"]
            assert basket_starts[-1] == 0  # offsets array is one larger than the number of baskets
            assert basket_lengths[-1] == 0
            
            counts = get_counts(filehandle, branch.count_branch)
            assert len(counts) == basket_entryoffsets[-1]
            offsets = np.zeros(len(counts) + 1, dtype=np.int64)
            offsets[1:] = np.cumsum(counts)
            dtype = branch.interpretation.content.from_dtype
            content = np.empty(offsets[-1], dtype=dtype)
    
            #####
            # Baskets' metadata
            N_baskets = len(basket_starts) - 1
            compressed_contents = []
            borders = []
            put_starts = []
            put_stops = []
    
            
            for i in range(N_baskets):
                # Grab each basket's metadata and store for later
                filehandle.seek(basket_starts[i])
                data = filehandle.read(basket_lengths[i])
                # print(f"Reading basket {i} with {len(data)} bytes at entry offset {basket_entryoffsets[i]}:{basket_entryoffsets[i+1]}")
                put_starts.append(offsets[basket_entryoffsets[i]])
                put_stops.append(offsets[basket_entryoffsets[i+1]])
                # print(f"Destination: {put_start}:{put_stop}")
                compressed_content, border = basket_to_compcont_border(data, dtype, put_stops[i] - put_starts[i])
                compressed_contents.append(compressed_content)
                borders.append(border)
            
            branch_metadata = {}
            branch_metadata["N_baskets"] = N_baskets
            branch_metadata["counts"] = counts
            branch_metadata["content"] = content
            branch_metadata["compressed_contents"] = compressed_contents
            branch_metadata["borders"] = borders
            branch_metadata["put_starts"] = put_starts
            branch_metadata["put_stops"] = put_stops
            branches_metadatas[branch.name] = branch_metadata
            
        elif isinstance(branch.interpretation, uproot.interpretation.numerical.AsDtype):
            basket_starts = branch.all_members["fBasketSeek"]
            basket_lengths = branch.all_members["fBasketBytes"]
            basket_entryoffsets = branch.all_members["fBasketEntry"]
            assert basket_starts[-1] == 0  # offsets array is one larger than the number of baskets
            assert basket_lengths[-1] == 0
            
            dtype = branch.interpretation.from_dtype
            content = np.empty(basket_entryoffsets[-1], dtype=dtype)
    
            #####
            N_baskets = len(basket_starts) - 1
            compressed_contents = []
            borders = []
            put_starts = []
            put_stops = []
            
            for i in range(N_baskets):
                filehandle.seek(basket_starts[i])
                data = filehandle.read(basket_lengths[i])
                put_starts.append(basket_entryoffsets[i])
                put_stops.append(basket_entryoffsets[i+1])
                # print(f"Reading basket {i} with {len(data)} bytes at entry offset {basket_entryoffsets[i]}:{basket_entryoffsets[i+1]}")
                compressed_content, border = basket_to_compcont_border(data, dtype, put_stops[i] - put_starts[i])
                compressed_contents.append(compressed_content)
                borders.append(border)
            
            branch_metadata = {}
            branch_metadata["N_baskets"] = N_baskets
            branch_metadata["content"] = content
            branch_metadata["compressed_contents"] = compressed_contents
            branch_metadata["borders"] = borders
            branch_metadata["put_starts"] = put_starts
            branch_metadata["put_stops"] = put_stops
            branches_metadatas[branch.name] = branch_metadata

    for branch_name in branches_metadatas.keys():
        metadata = branches_metadatas[branch_name]
        compressed_contents = metadata["compressed_contents"]
        
    all_compressed_content = [compressed_content 
                                  for branch_name in branches_metadatas.keys() 
                                  for compressed_content in branches_metadatas[branch_name]["compressed_contents"]
                             ]

    codec = NvCompBatchCodec("zstd")
    all_decompressed_content = codec.decode_batch(all_compressed_content)
    
    for branch in branches:
        # Get metadata
        branch_metadata = branches_metadatas[branch.name]
        N_baskets = branch_metadata["N_baskets"]
        content = branch_metadata["content"]
        put_starts = branch_metadata["put_starts"]
        put_stops = branch_metadata["put_stops"]
        borders = branch_metadata["borders"]
        
        if isinstance(branch.interpretation, uproot.interpretation.jagged.AsJagged):
            counts = branch_metadata["counts"]
            #Grab chunks of data
            branch_decompressed_contents = all_decompressed_content[:N_baskets]
            for i in range(N_baskets):
                content[put_starts[i]:put_stops[i]] = branch_decompressed_contents[i].view(dtype)[:borders[i] // dtype.itemsize]
                
            arrays[branch.name] = ak.unflatten(content.astype(branch.interpretation.content.to_dtype), counts)
            # Slice out used chunks
            all_decompressed_content = all_decompressed_content[N_baskets:]
            
        elif isinstance(branch.interpretation, uproot.interpretation.numerical.AsDtype):
            branch_decompressed_contents = all_decompressed_content[:N_baskets]
            for i in range(N_baskets):
                content[put_starts[i]:put_stops[i]] = decompressed_contents[i].view(dtype)[:borders[i] // dtype.itemsize]

            arrays[branch.name] = content.astype(branch.interpretation.to_dtype)
            # Slice out used chunks
            all_decompressed_content = all_decompressed_content[N_baskets:]
    
    return(arrays)


In [18]:
%%timeit
# /store/user/IDAP/RunIISummer20UL18NanoAODv9/TTToSemiLeptonic_TuneCP5_13TeV-powheg-pythia8/NANOAODSIM/20UL18JMENano_106X_upgrade2018_realistic_v16_L1v1-v1/40000/BCB3E2FC-D575-0341-A211-5C9A8D8798B9.root
filename = "/home/fstrug/uscmshome/nobackup/GPU/kvikio_playground/TTToSemiLeptonic_UL18JMENanoAOD-zstd.root"
file = uproot.open(filename)
tree = file["Events"]
branches = [tree["Muon_pt"]]
with open(filename, "rb") as rawfile:
    myarrays = GPU_all_baskets_to_array(rawfile, branches)



636 ms ± 5.45 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [7]:
for branch in branches:
    assert ak.all(myarrays[branch.name] == branch.array(library="ak"))