In [1]:
import sys
import numpy as np
from versioned_hdf5.subchunk_map import as_subchunk_map

try:
    from versioned_hdf5.staged_changes import (
        ChangesPlan,
        GetItemPlan,
        ResizePlan,
        SetItemPlan,
        np_hsize_t,
    )
except ImportError:
    # master branch
    ChangesPlan = None
    GetItemPlan = None
    ResizePlan = None
    SetItemPlan = None
    np_hsize_t = np.uint64

np.random.seed(0)

print(sys.modules.get("versioned_hdf5.staged_changes"))
print(sys.modules["versioned_hdf5.subchunk_map"])

<module 'versioned_hdf5.staged_changes' from '/home/crusaderky/github/versioned-hdf5/versioned_hdf5/staged_changes.cpython-312-x86_64-linux-gnu.so'>
<module 'versioned_hdf5.subchunk_map' from '/home/crusaderky/github/versioned-hdf5/versioned_hdf5/subchunk_map.cpython-312-x86_64-linux-gnu.so'>


# GetItemPlan benchmarks

In [2]:
def bench_getitem_plan(chunk_shape, perc):
    if chunk_shape == "square":
        chunk_size = (1000, 1000)
        nchunks = (100, 100)
    elif chunk_shape == "flat":
        chunk_size = (10, 100_000)
        nchunks = (10_000, 1)
    else:
        raise ValueError(chunk_shape)

    shape = (100_000, 100_000)
    assert all(n * c == s for n, c, s in zip(nchunks, chunk_size, shape))

    idx = (slice(int(shape[0] * perc)), slice(int(shape[1] * perc)))

    # Brand new dense array
    slab_indices = np.ones(nchunks, dtype=np_hsize_t)
    slab_offsets = np.arange(
        0,
        slab_indices.size * chunk_size[0],
        chunk_size[0],
        dtype=np_hsize_t,
    ).reshape(slab_indices.shape)

    args = (idx, shape, chunk_size, slab_indices, slab_offsets)

    print(f"{chunk_shape} {perc*100:.0f}%")
    if GetItemPlan is not None:
        print(GetItemPlan(*args).head)
        %timeit GetItemPlan(*args)

    print("as_subchunk_map")
    %timeit list(as_subchunk_map(idx=idx, shape=shape, chunk_size=chunk_size))

    print()

Performance is impacted by the number of modified chunks.


All benchmarks run on a dataset 80 GB in size (assuming double or int64 data), 10,000 chunks, 8 MB per chunk and plan the execution of reading the whole dataset item (a[:, :]).

#### Legend
Shape of the chunks:
- **square**: 1000x1000 points (8MB) square chunks, 100 chunks per axis
- **flat**: a single chunk of 8MB spanning the columns back to back. Typical e.g after a conversion from Pandas or PyArrow.

In [3]:
bench_getitem_plan("square", 1)
bench_getitem_plan("flat", 1)
bench_getitem_plan("square", 0.05)
bench_getitem_plan("flat", 0.05)

square 100%
GetItemPlan<output_shape=(100000, 100000), output_view=[:, :], 10000 slice transfers among 1 slab pairs>
545 μs ± 7.03 μs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
as_subchunk_map
15.8 ms ± 220 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)

flat 100%
GetItemPlan<output_shape=(100000, 100000), output_view=[:, :], 10000 slice transfers among 1 slab pairs>
589 μs ± 9.56 μs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
as_subchunk_map
19.2 ms ± 193 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)

square 5%
GetItemPlan<output_shape=(5000, 5000), output_view=[:, :], 25 slice transfers among 1 slab pairs>
181 μs ± 2.78 μs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)
as_subchunk_map
111 μs ± 911 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)

flat 5%
GetItemPlan<output_shape=(5000, 5000), output_view=[:, :], 500 slice transfers among 1 slab pairs>
211 μs ± 6.7 μs per loop (mean ± std. dev. of 7 runs, 1,000 loops e

# More plans benchmarks
The benchmarks below mirror those in wrapper_benchmark.ipynb

In [4]:
shape = (20_000, 20_000)  # 3 GiB
chunk_size = (100, 100)  # 78 kiB
nchunks = tuple(s // c + (s % c > 0) for s, c in zip(shape, chunk_size))
# Simulate newly created dense versioned_hdf5 dataset
slab_indices = np.ones(nchunks, dtype=np_hsize_t)
slab_offsets = np.arange(
    0,
    np.prod(nchunks) * chunk_size[0],
    chunk_size[0],
    dtype=np_hsize_t,
).reshape(slab_indices.shape)

In [5]:
# setitem of whole chunks not in memory
idx = slice(chunk_size[0], None, None)
args = (idx, shape, chunk_size, slab_indices, slab_offsets, 2, 1)
if SetItemPlan is not None:
    splan = SetItemPlan(*args)
    print(splan.head)
    %timeit SetItemPlan(*args)
print("as_subchunk_map")
%timeit list(as_subchunk_map(idx=idx, shape=shape, chunk_size=chunk_size))

SetItemPlan<value_shape=(19900, 20000), value_view=[:, :], append 1 empty slabs, 39800 slice transfers among 1 slab pairs, drop 0 slabs>
4.63 ms ± 56 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)
as_subchunk_map
63.3 ms ± 827 μs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [6]:
# setitem of parts of chunks (target all chunks)
idx = tuple(slice(None, None, c) for c in chunk_size)
args = (idx, shape, chunk_size, slab_indices, slab_offsets, 2, 1)
if SetItemPlan is not None:
    splan = SetItemPlan(*args)
    print(splan.head)
    %timeit SetItemPlan(*args)
print("as_subchunk_map")
%timeit list(as_subchunk_map(idx=idx, shape=shape, chunk_size=chunk_size))

SetItemPlan<value_shape=(200, 200), value_view=[:, :], append 1 empty slabs, 40200 slice transfers among 2 slab pairs, drop 1 slabs>
4.84 ms ± 75 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)
as_subchunk_map
61.4 ms ± 160 μs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [7]:
# setitem of parts of chunks (target every other chunk)
idx = (slice(None, None, chunk_size[0] * 2), slice(None, None, chunk_size[1]))
args = (idx, shape, chunk_size, slab_indices, slab_offsets, 2, 1)
if SetItemPlan is not None:
    splan = SetItemPlan(*args)
    print(splan.head)
    %timeit SetItemPlan(*args)
print("as_subchunk_map")
%timeit list(as_subchunk_map(idx=idx, shape=shape, chunk_size=chunk_size))

SetItemPlan<value_shape=(100, 200), value_view=[:, :], append 0 empty slabs, 20000 slice transfers among 1 slab pairs, drop 0 slabs>
1.04 ms ± 15 μs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
as_subchunk_map
32.1 ms ± 479 μs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [8]:
# setitem of parts of chunks (target every other chunk on inner axis)
idx = (slice(None, None, chunk_size[0]), slice(None, None, chunk_size[1] * 2))
args = (idx, shape, chunk_size, slab_indices, slab_offsets, 2, 1)
if SetItemPlan is not None:
    splan = SetItemPlan(*args)
    print(splan.head)
    %timeit SetItemPlan(*args)
print("as_subchunk_map")
%timeit list(as_subchunk_map(idx=idx, shape=shape, chunk_size=chunk_size))

SetItemPlan<value_shape=(200, 100), value_view=[:, :], append 0 empty slabs, 20000 slice transfers among 1 slab pairs, drop 0 slabs>
1.04 ms ± 28.2 μs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
as_subchunk_map
31.9 ms ± 317 μs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [9]:
# setitem of parts of chunks (checkers pattern)
idx = (slice(None, None, chunk_size[0] * 2), slice(None, None, chunk_size[1] * 2))
args = (idx, shape, chunk_size, slab_indices, slab_offsets, 2, 1)
if SetItemPlan is not None:
    splan = SetItemPlan(*args)
    print(splan.head)
    %timeit SetItemPlan(*args)
print("as_subchunk_map")
%timeit list(as_subchunk_map(idx=idx, shape=shape, chunk_size=chunk_size))

SetItemPlan<value_shape=(100, 100), value_view=[:, :], append 0 empty slabs, 10000 slice transfers among 1 slab pairs, drop 0 slabs>
687 μs ± 14.1 μs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
as_subchunk_map
16.1 ms ± 348 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [10]:
# commit a fully modified array
if SetItemPlan is not None:
    splan = SetItemPlan((), shape, chunk_size, slab_indices, slab_offsets, 2, 1)
    args = (shape, chunk_size, splan.slab_indices, splan.slab_offsets)
    cplan = ChangesPlan(*args)
    print(cplan.head)
    %timeit ChangesPlan(*args)

ChangesPlan<40000 chunks>
13.4 ms ± 58.6 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [11]:
# getitem after a single chunk has been changed
idx = ()
if SetItemPlan is not None:
    splan = SetItemPlan((0, 0), shape, chunk_size, slab_indices, slab_offsets, 2, 1)
    args = (idx, shape, chunk_size, splan.slab_indices, splan.slab_offsets)
    gplan = GetItemPlan(*args)
    print(gplan.head)
    %timeit GetItemPlan(*args)
print("as_subchunk_map")
%timeit list(as_subchunk_map(idx=idx, shape=shape, chunk_size=chunk_size))

GetItemPlan<output_shape=(20000, 20000), output_view=[:, :], 40000 slice transfers among 1 slab pairs>
1.51 ms ± 4.53 μs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
as_subchunk_map
62.7 ms ± 1.22 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [12]:
# resize to add 1 chunk
old_shape = (shape[0] - 2, shape[1])
new_shape = (shape[0] - 1, shape[1])
args = (old_shape, new_shape, chunk_size, slab_indices, slab_offsets, 2, 1)
if ResizePlan is not None:
    rplan = ResizePlan(*args)
    print(rplan.head)
    %timeit ResizePlan(*args)
print("as_subchunk_map")
%timeit list(as_subchunk_map(idx=(), shape=new_shape, chunk_size=chunk_size))

ResizePlan<append 0 empty slabs, 200 slice transfers among 1 slab pairs, drop 0 slabs>
321 μs ± 3.43 μs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
as_subchunk_map
60.1 ms ± 444 μs per loop (mean ± std. dev. of 7 runs, 10 loops each)
