In [1]:
import sys
import numpy as np
from versioned_hdf5.staged_changes import StagedChangesArray

print(sys.modules["versioned_hdf5.hyperspace"])
print(sys.modules["versioned_hdf5.staged_changes"])
print(sys.modules["versioned_hdf5.subchunk_map"])

<module 'versioned_hdf5.hyperspace' from '/home/crusaderky/github/versioned-hdf5/versioned_hdf5/hyperspace.cpython-312-x86_64-linux-gnu.so'>
<module 'versioned_hdf5.staged_changes' from '/home/crusaderky/github/versioned-hdf5/versioned_hdf5/staged_changes.cpython-312-x86_64-linux-gnu.so'>
<module 'versioned_hdf5.subchunk_map' from '/home/crusaderky/github/versioned-hdf5/versioned_hdf5/subchunk_map.cpython-312-x86_64-linux-gnu.so'>


### Create a sparse array of 1 million chunks

In [2]:
%%time
s = StagedChangesArray.full(
    shape=(2000, 2000),
    chunk_size=(2, 2),
    fill_value=1337,
)
s[3, 2] = 1

CPU times: user 151 μs, sys: 11 ms, total: 11.2 ms
Wall time: 10.3 ms


In [3]:
s[:5, :5]

array([[1337, 1337, 1337, 1337, 1337],
       [1337, 1337, 1337, 1337, 1337],
       [1337, 1337, 1337, 1337, 1337],
       [1337, 1337,    1, 1337, 1337],
       [1337, 1337, 1337, 1337, 1337]])

### Extract all non-full chunks

In [4]:
%%time
list(s.changes())

CPU times: user 2.34 ms, sys: 1.02 ms, total: 3.35 ms
Wall time: 3.18 ms


[((slice(2, 4, 1), slice(2, 4, 1)),
  array([[1337, 1337],
         [   1, 1337]]))]

In [5]:
### Change fill_value for all full chunks

In [6]:
%time s2 = s.refill(2)

CPU times: user 855 μs, sys: 950 μs, total: 1.8 ms
Wall time: 1.66 ms


In [7]:
s2[:5, :5]

array([[2, 2, 2, 2, 2],
       [2, 2, 2, 2, 2],
       [2, 2, 2, 2, 2],
       [2, 2, 1, 2, 2],
       [2, 2, 2, 2, 2]])

In [8]:
s2.chunk_values

[None,
 array([[2, 2],
        [1, 2]])]

### \_\_getitem\_\_ of 1 million full chunks

In [9]:
%time s[:]

CPU times: user 733 ms, sys: 184 ms, total: 917 ms
Wall time: 914 ms


array([[1337, 1337, 1337, ..., 1337, 1337, 1337],
       [1337, 1337, 1337, ..., 1337, 1337, 1337],
       [1337, 1337, 1337, ..., 1337, 1337, 1337],
       ...,
       [1337, 1337, 1337, ..., 1337, 1337, 1337],
       [1337, 1337, 1337, ..., 1337, 1337, 1337],
       [1337, 1337, 1337, ..., 1337, 1337, 1337]])

### Create 1 million real chunks in memory, replacing full ones

In [10]:
print(s._setitem_plan(()).head)
%time s[()] = 3

SetItemPlan<shape=(2000, 2000), 0 loads from base into 0 chunks, 0 appends of empty chunks, 0 appends of full chunks, 999999 appends from __setitem__ value, 0 replaces with empty chunks, 1 replaces from __setitem__ value, 0 updates>
CPU times: user 1.7 s, sys: 600 ms, total: 2.3 s
Wall time: 2.3 s


### Replace 1 million real chunks in memory

In [11]:
print(s._setitem_plan(()).head)
%time s[()] = 4

SetItemPlan<shape=(2000, 2000), 0 loads from base into 0 chunks, 0 appends of empty chunks, 0 appends of full chunks, 0 appends from __setitem__ value, 0 replaces with empty chunks, 1000000 replaces from __setitem__ value, 0 updates>
CPU times: user 1.05 s, sys: 177 ms, total: 1.23 s
Wall time: 1.23 s


### Update 1 million real chunks in memory

In [12]:
print(s._setitem_plan(slice(None, None, 2)).head)
%time s[::2] = 5

SetItemPlan<shape=(1000, 2000), 0 loads from base into 0 chunks, 0 appends of empty chunks, 0 appends of full chunks, 0 appends from __setitem__ value, 0 replaces with empty chunks, 0 replaces from __setitem__ value, 1000000 updates>
CPU times: user 2.03 s, sys: 259 ms, total: 2.29 s
Wall time: 2.28 s
