# Try some basic InMemoryDataset operations and show the data_dict afterwards
This notebook can be run in this PR as well against master.

In [1]:
import pprint
import sys
import tempfile
from textwrap import indent

import h5py
import numpy as np

from versioned_hdf5 import VersionedHDF5File

print(sys.modules.get("versioned_hdf5.staged_changes"))
print(sys.modules.get("versioned_hdf5.subchunk_map"))

<module 'versioned_hdf5.staged_changes' from '/home/crusaderky/github/versioned-hdf5/versioned_hdf5/staged_changes.cpython-312-x86_64-linux-gnu.so'>
<module 'versioned_hdf5.subchunk_map' from '/home/crusaderky/github/versioned-hdf5/versioned_hdf5/subchunk_map.cpython-312-x86_64-linux-gnu.so'>


In [2]:
def debug_data_dict(dset):
    try:
        data_dict = dset.data_dict  # legacy
    except AttributeError:
        data_dict = dset.build_data_dict()  # new

    # In legacy, must acquire data_dict *before* reading the full array,
    # as that will alter the data_dict
    print(dset[:])
    print()

    for k, v in sorted((k.raw, v) for k, v in data_dict.items()):
        k_str = "[" + ", ".join(f"{ki.start}:{ki.stop}" for ki in k) + "] = "
        if isinstance(v, np.ndarray):
            v_str = indent(
                str(v),
                " " * len(k_str),
                lambda line: not line.startswith("[["),
            )
        else:
            v_str = f"raw[{v.start}:{v.stop}]"
        print(k_str + v_str)


tmpdir = tempfile.TemporaryDirectory()
path = f"{tmpdir.name}/data.h5"

In [3]:
with h5py.File(path, "w") as f:
    vf = VersionedHDF5File(f)
    with vf.stage_version("r0") as sv:
        sv.create_dataset("value", data=np.arange(100).reshape((10, 10)), chunks=(4, 4))

In [4]:
with h5py.File(path, "r+") as f:
    vf = VersionedHDF5File(f)
    with vf.stage_version(f"r1") as sv:
        dset = sv["value"]

        print("\n=== Fresh dataset ===\n")
        debug_data_dict(dset)

        dset[:5, 4:8] = 123
        print("\n=== After __setitem__ (full and partial chunks) ===\n")
        debug_data_dict(dset)


=== Fresh dataset ===

[[ 0  1  2  3  4  5  6  7  8  9]
 [10 11 12 13 14 15 16 17 18 19]
 [20 21 22 23 24 25 26 27 28 29]
 [30 31 32 33 34 35 36 37 38 39]
 [40 41 42 43 44 45 46 47 48 49]
 [50 51 52 53 54 55 56 57 58 59]
 [60 61 62 63 64 65 66 67 68 69]
 [70 71 72 73 74 75 76 77 78 79]
 [80 81 82 83 84 85 86 87 88 89]
 [90 91 92 93 94 95 96 97 98 99]]

[0:4, 0:4] = raw[0:4]
[0:4, 4:8] = raw[4:8]
[0:4, 8:10] = raw[8:12]
[4:8, 0:4] = raw[12:16]
[4:8, 4:8] = raw[16:20]
[4:8, 8:10] = raw[20:24]
[8:10, 0:4] = raw[24:26]
[8:10, 4:8] = raw[28:30]
[8:10, 8:10] = raw[32:34]

=== After __setitem__ (full and partial chunks) ===

[[  0   1   2   3 123 123 123 123   8   9]
 [ 10  11  12  13 123 123 123 123  18  19]
 [ 20  21  22  23 123 123 123 123  28  29]
 [ 30  31  32  33 123 123 123 123  38  39]
 [ 40  41  42  43 123 123 123 123  48  49]
 [ 50  51  52  53  54  55  56  57  58  59]
 [ 60  61  62  63  64  65  66  67  68  69]
 [ 70  71  72  73  74  75  76  77  78  79]
 [ 80  81  82  83  84  85  86

In [5]:
with h5py.File(path, "r+") as f:
    vf = VersionedHDF5File(f)
    with vf.stage_version(f"r2") as sv:
        dset = sv["value"]
        print("\n=== After commit ===\n")
        debug_data_dict(dset)


=== After commit ===

[[  0   1   2   3 123 123 123 123   8   9]
 [ 10  11  12  13 123 123 123 123  18  19]
 [ 20  21  22  23 123 123 123 123  28  29]
 [ 30  31  32  33 123 123 123 123  38  39]
 [ 40  41  42  43 123 123 123 123  48  49]
 [ 50  51  52  53  54  55  56  57  58  59]
 [ 60  61  62  63  64  65  66  67  68  69]
 [ 70  71  72  73  74  75  76  77  78  79]
 [ 80  81  82  83  84  85  86  87  88  89]
 [ 90  91  92  93  94  95  96  97  98  99]]

[0:4, 0:4] = raw[0:4]
[0:4, 4:8] = raw[36:40]
[0:4, 8:10] = raw[8:12]
[4:8, 0:4] = raw[12:16]
[4:8, 4:8] = raw[40:44]
[4:8, 8:10] = raw[20:24]
[8:10, 0:4] = raw[24:26]
[8:10, 4:8] = raw[28:30]
[8:10, 8:10] = raw[32:34]


In [6]:
with h5py.File(path, "r+") as f:
    vf = VersionedHDF5File(f)
    with vf.stage_version(f"r3") as sv:
        dset = sv["value"]
        dset.resize((7, 13))
        dset[5, 12] = 1337
        print("\n=== After resize() plus __setitem__ encroaching the filled area ===\n")
        debug_data_dict(dset)


=== After resize() plus __setitem__ encroaching the filled area ===

[[   0    1    2    3  123  123  123  123    8    9    0    0    0]
 [  10   11   12   13  123  123  123  123   18   19    0    0    0]
 [  20   21   22   23  123  123  123  123   28   29    0    0    0]
 [  30   31   32   33  123  123  123  123   38   39    0    0    0]
 [  40   41   42   43  123  123  123  123   48   49    0    0    0]
 [  50   51   52   53   54   55   56   57   58   59    0    0 1337]
 [  60   61   62   63   64   65   66   67   68   69    0    0    0]]

[0:4, 0:4] = raw[0:4]
[0:4, 4:8] = raw[36:40]
[0:4, 8:12] = [[ 8  9  0  0]
               [18 19  0  0]
               [28 29  0  0]
               [38 39  0  0]]
[4:7, 0:4] = raw[12:15]
[4:7, 4:8] = raw[40:43]
[4:7, 8:12] = [[48 49  0  0]
               [58 59  0  0]
               [68 69  0  0]]
[4:7, 12:13] = [[   0]
                [1337]
                [   0]]


In [7]:
with h5py.File(path, "r+") as f:
    vf = VersionedHDF5File(f)
    with vf.stage_version(f"r4") as sv:
        dset = sv["value"]
        print("\n=== After commit again ===\n")
        debug_data_dict(dset)


=== After commit again ===

[[   0    1    2    3  123  123  123  123    8    9    0    0    0]
 [  10   11   12   13  123  123  123  123   18   19    0    0    0]
 [  20   21   22   23  123  123  123  123   28   29    0    0    0]
 [  30   31   32   33  123  123  123  123   38   39    0    0    0]
 [  40   41   42   43  123  123  123  123   48   49    0    0    0]
 [  50   51   52   53   54   55   56   57   58   59    0    0 1337]
 [  60   61   62   63   64   65   66   67   68   69    0    0    0]]

[0:4, 0:4] = raw[0:4]
[0:4, 4:8] = raw[36:40]
[0:4, 8:12] = raw[44:48]
[4:7, 0:4] = raw[12:15]
[4:7, 4:8] = raw[40:43]
[4:7, 8:12] = raw[48:51]
[4:7, 12:13] = raw[52:55]
