In [1]:
import mmap
import numpy as np

# @typing.no_type_check
def get_memory_mapped_data(
    file_path: str,
    data_type: str,
    offset_bytes: int,
    skip_that_many_bytes_stride: tuple[int, ...],
    data_shape: tuple[int, ...],
):
    """Read typed data from memory-mapped file from offset with stride."""
    # https://stackoverflow.com/questions/60493766/ \
    #       read-binary-flatfile-and-skip-bytes for I/O access details

    with (
        open(file_path, "rb") as fp,
        mmap.mmap(fp.fileno(), length=0, access=mmap.ACCESS_READ) as memory_mapped,
    ):
        # examples
        # shape = (n, m)
        # strides = (row_stride, col_stride)
        # every 3rd row use shape=(n // 3, m), strides=(row_stride * 3, col_stride)
        # every 3rd col use shape=(n, m // 3), stridess=(row_stride, col_stride * 3)
        return np.ndarray(
            shape=data_shape,
            dtype=data_type,
            buffer=memory_mapped,
            offset=offset_bytes,
            strides=skip_that_many_bytes_stride,
        ).copy()
        """
        return np.ndarray(
            buffer=memory_mapped,
            dtype=data_type,
            offset=data_offset,
            strides=data_stride,
            shape=data_shape,
        ).copy()
        """
    return None

In [26]:
file_path = "data/utils/memory_mapped_io.raw"
data_type_literal = "<f4"
item_size = np.dtype(data_type_literal).itemsize

# write_data = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], dtype=data_type_literal).reshape(4,3)
write_data = np.array([ 1,  2,  3,  4, 
                        5,  6,  7,  8, 
                        9, 10, 11, 12,
                       13, 14, 15, 16,
                       17, 18, 19, 20,
                       21, 22, 23, 24], dtype=data_type_literal).reshape(6,4)
print(f"original test data")
print(f"{np.shape(write_data)}, {write_data.dtype}, {write_data}")
write_data.tofile(file_path)
del write_data

print(f"item_size {item_size}")
print(f"two-dimensional, read back as is")
# read_data = get_memory_mapped_data(file_path, "<f4", 0, (3 * item_size, item_size), (4, 3))
read_data = get_memory_mapped_data(file_path, "<f4", 0, (4 * item_size, item_size), (6, 4))
print(f"{np.shape(read_data)}, {read_data.dtype}, {read_data}")
del read_data

print(f"one-dimensional implicit, read back as is")
read_data = get_memory_mapped_data(file_path, "<f4", 0, (1 * item_size,), (6 * 4,))
print(f"{np.shape(read_data)}, {read_data.dtype}, {read_data}")
del read_data

# for row_idx in [0, 1, 2, 3]:
#     print(f"row_idx {row_idx} each second")
#     read_data = get_memory_mapped_data(file_path, "<f4", row_idx * 3 * item_size, (2 * 3 * item_size, item_size), (4 // 2, 3))  # skip each 2nd row
#     print(f"{np.shape(read_data)}, {read_data.dtype}, {read_data}")
#     del read_data

for column_idx in [0, 1, 2, 3]:  # each dim-th column
    print(f"column_idx {column_idx}")
    # explicitly 2d
    # read_data = get_memory_mapped_data(file_path, "<f4", column_idx * item_size, (3 * item_size, item_size * 3), (4, 3 // 3))  # skip each 3rd column
    # 1d only
    read_data = get_memory_mapped_data(file_path, "<f4", column_idx * item_size, (4 * item_size,), (6,))  # skip each 3rd column
    
    print(f"{np.shape(read_data)}, {read_data.dtype}, {read_data}")
    del read_data

original test data
(6, 4), float32, [[ 1.  2.  3.  4.]
 [ 5.  6.  7.  8.]
 [ 9. 10. 11. 12.]
 [13. 14. 15. 16.]
 [17. 18. 19. 20.]
 [21. 22. 23. 24.]]
item_size 4
two-dimensional, read back as is
(6, 4), float32, [[ 1.  2.  3.  4.]
 [ 5.  6.  7.  8.]
 [ 9. 10. 11. 12.]
 [13. 14. 15. 16.]
 [17. 18. 19. 20.]
 [21. 22. 23. 24.]]
one-dimensional implicit, read back as is
(24,), float32, [ 1.  2.  3.  4.  5.  6.  7.  8.  9. 10. 11. 12. 13. 14. 15. 16. 17. 18.
 19. 20. 21. 22. 23. 24.]
column_idx 0
(6,), float32, [ 1.  5.  9. 13. 17. 21.]
column_idx 1
(6,), float32, [ 2.  6. 10. 14. 18. 22.]
column_idx 2
(6,), float32, [ 3.  7. 11. 15. 19. 23.]
column_idx 3
(6,), float32, [ 4.  8. 12. 16. 20. 24.]


In [6]:
print(np.dtype("<f4").itemsize)

4
