From 68b72bfb4270bb78cce57ddf858173270cd48cee Mon Sep 17 00:00:00 2001 From: Carter Francis Date: Tue, 23 Jan 2024 11:54:28 -0600 Subject: [PATCH] Add Changelog and document reason for changes. --- rsciio/_hierarchical.py | 11 +++++------ upcoming_changes/211.bugfix.rst | 1 + 2 files changed, 6 insertions(+), 6 deletions(-) create mode 100644 upcoming_changes/211.bugfix.rst diff --git a/rsciio/_hierarchical.py b/rsciio/_hierarchical.py index d58d38ba..47ac2b7c 100644 --- a/rsciio/_hierarchical.py +++ b/rsciio/_hierarchical.py @@ -263,13 +263,12 @@ def _read_array(group, dataset_key): key = "ragged_shapes" if key in group: ragged_shape = group[key] - # if the data is chunked saved array we must first - # cast to a numpy array to avoid multiple calls to - # _decode_chunk in zarr (or h5py) + # Use same chunks as data so that apply_gufunc doesn't rechunk + # Reduces the transfer of data between workers which + # significantly improves performance for distributed loading data = da.from_array(data, chunks=data.chunks) - shape = da.from_array( - ragged_shape, chunks=data.chunks - ) # same chunks as data + shape = da.from_array(ragged_shape, chunks=data.chunks) + data = da.apply_gufunc(unflatten_data, "(),()->()", data, shape) return data diff --git a/upcoming_changes/211.bugfix.rst b/upcoming_changes/211.bugfix.rst new file mode 100644 index 00000000..fd8dffc8 --- /dev/null +++ b/upcoming_changes/211.bugfix.rst @@ -0,0 +1 @@ +Fix saving ragged arrays of vectors from/to a chunked ``hspy`` and ``zspy`` store. Greatly increases the speed of saving and loading ragged arrays from chunked datasets. \ No newline at end of file