glue-viz · dhomeier · Jun 24, 2022 · May 26, 2022 · Jun 22, 2022 · Jun 23, 2022
diff --git a/CHANGES.md b/CHANGES.md
@@ -11,6 +11,9 @@ v1.4.0 (unreleased)
 
 * Add support for using degrees in full-sphere projections. [#2279]
 
+* Fixed a bug in when using `compute_statistic` on an array large enough to
+  need chunking when a subset is defined. [#2302]
+
 v1.3.0 (2022-04-22)
 -------------------
 

diff --git a/glue/core/data.py b/glue/core/data.py
@@ -1694,6 +1694,7 @@ def compute_statistic(self, statistic, cid, subset_state=None, axis=None,
         # later we will need to pad out the result of compute_statistic.
         subarray_slices = None
 
+        chunk_view = None
         if subset_state:
             if isinstance(subset_state, SliceSubsetState) and view is None:
                 mask = None
@@ -1766,6 +1767,8 @@ def compute_statistic(self, statistic, cid, subset_state=None, axis=None,
                                 mask_idim += 1
                             else:
                                 new_view.append(view[idim])
+                        # This is the chunk view, which we'll need later
+                        chunk_view = view
                         view = tuple(new_view)
                     else:  # pragma: nocover
                         # This should probably never happen, but just in case!
@@ -1828,9 +1831,16 @@ def compute_statistic(self, statistic, cid, subset_state=None, axis=None,
             # of the if statement above.
             if not isinstance(axis, tuple):
                 axis = (axis,)
-            full_shape = [self.shape[idim] for idim in range(self.ndim) if idim not in axis]
-            full_result = np.zeros(full_shape) * np.nan
             result_slices = tuple([subarray_slices[idim] for idim in range(self.ndim) if idim not in axis])
+
+            if chunk_view is None:
+                full_shape = [self.shape[idim] for idim in range(self.ndim) if idim not in axis]
+            else:
+                chunk_shape = subset_state.to_mask(self, chunk_view).shape
+                full_shape = [chunk_shape[idim] for idim in range(self.ndim) if idim not in axis]
+                view_start = [chunk_view[idim].start for idim in range(self.ndim) if idim not in axis][0]
+
+            full_result = np.zeros(full_shape) * np.nan
             full_result[result_slices] = result
             return full_result
 

diff --git a/glue/core/tests/test_data.py b/glue/core/tests/test_data.py
@@ -855,9 +855,54 @@ def test_compute_statistic_chunks(shape):
     data = Data(x=np.random.random(shape))
 
     axis = tuple(range(data.ndim - 1))
+
     assert_allclose(data.compute_statistic('mean', data.id['x'], axis=axis),
                     data.compute_statistic('mean', data.id['x'], axis=axis, n_chunk_max=10))
 
+    subset_state = SliceSubsetState(data, [slice(5)])
+    stats = data.compute_statistic('mean', data.id['x'], axis=axis, subset_state=subset_state)
+    chunked = data.compute_statistic('mean', data.id['x'], axis=axis, subset_state=subset_state,
+                                     n_chunk_max=10)
+    assert_allclose(stats, chunked)
+
+    subset_state = data.id['x'] > 0.25
+    stats = data.compute_statistic('mean', data.id['x'], axis=axis, subset_state=subset_state)
+    chunked = data.compute_statistic('mean', data.id['x'], axis=axis, subset_state=subset_state,
+                                     n_chunk_max=10)
+    assert_allclose(stats, chunked)
+
+    roi = RangeROI('x', min=0.1, max=0.95)
+    subset_state = roi_to_subset_state(roi, x_att='x')
+    stats = data.compute_statistic('mean', data.id['x'], axis=axis, subset_state=subset_state)
+    chunked = data.compute_statistic('mean', data.id['x'], axis=axis, subset_state=subset_state,
+                                     n_chunk_max=10)
+    assert_allclose(stats, chunked)
+
+    if data.ndim < 3:
+        return
+
+    assert_allclose(data.compute_statistic('mean', data.id['x'], axis=2),
+                    data.compute_statistic('mean', data.id['x'], axis=2, n_chunk_max=10))
+
+    subset_state = SliceSubsetState(data, [slice(5)])
+    stats = data.compute_statistic('mean', data.id['x'], axis=2, subset_state=subset_state)
+    chunked = data.compute_statistic('mean', data.id['x'], axis=2, subset_state=subset_state,
+                                     n_chunk_max=10)
+    assert_allclose(stats, chunked)
+
+    subset_state = data.id['x'] > 0.25
+    stats = data.compute_statistic('mean', data.id['x'], axis=2, subset_state=subset_state)
+    chunked = data.compute_statistic('mean', data.id['x'], axis=2, subset_state=subset_state,
+                                     n_chunk_max=10)
+    assert_allclose(stats, chunked)
+
+    roi = RangeROI('x', min=0.1, max=0.95)
+    subset_state = roi_to_subset_state(roi, x_att='x')
+    stats = data.compute_statistic('mean', data.id['x'], axis=2, subset_state=subset_state)
+    chunked = data.compute_statistic('mean', data.id['x'], axis=2, subset_state=subset_state,
+                                     n_chunk_max=10)
+    assert_allclose(stats, chunked)
+
 
 def test_compute_statistic_random_subset():
 
@@ -991,6 +1036,47 @@ def test_compute_statistic_shape():
     assert result.shape == (20,)
 
 
+def test_compute_statistic_shape_view():
+
+    # Test the compute_statistic method with the same optimizations, but setting
+    # the `view` parameter for sub-slicing the dataset.
+
+    array = np.ones(10 * 20 * 30).reshape((10, 20, 30))
+    array[3:5, 6:14, 10:21] += 1
+
+    data = Data(x=array, y=array)
+
+    subset_state = data.id['y'] > 1.5
+
+    view = (slice(0, 5), slice(4, 12), slice(0, 10))
+    subset_state = data.id['y'] > 1.5
+
+    result = data.compute_statistic('sum', data.id['x'], subset_state=subset_state, view=view)
+    assert np.isscalar(result)
+
+    result = data.compute_statistic('sum', data.id['x'], subset_state=subset_state,
+                                    axis=1, view=view)
+    assert result.shape == (5, 10)
+
+    result = data.compute_statistic('sum', data.id['x'], subset_state=subset_state,
+                                    axis=(0, 2), view=view)
+    assert result.shape == (8,)
+
+    roi = RangeROI('x', min=1.5, max=2.0)
+    subset_state = roi_to_subset_state(roi, x_att='x')
+
+    result = data.compute_statistic('sum', data.id['x'], subset_state=subset_state, view=view)
+    assert np.isscalar(result)
+
+    result = data.compute_statistic('sum', data.id['x'], subset_state=subset_state,
+                                    axis=1, view=view)
+    assert result.shape == (5, 10)
+
+    result = data.compute_statistic('sum', data.id['x'], subset_state=subset_state,
+                                    axis=(0, 2), view=view)
+    assert result.shape == (8,)
+
+
 def test_compute_histogram_log():
 
     # Make sure that the returned histogram is NaN everywhere if either of the