Skip to content

Commit

Permalink
Added operation_kwargs Histogram property (#3921)
Browse files Browse the repository at this point in the history
* Added private _operation_kwargs Histogram property. For Histogram elements created using the histogram operation, this holds a dict of operation kwargs that can be used to recreate the histogram from the same dataset.  Or None, if the Histogram was created directly using the Histogram constructor.
  • Loading branch information
jonmmease committed Sep 3, 2019
1 parent 996e7c4 commit 82acc56
Show file tree
Hide file tree
Showing 3 changed files with 58 additions and 3 deletions.
5 changes: 4 additions & 1 deletion holoviews/element/chart.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import numpy as np

import copy
import param

from ..core import util
Expand Down Expand Up @@ -182,6 +182,8 @@ def __init__(self, data, edges=None, **params):
elif isinstance(data, tuple) and len(data) == 2 and len(data[0])+1 == len(data[1]):
data = data[::-1]

self._operation_kwargs = params.pop('_operation_kwargs', None)

dataset = params.pop("dataset", None)
super(Histogram, self).__init__(data, **params)

Expand All @@ -204,6 +206,7 @@ def clone(self, data=None, shared_data=True, new_type=None, *args, **overrides):
data=data,
shared_data=shared_data,
new_type=new_type,
_operation_kwargs=copy.deepcopy(self._operation_kwargs),
*args,
**overrides
)
Expand Down
12 changes: 12 additions & 0 deletions holoviews/operation/element.py
Original file line number Diff line number Diff line change
Expand Up @@ -655,6 +655,18 @@ def _process(self, element, key=None):
hist = np.cumsum(hist)
if self.p.normed in (True, 'integral'):
hist *= edges[1]-edges[0]

# Save off the kwargs needed to reproduce this Histogram later.
# We remove the properties that are used as instructions for how to
# calculate the bins, and replace those with the explicit list of bin
# edges. This way, not only can we regenerate this exact histogram
# from the same data set, but we can also generate a histogram using
# a different dataset that will share the exact same bins.
exclusions = {'log', 'bin_range', 'num_bins'}
params['_operation_kwargs'] = {
k: v for k, v in self.p.items() if k not in exclusions
}
params['_operation_kwargs']['bins'] = list(edges)
return Histogram((edges, hist), kdims=[element.get_dimension(selected_dim)],
label=element.label, dataset=element.dataset, **params)

Expand Down
44 changes: 42 additions & 2 deletions holoviews/tests/operation/testoperation.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,9 +147,49 @@ def test_points_histogram(self):
vdims=('x_frequency', 'Frequency'))
self.assertEqual(op_hist, hist)

def test_histogram_operation_kwargs(self):
points = Points([float(j) for i in range(10) for j in [i] * (2 * i)])
op_hist = histogram(
points,
dimension='y',
normed=False,
num_bins=10,
bin_range=[0, 10],
)

hist = Histogram((
[0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0],
[0, 2, 4, 6, 8, 10, 12, 14, 16, 18]
), vdims=('y_count', 'Count'), kdims='y')

# Check histogram
self.assertEqual(op_hist, hist)

# Check operation kwargs for histogram generated with operation
self.assertEqual(
op_hist._operation_kwargs,
{'dimension': 'y',
'normed': False,
'dynamic': False,
'bins': [0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0]}
)

# Test that operation_kwargs is preserved through clone
self.assertEqual(
op_hist.clone()._operation_kwargs,
{'dimension': 'y',
'normed': False,
'dynamic': False,
'bins': [0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0]}
)

# Check that operation kwargs is None for histogram generated directly
# from the Histogram constructor
self.assertIsNone(hist._operation_kwargs)

@da_skip
def test_dataset_histogram_dask(self):
import dask.array as da
import dask.array as da
ds = Dataset((da.from_array(np.array(range(10), dtype='f'), chunks=(3)),),
['x'], datatype=['dask'])
op_hist = histogram(ds, num_bins=3)
Expand All @@ -161,7 +201,7 @@ def test_dataset_histogram_dask(self):

@da_skip
def test_dataset_cumulative_histogram_dask(self):
import dask.array as da
import dask.array as da
ds = Dataset((da.from_array(np.array(range(10), dtype='f'), chunks=(3)),),
['x'], datatype=['dask'])
op_hist = histogram(ds, num_bins=3, cumulative=True)
Expand Down

0 comments on commit 82acc56

Please sign in to comment.