Skip to content

Commit

Permalink
Support select on all dims in .dataset for Histogram elements
Browse files Browse the repository at this point in the history
When select is performed on a Histogram element that has .dataset and ._operation_kwargs properties, regenerate the histogram using the selected data.
  • Loading branch information
jonmmease committed Sep 3, 2019
1 parent 400fbc8 commit 7083f6b
Show file tree
Hide file tree
Showing 2 changed files with 94 additions and 24 deletions.
60 changes: 44 additions & 16 deletions holoviews/element/chart.py
Expand Up @@ -220,22 +220,50 @@ def clone(self, data=None, shared_data=True, new_type=None, *args, **overrides):

return new_element

def select(self, selection_specs=None, **selection):
selected = super(Histogram, self).select(
selection_specs=selection_specs, **selection
)

if not np.isscalar(selected) and not np.array_equal(selected.data, self.data):
# Selection changed histogram bins, so update dataset
selection = {
dim: sel for dim, sel in selection.items()
if dim in self.dimensions()+['selection_mask']
}

if selected._dataset is not None:
selected._dataset = self.dataset.select(**selection)

return selected
def select(self, selection_expr=None, selection_specs=None, **selection):
from ..operation import histogram
# Handle selection_specs and early exit
if selection_specs is not None and not isinstance(selection_specs, (list, tuple)):
selection_specs = [selection_specs]
if (selection_specs and not any(self.matches(sp) for sp in selection_specs)
or (not selection and not selection_expr)):
return self

if self.dataset is not None and self._operation_kwargs is not None:
# We have what we need to perform selection on dataset and
# regenerate the histogram.
selected_dataset = self.dataset.select(selection_expr, **selection)
selected = histogram(selected_dataset, **self._operation_kwargs)
if selected_dataset.dataset is not None:
selected._dataset = selected_dataset.dataset
else:
selected._dataset = selected_dataset
return selected
else:
# Perform selection directly on histogram
selected = super(Histogram, self).select(
selection_expr=selection_expr,
selection_specs=selection_specs,
**selection
)

# Handle updating dataset
if (not np.isscalar(selected)
and not np.array_equal(selected.data, self.data)
and selected._dataset is not None):

# Selection changed histogram bins, so update dataset
selection = {
dim: sel for dim, sel in selection.items()
if dim in self.dimensions() + ['selection_mask']
}

if selected._dataset is not None:
selected._dataset = self.dataset.select(
selection_expr=selection_expr, **selection
)

return selected

def __setstate__(self, state):
"""
Expand Down
58 changes: 50 additions & 8 deletions holoviews/tests/core/testdatasetproperty.py
@@ -1,7 +1,10 @@
from holoviews.element.comparison import ComparisonTestCase
import pandas as pd
import numpy as np
from holoviews import Dataset, Curve, Dimension, Scatter, Distribution
import dask.dataframe as dd
from holoviews.operation import histogram
from holoviews import dim

class DatasetPropertyTestCase(ComparisonTestCase):

Expand Down Expand Up @@ -186,24 +189,63 @@ def test_select_single(self):

def test_select_multi(self):
# Add second selection on b. b is a dimension in hist.dataset but
# not in hist. Make sure that we only apply the a selection (and not
# the b selection) to the .dataset property
# not in hist. Make sure that we apply the selection on both
# properties.
sub_hist = self.hist.select(a=(1, None), b=100)

self.assertNotEqual(
sub_hist.dataset,
self.ds.select(a=(1, None), b=100)
)

self.assertEqual(
sub_hist.dataset,
self.ds.select(a=(1, None))
self.ds.select(a=(1, None), b=100)
)

def test_hist_to_curve(self):
# No exception thrown
self.hist.to.curve()

def test_hist_selection_all_dims(self):
xs = [float(j) for i in range(10) for j in [i] * (2 * i)]
df = pd.DataFrame({
'x': xs,
'y': [v % 3 for v in range(len(xs))]
})

ds = Dataset(df)
hist1 = histogram(
ds,
dimension='x',
normed=False,
num_bins=10,
bin_range=[0, 10],
)

# Make sure hist1 dataset equal to original
self.assertEqual(hist1.dataset, ds)

# Check histogram data
self.assertEqual(
hist1.data,
{'x': np.array([0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10.]),
'x_count': np.array([0, 2, 4, 6, 8, 10, 12, 14, 16, 18])}
)

# Select histogram subset using the x and y dimensions
hist2 = hist1.select(x=(4, None), y=2)

# Check dataset down selection
self.assertEqual(hist2.dataset, ds.select(x=(4, None), y=2))

# Check histogram data. Bins should match and counts should be
# reduced from hist1 due to selection
self.assertEqual(
hist2.data,
{'x': np.array([0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10.]),
'x_count': np.array([0, 0, 0, 0, 2, 4, 4, 4, 6, 6])}
)

# Check that selection using dim expression produces the same result
hist3 = hist1.select((dim('x') >= 4) & (dim('y') == 2))
self.assertEqual(hist3, hist2)


class DistributionTestCase(DatasetPropertyTestCase):

Expand Down

0 comments on commit 7083f6b

Please sign in to comment.