Skip to content

Commit

Permalink
Merge 0a917a7 into b20132f
Browse files Browse the repository at this point in the history
  • Loading branch information
jonmmease committed Sep 14, 2019
2 parents b20132f + 0a917a7 commit 3841ee5
Show file tree
Hide file tree
Showing 3 changed files with 131 additions and 26 deletions.
18 changes: 13 additions & 5 deletions holoviews/core/data/__init__.py
Expand Up @@ -391,18 +391,26 @@ def select(self, selection_expr=None, selection_specs=None, **selection):

if selection_specs is not None and not isinstance(selection_specs, (list, tuple)):
selection_specs = [selection_specs]

# Get reference to the dataset that selections will be applied to
if (self.dataset is not None
and self.interface == self.dataset.interface):
# We can operate directly on self.dataset so select has access to
# all of the dimensions in dataset
dataset = self.dataset
else:
dataset = self

selection = {dim_name: sel for dim_name, sel in selection.items()
if dim_name in self.dimensions()+['selection_mask']}
if dim_name in dataset.dimensions()+['selection_mask']}
if (selection_specs and not any(self.matches(sp) for sp in selection_specs)
or (not selection and not selection_expr)):
return self

# Handle selection dim expression
if selection_expr is not None:
mask = selection_expr.apply(self, compute=False, keep_index=True)
dataset = self[mask]
else:
dataset = self
mask = selection_expr.apply(dataset, compute=False, keep_index=True)
dataset = dataset[mask]

# Handle selection kwargs
if selection:
Expand Down
61 changes: 48 additions & 13 deletions holoviews/element/chart.py
Expand Up @@ -236,6 +236,13 @@ def __init__(self, data, edges=None, **params):
# This is so that dataset contains the data needed to reconstruct
# the element.
self._dataset = dataset.clone()
elif self.dataset is None:
kdim = self.kdims[0].name
vdim = self.vdims[0].name
self._dataset = Dataset({
kdim: self.dimension_values(kdim),
vdim: self.dimension_values(vdim),
}, kdims=kdim, vdims=vdim)

def clone(self, data=None, shared_data=True, new_type=None, *args, **overrides):
if 'dataset' in overrides:
Expand Down Expand Up @@ -263,22 +270,50 @@ def clone(self, data=None, shared_data=True, new_type=None, *args, **overrides):

return new_element

def select(self, selection_specs=None, **selection):
selected = super(Histogram, self).select(
selection_specs=selection_specs, **selection
)
def select(self, selection_expr=None, selection_specs=None, **selection):
from ..operation import histogram
# Handle selection_specs and early exit
if selection_specs is not None and not isinstance(selection_specs, (list, tuple)):
selection_specs = [selection_specs]
if (selection_specs and not any(self.matches(sp) for sp in selection_specs)
or (not selection and not selection_expr)):
return self

if self.dataset is not None and self._operation_kwargs is not None:
# We have what we need to perform selection on dataset and
# regenerate the histogram.
selected_dataset = self.dataset.select(selection_expr, **selection)
selected = histogram(selected_dataset, **self._operation_kwargs)
if selected_dataset.dataset is not None:
selected._dataset = selected_dataset.dataset
else:
selected._dataset = selected_dataset
return selected
else:
# Perform selection directly on histogram
selected = super(Histogram, self).select(
selection_expr=selection_expr,
selection_specs=selection_specs,
**selection
)

if not np.isscalar(selected) and not np.array_equal(selected.data, self.data):
# Selection changed histogram bins, so update dataset
selection = {
dim: sel for dim, sel in selection.items()
if dim in self.dimensions()+['selection_mask']
}
# Handle updating dataset
if (not np.isscalar(selected)
and not np.array_equal(selected.data, self.data)
and selected._dataset is not None):

# Selection changed histogram bins, so update dataset
selection = {
dim: sel for dim, sel in selection.items()
if dim in self.dimensions() + ['selection_mask']
}

if selected._dataset is not None:
selected._dataset = self.dataset.select(**selection)
if selected._dataset is not None:
selected._dataset = self.dataset.select(
selection_expr=selection_expr, **selection
)

return selected
return selected

def _get_selection_expr_for_stream_value(self, **kwargs):
from ..util.transform import dim
Expand Down
78 changes: 70 additions & 8 deletions holoviews/tests/core/testdatasetproperty.py
@@ -1,7 +1,10 @@
from holoviews.element.comparison import ComparisonTestCase
import pandas as pd
import numpy as np
from holoviews import Dataset, Curve, Dimension, Scatter, Distribution
import dask.dataframe as dd
from holoviews.operation import histogram
from holoviews import dim

class DatasetPropertyTestCase(ComparisonTestCase):

Expand Down Expand Up @@ -147,6 +150,26 @@ def test_select_curve(self):
self.ds.select(b=10)
)

def test_select_curve_all_dimensions(self):
curve1 = self.ds.to.curve('a', 'b', groupby=[])

# Check curve1 dataset property
self.assertEqual(curve1.dataset, self.ds)

# Down select curve 1 on b, which is a value dimension, and c,
# which is a dimension in the original dataset, but not a kdim or vdim
curve2 = curve1.select(b=10, c='A')

# This selection should be equivalent to down selecting the dataset
# before creating the curve
self.assertEqual(
curve2,
self.ds.select(b=10, c='A').to.curve('a', 'b', groupby=[])
)

# Check that we get the same result when using a dim expression
curve3 = curve1.select((dim('b') == 10) & (dim('c') == 'A'))
self.assertEqual(curve3, curve2)

class HistogramTestCase(DatasetPropertyTestCase):

Expand All @@ -166,24 +189,63 @@ def test_select_single(self):

def test_select_multi(self):
# Add second selection on b. b is a dimension in hist.dataset but
# not in hist. Make sure that we only apply the a selection (and not
# the b selection) to the .dataset property
# not in hist. Make sure that we apply the selection on both
# properties.
sub_hist = self.hist.select(a=(1, None), b=100)

self.assertNotEqual(
sub_hist.dataset,
self.ds.select(a=(1, None), b=100)
)

self.assertEqual(
sub_hist.dataset,
self.ds.select(a=(1, None))
self.ds.select(a=(1, None), b=100)
)

def test_hist_to_curve(self):
# No exception thrown
self.hist.to.curve()

def test_hist_selection_all_dims(self):
xs = [float(j) for i in range(10) for j in [i] * (2 * i)]
df = pd.DataFrame({
'x': xs,
'y': [v % 3 for v in range(len(xs))]
})

ds = Dataset(df)
hist1 = histogram(
ds,
dimension='x',
normed=False,
num_bins=10,
bin_range=[0, 10],
)

# Make sure hist1 dataset equal to original
self.assertEqual(hist1.dataset, ds)

# Check histogram data
self.assertEqual(
hist1.data,
{'x': np.array([0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10.]),
'x_count': np.array([0, 2, 4, 6, 8, 10, 12, 14, 16, 18])}
)

# Select histogram subset using the x and y dimensions
hist2 = hist1.select(x=(4, None), y=2)

# Check dataset down selection
self.assertEqual(hist2.dataset, ds.select(x=(4, None), y=2))

# Check histogram data. Bins should match and counts should be
# reduced from hist1 due to selection
self.assertEqual(
hist2.data,
{'x': np.array([0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10.]),
'x_count': np.array([0, 0, 0, 0, 2, 4, 4, 4, 6, 6])}
)

# Check that selection using dim expression produces the same result
hist3 = hist1.select((dim('x') >= 4) & (dim('y') == 2))
self.assertEqual(hist3, hist2)


class DistributionTestCase(DatasetPropertyTestCase):

Expand Down

0 comments on commit 3841ee5

Please sign in to comment.