Skip to content

Commit

Permalink
Merge branch 'master' into histogram_operation_kwargs
Browse files Browse the repository at this point in the history
  • Loading branch information
jonmmease committed Sep 3, 2019
2 parents e36d295 + 996e7c4 commit c2018e2
Show file tree
Hide file tree
Showing 27 changed files with 957 additions and 172 deletions.
29 changes: 21 additions & 8 deletions examples/user_guide/15-Large_Data.ipynb
Expand Up @@ -194,7 +194,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"![](http://assets.holoviews.org/gifs/guides/user_guide/Large_Data/rasterize_color_range.gif)"
"<img src=\"http://assets.holoviews.org/gifs/guides/user_guide/Large_Data/rasterize_color_range.gif\"></img>"
]
},
{
Expand Down Expand Up @@ -363,7 +363,13 @@
"\n",
"# Hover info\n",
"\n",
"As you can see in the examples above, converting the data to an image using Datashader makes it feasible to work with even very large datasets interactively. One unfortunate side effect is that the original datapoints and line segments can no longer be used to support \"tooltips\" or \"hover\" information directly for RGB images generated with `datashade`; that data simply is not present at the browser level, and so the browser cannot unambiguously report information about any specific datapoint. Luckily, you can still provide hover information that reports properties of a subset of the data in a separate layer (as above), or you can provide information for a spatial region of the plot rather than for specific datapoints. For instance, in some small rectangle you can provide statistics such as the mean, count, standard deviation, etc:"
"As you can see in the examples above, converting the data to an image using Datashader makes it feasible to work with even very large datasets interactively. One unfortunate side effect is that the original datapoints and line segments can no longer be used to support \"tooltips\" or \"hover\" information directly for RGB images generated with `datashade`; that data simply is not present at the browser level, and so the browser cannot unambiguously report information about any specific datapoint. \n",
"\n",
"If you do need hover information, there are two good options available:\n",
"\n",
"1) Use the ``rasterize`` operation without `shade`, which will let the plotting code handle the conversion to colors while still having the actual aggregated data to support hovering\n",
"\n",
"2) Overlay a separate layer as a ``QuadMesh`` or ``Image`` containing the hover information"
]
},
{
Expand All @@ -374,20 +380,24 @@
"source": [
"from holoviews.streams import RangeXY\n",
"\n",
"rasterized = rasterize(points, width=400, height=400)\n",
"\n",
"fixed_hover = (datashade(points, width=400, height=400) * \n",
" hv.QuadMesh(rasterize(points, width=10, height=10, dynamic=False)))\n",
"\n",
"dynamic_hover = (datashade(points, width=400, height=400) * \n",
" hv.util.Dynamic(rasterize(points, width=10, height=10, streams=[RangeXY]), operation=hv.QuadMesh))\n",
" rasterize(points, width=10, height=10, streams=[RangeXY]).apply(hv.QuadMesh))\n",
"\n",
"(fixed_hover + dynamic_hover).opts(opts.QuadMesh(tools=['hover'], alpha=0, hover_alpha=0.2))"
"(rasterized + fixed_hover + dynamic_hover).opts(\n",
" opts.QuadMesh(tools=['hover'], alpha=0, hover_alpha=0.2), \n",
" opts.Image(tools=['hover']))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"In the above examples, the plot on the left provides hover information at a fixed spatial scale, while the one on the right reports on an area that scales with the zoom level so that arbitrarily small regions of data space can be examined, which is generally more useful (but requires a live Python server). Note that you can activate the hover tool for `Image` elements output by the `rasterize` operation."
"In the above examples, the plot on the left provides hover information directly on the aggregated ``Image``. The middle plot displays hover information as a ``QuadMesh`` at a fixed spatial scale, while the one on the right reports on an area that scales with the zoom level so that arbitrarily small regions of data space can be examined, which is generally more useful (but requires a live Python server)."
]
},
{
Expand Down Expand Up @@ -444,7 +454,7 @@
"opts.defaults(\n",
" opts.Image(aspect=1, axiswise=True, xaxis='bare', yaxis='bare'),\n",
" opts.RGB(aspect=1, axiswise=True, xaxis='bare', yaxis='bare'),\n",
" opts.Layout(vspace=0.1, hspace=0.1, sublabel_format=\"\"))\n",
" opts.Layout(vspace=0.1, hspace=0.1, sublabel_format=\"\", fig_size=80))\n",
"\n",
"np.random.seed(12)\n",
"N=100\n",
Expand All @@ -464,6 +474,9 @@
"\n",
"shadeable = [elemtype(pts) for elemtype in [hv.Curve, hv.Scatter, hv.Points]]\n",
"shadeable += [hv.Path([pts])]\n",
"shadeable += [hv.Spikes(np.random.randn(10000))]\n",
"shadeable += [hv.Area(np.random.randn(10000).cumsum())]\n",
"shadeable += [hv.Spread((np.arange(10000), np.random.randn(10000).cumsum(), np.random.randn(10000)*10))]\n",
"shadeable += [hv.Image((x,y,z)), hv.QuadMesh((x,y,z))]\n",
"shadeable += [hv.Graph(((np.zeros(N), np.arange(N)),))]\n",
"shadeable += [tri.edgepaths]\n",
Expand Down Expand Up @@ -491,8 +504,8 @@
"metadata": {},
"outputs": [],
"source": [
"rgb_opts = opts.RGB(aspect=1, axiswise=True, xaxis='bare', yaxis='bare')\n",
"hv.Layout([e.relabel(e.__class__.name).opts(rgb_opts) for e in shadeable + rasterizable]).cols(6)"
"el_opts = dict(aspect=1, axiswise=True, xaxis='bare', yaxis='bare')\n",
"hv.Layout([e.relabel(e.__class__.name).opts(**el_opts) for e in shadeable + rasterizable]).cols(6)"
]
},
{
Expand Down
58 changes: 50 additions & 8 deletions holoviews/core/data/__init__.py
Expand Up @@ -14,7 +14,7 @@
from ..element import Element
from ..ndmapping import OrderedDict
from ..spaces import HoloMap, DynamicMap
from .interface import Interface, iloc, ndloc
from .interface import Interface, iloc, ndloc, DataError
from .array import ArrayInterface
from .dictionary import DictInterface
from .grid import GridInterface
Expand Down Expand Up @@ -153,6 +153,8 @@ def __call__(self, new_type, kdims=None, vdims=None, groupby=None,
params['group'] = selected.group
params.update(kwargs)
if len(kdims) == selected.ndims or not groupby:
# Propagate dataset
params['dataset'] = self._element.dataset
element = new_type(selected, **params)
return element.sort() if sort else element
group = selected.groupby(groupby, container_type=HoloMap,
Expand Down Expand Up @@ -335,7 +337,7 @@ def add_dimension(self, dimension, dim_pos, dim_val, vdim=False, **kwargs):
return self.clone(data, **dimensions)


def select(self, selection_specs=None, **selection):
def select(self, selection_expr=None, selection_specs=None, **selection):
"""Applies selection by dimension name
Applies a selection along the dimensions of the object using
Expand All @@ -360,7 +362,14 @@ def select(self, selection_specs=None, **selection):
ds.select(x=[0, 1, 2])
* predicate expression: A holoviews.dim expression, e.g.:
from holoviews import dim
ds.select(selection_expr=dim('x') % 2 == 0)
Args:
selection_expr: holoviews.dim predicate expression
specifying selection.
selection_specs: List of specs to match on
A list of types, functions, or type[.group][.label]
strings specifying which objects to apply the
Expand All @@ -373,15 +382,33 @@ def select(self, selection_specs=None, **selection):
Returns an Dimensioned object containing the selected data
or a scalar if a single value was selected
"""
from ...util.transform import dim
if selection_expr is not None and not isinstance(selection_expr, dim):
raise ValueError("""\
The first positional argument to the Dataset.select method is expected to be a
holoviews.util.transform.dim expression. Use the selection_specs keyword
argument to specify a selection specification""")

if selection_specs is not None and not isinstance(selection_specs, (list, tuple)):
selection_specs = [selection_specs]
selection = {dim: sel for dim, sel in selection.items()
if dim in self.dimensions()+['selection_mask']}
selection = {dim_name: sel for dim_name, sel in selection.items()
if dim_name in self.dimensions()+['selection_mask']}
if (selection_specs and not any(self.matches(sp) for sp in selection_specs)
or not selection):
or (not selection and not selection_expr)):
return self

data = self.interface.select(self, **selection)
# Handle selection dim expression
if selection_expr is not None:
mask = selection_expr.apply(self, compute=False, keep_index=True)
dataset = self[mask]
else:
dataset = self

# Handle selection kwargs
if selection:
data = dataset.interface.select(dataset, **selection)
else:
data = dataset.data

if np.isscalar(data):
return data
Expand Down Expand Up @@ -453,7 +480,7 @@ def __getitem__(self, slices):
object.
"""
slices = util.process_ellipses(self, slices, vdim_selection=True)
if isinstance(slices, np.ndarray) and slices.dtype.kind == 'b':
if getattr(getattr(slices, 'dtype', None), 'kind', None) == 'b':
if not len(slices) == len(self):
raise IndexError("Boolean index must match length of sliced object")
return self.clone(self.select(selection_mask=slices))
Expand Down Expand Up @@ -852,8 +879,23 @@ def clone(self, data=None, shared_data=True, new_type=None, *args, **overrides):
if 'datatype' not in overrides:
datatypes = [self.interface.datatype] + self.datatype
overrides['datatype'] = list(util.unique_iterator(datatypes))
return super(Dataset, self).clone(data, shared_data, new_type, *args, **overrides)

if 'dataset' in overrides:
dataset = overrides.pop('dataset')
else:
dataset = self.dataset

new_dataset = super(Dataset, self).clone(data, shared_data, new_type, *args, **overrides)

if dataset is not None:
try:
new_dataset._dataset = dataset.clone(data=new_dataset.data, dataset=None)
except DataError:
# New dataset doesn't have the necessary dimensions to
# propagate dataset. Do nothing
pass

return new_dataset

@property
def iloc(self):
Expand Down
4 changes: 3 additions & 1 deletion holoviews/core/data/array.py
Expand Up @@ -123,7 +123,9 @@ def sort(cls, dataset, by=[], reverse=False):


@classmethod
def values(cls, dataset, dim, expanded=True, flat=True, compute=True):
def values(
cls, dataset, dim, expanded=True, flat=True, compute=True, keep_index=False
):
data = dataset.data
dim_idx = dataset.get_dimension_index(dim)
if data.ndim == 1:
Expand Down
18 changes: 16 additions & 2 deletions holoviews/core/data/dask.py
Expand Up @@ -90,12 +90,23 @@ def sort(cls, dataset, by=[], reverse=False):
return dataset.data

@classmethod
def values(cls, dataset, dim, expanded=True, flat=True, compute=True):
def values(
cls,
dataset,
dim,
expanded=True,
flat=True,
compute=True,
keep_index=False,
):
dim = dataset.get_dimension(dim)
data = dataset.data[dim.name]
if not expanded:
data = data.unique()
return data.compute().values if compute else data.values
if keep_index:
return data.compute() if compute else data
else:
return data.compute().values if compute else data.values

@classmethod
def select_mask(cls, dataset, selection):
Expand Down Expand Up @@ -164,6 +175,9 @@ def groupby(cls, dataset, dimensions, container_type, group_type, **kwargs):
kdims=element_dims)
group_kwargs.update(kwargs)

# Propagate dataset
group_kwargs['dataset'] = dataset.dataset

data = []
group_by = [d.name for d in index_dims]
groupby = dataset.data.groupby(group_by)
Expand Down
4 changes: 3 additions & 1 deletion holoviews/core/data/dictionary.py
Expand Up @@ -246,7 +246,9 @@ def range(cls, dataset, dimension):


@classmethod
def values(cls, dataset, dim, expanded=True, flat=True, compute=True):
def values(
cls, dataset, dim, expanded=True, flat=True, compute=True, keep_index=False
):
dim = dataset.get_dimension(dim).name
values = dataset.data.get(dim)
if isscalar(values):
Expand Down
4 changes: 3 additions & 1 deletion holoviews/core/data/grid.py
Expand Up @@ -338,7 +338,9 @@ def ndloc(cls, dataset, indices):


@classmethod
def values(cls, dataset, dim, expanded=True, flat=True, compute=True):
def values(
cls, dataset, dim, expanded=True, flat=True, compute=True, keep_index=False
):
dim = dataset.get_dimension(dim, strict=True)
if dim in dataset.vdims or dataset.data[dim.name].ndim > 1:
data = dataset.data[dim.name]
Expand Down
4 changes: 3 additions & 1 deletion holoviews/core/data/image.py
Expand Up @@ -156,7 +156,9 @@ def range(cls, obj, dim):


@classmethod
def values(cls, dataset, dim, expanded=True, flat=True, compute=True):
def values(
cls, dataset, dim, expanded=True, flat=True, compute=True, keep_index=False
):
"""
The set of samples available along a particular dimension.
"""
Expand Down
2 changes: 1 addition & 1 deletion holoviews/core/data/interface.py
Expand Up @@ -63,7 +63,7 @@ def __getitem__(self, index):
rows, cols = index
if rows is Ellipsis:
rows = slice(None)
data = self.dataset.interface.iloc(self.dataset, (rows, cols))
data = self.dataset.interface.iloc(self.dataset.dataset, (rows, cols))
kdims = self.dataset.kdims
vdims = self.dataset.vdims
if np.isscalar(data):
Expand Down
14 changes: 12 additions & 2 deletions holoviews/core/data/multipath.py
Expand Up @@ -281,7 +281,15 @@ def redim(cls, dataset, dimensions):
return new_data

@classmethod
def values(cls, dataset, dimension, expanded=True, flat=True, compute=True):
def values(
cls,
dataset,
dimension,
expanded=True,
flat=True,
compute=True,
keep_index=False,
):
"""
Returns a single concatenated array of all subpaths separated
by NaN values. If expanded keyword is False an array of arrays
Expand All @@ -293,7 +301,9 @@ def values(cls, dataset, dimension, expanded=True, flat=True, compute=True):
ds = cls._inner_dataset_template(dataset)
for d in dataset.data:
ds.data = d
dvals = ds.interface.values(ds, dimension, expanded, flat, compute)
dvals = ds.interface.values(
ds, dimension, expanded, flat, compute, keep_index
)
if not len(dvals):
continue
elif expanded:
Expand Down
22 changes: 19 additions & 3 deletions holoviews/core/data/pandas.py
Expand Up @@ -190,6 +190,9 @@ def groupby(cls, dataset, dimensions, container_type, group_type, **kwargs):
kdims=element_dims)
group_kwargs.update(kwargs)

# Propagate dataset
group_kwargs['dataset'] = dataset.dataset

group_by = [d.name for d in index_dims]
data = [(k, group_type(v, **group_kwargs)) for k, v in
dataset.data.groupby(group_by, sort=False)]
Expand Down Expand Up @@ -266,20 +269,33 @@ def select(cls, dataset, selection_mask=None, **selection):
df = dataset.data
if selection_mask is None:
selection_mask = cls.select_mask(dataset, selection)

indexed = cls.indexed(dataset, selection)
df = df.iloc[selection_mask]
if isinstance(selection_mask, pd.Series):
df = df[selection_mask]
else:
df = df.iloc[selection_mask]
if indexed and len(df) == 1 and len(dataset.vdims) == 1:
return df[dataset.vdims[0].name].iloc[0]
return df


@classmethod
def values(cls, dataset, dim, expanded=True, flat=True, compute=True):
def values(
cls,
dataset,
dim,
expanded=True,
flat=True,
compute=True,
keep_index=False,
):
dim = dataset.get_dimension(dim, strict=True)
data = dataset.data[dim.name]
if not expanded:
return data.unique()
return data.values

return data if keep_index else data.values


@classmethod
Expand Down
2 changes: 1 addition & 1 deletion holoviews/core/data/xarray.py
Expand Up @@ -303,7 +303,7 @@ def coords(cls, dataset, dimension, ordered=False, expanded=False, edges=False):


@classmethod
def values(cls, dataset, dim, expanded=True, flat=True, compute=True):
def values(cls, dataset, dim, expanded=True, flat=True, compute=True, keep_index=False):
dim = dataset.get_dimension(dim, strict=True)
data = dataset.data[dim.name].data
irregular = cls.irregular(dataset, dim) if dim in dataset.kdims else False
Expand Down

0 comments on commit c2018e2

Please sign in to comment.