Merge branch 'master' into histogram_operation_kwargs

holoviz · Sep 3, 2019 · c2018e2 · c2018e2
2 parents e36d295 + 996e7c4
commit c2018e2
Show file tree

Hide file tree

Showing 27 changed files with 957 additions and 172 deletions.
diff --git a/examples/user_guide/15-Large_Data.ipynb b/examples/user_guide/15-Large_Data.ipynb
@@ -194,7 +194,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "![](http://assets.holoviews.org/gifs/guides/user_guide/Large_Data/rasterize_color_range.gif)"
+    "<img src=\"http://assets.holoviews.org/gifs/guides/user_guide/Large_Data/rasterize_color_range.gif\"></img>"
    ]
   },
   {
@@ -363,7 +363,13 @@
     "\n",
     "# Hover info\n",
     "\n",
-    "As you can see in the examples above, converting the data to an image using Datashader makes it feasible to work with even very large datasets interactively.  One unfortunate side effect is that the original datapoints and line segments can no longer be used to support \"tooltips\" or \"hover\" information directly for RGB images generated with `datashade`; that data simply is not present at the browser level, and so the browser cannot unambiguously report information about any specific datapoint. Luckily, you can still provide hover information that reports properties of a subset of the data in a separate layer (as above), or you can provide information for a spatial region of the plot rather than for specific datapoints.  For instance, in some small rectangle you can provide statistics such as the mean, count, standard deviation, etc:"
+    "As you can see in the examples above, converting the data to an image using Datashader makes it feasible to work with even very large datasets interactively.  One unfortunate side effect is that the original datapoints and line segments can no longer be used to support \"tooltips\" or \"hover\" information directly for RGB images generated with `datashade`; that data simply is not present at the browser level, and so the browser cannot unambiguously report information about any specific datapoint. \n",
+    "\n",
+    "If you do need hover information, there are two good options available:\n",
+    "\n",
+    "1) Use the ``rasterize`` operation without `shade`, which will let the plotting code handle the conversion to colors while still having the actual aggregated data to support hovering\n",
+    "\n",
+    "2) Overlay a separate layer as a ``QuadMesh`` or ``Image`` containing the hover information"
    ]
   },
   {
@@ -374,20 +380,24 @@
    "source": [
     "from holoviews.streams import RangeXY\n",
     "\n",
+    "rasterized = rasterize(points, width=400, height=400)\n",
+    "\n",
     "fixed_hover = (datashade(points, width=400, height=400) *  \n",
     "               hv.QuadMesh(rasterize(points, width=10, height=10, dynamic=False)))\n",
     "\n",
     "dynamic_hover = (datashade(points, width=400, height=400) * \n",
-    "                 hv.util.Dynamic(rasterize(points, width=10, height=10, streams=[RangeXY]), operation=hv.QuadMesh))\n",
+    "                 rasterize(points, width=10, height=10, streams=[RangeXY]).apply(hv.QuadMesh))\n",
     "\n",
-    "(fixed_hover + dynamic_hover).opts(opts.QuadMesh(tools=['hover'], alpha=0, hover_alpha=0.2))"
+    "(rasterized + fixed_hover + dynamic_hover).opts(\n",
+    "    opts.QuadMesh(tools=['hover'], alpha=0, hover_alpha=0.2), \n",
+    "    opts.Image(tools=['hover']))"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "In the above examples, the plot on the left provides hover information at a fixed spatial scale, while the one on the right reports on an area that scales with the zoom level so that arbitrarily small regions of data space can be examined, which is generally more useful (but requires a live Python server). Note that you can activate the hover tool for `Image` elements output by the `rasterize` operation."
+    "In the above examples, the plot on the left provides hover information directly on the aggregated ``Image``. The middle plot displays hover information as a ``QuadMesh`` at a fixed spatial scale, while the one on the right reports on an area that scales with the zoom level so that arbitrarily small regions of data space can be examined, which is generally more useful (but requires a live Python server)."
    ]
   },
   {
@@ -444,7 +454,7 @@
     "opts.defaults(\n",
     "    opts.Image(aspect=1, axiswise=True, xaxis='bare', yaxis='bare'),\n",
     "    opts.RGB(aspect=1, axiswise=True, xaxis='bare', yaxis='bare'),\n",
-    "    opts.Layout(vspace=0.1, hspace=0.1, sublabel_format=\"\"))\n",
+    "    opts.Layout(vspace=0.1, hspace=0.1, sublabel_format=\"\", fig_size=80))\n",
     "\n",
     "np.random.seed(12)\n",
     "N=100\n",
@@ -464,6 +474,9 @@
     "\n",
     "shadeable  = [elemtype(pts) for elemtype in [hv.Curve, hv.Scatter, hv.Points]]\n",
     "shadeable += [hv.Path([pts])]\n",
+    "shadeable += [hv.Spikes(np.random.randn(10000))]\n",
+    "shadeable += [hv.Area(np.random.randn(10000).cumsum())]\n",
+    "shadeable += [hv.Spread((np.arange(10000), np.random.randn(10000).cumsum(), np.random.randn(10000)*10))]\n",
     "shadeable += [hv.Image((x,y,z)), hv.QuadMesh((x,y,z))]\n",
     "shadeable += [hv.Graph(((np.zeros(N), np.arange(N)),))]\n",
     "shadeable += [tri.edgepaths]\n",
@@ -491,8 +504,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "rgb_opts = opts.RGB(aspect=1, axiswise=True, xaxis='bare', yaxis='bare')\n",
-    "hv.Layout([e.relabel(e.__class__.name).opts(rgb_opts) for e in shadeable + rasterizable]).cols(6)"
+    "el_opts = dict(aspect=1, axiswise=True, xaxis='bare', yaxis='bare')\n",
+    "hv.Layout([e.relabel(e.__class__.name).opts(**el_opts) for e in shadeable + rasterizable]).cols(6)"
    ]
   },
   {

diff --git a/holoviews/core/data/__init__.py b/holoviews/core/data/__init__.py
@@ -14,7 +14,7 @@
 from ..element import Element
 from ..ndmapping import OrderedDict
 from ..spaces import HoloMap, DynamicMap
-from .interface import Interface, iloc, ndloc
+from .interface import Interface, iloc, ndloc, DataError
 from .array import ArrayInterface
 from .dictionary import DictInterface
 from .grid import GridInterface
@@ -153,6 +153,8 @@ def __call__(self, new_type, kdims=None, vdims=None, groupby=None,
             params['group'] = selected.group
         params.update(kwargs)
         if len(kdims) == selected.ndims or not groupby:
+            # Propagate dataset
+            params['dataset'] = self._element.dataset
             element = new_type(selected, **params)
             return element.sort() if sort else element
         group = selected.groupby(groupby, container_type=HoloMap,
@@ -335,7 +337,7 @@ def add_dimension(self, dimension, dim_pos, dim_val, vdim=False, **kwargs):
         return self.clone(data, **dimensions)
 
 
-    def select(self, selection_specs=None, **selection):
+    def select(self, selection_expr=None, selection_specs=None, **selection):
         """Applies selection by dimension name
 
         Applies a selection along the dimensions of the object using
@@ -360,7 +362,14 @@ def select(self, selection_specs=None, **selection):
 
             ds.select(x=[0, 1, 2])
 
+        * predicate expression: A holoviews.dim expression, e.g.:
+
+            from holoviews import dim
+            ds.select(selection_expr=dim('x') % 2 == 0)
+
         Args:
+            selection_expr: holoviews.dim predicate expression
+                specifying selection.
             selection_specs: List of specs to match on
                 A list of types, functions, or type[.group][.label]
                 strings specifying which objects to apply the
@@ -373,15 +382,33 @@ def select(self, selection_specs=None, **selection):
             Returns an Dimensioned object containing the selected data
             or a scalar if a single value was selected
         """
+        from ...util.transform import dim
+        if selection_expr is not None and not isinstance(selection_expr, dim):
+            raise ValueError("""\
+The first positional argument to the Dataset.select method is expected to be a
+holoviews.util.transform.dim expression. Use the selection_specs keyword
+argument to specify a selection specification""")
+
         if selection_specs is not None and not isinstance(selection_specs, (list, tuple)):
             selection_specs = [selection_specs]
-        selection = {dim: sel for dim, sel in selection.items()
-                     if dim in self.dimensions()+['selection_mask']}
+        selection = {dim_name: sel for dim_name, sel in selection.items()
+                     if dim_name in self.dimensions()+['selection_mask']}
         if (selection_specs and not any(self.matches(sp) for sp in selection_specs)
-            or not selection):
+                or (not selection and not selection_expr)):
             return self
 
-        data = self.interface.select(self, **selection)
+        # Handle selection dim expression
+        if selection_expr is not None:
+            mask = selection_expr.apply(self, compute=False, keep_index=True)
+            dataset = self[mask]
+        else:
+            dataset = self
+
+        # Handle selection kwargs
+        if selection:
+            data = dataset.interface.select(dataset, **selection)
+        else:
+            data = dataset.data
 
         if np.isscalar(data):
             return data
@@ -453,7 +480,7 @@ def __getitem__(self, slices):
                object.
         """
         slices = util.process_ellipses(self, slices, vdim_selection=True)
-        if isinstance(slices, np.ndarray) and slices.dtype.kind == 'b':
+        if getattr(getattr(slices, 'dtype', None), 'kind', None) == 'b':
             if not len(slices) == len(self):
                 raise IndexError("Boolean index must match length of sliced object")
             return self.clone(self.select(selection_mask=slices))
@@ -852,8 +879,23 @@ def clone(self, data=None, shared_data=True, new_type=None, *args, **overrides):
         if 'datatype' not in overrides:
             datatypes = [self.interface.datatype] + self.datatype
             overrides['datatype'] = list(util.unique_iterator(datatypes))
-        return super(Dataset, self).clone(data, shared_data, new_type, *args, **overrides)
 
+        if 'dataset' in overrides:
+            dataset = overrides.pop('dataset')
+        else:
+            dataset = self.dataset
+
+        new_dataset = super(Dataset, self).clone(data, shared_data, new_type, *args, **overrides)
+
+        if dataset is not None:
+            try:
+                new_dataset._dataset = dataset.clone(data=new_dataset.data, dataset=None)
+            except DataError:
+                # New dataset doesn't have the necessary dimensions to
+                # propagate dataset. Do nothing
+                pass
+
+        return new_dataset
 
     @property
     def iloc(self):

diff --git a/holoviews/core/data/array.py b/holoviews/core/data/array.py
@@ -123,7 +123,9 @@ def sort(cls, dataset, by=[], reverse=False):
 
 
     @classmethod
-    def values(cls, dataset, dim, expanded=True, flat=True, compute=True):
+    def values(
+            cls, dataset, dim, expanded=True, flat=True, compute=True, keep_index=False
+    ):
         data = dataset.data
         dim_idx = dataset.get_dimension_index(dim)
         if data.ndim == 1:

diff --git a/holoviews/core/data/dask.py b/holoviews/core/data/dask.py
@@ -90,12 +90,23 @@ def sort(cls, dataset, by=[], reverse=False):
         return dataset.data
 
     @classmethod
-    def values(cls, dataset, dim, expanded=True, flat=True, compute=True):
+    def values(
+            cls,
+            dataset,
+            dim,
+            expanded=True,
+            flat=True,
+            compute=True,
+            keep_index=False,
+    ):
         dim = dataset.get_dimension(dim)
         data = dataset.data[dim.name]
         if not expanded:
             data = data.unique()
-        return data.compute().values if compute else data.values
+        if keep_index:
+            return data.compute() if compute else data
+        else:
+            return data.compute().values if compute else data.values
 
     @classmethod
     def select_mask(cls, dataset, selection):
@@ -164,6 +175,9 @@ def groupby(cls, dataset, dimensions, container_type, group_type, **kwargs):
                                 kdims=element_dims)
         group_kwargs.update(kwargs)
 
+        # Propagate dataset
+        group_kwargs['dataset'] = dataset.dataset
+
         data = []
         group_by = [d.name for d in index_dims]
         groupby = dataset.data.groupby(group_by)

diff --git a/holoviews/core/data/dictionary.py b/holoviews/core/data/dictionary.py
@@ -246,7 +246,9 @@ def range(cls, dataset, dimension):
 
 
     @classmethod
-    def values(cls, dataset, dim, expanded=True, flat=True, compute=True):
+    def values(
+            cls, dataset, dim, expanded=True, flat=True, compute=True, keep_index=False
+    ):
         dim = dataset.get_dimension(dim).name
         values = dataset.data.get(dim)
         if isscalar(values):

diff --git a/holoviews/core/data/grid.py b/holoviews/core/data/grid.py
@@ -338,7 +338,9 @@ def ndloc(cls, dataset, indices):
 
 
     @classmethod
-    def values(cls, dataset, dim, expanded=True, flat=True, compute=True):
+    def values(
+            cls, dataset, dim, expanded=True, flat=True, compute=True, keep_index=False
+    ):
         dim = dataset.get_dimension(dim, strict=True)
         if dim in dataset.vdims or dataset.data[dim.name].ndim > 1:
             data = dataset.data[dim.name]

diff --git a/holoviews/core/data/image.py b/holoviews/core/data/image.py
@@ -156,7 +156,9 @@ def range(cls, obj, dim):
 
 
     @classmethod
-    def values(cls, dataset, dim, expanded=True, flat=True, compute=True):
+    def values(
+            cls, dataset, dim, expanded=True, flat=True, compute=True, keep_index=False
+    ):
         """
         The set of samples available along a particular dimension.
         """

diff --git a/holoviews/core/data/interface.py b/holoviews/core/data/interface.py
@@ -63,7 +63,7 @@ def __getitem__(self, index):
         rows, cols = index
         if rows is Ellipsis:
             rows = slice(None)
-        data = self.dataset.interface.iloc(self.dataset, (rows, cols))
+        data = self.dataset.interface.iloc(self.dataset.dataset, (rows, cols))
         kdims = self.dataset.kdims
         vdims = self.dataset.vdims
         if np.isscalar(data):

diff --git a/holoviews/core/data/multipath.py b/holoviews/core/data/multipath.py
@@ -281,7 +281,15 @@ def redim(cls, dataset, dimensions):
         return new_data
 
     @classmethod
-    def values(cls, dataset, dimension, expanded=True, flat=True, compute=True):
+    def values(
+            cls,
+            dataset,
+            dimension,
+            expanded=True,
+            flat=True,
+            compute=True,
+            keep_index=False,
+    ):
         """
         Returns a single concatenated array of all subpaths separated
         by NaN values. If expanded keyword is False an array of arrays
@@ -293,7 +301,9 @@ def values(cls, dataset, dimension, expanded=True, flat=True, compute=True):
         ds = cls._inner_dataset_template(dataset)
         for d in dataset.data:
             ds.data = d
-            dvals = ds.interface.values(ds, dimension, expanded, flat, compute)
+            dvals = ds.interface.values(
+                ds, dimension, expanded, flat, compute, keep_index
+            )
             if not len(dvals):
                 continue
             elif expanded:

diff --git a/holoviews/core/data/pandas.py b/holoviews/core/data/pandas.py
@@ -190,6 +190,9 @@ def groupby(cls, dataset, dimensions, container_type, group_type, **kwargs):
                                 kdims=element_dims)
         group_kwargs.update(kwargs)
 
+        # Propagate dataset
+        group_kwargs['dataset'] = dataset.dataset
+
         group_by = [d.name for d in index_dims]
         data = [(k, group_type(v, **group_kwargs)) for k, v in
                 dataset.data.groupby(group_by, sort=False)]
@@ -266,20 +269,33 @@ def select(cls, dataset, selection_mask=None, **selection):
         df = dataset.data
         if selection_mask is None:
             selection_mask = cls.select_mask(dataset, selection)
+
         indexed = cls.indexed(dataset, selection)
-        df = df.iloc[selection_mask]
+        if isinstance(selection_mask, pd.Series):
+            df = df[selection_mask]
+        else:
+            df = df.iloc[selection_mask]
         if indexed and len(df) == 1 and len(dataset.vdims) == 1:
             return df[dataset.vdims[0].name].iloc[0]
         return df
 
 
     @classmethod
-    def values(cls, dataset, dim, expanded=True, flat=True, compute=True):
+    def values(
+            cls,
+            dataset,
+            dim,
+            expanded=True,
+            flat=True,
+            compute=True,
+            keep_index=False,
+    ):
         dim = dataset.get_dimension(dim, strict=True)
         data = dataset.data[dim.name]
         if not expanded:
             return data.unique()
-        return data.values
+
+        return data if keep_index else data.values
 
 
     @classmethod

diff --git a/holoviews/core/data/xarray.py b/holoviews/core/data/xarray.py
@@ -303,7 +303,7 @@ def coords(cls, dataset, dimension, ordered=False, expanded=False, edges=False):
 
 
     @classmethod
-    def values(cls, dataset, dim, expanded=True, flat=True, compute=True):
+    def values(cls, dataset, dim, expanded=True, flat=True, compute=True, keep_index=False):
         dim = dataset.get_dimension(dim, strict=True)
         data = dataset.data[dim.name].data
         irregular = cls.irregular(dataset, dim) if dim in dataset.kdims else False