Merge 730ee9f into fb71e9e

holoviz · Mar 5, 2020 · b9032e3 · b9032e3
2 parents fb71e9e + 730ee9f
commit b9032e3
Show file tree

Hide file tree

Showing 16 changed files with 77 additions and 87 deletions.
diff --git a/doc/user_guide/Custom_Interactivity.rst b/doc/user_guide/Custom_Interactivity.rst
@@ -2,5 +2,4 @@ Custom Interactivity
 ____________________
 
 .. notebook:: holoviews ../../examples/user_guide/13-Custom_Interactivity.ipynb
-    :skip_execute: True
     :offset: 1
diff --git a/examples/user_guide/06-Building_Composite_Objects.ipynb b/examples/user_guide/06-Building_Composite_Objects.ipynb
@@ -67,7 +67,7 @@
     "\n",
     "penguins = hv.RGB.load_image('../reference/elements/assets/penguins.png').relabel(group=\"Family\", label=\"Penguin\")\n",
     "\n",
-    "layout = gridspace + penguins"
+    "layout = gridspace + penguins.opts(axiswise=True)"
    ]
   },
   {

diff --git a/examples/user_guide/07-Live_Data.ipynb b/examples/user_guide/07-Live_Data.ipynb
@@ -43,8 +43,8 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "<center><div class=\"alert alert-info\" role=\"alert\">To visualize and use a <b>DynamicMap</b> you need to be running a live Jupyter server.<br>This guide assumes that it will be run in a live notebook environment.<br>\n",
-    "When viewed statically, DynamicMaps will only show the first available Element,<br> and will thus not have any slider widgets, making it difficult to follow the descriptions below.<br><br>\n",
+    "\n",
+    "<center><div class=\"alert alert-info\" role=\"alert\">To use visualize and use a <b>DynamicMap</b> you need to be running a live Jupyter server.<br>When viewing this user guide as part of the documentation DynamicMaps will be sampled with a limited number of states.<br>\n",
     "It's also best to run this notebook one cell at a time, not via \"Run All\",<br> so that subsequent cells can reflect your dynamic interaction with widgets in previous cells.</div></center>"
    ]
   },

diff --git a/examples/user_guide/08-Tabular_Datasets.ipynb b/examples/user_guide/08-Tabular_Datasets.ipynb
@@ -29,6 +29,8 @@
     "\n",
     "* Dask DataFrames\n",
     "\n",
+    "* cuDF Dataframes\n",
+    "\n",
     "A number of additonal standard constructors are supported:\n",
     "\n",
     "* A tuple of array (or array-like) objects\n",
@@ -53,7 +55,7 @@
     "\n",
     "hv.extension('bokeh', 'matplotlib')\n",
     "\n",
-    "opts.defaults(opts.Scatter(size=10, padding=0.1))"
+    "opts.defaults(opts.Scatter(size=10))"
    ]
   },
   {
@@ -366,7 +368,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Tabularizing space containers\n",
+    "## Collapsing dimensioned containers\n",
     "\n",
     "Even deeply nested objects can be deconstructed in this way, serializing them to make it easier to get your raw data out of a collection of specialized ``Element`` types. Let's say we want to make multiple observations of a noisy signal. We can collect the data into a ``HoloMap`` to visualize it and then call ``.collapse()`` to get a ``Dataset`` object to which we can apply operations or transformations to other ``Element`` types. Deconstructing nested data in this way only works if the data is homogeneous. In practical terms this requires that your data structure contains Elements (of any type) held in these Container types: ``NdLayout``, ``GridSpace``, ``HoloMap``, and ``NdOverlay``, with all dimensions consistent throughout (so that they can all fit into the same set of columns). To read more about these containers see the [Dimensioned Containers](./Dimensioned_Containers.ipynb) guide.\n",
     "\n",
@@ -442,7 +444,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "noise_layout.table()"
+    "hv.Table(noise_layout.collapse())"
    ]
   },
   {

diff --git a/examples/user_guide/10-Indexing_and_Selecting_Data.ipynb b/examples/user_guide/10-Indexing_and_Selecting_Data.ipynb
@@ -429,7 +429,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "A HoloMap provides additional functionality to perform regular sampling on your data. In this case we'll take 3x3 subsamples of each of the Images."
+    "A `HoloMap` may not be sampled directly, instead we can use the `.apply` method to sample each element in the HoloMap and consequently use the `.collapse` method to produce a single `Dataset`. In this case we'll take 3x3 subsamples of each of the Images:"
    ]
   },
   {
@@ -442,9 +442,9 @@
     "\n",
     "sample_style = dict(edgecolors='k', alpha=1)\n",
     "all_samples = obs_hmap.collapse().to.scatter3d().opts(alpha=0.15, xticks=4)\n",
-    "sampled = obs_hmap.sample((3,3))\n",
+    "sampled = obs_hmap.apply.sample((3,3)).collapse()\n",
     "subsamples = sampled.to.scatter3d().opts(**sample_style)\n",
-    "all_samples * subsamples + sampled"
+    "all_samples * subsamples + hv.Table(sampled)"
    ]
   },
   {
@@ -460,9 +460,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "sampled = obs_hmap.sample((3,3), bounds=(2,5,5,10))\n",
+    "sampled = obs_hmap.apply.sample((3,3), bounds=(2,5,5,10)).collapse()\n",
     "subsamples = sampled.to.scatter3d().opts(xticks=4, **sample_style)\n",
-    "all_samples * subsamples + sampled"
+    "all_samples * subsamples + hv.Table(sampled)"
    ]
   },
   {
@@ -490,10 +490,10 @@
     "                    for i in np.linspace(0.5, 1.5, 3)},\n",
     "                   kdims='Observation')\n",
     "all_samples = curve.collapse().to.points()\n",
-    "sampled = curve.sample([0, 2, 4, 6, 8])\n",
+    "sampled = curve.apply.sample([0, 2, 4, 6, 8]).collapse()\n",
     "sample_points = sampled.to.points(extents=extents)\n",
     "sampling = all_samples * sample_points.opts(color='red')\n",
-    "sampling + sampled"
+    "sampling + hv.Table(sampled)"
    ]
   },
   {

diff --git a/examples/user_guide/12-Responding_to_Events.ipynb b/examples/user_guide/12-Responding_to_Events.ipynb
@@ -28,8 +28,7 @@
     "\n",
     "In this user guide we will explore the HoloViews streams system that allows *any* sort of value to be supplied from *anywhere*. This system opens a huge set of new possible visualization types, including continuously updating plots that reflect live data as well as dynamic visualizations that can be interacted with directly, as described in the [Custom Interactivity](./13-Custom_Interactivity.ipynb) guide.\n",
     "\n",
-    "<center><div class=\"alert alert-info\" role=\"alert\">To use visualize and use a <b>DynamicMap</b> you need to be running a live Jupyter server.<br>This user guide assumes that it will be run in a live notebook environment.<br>\n",
-    "When viewed statically, DynamicMaps will only show the first available Element.<br></div></center>"
+    "<center><div class=\"alert alert-info\" role=\"alert\">To use visualize and use a <b>DynamicMap</b> you need to be running a live Jupyter server.<br>When viewing this user guide as part of the documentation DynamicMaps will be sampled with a limited number of states.<br></div></center>"
    ]
   },
   {

diff --git a/examples/user_guide/15-Large_Data.ipynb b/examples/user_guide/15-Large_Data.ipynb
@@ -599,6 +599,10 @@
     "\n",
     "The expensive operations are all of type `ResamplingOperation`, which has a parameter `precompute` (see `hv.help(hv.operation.datashader.rasterize)`, etc.)  Precompute can be used to get faster performance in interactive usage by caching the last set of data used in plotting (*after* any transformations needed) and reusing it when it is requested again. This is particularly useful when your data is not in one of the supported data formats already and needs to be converted. `precompute` is False by default, because it requires using memory to store the cached data, but if you have enough memory, you can enable it so that repeated interactions (such as zooming and panning) will be much faster than the first one.  In practice, most Datashader-plots don't need to do extensive precomputing, but enabling it for TriMesh and Polygon plots can greatly speed up interactive usage.\n",
     "\n",
+    "### Use GPU support\n",
+    "\n",
+    "Many elements now also support aggregation directly on a GPU-based datastructure such as a [cuDF DataFrame](https://github.com/rapidsai/cudf) or an Xarray DataArray backed by a [cupy](https://github.com/cupy/cupy) array. These data structures can be passed directly to the appropriate HoloViews elements just as you would use a Pandas or other Xarray object. For instance, a cuDF can be used on elements like `hv.Points` and `hv.Curve`, while a cupy-backed DataArray raster or quadmesh can be passed to `hv.QuadMesh` elements. When used with Datashader, the GPU implementation can result in 10-100x speedups, as well as avoiding having to transfer the data out of the GPU for plotting (sending only the final rendered plot out of the GPU's memory). To see which HoloViews elements are supported, see the [datashader performance guide](https://datashader.org/user_guide/Performance.html). As of the Datashader 0.11 release, all point, line, area, and quadmesh aggregations are supported when using a GPU backed datastructure, including raster objects like `hv.Image` if first converted to `hv.Quadmesh`.\n",
+    "\n",
     "### Project data only once\n",
     "\n",
     "If you are working with geographic data using [GeoViews](http://geoviews.org) that needs to be projected before display and/or before datashading, GeoViews will have to do this every time you update a plot, which can drown out the performance improvement you get by using Datashader.  GeoViews allows you to project the entire dataset at once using `gv.operation.project`, and once you do this you should be able to use Datashader at full speed.\n",

diff --git a/examples/user_guide/Linked_Brushing.ipynb b/examples/user_guide/Linked_Brushing.ipynb
@@ -448,7 +448,7 @@
     "curve        = autompg_ds.aggregate('yr', function=np.mean).to(hv.Curve, 'yr', 'mpg')\n",
     "spread       = autompg_ds.aggregate('yr', function=np.mean, spreadfn=np.std).to(hv.Spread, 'yr', ['mpg', 'mpg_std'])\n",
     "distribution = hv.Distribution(autompg_ds, 'weight')\n",
-    "img          = rasterize(hv.Points(autompg_ds, ['hp', 'displ']), dynamic=False, width=20, height=20)\n",
+    "img          = hd.rasterize(hv.Points(autompg_ds, ['hp', 'displ']), dynamic=False, width=20, height=20)\n",
     "heatmap      = hv.HeatMap(autompg_ds, ['yr', 'origin'], 'accel').aggregate(function=np.mean)\n",
     "hextiles     = hv.HexTiles(autompg_ds, ['weight', 'displ'], []).opts(gridsize=20)\n",
     "hist         = autompg_ds.hist('displ', adjoin=False, normed=False)\n",

diff --git a/holoviews/core/accessors.py b/holoviews/core/accessors.py
@@ -213,6 +213,7 @@ def aggregate(self, dimensions=None, function=None, spreadfn=None, **kwargs):
         for more information.
         """
         kwargs['_method_args'] = (dimensions, function, spreadfn)
+        kwargs['per_element'] = True
         return self.__call__('aggregate', **kwargs)
 
     def opts(self, *args, **kwargs):
@@ -231,8 +232,19 @@ def reduce(self, dimensions=[], function=None, spreadfn=None, **kwargs):
         for more information.
         """
         kwargs['_method_args'] = (dimensions, function, spreadfn)
+        kwargs['per_element'] = True
         return self.__call__('reduce', **kwargs)
 
+    def sample(self, samples=[], bounds=None, **kwargs):
+        """Samples element values at supplied coordinates.
+
+        See :py:meth:`Dataset.sample` and :py:meth:`Apply.__call__`
+        for more information.
+        """
+        kwargs['_method_args'] = (samples, bounds)
+        kwargs['per_element'] = True
+        return self.__call__('sample', **kwargs)
+
     def select(self, **kwargs):
         """Applies a selection to all ViewableElement objects.
 

diff --git a/holoviews/core/data/__init__.py b/holoviews/core/data/__init__.py
@@ -721,6 +721,7 @@ def sample(self, samples=[], bounds=None, closest=True, **kwargs):
         # may be replaced with more general handling
         # see https://github.com/ioam/holoviews/issues/1173
         from ...element import Table, Curve
+        datatype = ['dataframe', 'dictionary', 'dask']
         if len(samples) == 1:
             sel = {kd.name: s for kd, s in zip(self.kdims, samples[0])}
             dims = [kd for kd, v in sel.items() if not np.isscalar(v)]
@@ -737,7 +738,7 @@ def sample(self, samples=[], bounds=None, closest=True, **kwargs):
             if np.isscalar(selection):
                 selection = [samples[0]+(selection,)]
             else:
-                reindexed = selection.clone(new_type=Dataset).reindex(kdims)
+                reindexed = selection.clone(new_type=Dataset, datatype=datatype).reindex(kdims)
                 selection = tuple(reindexed.columns(kdims+self.vdims).values())
 
             datatype = list(util.unique_iterator(self.datatype+['dataframe', 'dict']))
@@ -754,7 +755,8 @@ def sample(self, samples=[], bounds=None, closest=True, **kwargs):
             except NotImplementedError:
                 pass
         samples = [util.wrap_tuple(s) for s in samples]
-        return self.clone(self.interface.sample(self, samples), new_type=Table)
+        sampled = self.interface.sample(self, samples)
+        return self.clone(sampled, new_type=Table, datatype=datatype)
 
 
     def reduce(self, dimensions=[], function=None, spreadfn=None, **reductions):

diff --git a/holoviews/core/data/xarray.py b/holoviews/core/data/xarray.py
@@ -1,6 +1,8 @@
 from __future__ import absolute_import
+
 import sys
 import types
+
 from collections import OrderedDict
 
 import numpy as np
@@ -568,14 +570,24 @@ def length(cls, dataset):
 
     @classmethod
     def dframe(cls, dataset, dimensions):
-        data = dataset.data.to_dataframe().reset_index()
+        import xarray as xr
+        if cls.packed(dataset):
+            bands = {vd.name: dataset.data[..., i].drop('band')
+                     for i, vd in enumerate(dataset.vdims)}
+            data = xr.Dataset(bands)
+        else:
+            data = dataset.data
+        data = data.to_dataframe().reset_index()
         if dimensions:
             return data[dimensions]
         return data
 
     @classmethod
     def sample(cls, dataset, samples=[]):
-        raise NotImplementedError
+        names = [kd.name for kd in dataset.kdims]
+        samples = [dataset.data.sel(**{k: [v] for k, v in zip(names, s)}).to_dataframe().reset_index()
+                   for s in samples]
+        return util.pd.concat(samples)
 
     @classmethod
     def add_dimension(cls, dataset, dimension, dim_pos, values, vdim):

diff --git a/holoviews/core/ndmapping.py b/holoviews/core/ndmapping.py
@@ -887,6 +887,12 @@ def collapse(self, dimensions=None, function=None, spreadfn=None, **kwargs):
 
         collapsed = groups.clone(shared_data=False)
         for key, group in groups.items():
+            last = group.values()[-1]
+            if isinstance(last, UniformNdMapping):
+                group_data = OrderedDict([
+                    (k, v.collapse()) for k, v in group.items()
+                ])
+                group = group.clone(group_data)
             if hasattr(group.values()[-1], 'interface'):
                 group_data = concat(group)
                 if function:

diff --git a/holoviews/element/raster.py b/holoviews/element/raster.py
@@ -116,7 +116,7 @@ def collapse_data(cls, data_list, function, kdims=None, **kwargs):
             return function(np.dstack(data_list), axis=-1, **kwargs)
 
 
-    def sample(self, samples=[], **sample_values):
+    def sample(self, samples=[], bounds=None, **sample_values):
         """
         Sample the Raster along one or both of its dimensions,
         returning a reduced dimensionality type, which is either
@@ -131,8 +131,6 @@ def sample(self, samples=[], **sample_values):
 
         params = dict(self.param.get_param_values(onlychanged=True),
                       vdims=self.vdims)
-        params.pop('extents', None)
-        params.pop('bounds', None)
         if len(sample_values) == self.ndims or len(samples):
             if not len(samples):
                 samples = zip(*[c if isinstance(c, list) else [c] for _, c in
@@ -486,64 +484,6 @@ def select(self, selection_specs=None, **selection):
                               ydensity=self.ydensity, bounds=bounds)
 
 
-    def sample(self, samples=[], **kwargs):
-        """
-        Allows sampling of an Image as an iterator of coordinates
-        matching the key dimensions, returning a new object containing
-        just the selected samples. Alternatively may supply kwargs to
-        sample a coordinate on an object. On an Image the coordinates
-        are continuously indexed and will always snap to the nearest
-        coordinate.
-        """
-        kwargs = {k: v for k, v in kwargs.items() if k != 'closest'}
-        if kwargs and samples:
-            raise Exception('Supply explicit list of samples or kwargs, not both.')
-        elif kwargs:
-            sample = [slice(None) for _ in range(self.ndims)]
-            for dim, val in kwargs.items():
-                sample[self.get_dimension_index(dim)] = val
-            samples = [tuple(sample)]
-
-        # If a 1D cross-section of 2D space return Curve
-        shape = self.interface.shape(self, gridded=True)
-        if len(samples) == 1:
-            dims = [kd for kd, v in zip(self.kdims, samples[0])
-                    if not (np.isscalar(v) or isinstance(v, util.datetime_types))]
-            if len(dims) == 1:
-                kdims = [self.get_dimension(kd) for kd in dims]
-                sample = tuple(np.datetime64(s) if isinstance(s, util.datetime_types) else s
-                               for s in samples[0])
-                sel = {kd.name: s for kd, s in zip(self.kdims, sample)}
-                dims = [kd for kd, v in sel.items() if not np.isscalar(v)]
-                selection = self.select(**sel)
-                selection = tuple(selection.columns(kdims+self.vdims).values())
-                datatype = list(util.unique_iterator(self.datatype+['dataframe', 'dict']))
-                return self.clone(selection, kdims=kdims, new_type=Curve,
-                                  datatype=datatype)
-            else:
-                kdims = self.kdims
-        else:
-            kdims = self.kdims
-
-        xs, ys = zip(*samples)
-        if isinstance(xs[0], util.datetime_types):
-            xs = np.array(xs).astype(np.datetime64)
-        if isinstance(ys[0], util.datetime_types):
-            ys = np.array(ys).astype(np.datetime64)
-        yidx, xidx = self.sheet2matrixidx(np.array(xs), np.array(ys))
-        yidx = shape[0]-yidx-1
-
-        # Detect out-of-bounds indices
-        out_of_bounds= (yidx<0) | (xidx<0) | (yidx>=shape[0]) | (xidx>=shape[1])
-        if out_of_bounds.any():
-            coords = [samples[idx] for idx in np.where(out_of_bounds)[0]]
-            raise IndexError('Coordinate(s) %s out of bounds for %s with bounds %s' %
-                             (coords, type(self).__name__, self.bounds.lbrt()))
-
-        data = self.interface.ndloc(self, (yidx, xidx))
-        return self.clone(data, new_type=Table, datatype=['dataframe', 'dictionary'])
-
-
     def closest(self, coords=[], **kwargs):
         """
         Given a single coordinate or multiple coordinates as

diff --git a/holoviews/element/util.py b/holoviews/element/util.py
@@ -8,8 +8,10 @@
 from ..core.data import default_datatype
 from ..core.operation import Operation
 from ..core.sheetcoords import Slice
-from ..core.util import (is_nan, sort_topologically, one_to_one,
-                         cartesian_product, is_cyclic, datetime_types)
+from ..core.util import (
+    cartesian_product, datetime_types, is_cyclic, is_nan,
+    one_to_one, sort_topologically
+)
 
 try:
     import pandas as pd

diff --git a/holoviews/tests/core/testndmapping.py b/holoviews/tests/core/testndmapping.py
@@ -1,7 +1,9 @@
 from collections import OrderedDict
 
 from holoviews.core import Dimension
-from holoviews.core.ndmapping import MultiDimensionalMapping, NdMapping
+from holoviews.core.ndmapping import (
+    MultiDimensionalMapping, NdMapping, UniformNdMapping
+)
 from holoviews.element.comparison import ComparisonTestCase
 from holoviews import HoloMap, Dataset
 import numpy as np
@@ -200,6 +202,17 @@ def test_setitem_nested_2(self):
         self.assertEqual(ndmap['A'].data, nested_clone.data)
 
 
+class UniformNdMappingTest(ComparisonTestCase):
+
+    def test_collapse_nested(self):
+        inner1 = UniformNdMapping({1: Dataset([(1, 2)], ['x', 'y'])}, 'Y')
+        inner2 = UniformNdMapping({1: Dataset([(3, 4)], ['x', 'y'])}, 'Y')
+        outer = UniformNdMapping({1: inner1, 2: inner2}, 'X')
+        collapsed = outer.collapse()
+        expected = Dataset([(1, 1, 1, 2), (2, 1, 3, 4)], ['X', 'Y', 'x', 'y'])
+        self.assertEqual(collapsed, expected)
+
+
 class HoloMapTest(ComparisonTestCase):
 
     def setUp(self):
@@ -232,7 +245,6 @@ def test_columns_collapse_heterogeneous(self):
         expected = Dataset({'x':self.xs, 'y': self.ys * 4.5}, kdims=['x'], vdims=['y'])
         self.compare_dataset(collapsed, expected)
 
-
     def test_columns_sample_homogeneous(self):
         samples = self.columns.sample([0, 5, 10]).dimension_values('y')
         self.assertEqual(samples, np.array([0, 10, 20]))

diff --git a/setup.py b/setup.py
@@ -49,7 +49,7 @@
     'cyordereddict', 'ipython==5.4.1']
 
 extras_require['doc'] = extras_require['examples'] + [
-    'nbsite>0.5.2', 'sphinx', 'sphinx_holoviz_theme', 'mpl_sample_data', 'awscli']
+    'nbsite>0.5.2', 'sphinx', 'sphinx_holoviz_theme', 'mpl_sample_data', 'awscli', 'pscript']
 
 extras_require['build'] = ['param >=1.7.0', 'setuptools >=30.3.0', 'pyct >=0.4.4']