holoviz · jlstevens · Mar 5, 2017 · Mar 5, 2017 · Mar 5, 2017 · Mar 5, 2017
diff --git a/holoviews/core/data/__init__.py b/holoviews/core/data/__init__.py
@@ -516,13 +516,17 @@ def get_dimension_type(self, dim):
         return self.interface.dimension_type(self, dim_obj)
 
 
-    def dframe(self, dimensions=None):
+    def dframe(self, dimensions=None, copy=True):
         """
-        Returns the data in the form of a DataFrame.
+        Returns the data in the form of a DataFrame. Supplying a list
+        of dimensions filters the dataframe. If the data is already
+        a DataFrame copy=False may be supplied to avoid making a copy.
         """
-        if dimensions:
+        if pd is None:
+            raise Exception("Cannot return data as dataframe, pandas is not available")
+        elif dimensions:
             dimensions = [self.get_dimension(d, strict=True).name for d in dimensions]
-        return self.interface.dframe(self, dimensions)
+        return self.interface.dframe(self, dimensions, copy)
 
 
     def columns(self, dimensions=None):

diff --git a/holoviews/core/data/dask.py b/holoviews/core/data/dask.py
@@ -235,7 +235,7 @@ def concat(cls, columns_objs):
         return dd.concat([col.data for col in cast_objs])
 
     @classmethod
-    def dframe(cls, columns, dimensions):
+    def dframe(cls, columns, dimensions, copy):
         return columns.data.compute()
 
     @classmethod

diff --git a/holoviews/core/data/interface.py b/holoviews/core/data/interface.py
@@ -192,8 +192,8 @@ def array(cls, dataset, dimensions):
         return Element.array(dataset, dimensions)
 
     @classmethod
-    def dframe(cls, dataset, dimensions):
-        return Element.dframe(dataset, dimensions)
+    def dframe(cls, dataset, dimensions, copy):
+        return Element.dframe(dataset, dimensions, copy)
 
     @classmethod
     def columns(cls, dataset, dimensions):

diff --git a/holoviews/core/data/pandas.py b/holoviews/core/data/pandas.py
@@ -223,13 +223,13 @@ def add_dimension(cls, columns, dimension, dim_pos, values, vdim):
 
 
     @classmethod
-    def dframe(cls, columns, dimensions):
+    def dframe(cls, columns, dimensions, copy):
         if dimensions:
             dimensions = [columns.get_dimension(d, strict=True).name
                           for d in dimensions]
             return columns.reindex(dimensions).data.copy()
-        else:
-            return columns.data.copy()
+        elif :
+            return columns.data.copy() if copy else columns.data
 
 
 Interface.register(PandasInterface)
diff --git a/holoviews/core/data/xarray.py b/holoviews/core/data/xarray.py
@@ -239,7 +239,7 @@ def length(cls, dataset):
         return np.product(dataset[dataset.vdims[0].name].shape)
 
     @classmethod
-    def dframe(cls, dataset, dimensions):
+    def dframe(cls, dataset, dimensions, copy):
         if dimensions:
             return dataset.reindex(columns=dimensions).data.to_dataframe().reset_index(dimensions)
         else:

diff --git a/holoviews/core/element.py b/holoviews/core/element.py
@@ -144,7 +144,7 @@ def table(self, datatype=None):
         return Table(self, **(dict(datatype=datatype) if datatype else {}))
 
 
-    def dframe(self, dimensions=None):
+    def dframe(self, dimensions=None, copy=True):
         import pandas as pd
         column_names = dimensions if dimensions else self.dimensions(label=True)
         dim_vals = OrderedDict([(dim, self[dim]) for dim in column_names])

diff --git a/holoviews/element/util.py b/holoviews/element/util.py
@@ -5,8 +5,13 @@
 
 from ..core import Dataset, OrderedDict
 from ..core.operation import ElementOperation
-from ..core.util import (pd, is_nan, sort_topologically,
-                         cartesian_product, is_cyclic, one_to_one)
+from ..core.util import (is_nan, sort_topologically, one_to_one,
+                         cartesian_product, is_cyclic, get_df_data)
+
+try:
+    import pandas as pd
+except:
+    pd = None
 
 try:
     import dask
@@ -134,7 +139,13 @@ def _aggregate_dataset(self, obj, xcoords, ycoords):
         dtype = 'dataframe' if pd else 'dictionary'
         dense_data = Dataset(data, kdims=obj.kdims, vdims=obj.vdims, datatype=[dtype])
         concat_data = obj.interface.concatenate([dense_data, obj], datatype=[dtype])
-        agg = concat_data.reindex([xdim, ydim], vdims).aggregate([xdim, ydim], reduce_fn)
+        reindexed = concat_data.reindex([xdim, ydim], vdims)
+        if pd:
+            df = reindexed.dframe(copy=False)
+            df = df.groupby([xdim, ydim], sort=False).first().reset_index()
+            agg = reindexed.clone(df)
+        else:
+            agg = reindexed.aggregate([xdim, ydim], reduce_fn)
 
         # Convert data to a gridded dataset
         grid_data = {xdim: xcoords, ydim: ycoords}

diff --git a/holoviews/operation/datashader.py b/holoviews/operation/datashader.py
@@ -20,23 +20,19 @@
 
 from ..core import (ElementOperation, Element, Dimension, NdOverlay,
                     Overlay, CompositeOverlay, Dataset)
-from ..core.data import ArrayInterface, PandasInterface, DaskInterface
+from ..core.data import PandasInterface, DaskInterface
 from ..core.util import get_param_values, basestring
 from ..element import GridImage, Image, Path, Curve, Contours, RGB
 from ..streams import RangeXY
 
-DF_INTERFACES = [PandasInterface, DaskInterface]
 
 @dispatch(Element)
 def discover(dataset):
     """
     Allows datashader to correctly discover the dtypes of the data
     in a holoviews Element.
     """
-    if dataset.interface in DF_INTERFACES:
-        return dsdiscover(dataset.data)
-    else:
-        return dsdiscover(dataset.dframe())
+    return dsdiscover(dataset.dframe(copy=False))
 
 
 @bypixel.pipeline.register(Element)
@@ -135,7 +131,6 @@ def get_agg_data(cls, obj, category=None):
         kdims = obj.kdims
         vdims = obj.vdims
         x, y = obj.dimensions(label=True)[:2]
-        is_df = lambda x: isinstance(x, Dataset) and x.interface in DF_INTERFACES
         if isinstance(obj, Path):
             glyph = 'line'
             for p in obj.data:
@@ -146,7 +141,7 @@ def get_agg_data(cls, obj, category=None):
         elif isinstance(obj, CompositeOverlay):
             for key, el in obj.data.items():
                 x, y, element, glyph = cls.get_agg_data(el)
-                df = element.data if is_df(element) else element.dframe()
+                df = element.dframe(copy=False)
                 if isinstance(obj, NdOverlay):
                     df = df.assign(**dict(zip(obj.dimensions('key', True), key)))
                 paths.append(df)

diff --git a/holoviews/plotting/bokeh/chart.py b/holoviews/plotting/bokeh/chart.py
@@ -579,7 +579,7 @@ class BoxPlot(ChartPlot):
     def _init_chart(self, element, ranges):
         properties = self.style[self.cyclic_index]
         label = element.dimensions('key', True)
-        dframe = element.dframe()
+        dframe = element.dframe(copy=False)
 
         # Fix for displaying datetimes which are not handled by bokeh
         for kd in element.kdims: