Merge b0b252c into 9f5425e

holoviz · Mar 9, 2020 · 0a66405 · 0a66405
2 parents 9f5425e + b0b252c
commit 0a66405
Show file tree

Hide file tree

Showing 20 changed files with 436 additions and 72 deletions.
diff --git a/holoviews/core/accessors.py b/holoviews/core/accessors.py
@@ -205,11 +205,10 @@ def apply_function(object, **kwargs):
                     mapped.append((k, new_val))
             return self._obj.clone(mapped, link=link_inputs)
 
-
     def aggregate(self, dimensions=None, function=None, spreadfn=None, **kwargs):
         """Applies a aggregate function to all ViewableElements.
 
-        See :py:meth:`Dimensioned.opts` and :py:meth:`Apply.__call__`
+        See :py:meth:`Dimensioned.aggregate` and :py:meth:`Apply.__call__`
         for more information.
         """
         kwargs['_method_args'] = (dimensions, function, spreadfn)
@@ -222,6 +221,14 @@ def opts(self, *args, **kwargs):
         See :py:meth:`Dimensioned.opts` and :py:meth:`Apply.__call__`
         for more information.
         """
+        from ..util.transform import dim
+        from ..streams import Params
+        params = {}
+        for arg in kwargs.values():
+            if isinstance(arg, dim):
+                params.update(arg.params)
+        streams = Params.from_params(params, watch_only=True)
+        kwargs['streams'] = kwargs.get('streams', []) + streams
         kwargs['_method_args'] = args
         return self.__call__('opts', **kwargs)
 
@@ -253,6 +260,24 @@ def select(self, **kwargs):
         """
         return self.__call__('select', **kwargs)
 
+    def transform(self, *args, **kwargs):
+        """Applies transforms to all Datasets.
+
+        See :py:meth:`Dataset.transform` and :py:meth:`Apply.__call__`
+        for more information.
+        """
+        from ..util.transform import dim
+        from ..streams import Params
+        params = {}
+        for _, arg in list(args)+list(kwargs.items()):
+            if isinstance(arg, dim):
+                params.update(arg.params)
+        streams = Params.from_params(params, watch_only=True)
+        kwargs['streams'] = kwargs.get('streams', []) + streams
+        kwargs['_method_args'] = args
+        kwargs['per_element'] = True
+        return self.__call__('transform', **kwargs)
+
 
 @add_metaclass(AccessorPipelineMeta)
 class Redim(object):

diff --git a/holoviews/core/data/__init__.py b/holoviews/core/data/__init__.py
@@ -14,7 +14,7 @@
 from .. import util
 from ..accessors import Redim
 from ..dimension import (
-    Dimension, process_dimensions, Dimensioned, LabelledData
+    Dimension, Dimensioned, LabelledData, dimension_name, process_dimensions
 )
 from ..element import Element
 from ..ndmapping import OrderedDict, MultiDimensionalMapping
@@ -281,6 +281,16 @@ class Dataset(Element):
     _vdim_reductions = {}
     _kdim_reductions = {}
 
+    def __new__(cls, data=None, kdims=None, vdims=None, **kwargs):
+        """
+        Allows casting a DynamicMap to an Element class like hv.Curve, by applying the
+        class to each underlying element.
+        """
+        if isinstance(data, DynamicMap):
+            return data.apply(cls, per_element=True, kdims=kdims, vdims=vdims, **kwargs)
+        else:
+            return super(Dataset, cls).__new__(cls)
+
     def __init__(self, data, kdims=None, vdims=None, **kwargs):
         from ...operation.element import (
             chain as chain_op, factory
@@ -446,15 +456,14 @@ def add_dimension(self, dimension, dim_pos, dim_val, vdim=False, **kwargs):
 
         Requires the dimension name or object, the desired position in
         the key dimensions and a key value scalar or array of values,
-        matching the length o shape of the Dataset.
+        matching the length or shape of the Dataset.
 
         Args:
             dimension: Dimension or dimension spec to add
-            dim_pos (int) Integer index to insert dimension at
+            dim_pos (int): Integer index to insert dimension at
             dim_val (scalar or ndarray): Dimension value(s) to add
             vdim: Disabled, this type does not have value dimensions
             **kwargs: Keyword arguments passed to the cloned element
-
         Returns:
             Cloned object containing the new dimension
         """
@@ -798,24 +807,37 @@ def aggregate(self, dimensions=None, function=None, spreadfn=None, **kwargs):
         """Aggregates data on the supplied dimensions.
 
         Aggregates over the supplied key dimensions with the defined
-        function.
+        function or dim_transform specified as a tuple of the transformed
+        dimension name and dim transform.
 
         Args:
             dimensions: Dimension(s) to aggregate on
                 Default to all key dimensions
-            function: Aggregation function to apply, e.g. numpy.mean
+            function: Aggregation function or transform to apply
+                Supports both simple functions and dimension transforms
             spreadfn: Secondary reduction to compute value spread
                 Useful for computing a confidence interval, spread, or
                 standard deviation.
-            **kwargs: Keyword arguments passed to the aggregation function
+            **kwargs: Keyword arguments either passed to the aggregation function
+                or to create new names for the transformed variables
 
         Returns:
             Returns the aggregated Dataset
         """
-        if function is None:
-            raise ValueError("The aggregate method requires a function to be specified")
+        from ...util.transform import dim
         if dimensions is None: dimensions = self.kdims
         elif not isinstance(dimensions, list): dimensions = [dimensions]
+        if isinstance(function, tuple) or any(isinstance(v, dim) for v in kwargs.values()):
+            dataset = self.clone(new_type=Dataset)
+            if dimensions:
+                dataset = dataset.groupby(dimensions)
+            args = () if function is None else (function,)
+            transformed = dataset.apply.transform(*args, drop=True, **kwargs)
+            if not isinstance(transformed, Dataset):
+                transformed = transformed.collapse()
+            return transformed.clone(new_type=type(self))
+
+        # Handle functions
         kdims = [self.get_dimension(d, strict=True) for d in dimensions]
         if not len(self):
             if spreadfn:
@@ -908,6 +930,69 @@ def load_subset(*args):
         return self.interface.groupby(self, dim_names, container_type,
                                       group_type, **kwargs)
 
+    def transform(self, *args, **kwargs):
+        """Transforms the Dataset according to a dimension transform.
+
+        Transforms may be supplied as tuples consisting of the
+        dimension(s) and the dim transform to apply or keyword
+        arguments mapping from dimension(s) to dim transforms. If the
+        arg or kwarg declares multiple dimensions the dim transform
+        should return a tuple of values for each.
+
+        A transform may override an existing dimension or add a new
+        one in which case it will be added as an additional value
+        dimension.
+
+        Args:
+            args: Specify the output arguments and transforms as a
+                  tuple of dimension specs and dim transforms
+            drop (bool): Whether to drop all variables not part of the transform
+            keep_index (bool): Whether to keep indexes
+                  Whether to apply transform on datastructure with
+                  index, e.g. pandas.Series or xarray.DataArray
+            kwargs: Specify new dimensions in the form new_dim=dim_transform
+
+        Returns:
+            Transformed dataset with new dimensions
+        """
+        drop = kwargs.pop('drop', False)
+        keep_index = kwargs.pop('keep_index', False)
+        transforms = OrderedDict()
+        for s, transform in list(args)+list(kwargs.items()):
+            transforms[util.wrap_tuple(s)] = transform
+
+        new_data = OrderedDict()
+        for signature, transform in transforms.items():
+            applied = transform.apply(
+                self, compute=False, keep_index=keep_index
+            )
+            if len(signature) == 1:
+                new_data[signature[0]] = applied
+            else:
+                for s, vals in zip(signature, applied):
+                    new_data[s] = vals
+
+        new_dims = []
+        for d in new_data:
+            if self.get_dimension(d) is None:
+                new_dims.append(d)
+
+        ds = self
+        if ds.interface.datatype in ('image', 'array'):
+            ds = ds.clone(datatype=[dt for dt in ds.datatype if dt != ds.interface.datatype])
+
+        if drop:
+            kdims = [ds.get_dimension(d) for d in new_data if d in ds.kdims]
+            vdims = [ds.get_dimension(d) or d for d in new_data if d not in ds.kdims]
+            data = OrderedDict([(dimension_name(d), values) for d, values in new_data.items()])
+            return ds.clone(data, kdims=kdims, vdims=vdims)
+        else:
+            new_data = OrderedDict([(dimension_name(d), values) for d, values in new_data.items()])
+            data = ds.interface.assign(ds, new_data)
+            data, drop = data if isinstance(data, tuple) else (data, [])
+            kdims = [kd for kd in self.kdims if kd.name not in drop]
+            return ds.clone(data, kdims=kdims, vdims=ds.vdims+new_dims)
+
     def __len__(self):
         "Number of values in the Dataset."
         return self.interface.length(self)

diff --git a/holoviews/core/data/array.py b/holoviews/core/data/array.py
@@ -239,6 +239,18 @@ def unpack_scalar(cls, dataset, data):
         return data
 
 
+    @classmethod
+    def assign(cls, dataset, new_data):
+        data = dataset.data.copy()
+        for d, arr in new_data.items():
+            if dataset.get_dimension(d) is None:
+                continue
+            idx = dataset.get_dimension_index(d)
+            data[:, idx] = arr
+        new_cols = [arr for d, arr in new_data.items() if dataset.get_dimension(d) is None]
+        return np.column_stack([data]+new_cols)
+
+
     @classmethod
     def aggregate(cls, dataset, dimensions, function, **kwargs):
         reindexed = dataset.reindex(dimensions)

diff --git a/holoviews/core/data/dictionary.py b/holoviews/core/data/dictionary.py
@@ -215,7 +215,7 @@ def concat(cls, datasets, dimensions, vdims):
         columns = defaultdict(list)
         for key, ds in datasets:
             for k, vals in ds.data.items():
-                columns[k].append(vals)
+                columns[k].append(np.atleast_1d(vals))
             for d, k in zip(dimensions, key):
                 columns[d.name].append(np.full(len(ds), k))
 
@@ -270,6 +270,13 @@ def values(cls, dataset, dim, expanded=True, flat=True, compute=True, keep_index
         return values
 
 
+    @classmethod
+    def assign(cls, dataset, new_data):
+        data = OrderedDict(dataset.data)
+        data.update(new_data)
+        return data
+
+
     @classmethod
     def reindex(cls, dataset, kdims, vdims):
         dimensions = [dataset.get_dimension(d).name for d in kdims+vdims]

diff --git a/holoviews/core/data/grid.py b/holoviews/core/data/grid.py
@@ -411,24 +411,26 @@ def ndloc(cls, dataset, indices):
 
 
     @classmethod
-    def values(cls, dataset, dim, expanded=True, flat=True, compute=True, keep_index=False):
+    def values(cls, dataset, dim, expanded=True, flat=True, compute=True,
+               keep_index=False, canonicalize=True):
         dim = dataset.get_dimension(dim, strict=True)
         if dim in dataset.vdims or dataset.data[dim.name].ndim > 1:
             vdim_tuple = cls.packed(dataset)
             if vdim_tuple:
                 data = dataset.data[vdim_tuple][..., dataset.vdims.index(dim)]
             else:
                 data = dataset.data[dim.name]
-            data = cls.canonicalize(dataset, data)
+            if canonicalize:
+                data = cls.canonicalize(dataset, data)
             da = dask_array_module()
             if compute and da and isinstance(data, da.Array):
                 data = data.compute()
             return data.T.flatten() if flat else data
         elif expanded:
-            data = cls.coords(dataset, dim.name, expanded=True)
+            data = cls.coords(dataset, dim.name, expanded=True, ordered=canonicalize)
             return data.T.flatten() if flat else data
         else:
-            return cls.coords(dataset, dim.name, ordered=True)
+            return cls.coords(dataset, dim.name, ordered=canonicalize)
 
 
     @classmethod
@@ -798,5 +800,19 @@ def range(cls, dataset, dimension):
                 column.sort()
                 return column[0], column[-1]
 
+    @classmethod
+    def assign(cls, dataset, new_data):
+        data = OrderedDict(dataset.data)
+        for k, v in new_data.items():
+            if k in dataset.kdims:
+                coords = cls.coords(dataset, k)
+                if not coords.ndim > 1 and np.all(coords[1:] < coords[:-1]):
+                    v = v[::-1]
+                data[k] = v
+            else:
+                data[k] = cls.canonicalize(dataset, v)
+        return data
+
+
 
 Interface.register(GridInterface)
diff --git a/holoviews/core/data/pandas.py b/holoviews/core/data/pandas.py
@@ -179,7 +179,7 @@ def concat_fn(cls, dataframes, **kwargs):
             kwargs['sort'] = False
         return pd.concat(dataframes, **kwargs)
 
-        
+
     @classmethod
     def concat(cls, datasets, dimensions, vdims):
         dataframes = []
@@ -345,6 +345,9 @@ def add_dimension(cls, dataset, dimension, dim_pos, values, vdim):
             data.insert(dim_pos, dimension.name, values)
         return data
 
+    @classmethod
+    def assign(cls, dataset, new_data):
+        return dataset.data.assign(**new_data)
 
     @classmethod
     def as_dframe(cls, dataset):

diff --git a/holoviews/core/data/xarray.py b/holoviews/core/data/xarray.py
@@ -353,7 +353,9 @@ def values(cls, dataset, dim, expanded=True, flat=True, compute=True, keep_index
         if packed:
             data = dataset.data.data[..., dataset.vdims.index(dim)]
         else:
-            data = dataset.data[dim.name].data
+            data = dataset.data[dim.name]
+            if not keep_index:
+                data = data.data
         irregular = cls.irregular(dataset, dim) if dim in dataset.kdims else False
         irregular_kdims = [d for d in dataset.kdims if cls.irregular(dataset, d)]
         if irregular_kdims:
@@ -371,13 +373,16 @@ def values(cls, dataset, dim, expanded=True, flat=True, compute=True, keep_index
             if is_cupy(data):
                 import cupy
                 data = cupy.asnumpy(data)
-            data = cls.canonicalize(dataset, data, data_coords=data_coords,
-                                    virtual_coords=virtual_coords)
-            return data.T.flatten() if flat else data
+            if not keep_index:
+                data = cls.canonicalize(dataset, data, data_coords=data_coords,
+                                        virtual_coords=virtual_coords)
+            return data.T.flatten() if flat and not keep_index else data
         elif expanded:
             data = cls.coords(dataset, dim.name, expanded=True)
             return data.T.flatten() if flat else data
         else:
+            if keep_index:
+                return dataset[dim.name]
             return cls.coords(dataset, dim.name, ordered=True)
 
 
@@ -600,5 +605,39 @@ def add_dimension(cls, dataset, dimension, dim_pos, values, vdim):
                            dims=tuple(d.name for d in dataset.kdims[::-1]))
         return dataset.data.assign(**{dim: arr})
 
+    @classmethod
+    def assign(cls, dataset, new_data):
+        import xarray as xr
+        data = dataset.data
+        prev_coords = set.intersection(*[
+            set(var.coords) for var in data.data_vars.values()
+        ])
+        coords = OrderedDict()
+        for k, v in new_data.items():
+            if k not in dataset.kdims:
+                continue
+            elif isinstance(v, xr.DataArray):
+                coords[k] = v.rename(**{v.name: k})
+            coord_vals = cls.coords(dataset, k)
+            if not coord_vals.ndim > 1 and np.all(coord_vals[1:] < coord_vals[:-1]):
+                v = v[::-1]
+            coords[k] = (k, v)
+        if coords:
+            data = data.assign_coords(**coords)
+        dims = tuple(kd.name for kd in dataset.kdims[::-1])
+        vars = OrderedDict()
+        for k, v in new_data.items():
+            if k in dataset.kdims:
+                continue
+            if isinstance(v, xr.DataArray):
+                vars[k] = v
+            else:
+                vars[k] = (dims, cls.canonicalize(dataset, v, data_coords=dims))
+        if vars:
+            data = data.assign(vars)
+        used_coords = set.intersection(*[set(var.coords) for var in data.data_vars.values()])
+        drop_coords = set.symmetric_difference(used_coords, prev_coords)
+        return data.drop(list(drop_coords)), list(drop_coords)
+
 
 Interface.register(XArrayInterface)