Skip to content

Commit

Permalink
Merge b0b252c into 9f5425e
Browse files Browse the repository at this point in the history
  • Loading branch information
poplarShift committed Mar 9, 2020
2 parents 9f5425e + b0b252c commit 0a66405
Show file tree
Hide file tree
Showing 20 changed files with 436 additions and 72 deletions.
29 changes: 27 additions & 2 deletions holoviews/core/accessors.py
Expand Up @@ -205,11 +205,10 @@ def apply_function(object, **kwargs):
mapped.append((k, new_val))
return self._obj.clone(mapped, link=link_inputs)


def aggregate(self, dimensions=None, function=None, spreadfn=None, **kwargs):
"""Applies a aggregate function to all ViewableElements.
See :py:meth:`Dimensioned.opts` and :py:meth:`Apply.__call__`
See :py:meth:`Dimensioned.aggregate` and :py:meth:`Apply.__call__`
for more information.
"""
kwargs['_method_args'] = (dimensions, function, spreadfn)
Expand All @@ -222,6 +221,14 @@ def opts(self, *args, **kwargs):
See :py:meth:`Dimensioned.opts` and :py:meth:`Apply.__call__`
for more information.
"""
from ..util.transform import dim
from ..streams import Params
params = {}
for arg in kwargs.values():
if isinstance(arg, dim):
params.update(arg.params)
streams = Params.from_params(params, watch_only=True)
kwargs['streams'] = kwargs.get('streams', []) + streams
kwargs['_method_args'] = args
return self.__call__('opts', **kwargs)

Expand Down Expand Up @@ -253,6 +260,24 @@ def select(self, **kwargs):
"""
return self.__call__('select', **kwargs)

def transform(self, *args, **kwargs):
"""Applies transforms to all Datasets.
See :py:meth:`Dataset.transform` and :py:meth:`Apply.__call__`
for more information.
"""
from ..util.transform import dim
from ..streams import Params
params = {}
for _, arg in list(args)+list(kwargs.items()):
if isinstance(arg, dim):
params.update(arg.params)
streams = Params.from_params(params, watch_only=True)
kwargs['streams'] = kwargs.get('streams', []) + streams
kwargs['_method_args'] = args
kwargs['per_element'] = True
return self.__call__('transform', **kwargs)


@add_metaclass(AccessorPipelineMeta)
class Redim(object):
Expand Down
103 changes: 94 additions & 9 deletions holoviews/core/data/__init__.py
Expand Up @@ -14,7 +14,7 @@
from .. import util
from ..accessors import Redim
from ..dimension import (
Dimension, process_dimensions, Dimensioned, LabelledData
Dimension, Dimensioned, LabelledData, dimension_name, process_dimensions
)
from ..element import Element
from ..ndmapping import OrderedDict, MultiDimensionalMapping
Expand Down Expand Up @@ -281,6 +281,16 @@ class Dataset(Element):
_vdim_reductions = {}
_kdim_reductions = {}

def __new__(cls, data=None, kdims=None, vdims=None, **kwargs):
"""
Allows casting a DynamicMap to an Element class like hv.Curve, by applying the
class to each underlying element.
"""
if isinstance(data, DynamicMap):
return data.apply(cls, per_element=True, kdims=kdims, vdims=vdims, **kwargs)
else:
return super(Dataset, cls).__new__(cls)

def __init__(self, data, kdims=None, vdims=None, **kwargs):
from ...operation.element import (
chain as chain_op, factory
Expand Down Expand Up @@ -446,15 +456,14 @@ def add_dimension(self, dimension, dim_pos, dim_val, vdim=False, **kwargs):
Requires the dimension name or object, the desired position in
the key dimensions and a key value scalar or array of values,
matching the length o shape of the Dataset.
matching the length or shape of the Dataset.
Args:
dimension: Dimension or dimension spec to add
dim_pos (int) Integer index to insert dimension at
dim_pos (int): Integer index to insert dimension at
dim_val (scalar or ndarray): Dimension value(s) to add
vdim: Disabled, this type does not have value dimensions
**kwargs: Keyword arguments passed to the cloned element
Returns:
Cloned object containing the new dimension
"""
Expand Down Expand Up @@ -798,24 +807,37 @@ def aggregate(self, dimensions=None, function=None, spreadfn=None, **kwargs):
"""Aggregates data on the supplied dimensions.
Aggregates over the supplied key dimensions with the defined
function.
function or dim_transform specified as a tuple of the transformed
dimension name and dim transform.
Args:
dimensions: Dimension(s) to aggregate on
Default to all key dimensions
function: Aggregation function to apply, e.g. numpy.mean
function: Aggregation function or transform to apply
Supports both simple functions and dimension transforms
spreadfn: Secondary reduction to compute value spread
Useful for computing a confidence interval, spread, or
standard deviation.
**kwargs: Keyword arguments passed to the aggregation function
**kwargs: Keyword arguments either passed to the aggregation function
or to create new names for the transformed variables
Returns:
Returns the aggregated Dataset
"""
if function is None:
raise ValueError("The aggregate method requires a function to be specified")
from ...util.transform import dim
if dimensions is None: dimensions = self.kdims
elif not isinstance(dimensions, list): dimensions = [dimensions]
if isinstance(function, tuple) or any(isinstance(v, dim) for v in kwargs.values()):
dataset = self.clone(new_type=Dataset)
if dimensions:
dataset = dataset.groupby(dimensions)
args = () if function is None else (function,)
transformed = dataset.apply.transform(*args, drop=True, **kwargs)
if not isinstance(transformed, Dataset):
transformed = transformed.collapse()
return transformed.clone(new_type=type(self))

# Handle functions
kdims = [self.get_dimension(d, strict=True) for d in dimensions]
if not len(self):
if spreadfn:
Expand Down Expand Up @@ -908,6 +930,69 @@ def load_subset(*args):
return self.interface.groupby(self, dim_names, container_type,
group_type, **kwargs)

def transform(self, *args, **kwargs):
"""Transforms the Dataset according to a dimension transform.
Transforms may be supplied as tuples consisting of the
dimension(s) and the dim transform to apply or keyword
arguments mapping from dimension(s) to dim transforms. If the
arg or kwarg declares multiple dimensions the dim transform
should return a tuple of values for each.
A transform may override an existing dimension or add a new
one in which case it will be added as an additional value
dimension.
Args:
args: Specify the output arguments and transforms as a
tuple of dimension specs and dim transforms
drop (bool): Whether to drop all variables not part of the transform
keep_index (bool): Whether to keep indexes
Whether to apply transform on datastructure with
index, e.g. pandas.Series or xarray.DataArray
kwargs: Specify new dimensions in the form new_dim=dim_transform
Returns:
Transformed dataset with new dimensions
"""
drop = kwargs.pop('drop', False)
keep_index = kwargs.pop('keep_index', False)
transforms = OrderedDict()
for s, transform in list(args)+list(kwargs.items()):
transforms[util.wrap_tuple(s)] = transform

new_data = OrderedDict()
for signature, transform in transforms.items():
applied = transform.apply(
self, compute=False, keep_index=keep_index
)
if len(signature) == 1:
new_data[signature[0]] = applied
else:
for s, vals in zip(signature, applied):
new_data[s] = vals

new_dims = []
for d in new_data:
if self.get_dimension(d) is None:
new_dims.append(d)

ds = self
if ds.interface.datatype in ('image', 'array'):
ds = ds.clone(datatype=[dt for dt in ds.datatype if dt != ds.interface.datatype])

if drop:
kdims = [ds.get_dimension(d) for d in new_data if d in ds.kdims]
vdims = [ds.get_dimension(d) or d for d in new_data if d not in ds.kdims]
data = OrderedDict([(dimension_name(d), values) for d, values in new_data.items()])
return ds.clone(data, kdims=kdims, vdims=vdims)
else:
new_data = OrderedDict([(dimension_name(d), values) for d, values in new_data.items()])
data = ds.interface.assign(ds, new_data)
data, drop = data if isinstance(data, tuple) else (data, [])
kdims = [kd for kd in self.kdims if kd.name not in drop]
return ds.clone(data, kdims=kdims, vdims=ds.vdims+new_dims)

def __len__(self):
"Number of values in the Dataset."
return self.interface.length(self)
Expand Down
12 changes: 12 additions & 0 deletions holoviews/core/data/array.py
Expand Up @@ -239,6 +239,18 @@ def unpack_scalar(cls, dataset, data):
return data


@classmethod
def assign(cls, dataset, new_data):
data = dataset.data.copy()
for d, arr in new_data.items():
if dataset.get_dimension(d) is None:
continue
idx = dataset.get_dimension_index(d)
data[:, idx] = arr
new_cols = [arr for d, arr in new_data.items() if dataset.get_dimension(d) is None]
return np.column_stack([data]+new_cols)


@classmethod
def aggregate(cls, dataset, dimensions, function, **kwargs):
reindexed = dataset.reindex(dimensions)
Expand Down
9 changes: 8 additions & 1 deletion holoviews/core/data/dictionary.py
Expand Up @@ -215,7 +215,7 @@ def concat(cls, datasets, dimensions, vdims):
columns = defaultdict(list)
for key, ds in datasets:
for k, vals in ds.data.items():
columns[k].append(vals)
columns[k].append(np.atleast_1d(vals))
for d, k in zip(dimensions, key):
columns[d.name].append(np.full(len(ds), k))

Expand Down Expand Up @@ -270,6 +270,13 @@ def values(cls, dataset, dim, expanded=True, flat=True, compute=True, keep_index
return values


@classmethod
def assign(cls, dataset, new_data):
data = OrderedDict(dataset.data)
data.update(new_data)
return data


@classmethod
def reindex(cls, dataset, kdims, vdims):
dimensions = [dataset.get_dimension(d).name for d in kdims+vdims]
Expand Down
24 changes: 20 additions & 4 deletions holoviews/core/data/grid.py
Expand Up @@ -411,24 +411,26 @@ def ndloc(cls, dataset, indices):


@classmethod
def values(cls, dataset, dim, expanded=True, flat=True, compute=True, keep_index=False):
def values(cls, dataset, dim, expanded=True, flat=True, compute=True,
keep_index=False, canonicalize=True):
dim = dataset.get_dimension(dim, strict=True)
if dim in dataset.vdims or dataset.data[dim.name].ndim > 1:
vdim_tuple = cls.packed(dataset)
if vdim_tuple:
data = dataset.data[vdim_tuple][..., dataset.vdims.index(dim)]
else:
data = dataset.data[dim.name]
data = cls.canonicalize(dataset, data)
if canonicalize:
data = cls.canonicalize(dataset, data)
da = dask_array_module()
if compute and da and isinstance(data, da.Array):
data = data.compute()
return data.T.flatten() if flat else data
elif expanded:
data = cls.coords(dataset, dim.name, expanded=True)
data = cls.coords(dataset, dim.name, expanded=True, ordered=canonicalize)
return data.T.flatten() if flat else data
else:
return cls.coords(dataset, dim.name, ordered=True)
return cls.coords(dataset, dim.name, ordered=canonicalize)


@classmethod
Expand Down Expand Up @@ -798,5 +800,19 @@ def range(cls, dataset, dimension):
column.sort()
return column[0], column[-1]

@classmethod
def assign(cls, dataset, new_data):
data = OrderedDict(dataset.data)
for k, v in new_data.items():
if k in dataset.kdims:
coords = cls.coords(dataset, k)
if not coords.ndim > 1 and np.all(coords[1:] < coords[:-1]):
v = v[::-1]
data[k] = v
else:
data[k] = cls.canonicalize(dataset, v)
return data



Interface.register(GridInterface)
5 changes: 4 additions & 1 deletion holoviews/core/data/pandas.py
Expand Up @@ -179,7 +179,7 @@ def concat_fn(cls, dataframes, **kwargs):
kwargs['sort'] = False
return pd.concat(dataframes, **kwargs)


@classmethod
def concat(cls, datasets, dimensions, vdims):
dataframes = []
Expand Down Expand Up @@ -345,6 +345,9 @@ def add_dimension(cls, dataset, dimension, dim_pos, values, vdim):
data.insert(dim_pos, dimension.name, values)
return data

@classmethod
def assign(cls, dataset, new_data):
return dataset.data.assign(**new_data)

@classmethod
def as_dframe(cls, dataset):
Expand Down
47 changes: 43 additions & 4 deletions holoviews/core/data/xarray.py
Expand Up @@ -353,7 +353,9 @@ def values(cls, dataset, dim, expanded=True, flat=True, compute=True, keep_index
if packed:
data = dataset.data.data[..., dataset.vdims.index(dim)]
else:
data = dataset.data[dim.name].data
data = dataset.data[dim.name]
if not keep_index:
data = data.data
irregular = cls.irregular(dataset, dim) if dim in dataset.kdims else False
irregular_kdims = [d for d in dataset.kdims if cls.irregular(dataset, d)]
if irregular_kdims:
Expand All @@ -371,13 +373,16 @@ def values(cls, dataset, dim, expanded=True, flat=True, compute=True, keep_index
if is_cupy(data):
import cupy
data = cupy.asnumpy(data)
data = cls.canonicalize(dataset, data, data_coords=data_coords,
virtual_coords=virtual_coords)
return data.T.flatten() if flat else data
if not keep_index:
data = cls.canonicalize(dataset, data, data_coords=data_coords,
virtual_coords=virtual_coords)
return data.T.flatten() if flat and not keep_index else data
elif expanded:
data = cls.coords(dataset, dim.name, expanded=True)
return data.T.flatten() if flat else data
else:
if keep_index:
return dataset[dim.name]
return cls.coords(dataset, dim.name, ordered=True)


Expand Down Expand Up @@ -600,5 +605,39 @@ def add_dimension(cls, dataset, dimension, dim_pos, values, vdim):
dims=tuple(d.name for d in dataset.kdims[::-1]))
return dataset.data.assign(**{dim: arr})

@classmethod
def assign(cls, dataset, new_data):
import xarray as xr
data = dataset.data
prev_coords = set.intersection(*[
set(var.coords) for var in data.data_vars.values()
])
coords = OrderedDict()
for k, v in new_data.items():
if k not in dataset.kdims:
continue
elif isinstance(v, xr.DataArray):
coords[k] = v.rename(**{v.name: k})
coord_vals = cls.coords(dataset, k)
if not coord_vals.ndim > 1 and np.all(coord_vals[1:] < coord_vals[:-1]):
v = v[::-1]
coords[k] = (k, v)
if coords:
data = data.assign_coords(**coords)
dims = tuple(kd.name for kd in dataset.kdims[::-1])
vars = OrderedDict()
for k, v in new_data.items():
if k in dataset.kdims:
continue
if isinstance(v, xr.DataArray):
vars[k] = v
else:
vars[k] = (dims, cls.canonicalize(dataset, v, data_coords=dims))
if vars:
data = data.assign(vars)
used_coords = set.intersection(*[set(var.coords) for var in data.data_vars.values()])
drop_coords = set.symmetric_difference(used_coords, prev_coords)
return data.drop(list(drop_coords)), list(drop_coords)


Interface.register(XArrayInterface)

0 comments on commit 0a66405

Please sign in to comment.