Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Multi-dimensional dim transforms on data sets #4080

Merged
merged 50 commits into from
Mar 9, 2020
Merged
Show file tree
Hide file tree
Changes from 49 commits
Commits
Show all changes
50 commits
Select commit Hold shift + click to select a range
5b9718f
enable vdim insertion in datasets
poplarShift Oct 30, 2019
ec4f186
add transform to add new dimensions
poplarShift Oct 30, 2019
ad264aa
add option to traverse only first level of children
poplarShift Oct 30, 2019
555ef28
add dim transform to dimensioned containers
poplarShift Oct 30, 2019
5cb209c
drop_dimensions method on pandas and xr interfaces
poplarShift Oct 30, 2019
c076a1b
drop remaining dimensions after transform
poplarShift Oct 30, 2019
bf0f451
drop duplicate values after dropping dimensions
poplarShift Oct 30, 2019
cc23478
move computation of multi-output transform out of loop
poplarShift Oct 30, 2019
db7e8be
annotation
poplarShift Oct 30, 2019
c9314f2
fix dimension insertion index
poplarShift Oct 30, 2019
89f26eb
enable dropping of dims and duplicates in ndmapping transforms
poplarShift Oct 30, 2019
31cca92
Enable Dataset aggregation using dim transforms
poplarShift Oct 30, 2019
71eac0d
aggregation defaults to all kdims
poplarShift Oct 30, 2019
eb57903
handle dataset without dim restrictions during aggregation
poplarShift Oct 30, 2019
eb78673
allow single string for transform output signature
poplarShift Oct 30, 2019
029fb3f
enable bokeh hextiles aggregation with dim transforms
poplarShift Oct 30, 2019
347f592
enable overwriting of dataset dimensions
poplarShift Oct 31, 2019
805907b
enable passing dim transforms to hex tiles
poplarShift Oct 31, 2019
ea00cb5
fix dim insertion position
poplarShift Oct 31, 2019
3a8d4d2
Transforms improvements
philippjfr Jan 16, 2020
0e191a6
Implement assign based transform method
philippjfr Mar 6, 2020
7bf8fcd
Further cleanup
philippjfr Mar 6, 2020
0109fd2
Fixed flake
philippjfr Mar 6, 2020
6a97267
Added tests
philippjfr Mar 6, 2020
2a4cbef
Fixed flakes
philippjfr Mar 6, 2020
d5924f2
Python2 fix
philippjfr Mar 6, 2020
b6bd400
Fix handling of grid transforms
philippjfr Mar 6, 2020
47ae6ba
Fix flakes
philippjfr Mar 6, 2020
96f1559
Small fix
philippjfr Mar 6, 2020
e35af03
Add more tests
philippjfr Mar 6, 2020
d867e64
Allow arbitrary dim expressions
philippjfr Mar 6, 2020
b2f6051
Allow applying transform method to indexed datastructure
philippjfr Mar 6, 2020
99ce8bb
Fixed flakes
philippjfr Mar 6, 2020
6e83b97
Add support for dropping coords
philippjfr Mar 6, 2020
cb634cb
Defer NumPy function calls to method on data
philippjfr Mar 6, 2020
6221ae6
Fixed flakes
philippjfr Mar 6, 2020
680b5f2
Fixed py2 issue
philippjfr Mar 6, 2020
315025c
Minor fixes for numpy transforms
philippjfr Mar 6, 2020
995d0d4
Better error handling on transforms
philippjfr Mar 6, 2020
39d1980
Fixed flakes
philippjfr Mar 6, 2020
a79cc5c
Resolve parameters in dim expressions
philippjfr Mar 6, 2020
c10f014
Implement Dataset.__new__ to allow casting DynamicMaps
philippjfr Mar 7, 2020
584d46a
Generalized transform watching to apply.opts
philippjfr Mar 7, 2020
3497917
Apply suggestions from code review
philippjfr Mar 8, 2020
0e9e28d
Made __new__ backward compatible
philippjfr Mar 8, 2020
f0fad57
Fix issue with BaseShape.__new__
philippjfr Mar 8, 2020
c292267
Pass assign_coords as kwargs
philippjfr Mar 8, 2020
b0b252c
Pass assign_coords as kwargs
philippjfr Mar 8, 2020
a1c1969
Enable keep_index by default
philippjfr Mar 9, 2020
a3376e5
Fixes for xarray assign
philippjfr Mar 9, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 27 additions & 2 deletions holoviews/core/accessors.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,11 +205,10 @@ def apply_function(object, **kwargs):
mapped.append((k, new_val))
return self._obj.clone(mapped, link=link_inputs)


def aggregate(self, dimensions=None, function=None, spreadfn=None, **kwargs):
"""Applies a aggregate function to all ViewableElements.

See :py:meth:`Dimensioned.opts` and :py:meth:`Apply.__call__`
See :py:meth:`Dimensioned.aggregate` and :py:meth:`Apply.__call__`
for more information.
"""
kwargs['_method_args'] = (dimensions, function, spreadfn)
Expand All @@ -222,6 +221,14 @@ def opts(self, *args, **kwargs):
See :py:meth:`Dimensioned.opts` and :py:meth:`Apply.__call__`
for more information.
"""
from ..util.transform import dim
from ..streams import Params
params = {}
for arg in kwargs.values():
if isinstance(arg, dim):
params.update(arg.params)
streams = Params.from_params(params, watch_only=True)
kwargs['streams'] = kwargs.get('streams', []) + streams
kwargs['_method_args'] = args
return self.__call__('opts', **kwargs)

Expand Down Expand Up @@ -253,6 +260,24 @@ def select(self, **kwargs):
"""
return self.__call__('select', **kwargs)

def transform(self, *args, **kwargs):
"""Applies transforms to all Datasets.
jbednar marked this conversation as resolved.
Show resolved Hide resolved

See :py:meth:`Dataset.transform` and :py:meth:`Apply.__call__`
for more information.
"""
from ..util.transform import dim
from ..streams import Params
params = {}
for _, arg in list(args)+list(kwargs.items()):
if isinstance(arg, dim):
params.update(arg.params)
streams = Params.from_params(params, watch_only=True)
kwargs['streams'] = kwargs.get('streams', []) + streams
kwargs['_method_args'] = args
kwargs['per_element'] = True
return self.__call__('transform', **kwargs)


@add_metaclass(AccessorPipelineMeta)
class Redim(object):
Expand Down
105 changes: 96 additions & 9 deletions holoviews/core/data/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from .. import util
from ..accessors import Redim
from ..dimension import (
Dimension, process_dimensions, Dimensioned, LabelledData
Dimension, Dimensioned, LabelledData, dimension_name, process_dimensions
)
from ..element import Element
from ..ndmapping import OrderedDict, MultiDimensionalMapping
Expand Down Expand Up @@ -281,6 +281,16 @@ class Dataset(Element):
_vdim_reductions = {}
_kdim_reductions = {}

def __new__(cls, data=None, kdims=None, vdims=None, **kwargs):
"""
Allows casting a DynamicMap to an Element class like hv.Curve, by applying the
class to each underlying element.
"""
if isinstance(data, DynamicMap):
philippjfr marked this conversation as resolved.
Show resolved Hide resolved
return data.apply(cls, per_element=True, kdims=kdims, vdims=vdims, **kwargs)
else:
return super(Dataset, cls).__new__(cls)

def __init__(self, data, kdims=None, vdims=None, **kwargs):
from ...operation.element import (
chain as chain_op, factory
Expand Down Expand Up @@ -446,15 +456,14 @@ def add_dimension(self, dimension, dim_pos, dim_val, vdim=False, **kwargs):

Requires the dimension name or object, the desired position in
the key dimensions and a key value scalar or array of values,
matching the length o shape of the Dataset.
matching the length or shape of the Dataset.

Args:
dimension: Dimension or dimension spec to add
dim_pos (int) Integer index to insert dimension at
dim_pos (int): Integer index to insert dimension at
dim_val (scalar or ndarray): Dimension value(s) to add
vdim: Disabled, this type does not have value dimensions
**kwargs: Keyword arguments passed to the cloned element

Returns:
Cloned object containing the new dimension
"""
Expand Down Expand Up @@ -798,24 +807,37 @@ def aggregate(self, dimensions=None, function=None, spreadfn=None, **kwargs):
"""Aggregates data on the supplied dimensions.

Aggregates over the supplied key dimensions with the defined
function.
function or dim_transform specified as a tuple of the transformed
dimension name and dim transform.

Args:
dimensions: Dimension(s) to aggregate on
Default to all key dimensions
function: Aggregation function to apply, e.g. numpy.mean
function: Aggregation function or transform to apply
Supports both simple functions and dimension transforms
spreadfn: Secondary reduction to compute value spread
Useful for computing a confidence interval, spread, or
standard deviation.
**kwargs: Keyword arguments passed to the aggregation function
**kwargs: Keyword arguments either passed to the aggregation function
or to create new names for the transformed variables

Returns:
Returns the aggregated Dataset
"""
if function is None:
raise ValueError("The aggregate method requires a function to be specified")
from ...util.transform import dim
if dimensions is None: dimensions = self.kdims
elif not isinstance(dimensions, list): dimensions = [dimensions]
if isinstance(function, tuple) or any(isinstance(v, dim) for v in kwargs.values()):
dataset = self.clone(new_type=Dataset)
if dimensions:
dataset = dataset.groupby(dimensions)
args = () if function is None else (function,)
transformed = dataset.apply.transform(*args, drop=True, **kwargs)
if not isinstance(transformed, Dataset):
transformed = transformed.collapse()
return transformed.clone(new_type=type(self))

# Handle functions
kdims = [self.get_dimension(d, strict=True) for d in dimensions]
if not len(self):
if spreadfn:
Expand Down Expand Up @@ -908,6 +930,71 @@ def load_subset(*args):
return self.interface.groupby(self, dim_names, container_type,
group_type, **kwargs)

def transform(self, *args, **kwargs):
"""Transforms the Dataset according to a dimension transform.

Transforms may be supplied as tuples consisting of the
dimension(s) and the dim transform to apply or keyword
arguments mapping from dimension(s) to dim transforms. If the
arg or kwarg declares multiple dimensions the dim transform
should return a tuple of values for each.

A transform may override an existing dimension or add a new
one in which case it will be added as an additional value
dimension.

Args:
args: Specify the output arguments and transforms as a
tuple of dimension specs and dim transforms
drop (bool): Whether to drop all variables not part of the transform
keep_index (bool): Whether to keep indexes
Whether to apply transform on datastructure with
index, e.g. pandas.Series or xarray.DataArray,
(important for dask datastructures where index may
be required to align datasets).
kwargs: Specify new dimensions in the form new_dim=dim_transform

Returns:
Transformed dataset with new dimensions
"""
drop = kwargs.pop('drop', False)
keep_index = kwargs.pop('keep_index', True)
transforms = OrderedDict()
for s, transform in list(args)+list(kwargs.items()):
transforms[util.wrap_tuple(s)] = transform

new_data = OrderedDict()
for signature, transform in transforms.items():
applied = transform.apply(
self, compute=False, keep_index=keep_index
)
if len(signature) == 1:
new_data[signature[0]] = applied
else:
for s, vals in zip(signature, applied):
new_data[s] = vals

new_dims = []
for d in new_data:
if self.get_dimension(d) is None:
new_dims.append(d)

ds = self
if ds.interface.datatype in ('image', 'array'):
ds = ds.clone(datatype=[dt for dt in ds.datatype if dt != ds.interface.datatype])

if drop:
kdims = [ds.get_dimension(d) for d in new_data if d in ds.kdims]
vdims = [ds.get_dimension(d) or d for d in new_data if d not in ds.kdims]
data = OrderedDict([(dimension_name(d), values) for d, values in new_data.items()])
return ds.clone(data, kdims=kdims, vdims=vdims)
else:
new_data = OrderedDict([(dimension_name(d), values) for d, values in new_data.items()])
data = ds.interface.assign(ds, new_data)
data, drop = data if isinstance(data, tuple) else (data, [])
kdims = [kd for kd in self.kdims if kd.name not in drop]
return ds.clone(data, kdims=kdims, vdims=ds.vdims+new_dims)

def __len__(self):
"Number of values in the Dataset."
return self.interface.length(self)
Expand Down
12 changes: 12 additions & 0 deletions holoviews/core/data/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,18 @@ def unpack_scalar(cls, dataset, data):
return data


@classmethod
def assign(cls, dataset, new_data):
data = dataset.data.copy()
for d, arr in new_data.items():
if dataset.get_dimension(d) is None:
continue
idx = dataset.get_dimension_index(d)
data[:, idx] = arr
new_cols = [arr for d, arr in new_data.items() if dataset.get_dimension(d) is None]
return np.column_stack([data]+new_cols)


@classmethod
def aggregate(cls, dataset, dimensions, function, **kwargs):
reindexed = dataset.reindex(dimensions)
Expand Down
9 changes: 8 additions & 1 deletion holoviews/core/data/dictionary.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,7 @@ def concat(cls, datasets, dimensions, vdims):
columns = defaultdict(list)
for key, ds in datasets:
for k, vals in ds.data.items():
columns[k].append(vals)
columns[k].append(np.atleast_1d(vals))
for d, k in zip(dimensions, key):
columns[d.name].append(np.full(len(ds), k))

Expand Down Expand Up @@ -270,6 +270,13 @@ def values(cls, dataset, dim, expanded=True, flat=True, compute=True, keep_index
return values


@classmethod
def assign(cls, dataset, new_data):
jbednar marked this conversation as resolved.
Show resolved Hide resolved
data = OrderedDict(dataset.data)
data.update(new_data)
return data


@classmethod
def reindex(cls, dataset, kdims, vdims):
dimensions = [dataset.get_dimension(d).name for d in kdims+vdims]
Expand Down
24 changes: 20 additions & 4 deletions holoviews/core/data/grid.py
Original file line number Diff line number Diff line change
Expand Up @@ -411,24 +411,26 @@ def ndloc(cls, dataset, indices):


@classmethod
def values(cls, dataset, dim, expanded=True, flat=True, compute=True, keep_index=False):
def values(cls, dataset, dim, expanded=True, flat=True, compute=True,
keep_index=False, canonicalize=True):
dim = dataset.get_dimension(dim, strict=True)
if dim in dataset.vdims or dataset.data[dim.name].ndim > 1:
vdim_tuple = cls.packed(dataset)
if vdim_tuple:
data = dataset.data[vdim_tuple][..., dataset.vdims.index(dim)]
else:
data = dataset.data[dim.name]
data = cls.canonicalize(dataset, data)
if canonicalize:
data = cls.canonicalize(dataset, data)
da = dask_array_module()
if compute and da and isinstance(data, da.Array):
data = data.compute()
return data.T.flatten() if flat else data
elif expanded:
data = cls.coords(dataset, dim.name, expanded=True)
data = cls.coords(dataset, dim.name, expanded=True, ordered=canonicalize)
return data.T.flatten() if flat else data
else:
return cls.coords(dataset, dim.name, ordered=True)
return cls.coords(dataset, dim.name, ordered=canonicalize)


@classmethod
Expand Down Expand Up @@ -798,5 +800,19 @@ def range(cls, dataset, dimension):
column.sort()
return column[0], column[-1]

@classmethod
def assign(cls, dataset, new_data):
data = OrderedDict(dataset.data)
for k, v in new_data.items():
if k in dataset.kdims:
coords = cls.coords(dataset, k)
if not coords.ndim > 1 and np.all(coords[1:] < coords[:-1]):
v = v[::-1]
data[k] = v
else:
data[k] = cls.canonicalize(dataset, v)
return data



Interface.register(GridInterface)
5 changes: 4 additions & 1 deletion holoviews/core/data/pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ def concat_fn(cls, dataframes, **kwargs):
kwargs['sort'] = False
return pd.concat(dataframes, **kwargs)


@classmethod
def concat(cls, datasets, dimensions, vdims):
dataframes = []
Expand Down Expand Up @@ -345,6 +345,9 @@ def add_dimension(cls, dataset, dimension, dim_pos, values, vdim):
data.insert(dim_pos, dimension.name, values)
return data

@classmethod
def assign(cls, dataset, new_data):
return dataset.data.assign(**new_data)

@classmethod
def as_dframe(cls, dataset):
Expand Down
47 changes: 43 additions & 4 deletions holoviews/core/data/xarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -353,7 +353,9 @@ def values(cls, dataset, dim, expanded=True, flat=True, compute=True, keep_index
if packed:
data = dataset.data.data[..., dataset.vdims.index(dim)]
else:
data = dataset.data[dim.name].data
data = dataset.data[dim.name]
if not keep_index:
data = data.data
irregular = cls.irregular(dataset, dim) if dim in dataset.kdims else False
irregular_kdims = [d for d in dataset.kdims if cls.irregular(dataset, d)]
if irregular_kdims:
Expand All @@ -371,13 +373,16 @@ def values(cls, dataset, dim, expanded=True, flat=True, compute=True, keep_index
if is_cupy(data):
import cupy
data = cupy.asnumpy(data)
data = cls.canonicalize(dataset, data, data_coords=data_coords,
virtual_coords=virtual_coords)
return data.T.flatten() if flat else data
if not keep_index:
data = cls.canonicalize(dataset, data, data_coords=data_coords,
virtual_coords=virtual_coords)
return data.T.flatten() if flat and not keep_index else data
elif expanded:
data = cls.coords(dataset, dim.name, expanded=True)
return data.T.flatten() if flat else data
else:
if keep_index:
return dataset[dim.name]
return cls.coords(dataset, dim.name, ordered=True)


Expand Down Expand Up @@ -600,5 +605,39 @@ def add_dimension(cls, dataset, dimension, dim_pos, values, vdim):
dims=tuple(d.name for d in dataset.kdims[::-1]))
return dataset.data.assign(**{dim: arr})

@classmethod
def assign(cls, dataset, new_data):
import xarray as xr
data = dataset.data
prev_coords = set.intersection(*[
set(var.coords) for var in data.data_vars.values()
])
coords = OrderedDict()
for k, v in new_data.items():
if k not in dataset.kdims:
continue
elif isinstance(v, xr.DataArray):
coords[k] = v.rename(**{v.name: k})
coord_vals = cls.coords(dataset, k)
if not coord_vals.ndim > 1 and np.all(coord_vals[1:] < coord_vals[:-1]):
v = v[::-1]
coords[k] = (k, v)
if coords:
data = data.assign_coords(**coords)
dims = tuple(kd.name for kd in dataset.kdims[::-1])
vars = OrderedDict()
for k, v in new_data.items():
if k in dataset.kdims:
continue
if isinstance(v, xr.DataArray):
vars[k] = v
else:
vars[k] = (dims, cls.canonicalize(dataset, v, data_coords=dims))
if vars:
data = data.assign(vars)
used_coords = set.intersection(*[set(var.coords) for var in data.data_vars.values()])
drop_coords = set.symmetric_difference(used_coords, prev_coords)
return data.drop(list(drop_coords)), list(drop_coords)


Interface.register(XArrayInterface)
Loading