Skip to content

Commit

Permalink
Merge pull request #1154 from ioam/groupby_fixes
Browse files Browse the repository at this point in the history
Implement dropping dimensions in gridded groupby
  • Loading branch information
jlstevens committed Feb 27, 2017
2 parents fd65ac7 + 203a09b commit 336ccf6
Show file tree
Hide file tree
Showing 5 changed files with 90 additions and 19 deletions.
25 changes: 18 additions & 7 deletions holoviews/core/data/__init__.py
Expand Up @@ -83,13 +83,16 @@ def __call__(self, new_type, kdims=None, vdims=None, groupby=None,
raise ValueError('Cannot supply both mdims and groupby')
else:
self._element.warning("'mdims' keyword has been renamed "
"to groupby; the name mdims is "
"to 'groupby'; the name mdims is "
"deprecated and will be removed "
"after version 1.7.")
groupby = kwargs['mdims']
groupby = kwargs.pop('mdims')

if kdims is None:
kdims = self._element.kdims
kd_filter = groupby or []
if not isinstance(kd_filter, list):
kd_filter = [groupby]
kdims = [kd for kd in self._element.kdims if kd not in kd_filter]
elif kdims and not isinstance(kdims, list): kdims = [kdims]
if vdims is None:
vdims = self._element.vdims
Expand All @@ -99,7 +102,10 @@ def __call__(self, new_type, kdims=None, vdims=None, groupby=None,
elif groupby and not isinstance(groupby, list):
groupby = [groupby]

selected = self._element.reindex(groupby+kdims, vdims)
if self._element.interface.gridded:
selected = self._element
else:
selected = self._element.reindex(groupby+kdims, vdims)
params = {'kdims': [selected.get_dimension(kd, strict=True) for kd in kdims],
'vdims': [selected.get_dimension(vd, strict=True) for vd in vdims],
'label': selected.label}
Expand Down Expand Up @@ -428,15 +434,20 @@ def groupby(self, dimensions=[], container_type=HoloMap, group_type=None,

if dynamic:
group_dims = [d.name for d in self.kdims if d not in dimensions]
group_kwargs = dict(util.get_param_values(self), **kwargs)
group_kwargs['kdims'] = [self.get_dimension(d) for d in group_dims]
kdims = [self.get_dimension(d) for d in group_dims]
group_kwargs = dict(util.get_param_values(self), kdims=kdims)
group_kwargs.update(kwargs)
drop_dim = len(kdims) != len(group_kwargs['kdims'])
def load_subset(*args):
constraint = dict(zip(dim_names, args))
group = self.select(**constraint)
if np.isscalar(group):
return group_type(([group],), group=self.group,
label=self.label, vdims=self.vdims)
return group_type(group.reindex(group_dims), **group_kwargs)
data = group.reindex(group_dims)
if drop_dim and self.interface.gridded:
data = data.columns()
return group_type(data, **group_kwargs)
dynamic_dims = [d(values=list(self.interface.values(self, d.name, False)))
for d in dimensions]
return DynamicMap(load_subset, kdims=dynamic_dims)
Expand Down
9 changes: 8 additions & 1 deletion holoviews/core/data/grid.py
Expand Up @@ -188,13 +188,20 @@ def groupby(cls, dataset, dim_names, container_type, group_type, **kwargs):
group_kwargs['kdims'] = kdims
group_kwargs.update(kwargs)

drop_dim = len(group_kwargs['kdims']) != len(kdims)

# Find all the keys along supplied dimensions
keys = [dataset.data[d.name] for d in dimensions]

# Iterate over the unique entries applying selection masks
grouped_data = []
for unique_key in zip(*util.cartesian_product(keys)):
group_data = cls.select(dataset, **dict(zip(dim_names, unique_key)))
select = dict(zip(dim_names, unique_key))
if drop_dim:
group_data = dataset.select(**select)
group_data = group_data if np.isscalar(group_data) else group_data.columns()
else:
group_data = cls.select(dataset, **select)
if np.isscalar(group_data):
group_data = {dataset.vdims[0].name: np.atleast_1d(group_data)}
for dim, v in zip(dim_names, unique_key):
Expand Down
18 changes: 15 additions & 3 deletions holoviews/core/data/iris.py
Expand Up @@ -11,6 +11,7 @@
from .interface import Interface
from .grid import GridInterface
from ..dimension import Dimension
from ..element import Element
from ..ndmapping import (NdMapping, item_check, sorted_context)
from ..spaces import HoloMap
from .. import util
Expand Down Expand Up @@ -178,14 +179,25 @@ def groupby(cls, dataset, dims, container_type=HoloMap, group_type=None, **kwarg
constraints = [d.name for d in dims]
slice_dims = [d for d in dataset.kdims if d not in dims]

# Update the kwargs appropriately for Element group types
group_kwargs = {}
group_type = dict if group_type == 'raw' else group_type
if issubclass(group_type, Element):
group_kwargs.update(util.get_param_values(dataset))
group_kwargs['kdims'] = slice_dims
group_kwargs.update(kwargs)

drop_dim = len(group_kwargs['kdims']) != len(slice_dims)

unique_coords = product(*[cls.values(dataset, d, expanded=False)
for d in dims])
data = []
for key in unique_coords:
constraint = iris.Constraint(**dict(zip(constraints, key)))
cube = dataset.clone(dataset.data.extract(constraint),
new_type=group_type,
**dict(kwargs, kdims=slice_dims))
extracted = dataset.data.extract(constraint)
if drop_dim:
extracted = group_type(extracted, kdims=slice_dims).columns()
cube = group_type(extracted, **group_kwargs)
data.append((key, cube))
if issubclass(container_type, NdMapping):
with item_check(False), sorted_context(False):
Expand Down
21 changes: 13 additions & 8 deletions holoviews/core/data/xarray.py
Expand Up @@ -105,19 +105,26 @@ def groupby(cls, dataset, dimensions, container_type, group_type, **kwargs):
kdims=element_dims)
group_kwargs.update(kwargs)

drop_dim = len(group_kwargs['kdims']) != len(element_dims)

# XArray 0.7.2 does not support multi-dimensional groupby
# Replace custom implementation when
# https://github.com/pydata/xarray/pull/818 is merged.
group_by = [d.name for d in index_dims]
data = []
if len(dimensions) == 1:
data = [(k, group_type(v, **group_kwargs)) for k, v in
dataset.data.groupby(index_dims[0].name)]
for k, v in dataset.data.groupby(index_dims[0].name):
if drop_dim:
v = v.to_dataframe().reset_index()
data.append((k, group_type(v, **group_kwargs)))
else:
unique_iters = [cls.values(dataset, d, False) for d in group_by]
indexes = zip(*util.cartesian_product(unique_iters))
data = [(k, group_type(dataset.data.sel(**dict(zip(group_by, k))),
**group_kwargs))
for k in indexes]
for k in indexes:
sel = dataset.data.sel(**dict(zip(group_by, k)))
if drop_dim:
sel = sel.to_dataframe().reset_index()
data.append((k, group_type(sel, **group_kwargs)))

if issubclass(container_type, NdMapping):
with item_check(False), sorted_context(False):
Expand Down Expand Up @@ -233,10 +240,8 @@ def length(cls, dataset):

@classmethod
def dframe(cls, dataset, dimensions):
dimensions = [dataset.get_dimension(d, strict=True).name
for d in dimensions]
if dimensions:
return dataset.reindex(columns=dimensions)
return dataset.reindex(columns=dimensions).data.to_dataframe().reset_index(dimensions)
else:
return dataset.data.to_dataframe().reset_index(dimensions)

Expand Down
36 changes: 36 additions & 0 deletions tests/testdataset.py
Expand Up @@ -23,6 +23,20 @@
dd = None


class DatatypeContext(object):

def __init__(self, datatypes):
self.datatypes = datatypes
self._old_datatypes = None

def __enter__(self):
self._old_datatypes = Dataset.datatype
Dataset.datatype = self.datatypes

def __exit__(self, *args):
Dataset.datatype = self._old_datatypes


class HomogeneousColumnTypes(object):
"""
Tests for data formats that require all dataset to have the same
Expand Down Expand Up @@ -593,6 +607,8 @@ class GridDatasetTest(HomogeneousColumnTypes, ComparisonTestCase):
Test of the Grid array interface
"""

datatype = 'grid'

def setUp(self):
self.restore_datatype = Dataset.datatype
Dataset.datatype = ['grid']
Expand Down Expand Up @@ -845,12 +861,30 @@ def test_dataset_groupby_multiple_dims(self):
for c, d in keys:
self.assertEqual(grouped[c, d], dataset.select(c=c, d=d).reindex(['a', 'b']))

def test_dataset_groupby_drop_dims(self):
array = np.random.rand(3, 20, 10)
ds = Dataset({'x': range(10), 'y': range(20), 'z': range(3), 'Val': array},
kdims=['x', 'y', 'z'], vdims=['Val'])
with DatatypeContext([self.datatype, 'columns', 'dataframe']):
partial = ds.to(Dataset, kdims=['x'], vdims=['Val'], groupby='y')
self.assertEqual(partial.last['Val'], array[:, -1, :].T.flatten())

def test_dataset_groupby_drop_dims_dynamic(self):
array = np.random.rand(3, 20, 10)
ds = Dataset({'x': range(10), 'y': range(20), 'z': range(3), 'Val': array},
kdims=['x', 'y', 'z'], vdims=['Val'])
with DatatypeContext([self.datatype, 'columns', 'dataframe']):
partial = ds.to(Dataset, kdims=['x'], vdims=['Val'], groupby='y', dynamic=True)
self.assertEqual(partial[19]['Val'], array[:, -1, :].T.flatten())


class IrisDatasetTest(GridDatasetTest):
"""
Tests for Iris interface
"""

datatype = 'cube'

def setUp(self):
import iris
self.restore_datatype = Dataset.datatype
Expand Down Expand Up @@ -901,6 +935,8 @@ class XArrayDatasetTest(GridDatasetTest):
Tests for Iris interface
"""

datatype = 'xarray'

def setUp(self):
import xarray
self.restore_datatype = Dataset.datatype
Expand Down

0 comments on commit 336ccf6

Please sign in to comment.