Skip to content

Commit

Permalink
Merge d08de32 into de0a124
Browse files Browse the repository at this point in the history
  • Loading branch information
philippjfr committed Oct 23, 2018
2 parents de0a124 + d08de32 commit 0067418
Show file tree
Hide file tree
Showing 5 changed files with 80 additions and 7 deletions.
3 changes: 2 additions & 1 deletion holoviews/core/data/__init__.py
Expand Up @@ -50,7 +50,8 @@

if 'array' not in datatypes:
datatypes.append('array')

if 'multitabular' not in datatypes:
datatypes.append('multitabular')


def concat(datasets, datatype=None):
Expand Down
5 changes: 4 additions & 1 deletion holoviews/core/data/dictionary.py
Expand Up @@ -43,7 +43,10 @@ def init(cls, eltype, data, kdims, vdims):
vdims = eltype.vdims

dimensions = [dimension_name(d) for d in kdims + vdims]
if isinstance(data, tuple):
if (isinstance(data, list) and all(isinstance(d, dict) for d in data) and
not all(c in d for d in data for c in dimensions)):
raise ValueError('DictInterface could not find specified dimensions in the data.')
elif isinstance(data, tuple):
data = {d: v for d, v in zip(dimensions, data)}
elif util.is_dataframe(data) and all(d in data for d in dimensions):
data = {d: data[d] for d in dimensions}
Expand Down
48 changes: 44 additions & 4 deletions holoviews/core/data/multipath.py
@@ -1,6 +1,8 @@
import numpy as np

from ..util import max_range
from .. import util
from ..element import Element
from ..ndmapping import NdMapping, item_check, sorted_context
from .dictionary import DictInterface
from .interface import Interface, DataError

Expand Down Expand Up @@ -110,7 +112,7 @@ def range(cls, dataset, dim):
for d in dataset.data:
ds.data = d
ranges.append(ds.interface.range(ds, dim))
return max_range(ranges)
return util.max_range(ranges)


@classmethod
Expand Down Expand Up @@ -177,7 +179,45 @@ def aggregate(cls, dataset, dimensions, function, **kwargs):

@classmethod
def groupby(cls, dataset, dimensions, container_type, group_type, **kwargs):
raise NotImplementedError('Grouping currently not implemented')
# Get dimensions information
dimensions = [dataset.get_dimension(d) for d in dimensions]
kdims = [kdim for kdim in dataset.kdims if kdim not in dimensions]

# Update the kwargs appropriately for Element group types
group_kwargs = {}
group_type = list if group_type == 'raw' else group_type
if issubclass(group_type, Element):
group_kwargs.update(util.get_param_values(dataset))
group_kwargs['kdims'] = kdims
group_kwargs.update(kwargs)

# Find all the keys along supplied dimensions
values = []
for d in dimensions:
if not cls.isscalar(dataset, d):
raise ValueError('MultiInterface can only apply groupby '
'on scalar dimensions, %s dimension'
'is not scalar' % d)
vals = cls.values(dataset, d, False, True)
values.append(vals)
values = tuple(values)

# Iterate over the unique entries applying selection masks
from . import Dataset
ds = Dataset(values, dimensions)
keys = (tuple(vals[i] for vals in values) for i in range(len(vals)))
grouped_data = []
for unique_key in util.unique_iterator(keys):
mask = ds.interface.select_mask(ds, dict(zip(dimensions, unique_key)))
selection = [data for data, m in zip(dataset.data, mask) if m]
group_data = group_type(selection, **group_kwargs)
grouped_data.append((unique_key, group_data))

if issubclass(container_type, NdMapping):
with item_check(False), sorted_context(False):
return container_type(grouped_data, kdims=dimensions)
else:
return container_type(grouped_data)

@classmethod
def sample(cls, dataset, samples=[]):
Expand Down Expand Up @@ -293,7 +333,7 @@ def split(cls, dataset, start, end, datatype, **kwargs):
def add_dimension(cls, dataset, dimension, dim_pos, values, vdim):
if not len(dataset.data):
return dataset.data
elif values is None or np.isscalar(values):
elif values is None or util.isscalar(values):
values = [values]*len(dataset.data)
elif not len(values) == len(dataset.data):
raise ValueError('Added dimension values must be scalar or '
Expand Down
3 changes: 2 additions & 1 deletion holoviews/core/data/pandas.py
Expand Up @@ -126,7 +126,8 @@ def init(cls, eltype, data, kdims, vdims):
if not cls.expanded(data):
raise ValueError('PandasInterface expects data to be of uniform shape.')
data = pd.DataFrame(dict(zip(columns, data)), columns=columns)
elif isinstance(data, dict) and any(c not in data for c in columns):
elif ((isinstance(data, dict) and any(c not in data for c in columns)) or
(isinstance(data, list) and any(isinstance(d, dict) and c not in d for d in data for c in columns))):
raise ValueError('PandasInterface could not find specified dimensions in the data.')
else:
data = pd.DataFrame(data, columns=columns)
Expand Down
28 changes: 28 additions & 0 deletions holoviews/tests/core/data/testmultiinterface.py
Expand Up @@ -5,6 +5,7 @@
from unittest import SkipTest

import numpy as np
from holoviews.core.data import Dataset
from holoviews.core.data.interface import DataError
from holoviews.element import Path
from holoviews.element.comparison import ComparisonTestCase
Expand Down Expand Up @@ -155,3 +156,30 @@ def test_multi_split_empty(self):
def test_multi_values_empty(self):
mds = Path([], kdims=['x', 'y'], datatype=['multitabular'])
self.assertEqual(mds.dimension_values(0), np.array([]))

def test_multi_dict_groupby(self):
arrays = [{'x': np.arange(i, i+2), 'y': i} for i in range(2)]
mds = Dataset(arrays, kdims=['x', 'y'], datatype=['multitabular'])
for i, (k, ds) in enumerate(mds.groupby('y').items()):
self.assertEqual(k, arrays[i]['y'])
self.assertEqual(ds, Dataset([arrays[i]], kdims=['x']))

def test_multi_dict_groupby_non_scalar(self):
arrays = [{'x': np.arange(i, i+2), 'y': i} for i in range(2)]
mds = Dataset(arrays, kdims=['x', 'y'], datatype=['multitabular'])
with self.assertRaises(ValueError):
mds.groupby('x')

def test_multi_array_groupby(self):
arrays = [np.array([(1+i, i), (2+i, i), (3+i, i)]) for i in range(2)]
mds = Dataset(arrays, kdims=['x', 'y'], datatype=['multitabular'])
for i, (k, ds) in enumerate(mds.groupby('y').items()):
self.assertEqual(k, arrays[i][0, 1])
self.assertEqual(ds, Dataset([arrays[i]], kdims=['x']))

def test_multi_array_groupby_non_scalar(self):
arrays = [np.array([(1+i, i), (2+i, i), (3+i, i)]) for i in range(2)]
mds = Dataset(arrays, kdims=['x', 'y'], datatype=['multitabular'])
with self.assertRaises(ValueError):
mds.groupby('x')

0 comments on commit 0067418

Please sign in to comment.