Skip to content

Commit

Permalink
Made groupby operation iterative
Browse files Browse the repository at this point in the history
Results in several fold speedup for multi-dimensional
groupby operations.
  • Loading branch information
philippjfr committed May 11, 2015
1 parent 5644a43 commit 7578c51
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 4 deletions.
7 changes: 4 additions & 3 deletions holoviews/core/ndmapping.py
Expand Up @@ -11,7 +11,7 @@

from . import traversal
from .dimension import OrderedDict, Dimension, Dimensioned, ViewableElement
from .util import unique_iterator, sanitize_identifier, dimension_sort
from .util import unique_iterator, sanitize_identifier, dimension_sort, group_select, iterative_select


class item_check(object):
Expand Down Expand Up @@ -271,8 +271,9 @@ def groupby(self, dimensions, container_type=None, group_type=None, **kwargs):
selects = unique_iterator(itemgetter(*inds)(key) if len(inds) > 1 else (key[inds[0]],)
for key in self.data.keys())
with item_check(False):
groups = [(sel, group_type(self.select(**dict(zip(dimensions, sel))).reindex(inames), **kwargs))
for sel in selects]
selects = group_select(list(selects))
groups = [(k, group_type(v.reindex(inames), **kwargs))
for k, v in iterative_select(self, dimensions, selects)]
return container_type(groups, key_dimensions=dims)


Expand Down
39 changes: 38 additions & 1 deletion holoviews/core/util.py
@@ -1,4 +1,4 @@
import sys, warnings
import sys, warnings, operator
import numbers
import itertools
import string
Expand Down Expand Up @@ -507,3 +507,40 @@ def layer_groups(ordering, length=2):
for el in ordering:
group_orderings[el[:length]].append(el)
return group_orderings


def group_select(selects, length=None, depth=None):
"""
Given a list of key tuples to select, groups them into sensible
chunks to avoid duplicating indexing operations.
"""
if length == None and depth == None:
length = depth = len(selects[0])
getter = operator.itemgetter(depth-length)
if length > 1:
selects = sorted(selects, key=getter)
grouped_selects = defaultdict(dict)
for k, v in itertools.groupby(selects, getter):
grouped_selects[k] = group_select(list(v), length-1, depth)
return grouped_selects
else:
return list(selects)


def iterative_select(obj, dimensions, selects, depth=None):
"""
Takes the output of group_select selecting subgroups iteratively,
avoiding duplicating select operations.
"""
ndims = len(dimensions)
depth = depth if depth is not None else ndims
items = []
if isinstance(selects, dict):
for k, v in selects.items():
items += iterative_select(obj.select(**{dimensions[ndims-depth]: k}),
dimensions, v, depth-1)
else:
for s in selects:
items.append((s, obj.select(**{dimensions[-1]: s[-1]})))
return items

0 comments on commit 7578c51

Please sign in to comment.