From 7578c51f519bffc688531b950bb1312caf4165cd Mon Sep 17 00:00:00 2001 From: philippjfr Date: Mon, 11 May 2015 17:12:24 +0100 Subject: [PATCH] Made groupby operation iterative Results in several fold speedup for multi-dimensional groupby operations. --- holoviews/core/ndmapping.py | 7 ++++--- holoviews/core/util.py | 39 ++++++++++++++++++++++++++++++++++++- 2 files changed, 42 insertions(+), 4 deletions(-) diff --git a/holoviews/core/ndmapping.py b/holoviews/core/ndmapping.py index c56c741f3b..d51963f34f 100644 --- a/holoviews/core/ndmapping.py +++ b/holoviews/core/ndmapping.py @@ -11,7 +11,7 @@ from . import traversal from .dimension import OrderedDict, Dimension, Dimensioned, ViewableElement -from .util import unique_iterator, sanitize_identifier, dimension_sort +from .util import unique_iterator, sanitize_identifier, dimension_sort, group_select, iterative_select class item_check(object): @@ -271,8 +271,9 @@ def groupby(self, dimensions, container_type=None, group_type=None, **kwargs): selects = unique_iterator(itemgetter(*inds)(key) if len(inds) > 1 else (key[inds[0]],) for key in self.data.keys()) with item_check(False): - groups = [(sel, group_type(self.select(**dict(zip(dimensions, sel))).reindex(inames), **kwargs)) - for sel in selects] + selects = group_select(list(selects)) + groups = [(k, group_type(v.reindex(inames), **kwargs)) + for k, v in iterative_select(self, dimensions, selects)] return container_type(groups, key_dimensions=dims) diff --git a/holoviews/core/util.py b/holoviews/core/util.py index db831db973..7122601a43 100644 --- a/holoviews/core/util.py +++ b/holoviews/core/util.py @@ -1,4 +1,4 @@ -import sys, warnings +import sys, warnings, operator import numbers import itertools import string @@ -507,3 +507,40 @@ def layer_groups(ordering, length=2): for el in ordering: group_orderings[el[:length]].append(el) return group_orderings + + +def group_select(selects, length=None, depth=None): + """ + Given a list of key tuples to select, groups them into sensible + chunks to avoid duplicating indexing operations. + """ + if length == None and depth == None: + length = depth = len(selects[0]) + getter = operator.itemgetter(depth-length) + if length > 1: + selects = sorted(selects, key=getter) + grouped_selects = defaultdict(dict) + for k, v in itertools.groupby(selects, getter): + grouped_selects[k] = group_select(list(v), length-1, depth) + return grouped_selects + else: + return list(selects) + + +def iterative_select(obj, dimensions, selects, depth=None): + """ + Takes the output of group_select selecting subgroups iteratively, + avoiding duplicating select operations. + """ + ndims = len(dimensions) + depth = depth if depth is not None else ndims + items = [] + if isinstance(selects, dict): + for k, v in selects.items(): + items += iterative_select(obj.select(**{dimensions[ndims-depth]: k}), + dimensions, v, depth-1) + else: + for s in selects: + items.append((s, obj.select(**{dimensions[-1]: s[-1]}))) + return items +