From 7578c51f519bffc688531b950bb1312caf4165cd Mon Sep 17 00:00:00 2001
From: philippjfr
Date: Mon, 11 May 2015 17:12:24 +0100
Subject: [PATCH] Made groupby operation iterative
Results in several fold speedup for multi-dimensional
groupby operations.
---
holoviews/core/ndmapping.py | 7 ++++---
holoviews/core/util.py | 39 ++++++++++++++++++++++++++++++++++++-
2 files changed, 42 insertions(+), 4 deletions(-)
diff --git a/holoviews/core/ndmapping.py b/holoviews/core/ndmapping.py
index c56c741f3b..d51963f34f 100644
--- a/holoviews/core/ndmapping.py
+++ b/holoviews/core/ndmapping.py
@@ -11,7 +11,7 @@
from . import traversal
from .dimension import OrderedDict, Dimension, Dimensioned, ViewableElement
-from .util import unique_iterator, sanitize_identifier, dimension_sort
+from .util import unique_iterator, sanitize_identifier, dimension_sort, group_select, iterative_select
class item_check(object):
@@ -271,8 +271,9 @@ def groupby(self, dimensions, container_type=None, group_type=None, **kwargs):
selects = unique_iterator(itemgetter(*inds)(key) if len(inds) > 1 else (key[inds[0]],)
for key in self.data.keys())
with item_check(False):
- groups = [(sel, group_type(self.select(**dict(zip(dimensions, sel))).reindex(inames), **kwargs))
- for sel in selects]
+ selects = group_select(list(selects))
+ groups = [(k, group_type(v.reindex(inames), **kwargs))
+ for k, v in iterative_select(self, dimensions, selects)]
return container_type(groups, key_dimensions=dims)
diff --git a/holoviews/core/util.py b/holoviews/core/util.py
index db831db973..7122601a43 100644
--- a/holoviews/core/util.py
+++ b/holoviews/core/util.py
@@ -1,4 +1,4 @@
-import sys, warnings
+import sys, warnings, operator
import numbers
import itertools
import string
@@ -507,3 +507,40 @@ def layer_groups(ordering, length=2):
for el in ordering:
group_orderings[el[:length]].append(el)
return group_orderings
+
+
+def group_select(selects, length=None, depth=None):
+ """
+ Given a list of key tuples to select, groups them into sensible
+ chunks to avoid duplicating indexing operations.
+ """
+ if length == None and depth == None:
+ length = depth = len(selects[0])
+ getter = operator.itemgetter(depth-length)
+ if length > 1:
+ selects = sorted(selects, key=getter)
+ grouped_selects = defaultdict(dict)
+ for k, v in itertools.groupby(selects, getter):
+ grouped_selects[k] = group_select(list(v), length-1, depth)
+ return grouped_selects
+ else:
+ return list(selects)
+
+
+def iterative_select(obj, dimensions, selects, depth=None):
+ """
+ Takes the output of group_select selecting subgroups iteratively,
+ avoiding duplicating select operations.
+ """
+ ndims = len(dimensions)
+ depth = depth if depth is not None else ndims
+ items = []
+ if isinstance(selects, dict):
+ for k, v in selects.items():
+ items += iterative_select(obj.select(**{dimensions[ndims-depth]: k}),
+ dimensions, v, depth-1)
+ else:
+ for s in selects:
+ items.append((s, obj.select(**{dimensions[-1]: s[-1]})))
+ return items
+