Skip to content
This repository has been archived by the owner on Mar 24, 2021. It is now read-only.

Commit

Permalink
Nested merge now takes a list of key combos to group by
Browse files Browse the repository at this point in the history
  • Loading branch information
nick-gravgaard committed Jul 31, 2014
1 parent ea2c43e commit a870139
Show file tree
Hide file tree
Showing 6 changed files with 71 additions and 34 deletions.
54 changes: 31 additions & 23 deletions backdrop/core/nested_merge.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,15 @@
from .errors import InvalidOperationError

from operator import itemgetter, add
from operator import add
import itertools


def _multi_itemgetter(*items):
"""Like operator.itemgetter, but the callable always returns
a sequence of lookup values (regardless of items' length)"""
return lambda obj: tuple(obj[item] for item in items)


def nested_merge(keys, collect, data):
if len(keys) > 1:
data = group_by(data, keys)
Expand All @@ -19,46 +25,47 @@ def group_by(data, keys):
"""Recursively group an array of results by a list of keys
data: a list of dictionaries as returned by MongoDriver.group
keys: a list of keys to group by
keys: a list of combinations of keys to group by
"""
key = keys[0]
getter = itemgetter(key)
key_combo = keys[0]
getter = _multi_itemgetter(*key_combo)
data = sorted(data, key=getter)

if len(keys) > 1:
data = [
{
key: value,
"_subgroup": group_by(
remove_key_from_all(subgroups, key),
keys[1:]
)
}
for value, subgroups in itertools.groupby(data, getter)
]
grouped_data = []
for values, subgroups in itertools.groupby(data, getter):
result = dict(zip(key_combo, values))
result['_subgroup'] = group_by(
remove_keys_from_all(subgroups, key_combo),
keys[1:]
)
grouped_data.append(result)
data = grouped_data

return data


def remove_key_from_all(groups, key):
"""Remove a key from each group in a list of groups
def remove_keys_from_all(groups, keys):
"""Remove keys from each group in a list of groups
groups: a list of groups (dictionaries)
key: the key to remove
"""
return [remove_key(group, key) for group in groups]
return [remove_keys(group, keys) for group in groups]


def remove_key(doc, key):
"""Return a new document with the key removed
def remove_keys(doc, keys):
"""Return a new document with keys in keys removed
>>> doc = {'a':1, 'b':2}
>>> remove_key(doc, 'a')
>>> remove_keys(doc, ['a'])
{'b': 2}
>>> # Show that the original document is not affected
>>> doc['a']
1
"""
return dict(
(k, v) for k, v in doc.items() if k != key)
(k, v) for k, v in doc.items() if k not in keys)


def apply_counts(groups):
Expand Down Expand Up @@ -104,7 +111,7 @@ def apply_collect_to_group(group, collect):

# remove left over collect keys
for key, _ in collect:
group = remove_key(group, key)
group = remove_keys(group, key)

# Hack in the old way
for key, method in collect:
Expand Down Expand Up @@ -203,7 +210,8 @@ def collect_reducer_mean(values):


def sort_all(data, keys):
key_combo = keys[0]
if len(keys) > 1:
for i, group in enumerate(data):
data[i]['_subgroup'] = sort_all(group['_subgroup'], keys[1:])
return sorted(data, key=itemgetter(keys[0]))
return sorted(data, key=_multi_itemgetter(*key_combo))
13 changes: 7 additions & 6 deletions backdrop/core/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,25 +50,26 @@ def collect_fields(self):

@property
def group_keys(self):
"""Return a list of fields that are being grouped on
"""Return a list of lists of combinations of fields that are being
grouped on
This is kinda coupled to how we group with Mongo but these keys
are in the returned results and are used in the nested merge to
create the hierarchical response.
>>> from ..core.timeseries import WEEK
>>> Query.create(group_by=['foo']).group_keys
['foo']
[['foo']]
>>> Query.create(period=WEEK).group_keys
['_week_start_at']
[['_week_start_at']]
>>> Query.create(group_by=['foo'], period=WEEK).group_keys
['foo', '_week_start_at']
[['foo'], ['_week_start_at']]
"""
keys = []
if self.group_by:
keys += self.group_by
keys.append(self.group_by)
if self.period:
keys.append(self.period.start_at_key)
keys.append([self.period.start_at_key])
return keys

@property
Expand Down
3 changes: 2 additions & 1 deletion backdrop/core/storage/mongo.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import os
import logging
import datetime
import itertools

import pymongo
from pymongo.errors import AutoReconnect, CollectionInvalid
Expand Down Expand Up @@ -99,7 +100,7 @@ def _execute_query(self, data_set_id, query):
return self._basic_query(data_set_id, query)

def _group_query(self, data_set_id, query):
keys = query.group_keys
keys = list(itertools.chain.from_iterable(query.group_keys))
spec = get_mongo_spec(query)
collect_fields = query.collect_fields

Expand Down
11 changes: 11 additions & 0 deletions features/read_api/group.feature
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,17 @@ Feature: grouping queries for read api
and the "1st" result should be "{"authority": "Camden", "_count": 2}"
and the "2nd" result should be "{"authority": "Westminster", "_count": 4}"


Scenario: grouping by multiple keys
Given "licensing_2.json" is in "foo" data_set
when I go to "/foo?group_by=authority&group_by=licence_name"
then I should get back a status of "200"
and the JSON should have "3" results
and the "1st" result should be "{"authority": "Camden", "licence_name": "Temporary events notice", "_count": 1}"
and the "2nd" result should be "{"authority": "Westminster", "licence_name": "Cat herding licence", "_count": 1}"
and the "3rd" result should be "{"authority": "Westminster", "licence_name": "Temporary events notice", "_count": 3}"


Scenario: grouping and filtering by different keys
Given "licensing_2.json" is in "foo" data_set
when I go to "/foo?group_by=authority&filter_by=licence_name:Temporary%20events%20notice"
Expand Down
16 changes: 16 additions & 0 deletions tests/core/test_data_set.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,22 @@ def test_month_and_group_query(self):
assert_that(data,
has_item(has_entries({"values": has_length(3)})))

def test_month_and_groups_query(self):
self.mock_storage.execute_query.return_value = [
{'some_group': 'val1', 'another_group': 'val3', '_month_start_at': d(2013, 1, 1), '_count': 1},
{'some_group': 'val1', 'another_group': 'val3', '_month_start_at': d(2013, 2, 1), '_count': 5},
{'some_group': 'val2', 'another_group': 'val3', '_month_start_at': d(2013, 3, 1), '_count': 2},
{'some_group': 'val2', 'another_group': 'val3', '_month_start_at': d(2013, 4, 1), '_count': 6},
{'some_group': 'val2', 'another_group': 'val3', '_month_start_at': d(2013, 7, 1), '_count': 6},
]

data = self.data_set.execute_query(Query.create(period=MONTH,
group_by=['some_group', 'another_group']))
assert_that(data,
has_item(has_entries({"values": has_length(2)})))
assert_that(data,
has_item(has_entries({"values": has_length(3)})))

def test_month_and_group_query_with_start_and_end_at(self):
self.mock_storage.execute_query.return_value = [
{'some_group': 'val1', '_month_start_at': d(2013, 1, 1), '_count': 1},
Expand Down
8 changes: 4 additions & 4 deletions tests/core/test_nested_merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def test_one_level_grouping_with_collect(self):
datum(name='Jack', age=[34, 34]),
datum(name='John', age=[56, 65])
]
results = nested_merge(['name'], [('age', 'mean')], data)
results = nested_merge([['name']], [('age', 'mean')], data)

assert_that(results,
contains(
Expand All @@ -46,7 +46,7 @@ def test_two_level_grouping_with_collect(self):
datum(name='James', place='Kettering', age=[43, 87], count=2),
datum(name='Jill', place='Keswick', age=[76, 32], count=2),
]
results = nested_merge(['name', 'place'], [('age', 'mean')], data)
results = nested_merge([['name'], ['place']], [('age', 'mean')], data)

assert_that(results,
contains(
Expand Down Expand Up @@ -97,7 +97,7 @@ def test_one_level_grouping(self):
datum(name='Jack', age=[34, 34]),
datum(name='John', age=[56, 65])
]
results = group_by(data, ['name'])
results = group_by(data, [['name']])

assert_that(results,
contains(
Expand All @@ -112,7 +112,7 @@ def test_two_level_grouping(self):
datum(name='James', place='Kettering', age=[43, 87], count=2),
datum(name='Jill', place='Keswick', age=[76, 32], count=2),
]
results = group_by(data, ['name', 'place'])
results = group_by(data, [['name'], ['place']])

assert_that(results,
contains(
Expand Down

0 comments on commit a870139

Please sign in to comment.