Skip to content
This repository has been archived by the owner on Mar 24, 2021. It is now read-only.

Commit

Permalink
Add ability to specify different collection methods
Browse files Browse the repository at this point in the history
Methods are passed in to a collect query param as field:method.
  • Loading branch information
alexmuller committed Jul 2, 2013
1 parent b3a4086 commit 1dfb708
Show file tree
Hide file tree
Showing 9 changed files with 156 additions and 23 deletions.
42 changes: 34 additions & 8 deletions backdrop/core/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,8 @@ def _require_keys_in_query(self, keys, query):
return query

def _group(self, keys, query, sort=None, limit=None, collect=None):
results = self._mongo.group(keys, query, collect)
collect_fields = unique_collect_fields(collect)
results = self._mongo.group(keys, query, list(collect_fields))

results = nested_merge(keys, collect, results)

Expand Down Expand Up @@ -174,22 +175,47 @@ class InvalidSortError(ValueError):

def extract_collected_values(collect, result):
collected = {}
for collect_field in collect:
for collect_field in unique_collect_fields(collect):
collected[collect_field] = result.pop(collect_field)
return collected, result


def insert_collected_values(collected, group):
for collect_field in collected.keys():
if collect_field not in group:
group[collect_field] = set()
group[collect_field].update(collected[collect_field])
group[collect_field] = []
group[collect_field] += collected[collect_field]


def convert_collected_values_to_list(collect, groups):
def apply_collection_methods(collect, groups):
for group in groups:
for collected_field in collect:
group[collected_field] = sorted(list(group[collected_field]))
for collect_field, collect_method in collect:
collect_key = '{0}:{1}'.format(collect_field, collect_method)
group[collect_key] = apply_collection_method(
group[collect_field], collect_method)
for collect_field in unique_collect_fields(collect):
del group[collect_field]
# This is to provide backwards compatibility with earlier interface
if (collect_field, 'set') in collect:
group[collect_field] = group['{0}:set'.format(collect_field)]


def apply_collection_method(collected_data, collect_method):
if "sum" == collect_method:
return sum(collected_data)
elif "count" == collect_method:
return len(collected_data)
elif "set" == collect_method:
return sorted(list(set(collected_data)))
elif "mean" == collect_method:
return sum(collected_data) / float(len(collected_data))
else:
raise ValueError("Unknown collection method")


def unique_collect_fields(collect):
"""Return the unique set of field names to collect."""
return set([collect_field for collect_field, _ in collect])


def nested_merge(keys, collect, results):
Expand All @@ -201,7 +227,7 @@ def nested_merge(keys, collect, results):

insert_collected_values(collected, group)

convert_collected_values_to_list(collect, groups)
apply_collection_methods(collect, groups)
return groups


Expand Down
7 changes: 6 additions & 1 deletion backdrop/read/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,12 @@ def parse_request_args(request_args):

args['limit'] = if_present(int, request_args.get('limit'))

args['collect'] = request_args.getlist('collect')
args['collect'] = []
for collect_arg in request_args.getlist('collect'):
if ':' in collect_arg:
args['collect'].append(tuple(collect_arg.split(':')))
else:
args['collect'].append((collect_arg, 'set'))

return args

Expand Down
5 changes: 5 additions & 0 deletions backdrop/read/validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,11 @@ def validate(self, request_args, context):
validate_field_value=self.validate_field_value)

def validate_field_value(self, value, request_args, _):
if ":" in value:
value, operator = value.split(":")
if operator not in ["sum", "count", "set", "mean"]:
self.add_error("Unknown collection method")

if not key_is_valid(value):
self.add_error('Cannot collect an invalid field name')
if value.startswith('_'):
Expand Down
8 changes: 8 additions & 0 deletions features/read_api/collect.feature
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,16 @@ Feature: collect fields into grouped responses
when I go to "/foo?collect=authority"
then I should get back a status of "400"


Scenario: should be able to collect false values
Given "licensing_2.json" is in "foo" bucket
when I go to "/foo?group_by=licence_name&filter_by=isPaymentRequired:false&collect=isPaymentRequired"
then I should get back a status of "200"
and the "1st" result should have "isPaymentRequired" with item "false"

Scenario: should be able to perform maths on collect
Given "sort_and_limit.json" is in "foo" bucket
when I go to "/foo?group_by=type&filter_by=type:wild&collect=value:sum&collect=value:mean"
then I should get back a status of "200"
and the "1st" result should have "value:sum" with json "27"
and the "1st" result should have "value:mean" with json "6.75"
7 changes: 7 additions & 0 deletions features/steps/read_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,13 @@ def step(context, nth, key, value):
assert_that(the_data[i][key], has_item(json.loads(value)))


@then('the "{nth}" result should have "{key}" with json "{expected_json}"')
def impl(context, nth, key, expected_json):
the_data = json.loads(context.response.data)['data']
i = parse_position(nth, the_data)
assert_that(the_data[i][key], is_(json.loads(expected_json)))


@then('the "{header}" header should be "{value}"')
def step(context, header, value):
assert_that(context.response.headers.get(header), is_(value))
19 changes: 10 additions & 9 deletions tests/core/integration/test_database_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -295,32 +295,33 @@ def test_grouping_by_multiple_keys(self):
def test_grouping_with_collect(self):
self.setUpPeopleLocationData()

results = self.repo.group("person", Query.create(), None, None, ["place"])
results = self.repo.group("person", Query.create(), None, None, [("place", "set")])

assert_that(results, has_item(has_entries({
"person": "John",
"place": has_items("Kettering", "Kennington")
"place:set": has_items("Kettering", "Kennington")
})))

def test_another_grouping_with_collect(self):
self.setUpPeopleLocationData()

results = self.repo.group("place", Query.create(), None, None, ["person"])
results = self.repo.group("place", Query.create(), None, None, [("person", "set")])

assert_that(results, has_item(has_entries({
"place": "Kettering",
"person": has_items("Jack", "John")
"person:set": has_items("Jack", "John")
})))

def test_grouping_with_collect_two_fields(self):
self.setUpPeopleLocationData()

results = self.repo.group("place", Query.create(), None, None, ["person", "hair"])
results = self.repo.group("place", Query.create(), None, None,
[("person", "set"), ("hair", "set")])

assert_that(results, has_item(has_entries({
"place": "Kettering",
"person": ["Jack", "John"],
"hair": ["blond", "dark", "red"]
"person:set": ["Jack", "John"],
"hair:set": ["blond", "dark", "red"]
})))

def test_grouping_on_non_existent_keys(self):
Expand Down Expand Up @@ -426,12 +427,12 @@ def test_multi_group_with_collect(self):
"place",
"_week_start_at",
Query.create(),
collect=["person"]
collect=[("person", "set")]
)

assert_that(results, has_item(has_entries({
"place": "Kettering",
"person": ["Jack", "John"]
"person:set": ["Jack", "John"]
})))


Expand Down
52 changes: 51 additions & 1 deletion tests/core/test_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from mock import Mock, patch
from pymongo.errors import AutoReconnect
from backdrop.core import database
from backdrop.core.database import Repository, InvalidSortError, MongoDriver
from backdrop.core.database import Repository, InvalidSortError, MongoDriver, apply_collection_method
from backdrop.read.query import Query
from tests.support.test_helpers import d_tz

Expand Down Expand Up @@ -71,6 +71,56 @@ def test_nested_merge_squashes_duplicates(self):
{'a': 2}
]))

def test_nested_merge_collected_values(self):
stub_dictionaries = [
{'a': 1, 'b': [2], 'c': 3},
{'a': 1, 'b': [1], 'c': 3},
{'a': 2, 'b': [1], 'c': 3}
]
output = database.nested_merge(['a'], [('b', 'set')], stub_dictionaries)
assert_that(output, is_([
{'a': 1, 'b:set': [1, 2], 'b': [1, 2]},
{'a': 2, 'b:set': [1], 'b': [1]}
]))

def test_nested_merge_collect_sum(self):
stub_dictionaries = [
{'a': 1, 'b': [2]},
{'a': 1, 'b': [1]},
{'a': 2, 'b': [1]}
]
output = database.nested_merge(['a'], [('b', 'sum')], stub_dictionaries)
assert_that(output, is_([
{'a': 1, 'b:sum': 3},
{'a': 2, 'b:sum': 1}
]))


class TestApplyCollectionMethod(unittest.TestCase):
def test_sum(self):
data = [2, 5, 8]
response = apply_collection_method(data, "sum")
assert_that(response, is_(15))

def test_count(self):
data = ['Sheep', 'Elephant', 'Wolf', 'Dog']
response = apply_collection_method(data, "count")
assert_that(response, is_(4))

def test_set(self):
data = ['Badger', 'Badger', 'Badger', 'Snake']
response = apply_collection_method(data, "set")
assert_that(response, is_(['Badger', 'Snake']))

def test_mean(self):
data = [13, 19, 15, 2]
response = apply_collection_method(data, "mean")
assert_that(response, is_(12.25))

def test_unknown_collection_method_raises_error(self):
self.assertRaises(ValueError,
apply_collection_method, ['foo'], "unknown")


class TestRepository(unittest.TestCase):
def setUp(self):
Expand Down
18 changes: 14 additions & 4 deletions tests/read/test_parse_request_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,21 +103,31 @@ def test_limit_is_parsed(self):

assert_that(args['limit'], is_(123))

def test_one_collect_is_parsed(self):
def test_one_collect_is_parsed_with_default_method(self):
request_args = MultiDict([
("collect", "some_key")
])

args = parse_request_args(request_args)

assert_that(args['collect'], is_(["some_key"]))
assert_that(args['collect'], is_([("some_key", "set")]))

def test_two_collects_are_parsed(self):
def test_two_collects_are_parsed_with_default_methods(self):
request_args = MultiDict([
("collect", "some_key"),
("collect", "some_other_key")
])

args = parse_request_args(request_args)

assert_that(args['collect'], is_(["some_key", "some_other_key"]))
assert_that(args['collect'], is_([("some_key", "set"),
("some_other_key", "set")]))

def test_one_collect_is_parsed_with_custom_method(self):
request_args = MultiDict([
("collect", "some_key:mean")
])

args = parse_request_args(request_args)

assert_that(args['collect'], is_([("some_key", "mean")]))
21 changes: 21 additions & 0 deletions tests/read/test_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -306,6 +306,27 @@ def test_that_queries_with_invalid_timezone_are_disallowed(self):
assert_that(validation_result, is_invalid_with_message(
"start_at is not a valid datetime"))

def test_that_collect_queries_with_valid_methods_are_allowed(self):
valid_collection_methods = ["sum", "count", "set", "mean"]

for method in valid_collection_methods:
validation_result = validate_request_args({
'group_by': 'foo',
'collect': 'field:{0}'.format(method),
})

assert_that(validation_result, is_valid())

def test_that_collect_queries_with_invalid_method_are_disallowed(self):
validation_result = validate_request_args({
'group_by': 'foo',
'collect': 'field:infinity',
})

assert_that(validation_result, is_invalid_with_message((
"Unknown collection method"
)))


class TestValidationHelpers(TestCase):
def test_timestamp_is_valid_method(self):
Expand Down

0 comments on commit 1dfb708

Please sign in to comment.