Skip to content
This repository has been archived by the owner on Mar 24, 2021. It is now read-only.

Commit

Permalink
Merge e2a793c into 9e90c73
Browse files Browse the repository at this point in the history
  • Loading branch information
alexmuller committed Jul 3, 2013
2 parents 9e90c73 + e2a793c commit eea2696
Show file tree
Hide file tree
Showing 12 changed files with 268 additions and 54 deletions.
19 changes: 19 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,3 +45,22 @@ This is the oauth flow we are using to authenticate users with Signonotron2
- **GET** (to signonotron) `/user.json` uses access token to get user data and see if they have permissions to sign in to backdrop
4. User is now signed in

## Requesting data

Requests return a JSON object containing a `data` array.

`GET /bucket_name` will return an array of data. Each element is an object.

`GET /bucket_name?collect=score&group_by=name` will return an array. In this
case, each element of the array is an object containing a `name` value, a
`score` array with the scores for that name and a `_count` value with the
number of scores.

`GET /bucket_name?filter_by=name:Foo` returns all elements with `name` equal to "Foo".

Other parameters:

- `start_at` (YYYY-MM-DDTHH:MM:SS+HH:MM) and `end_at` (YYYY-MM-DDTHH:MM:SS+HH:MM)
- `period` ("week", "month")
- `sort_by` (field)
- `limit` (number)
92 changes: 70 additions & 22 deletions backdrop/core/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,31 +49,32 @@ def _ignore_docs_without_grouping_keys(self, keys, query):
query[key] = {"$ne": None}
return query

def group(self, keys, query, collect):
def group(self, keys, query, collect_fields):
return self._collection.group(
key=keys,
condition=self._ignore_docs_without_grouping_keys(keys, query),
initial=self._build_accumulator_initial_state(collect),
reduce=self._build_reducer_function(collect)
initial=self._build_accumulator_initial_state(collect_fields),
reduce=self._build_reducer_function(collect_fields)
)

def _build_collector_code(self, collect):
def _build_collector_code(self, collect_fields):
template = "if (current.{c} !== undefined) " \
"{{ previous.{c}.push(current.{c}); }}"
code = [template.format(c=collect_me) for collect_me in collect]
code = [template.format(c=collect_field)
for collect_field in collect_fields]
return "\n".join(code)

def _build_accumulator_initial_state(self, collect):
def _build_accumulator_initial_state(self, collect_fields):
initial = {'_count': 0}
for collect_me in collect:
initial.update({collect_me: []})
for collect_field in collect_fields:
initial.update({collect_field: []})
return initial

def _build_reducer_function(self, collect):
def _build_reducer_function(self, collect_fields):
reducer_skeleton = "function (current, previous)" + \
"{{ previous._count++; {collectors} }}"
reducer_code = reducer_skeleton.format(
collectors=self._build_collector_code(collect)
collectors=self._build_collector_code(collect_fields)
)
reducer = Code(reducer_code)
return reducer
Expand Down Expand Up @@ -143,7 +144,8 @@ def _require_keys_in_query(self, keys, query):
return query

def _group(self, keys, query, sort=None, limit=None, collect=None):
results = self._mongo.group(keys, query, collect)
collect_fields = unique_collect_fields(collect)
results = self._mongo.group(keys, query, list(collect_fields))

results = nested_merge(keys, collect, results)

Expand Down Expand Up @@ -173,22 +175,60 @@ class InvalidSortError(ValueError):

def extract_collected_values(collect, result):
collected = {}
for collect_field in collect:
for collect_field in unique_collect_fields(collect):
collected[collect_field] = result.pop(collect_field)
return collected, result


def insert_collected_values(collected, group):
for collect_field in collected.keys():
if collect_field not in group:
group[collect_field] = set()
group[collect_field].update(collected[collect_field])
group[collect_field] = []
group[collect_field] += collected[collect_field]


def convert_collected_values_to_list(collect, groups):
def apply_collection_methods(collect, groups):
for group in groups:
for collected_field in collect:
group[collected_field] = sorted(list(group[collected_field]))
for collect_field, collect_method in collect:
if collect_method == 'default':
collect_keys = [collect_field, '{0}:set'.format(collect_field)]
else:
collect_keys = ['{0}:{1}'.format(collect_field,
collect_method)]
for collect_key in collect_keys:
group[collect_key] = apply_collection_method(
group[collect_field], collect_method)
for collect_field in unique_collect_fields(collect):
del group[collect_field]
# This is to provide backwards compatibility with earlier interface
if (collect_field, 'default') in collect:
group[collect_field] = group['{0}:set'.format(collect_field)]


def apply_collection_method(collected_data, collect_method):
if "sum" == collect_method:
try:
return sum(collected_data)
except TypeError:
raise InvalidOperationError("Unable to sum that data")
elif "count" == collect_method:
return len(collected_data)
elif "set" == collect_method:
return sorted(list(set(collected_data)))
elif "mean" == collect_method:
try:
return sum(collected_data) / float(len(collected_data))
except TypeError:
raise InvalidOperationError("Unable to find the mean of that data")
elif "default" == collect_method:
return sorted(list(set(collected_data)))
else:
raise ValueError("Unknown collection method")


def unique_collect_fields(collect):
"""Return the unique set of field names to collect."""
return set([collect_field for collect_field, _ in collect])


def nested_merge(keys, collect, results):
Expand All @@ -200,7 +240,7 @@ def nested_merge(keys, collect, results):

insert_collected_values(collected, group)

convert_collected_values_to_list(collect, groups)
apply_collection_methods(collect, groups)
return groups


Expand All @@ -213,7 +253,7 @@ def _merge(groups, keys, result):
group = _find_group(group for group in groups if group[key] == value)
if not group:
if is_leaf:
group = _new_leaf_node(key, value, result)
group = _new_leaf_node(key, value, result.get('_count'))
else:
group = _new_branch_node(key, value)
groups.append(group)
Expand All @@ -240,10 +280,14 @@ def _new_branch_node(key, value):
}


def _new_leaf_node(key, value, result):
def _new_leaf_node(key, value, count=None):
"""Create a new node that has no further sub-groups"""
result[key] = value
return result
r = {
key: value,
}
if count is not None:
r['_count'] = count
return r


def _merge_and_sort_subgroup(group, keys, result):
Expand All @@ -254,3 +298,7 @@ def _merge_and_sort_subgroup(group, keys, result):
def _add_branch_node_counts(group):
group['_count'] = sum(doc.get('_count', 0) for doc in group['_subgroup'])
group['_group_count'] = len(group['_subgroup'])


class InvalidOperationError(TypeError):
pass
15 changes: 12 additions & 3 deletions backdrop/read/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from .validation import validate_request_args
from ..core import database, log_handler, cache_control
from ..core.bucket import Bucket
from ..core.database import InvalidOperationError


def setup_logging():
Expand Down Expand Up @@ -69,6 +70,11 @@ def health_check():
message='cannot connect to database'), 500


def log_error_and_respond(message, status_code):
app.logger.error(message)
return jsonify(status='error', message=message), status_code


@app.route('/<bucket_name>', methods=['GET', 'OPTIONS'])
@cache_control.set("max-age=3600, must-revalidate")
@cache_control.etag
Expand All @@ -84,11 +90,14 @@ def query(bucket_name):
raw_queries_allowed(bucket_name))

if not result.is_valid:
app.logger.error(result.message)
return jsonify(status='error', message=result.message), 400
return log_error_and_respond(result.message, 400)

bucket = Bucket(db, bucket_name)
result_data = bucket.query(Query.parse(request.args)).data()

try:
result_data = bucket.query(Query.parse(request.args)).data()
except InvalidOperationError:
return log_error_and_respond('invalid collect for that data', 400)

# Taken from flask.helpers.jsonify to add JSONEncoder
# NB. this can be removed once fix #471 works it's way into a release
Expand Down
7 changes: 6 additions & 1 deletion backdrop/read/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,12 @@ def parse_request_args(request_args):

args['limit'] = if_present(int, request_args.get('limit'))

args['collect'] = request_args.getlist('collect')
args['collect'] = []
for collect_arg in request_args.getlist('collect'):
if ':' in collect_arg:
args['collect'].append(tuple(collect_arg.split(':')))
else:
args['collect'].append((collect_arg, 'default'))

return args

Expand Down
5 changes: 5 additions & 0 deletions backdrop/read/validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,11 @@ def validate(self, request_args, context):
validate_field_value=self.validate_field_value)

def validate_field_value(self, value, request_args, _):
if ":" in value:
value, operator = value.split(":")
if operator not in ["sum", "count", "set", "mean"]:
self.add_error("Unknown collection method")

if not key_is_valid(value):
self.add_error('Cannot collect an invalid field name')
if value.startswith('_'):
Expand Down
14 changes: 14 additions & 0 deletions features/read_api/collect.feature
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,22 @@ Feature: collect fields into grouped responses
when I go to "/foo?collect=authority"
then I should get back a status of "400"


Scenario: should be able to collect false values
Given "licensing_2.json" is in "foo" bucket
when I go to "/foo?group_by=licence_name&filter_by=isPaymentRequired:false&collect=isPaymentRequired"
then I should get back a status of "200"
and the "1st" result should have "isPaymentRequired" with item "false"

Scenario: should be able to perform maths on collect
Given "sort_and_limit.json" is in "foo" bucket
when I go to "/foo?group_by=type&filter_by=type:wild&collect=value:sum&collect=value:mean"
then I should get back a status of "200"
and the "1st" result should have "value:sum" with json "27"
and the "1st" result should have "value:mean" with json "6.75"

Scenario: should receive a nice error when performing invalid operation
Given "dinosaurs.json" is in "foo" bucket
when I go to "/foo?group_by=type&collect=name:sum"
then I should get back a status of "400"
and the error message should be "invalid collect for that data"
1 change: 1 addition & 0 deletions features/read_api/group.feature
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ Feature: grouping queries for read api
then I should get back a status of "200"
and the JSON should have "1" results
and the "1st" result should have "values" with item "{"_start_at": "2013-03-11T00:00:00+00:00", "_end_at": "2013-03-18T00:00:00+00:00", "_count": 2.0}"
and the "1st" result should have "values" with item "{"_start_at": "2013-03-18T00:00:00+00:00", "_end_at": "2013-03-25T00:00:00+00:00", "_count": 1.0}"


Scenario: grouping data by time period (week) and a name that doesn't exist
Expand Down
13 changes: 13 additions & 0 deletions features/steps/read_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,19 @@ def step(context, nth, key, value):
assert_that(the_data[i][key], has_item(json.loads(value)))


@then('the "{nth}" result should have "{key}" with json "{expected_json}"')
def impl(context, nth, key, expected_json):
the_data = json.loads(context.response.data)['data']
i = parse_position(nth, the_data)
assert_that(the_data[i][key], is_(json.loads(expected_json)))


@then('the "{header}" header should be "{value}"')
def step(context, header, value):
assert_that(context.response.headers.get(header), is_(value))


@then(u'the error message should be "{expected_message}"')
def impl(context, expected_message):
error_message = json.loads(context.response.data)['message']
assert_that(error_message, is_(expected_message))
Loading

0 comments on commit eea2696

Please sign in to comment.