Skip to content
This repository has been archived by the owner on Mar 24, 2021. It is now read-only.

Commit

Permalink
Merge pull request #233 from alphagov/skip-blank-values
Browse files Browse the repository at this point in the history
Skip blank values
  • Loading branch information
roc committed Jan 29, 2014
2 parents 0afe054 + 4cb6163 commit e16dbc8
Show file tree
Hide file tree
Showing 24 changed files with 417 additions and 306 deletions.
6 changes: 4 additions & 2 deletions backdrop/read/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,13 +154,15 @@ def fetch(bucket_config):
bucket = Bucket(db, bucket_config)

try:
result_data = bucket.query(Query.parse(request.args)).data()
query = Query.parse(request.args)
data = bucket.query(query).data()

except InvalidOperationError:
return log_error_and_respond(
bucket.name, 'invalid collect function',
400)

response = jsonify(data=result_data)
response = jsonify(data=data)

# allow requests from any origin
response.headers['Access-Control-Allow-Origin'] = '*'
Expand Down
94 changes: 57 additions & 37 deletions backdrop/read/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,6 @@
from backdrop.read.response import *


def utc(dt):
return dt.replace(tzinfo=pytz.UTC)


def if_present(func, value):
"""Apply the given function to the value and return if it exists"""
if value is not None:
Expand All @@ -19,33 +15,16 @@ def if_present(func, value):
def parse_request_args(request_args):
args = dict()

args['period'] = if_present(parse_period,
request_args.get('period'))

if request_args.get('delta'):
# relative time range requested
delta = int(request_args['delta'])

date = if_present(parse_time_as_utc, request_args.get('date')) or now()
args['start_at'] = if_present(parse_time_as_utc,
request_args.get('start_at'))

period = args['period']
duration = period.delta * delta
args['end_at'] = if_present(parse_time_as_utc,
request_args.get('end_at'))

if delta > 0:
date = period.end(date)
args['start_at'] = date
args['end_at'] = date + duration
else:
date = period.start(date)
args['start_at'] = date + duration
args['end_at'] = date
else:
# absolute time range requested
args['start_at'] = if_present(parse_time_as_utc,
request_args.get('start_at'))
args['duration'] = if_present(int, request_args.get('duration'))

args['end_at'] = if_present(parse_time_as_utc,
request_args.get('end_at'))
args['period'] = if_present(parse_period,
request_args.get('period'))

def boolify(value):
return {
Expand Down Expand Up @@ -76,28 +55,65 @@ def parse_filter_by(filter_by):

return args


"""
This is the internal Query object
- Create list of attributes to build the query from
- We use delta interally, but the end user will use 'duration'
"""
_Query = namedtuple(
'_Query',
'start_at end_at filter_by period group_by sort_by limit collect'
)
['start_at', 'end_at', 'delta', 'period',
'filter_by', 'group_by', 'sort_by', 'limit', 'collect'])


class Query(_Query):
@classmethod
def create(cls,
start_at=None, end_at=None, filter_by=None, period=None,
group_by=None, sort_by=None, limit=None, collect=None):
return Query(start_at, end_at, filter_by or [], period,
group_by, sort_by, limit, collect or [])
start_at=None, end_at=None, duration=None, delta=None,
period=None, filter_by=None, group_by=None,
sort_by=None, limit=None, collect=None):
delta = None
if duration is not None:
date = start_at or end_at or now()
delta = duration if start_at else -duration
start_at, end_at = cls.__calculate_start_and_end(period, date,
delta)
return Query(start_at, end_at, delta, period,
filter_by or [], group_by, sort_by, limit, collect or [])

@classmethod
def parse(cls, request_args):
args = parse_request_args(request_args)
return Query(**args)
return Query.create(**args)

def to_mongo_query(self):
@staticmethod
def __calculate_start_and_end(period, date, delta):
duration = period.delta * delta
start_of_period = period.start(date)

start_at, end_at = sorted(
[start_of_period, start_of_period + duration])

return start_at, end_at

def __skip_blank_periods(self, results, repository):
amount_to_shift = results.amount_to_shift(self.delta)
if amount_to_shift != 0:
query = self.get_shifted_query(shift=amount_to_shift)
results = query.execute(repository)

return results

def get_shifted_query(self, shift):
"""Return a new Query where the date is shifted by n periods"""
args = self._asdict()

args['start_at'] = args['start_at'] + (self.period.delta * shift)
args['end_at'] = args['end_at'] + (self.period.delta * shift)

return Query.create(**args)

def to_mongo_query(self):
mongo_query = {}
if self.start_at or self.end_at:
mongo_query["_timestamp"] = {}
Expand All @@ -118,6 +134,10 @@ def execute(self, repository):
result = self.__execute_period_query(repository)
else:
result = self.__execute_query(repository)

if self.delta:
result = self.__skip_blank_periods(result, repository)

return result

def __get_period_key(self):
Expand Down
35 changes: 35 additions & 0 deletions backdrop/read/response.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,21 @@ def create_period_group(doc, period):
return datum


def first_nonempty(data, is_reversed):
if is_reversed:
data = reversed(data)

# iterate through data and get the index of the first non-empty result
first_nonempty_index = next(
(i for i, d in enumerate(data) if d['_count'] > 0),
0)

if is_reversed:
first_nonempty_index = -first_nonempty_index

return first_nonempty_index


class SimpleData(object):
def __init__(self, cursor):
self._data = []
Expand All @@ -39,6 +54,10 @@ def __add(self, document):
def data(self):
return tuple(self._data)

def amount_to_shift(self, delta):
"""This response type cannot be shifted"""
return 0


class PeriodData(object):
def __init__(self, cursor, period):
Expand Down Expand Up @@ -71,6 +90,11 @@ def __create_datum(self, doc):

return dict(datum.items() + doc.items())

def amount_to_shift(self, delta):
is_reversed = delta < 0

return first_nonempty(self._data, is_reversed)


class GroupedData(object):
def __init__(self, cursor):
Expand All @@ -84,6 +108,10 @@ def __add(self, document):
def data(self):
return tuple(self._data)

def amount_to_shift(self, delta):
"""This response type cannot be shifted"""
return 0


class PeriodGroupedData(object):
def __init__(self, cursor, period):
Expand Down Expand Up @@ -122,3 +150,10 @@ def fill_missing_periods(self, start_date, end_date, collect=None):
data=self._data[i]['values'],
default=default
)

def amount_to_shift(self, delta):
is_reversed = delta < 0

return min([
first_nonempty(i['values'], is_reversed) for i in self._data],
key=abs)
63 changes: 38 additions & 25 deletions backdrop/read/validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,14 +42,13 @@ def __init__(self, request_args):
self.allowed_parameters = set([
'start_at',
'end_at',
'filter_by',
'duration',
'period',
'filter_by',
'group_by',
'sort_by',
'limit',
'collect',
'date',
'delta',
])
super(ParameterValidator, self).__init__(request_args)

Expand All @@ -72,10 +71,20 @@ def validate(self, request_args, context):

class PeriodQueryValidator(Validator):
def validate(self, request_args, context):
if 'start_at' in request_args or 'end_at' in request_args:
if not ('start_at' in request_args and 'end_at' in request_args):
self.add_error("both 'start_at' and 'end_at' are required "
"for a period query")
if 'period' not in request_args:
return

if 'duration' not in request_args:
if 'start_at' not in request_args or 'end_at' not in request_args:
self.add_error("Either 'duration' or both 'start_at' and "
"'end_at' are required for a period query")

if 'group_by' not in request_args and 'limit' in request_args:
# When executing a grouped periodic query, the limit is
# applied to the list of groups rather than the time series
# inside them
self.add_error("A period query can only be limited if it is "
"grouped - please add 'group_by'")


class PositiveIntegerValidator(Validator):
Expand Down Expand Up @@ -262,31 +271,34 @@ def validate(self, request_args, context):
class RelativeTimeValidator(Validator):
def validate(self, request_args, context):

start_at = request_args.get('start_at')
end_at = request_args.get('end_at')
period = request_args.get('period')
date = request_args.get('date')
delta = request_args.get('delta')
duration = request_args.get('duration')

if (request_args.get('start_at') or request_args.get('end_at')) \
and (delta or date):
self.add_error("Absolute ('start_at' and 'end_at') and relative "
"('delta' and/or 'date') time cannot be requested "
"at the same time")
if start_at and end_at and duration:
self.add_error("Absolute and relative time cannot be requested at "
"the same time - either ask for 'start_at' and "
"'end_at', or ask for 'start_at'/'end_at' with "
"'duration'")

if date and not delta:
self.add_error("Use of 'date' requires 'delta'")
if start_at and end_at is None and duration is None:
self.add_error("Use of 'start_at' requires 'end_at' or 'duration'")

if delta:
if delta == '0':
self.add_error("'delta' must not be zero")
if not period:
self.add_error("If 'delta' is requested (for relative time), "
"'period' is required")
if end_at and start_at is None and duration is None:
self.add_error("Use of 'end_at' requires 'start_at' or 'duration'")

if delta:
if duration:
if duration == '0':
self.add_error("'duration' must not be zero")
if not period:
self.add_error("If 'duration' is requested (for relative "
"time), 'period' is required - please add a "
"period (like 'day', 'month' etc)")
try:
int(delta)
int(duration)
except ValueError:
self.add_error("'delta' is not a valid Integer")
self.add_error("'duration' is not a valid Integer")


def validate_request_args(request_args, raw_queries_allowed=False):
Expand All @@ -305,6 +317,7 @@ def validate_request_args(request_args, raw_queries_allowed=False):
SortByValidator(request_args),
GroupByValidator(request_args),
PositiveIntegerValidator(request_args, param_name='limit'),
PositiveIntegerValidator(request_args, param_name='duration'),
ParamDependencyValidator(request_args, param_name='collect',
depends_on=['group_by', 'period']),
RelativeTimeValidator(request_args),
Expand Down
2 changes: 1 addition & 1 deletion features/end_to_end.feature
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,6 @@ Feature: end-to-end platform test
Given I have the data in "grouped_timestamps.json"
and I have a bucket named "flavour_events"
when I post the data to "/flavour_events"
and I go to "/flavour_events?period=week&group_by=flavour"
and I go to "/flavour_events?period=week&group_by=flavour&start_at=2013-03-18T00:00:00Z&end_at=2013-04-08T00:00:00Z"
then I should get back a status of "200"
and the JSON should have "4" result(s)
3 changes: 3 additions & 0 deletions features/fixtures/collectables.json
Original file line number Diff line number Diff line change
@@ -1,13 +1,16 @@
[
{
"_timestamp": "2013-08-05T10:10:10+00:00",
"_week_start_at": "2013-08-05T00:00:00+00:00",
"pickup": "mushroom"
},
{
"_timestamp": "2013-08-05T10:10:10+00:00",
"_week_start_at": "2013-08-05T00:00:00+00:00",
"pickup": "ring"
},
{
"_timestamp": "2013-08-12T10:10:10+00:00",
"_week_start_at": "2013-08-12T00:00:00+00:00",
"pickup": "1-up"
}
Expand Down
7 changes: 6 additions & 1 deletion features/fixtures/licensing_2.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
{
"_id": "1234",
"_timestamp": "2012-12-12T01:01:01+00:00",
"_day_start_at": "2012-12-12T00:00:00+00:00",
"_week_start_at": "2012-12-10T00:00:00+00:00",

"licence_name": "Temporary events notice",
Expand All @@ -13,6 +14,7 @@
{
"_id": "1235",
"_timestamp": "2012-12-12T01:01:01+00:00",
"_day_start_at": "2012-12-12T00:00:00+00:00",
"_week_start_at": "2012-12-10T00:00:00+00:00",

"licence_name": "Temporary events notice",
Expand All @@ -24,6 +26,7 @@
{
"_id": "1236",
"_timestamp": "2012-12-13T01:01:01+00:00",
"_day_start_at": "2012-12-13T00:00:00+00:00",
"_week_start_at": "2012-12-10T00:00:00+00:00",

"licence_name": "Temporary events notice",
Expand All @@ -35,6 +38,7 @@
{
"_id": "1237",
"_timestamp": "2012-12-14T01:01:01+00:00",
"_day_start_at": "2012-12-14T00:00:00+00:00",
"_week_start_at": "2012-12-10T00:00:00+00:00",

"licence_name": "Temporary events notice",
Expand All @@ -46,6 +50,7 @@
{
"_id": "1238",
"_timestamp": "2012-12-04T01:01:01+00:00",
"_day_start_at": "2012-12-04T00:00:00+00:00",
"_week_start_at": "2012-12-03T00:00:00+00:00",

"licence_name": "Cat herding licence",
Expand All @@ -54,4 +59,4 @@
"type": "success",
"isPaymentRequired": false
}
]
]

0 comments on commit e16dbc8

Please sign in to comment.