Skip to content
This repository has been archived by the owner on Mar 24, 2021. It is now read-only.

Commit

Permalink
Merge pull request #438 from alphagov/get-latest-tx-data
Browse files Browse the repository at this point in the history
Get latest tx data
  • Loading branch information
leelongmore committed Dec 7, 2015
2 parents 9c5670e + c7052ed commit ad45fb6
Show file tree
Hide file tree
Showing 4 changed files with 140 additions and 50 deletions.
69 changes: 51 additions & 18 deletions backdrop/transformers/tasks/latest_transaction_explorer_values.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,55 +20,71 @@
"type"
]

REQUIRED_FIELDS = [
"_timestamp",
]

admin_api = AdminAPI(
config.STAGECRAFT_URL,
config.STAGECRAFT_OAUTH_TOKEN)


def _get_latest_data_point(data, data_point_name):
def _get_latest_data_point(sorted_data, data_point_name):
def _use_data_point(data_point, name, ignore):
has_data = (name in data_point and data_point[name] is not None)
should_not_be_ignored = (ignore != data_point['type'])
return has_data and should_not_be_ignored
return should_not_be_ignored

name = data_point_name['name']
ignore = data_point_name['ignore']

data.sort(key=lambda item: item['_timestamp'], reverse=True)
for data_point in data:
# sorted_data should be pre sorted so
# the first returned is always the most recent
for data_point in sorted_data:
if _use_data_point(data_point, name, ignore):
return data_point
return None


def _up_to_date(latest_data_points,
latest_quarter,
latest_seasonally_adjusted):
if latest_data_points['type'] == 'seasonally-adjusted':
return latest_data_points['_timestamp'] == latest_seasonally_adjusted
else:
return latest_data_points['_timestamp'] == latest_quarter


def _get_stripped_down_data_for_data_point_name_only(
dashboard_config,
latest_data_points,
data_point_name):
data_point_name,
latest_quarter,
latest_seasonally_adjusted):
"""
Builds up backdrop ready datum for a single transaction explorer metric.
It does this by iterating through the passed in data_point_name
and all the REQUIRED_FIELDS and building up a new dict based on
and building up a new dict based on
these key: value pairings. We then loop through additional fields and add
those if present. If a REQUIRED_FIELD is not found we return None for
this data_point.
those if present. If a required_field is not found we return
a dict with a value of None for this data_point.
"""
required_fields = REQUIRED_FIELDS + [data_point_name['name']]
required_fields = [data_point_name['name']]
new_data = {}
for field in required_fields:
if field in latest_data_points:
if field in latest_data_points and _up_to_date(
latest_data_points,
latest_quarter,
latest_seasonally_adjusted):
new_data[field] = latest_data_points[field]
else:
new_data[field] = None
for field in ADDITIONAL_FIELDS:
if field in latest_data_points:
new_data[field] = latest_data_points[field]
new_data['dashboard_slug'] = dashboard_config['slug']

if latest_data_points['type'] == 'seasonally-adjusted':
new_data['_timestamp'] = latest_seasonally_adjusted
else:
new_data['_timestamp'] = latest_quarter

new_data['_id'] = encode_id(
new_data['dashboard_slug'],
data_point_name['name'])
Expand All @@ -93,10 +109,25 @@ def _get_dashboard_configs_with_data(ids_with_data):


def _get_data_points_for_each_tx_metric(data, transform, data_set_config):
ids_with_data = _service_ids_with_data(
data)
# This sorted should be preserved through the grouping by service id
# and then the creating a (dashboard_config, data) tuple. That way, get
# _get_latest_data_point should not need to sort itself.
data_ordered_by_timestamp = sorted(
data, key=lambda k: k['_timestamp'], reverse=True)
quarterly_data_ordered_by_timestamp = \
[datum for datum in data_ordered_by_timestamp
if datum['type'] == 'quarterly']
seasonally_adjusted_data_ordered_by_timestamp = \
[datum for datum in data_ordered_by_timestamp
if datum['type'] == 'seasonally-adjusted']
latest_quarter = quarterly_data_ordered_by_timestamp[0]['_timestamp']
latest_seasonally_adjusted = \
seasonally_adjusted_data_ordered_by_timestamp[0]['_timestamp']

ids_with_data = _service_ids_with_data(data_ordered_by_timestamp)
dashboard_configs_with_data = _get_dashboard_configs_with_data(
ids_with_data)

for data_point_name in REQUIRED_DATA_POINTS:
for dashboard_config, dashboard_data in dashboard_configs_with_data:
latest_data = _get_latest_data_point(
Expand All @@ -105,7 +136,9 @@ def _get_data_points_for_each_tx_metric(data, transform, data_set_config):
if not latest_data:
continue
datum = _get_stripped_down_data_for_data_point_name_only(
dashboard_config, latest_data, data_point_name)
dashboard_config, latest_data, data_point_name,
latest_quarter,
latest_seasonally_adjusted)
# we need to look at whether this is later than the latest
# data currently present on the output data set as
# for things like digital-takeup the transactions explorer
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,24 @@
[
{
"_day_start_at": "2012-01-01T00:00:00+00:00",
"_hour_start_at": "2012-01-01T00:00:00+00:00",
"_id": "MjAxMi0wMS0wMSAwMDowMDowMDIwMTMtMDEtMDEgMDA6MDA6MDBiaXMtYW5udWFsLXJldHVybnM=",
"_month_start_at": "2012-01-01T00:00:00+00:00",
"_quarter_start_at": "2012-01-01T00:00:00+00:00",
"_timestamp": "2014-01-01T00:00:00+00:00",
"_updated_at": "2014-03-19T10:44:32.287000+00:00",
"_week_start_at": "2013-12-26T00:00:00+00:00",
"cost_per_transaction": null,
"digital_cost_per_transaction": 2.36,
"digital_takeup": null,
"end_at": "2013-01-01T00:00:00+00:00",
"number_of_digital_transactions": 2301214,
"number_of_transactions": 2358738,
"period": "year",
"service_id": "service-with-quarterly-not-latest",
"total_cost": null,
"type": "quarterly"
},
{
"_day_start_at": "2012-01-01T00:00:00+00:00",
"_hour_start_at": "2012-01-01T00:00:00+00:00",
Expand Down Expand Up @@ -124,7 +144,7 @@
"_id": "MjAxMS0wNC0wMSAwMDowMDowMDIwMTItMDQtMDEgMDA6MDA6MDBiaXMtYW5udWFsLXJldHVybnM=",
"_month_start_at": "2013-04-01T00:00:00+00:00",
"_quarter_start_at": "2013-04-01T00:00:00+00:00",
"_timestamp": "2013-04-01T00:00:00+00:00",
"_timestamp": "2013-01-01T00:00:00+00:00",
"_updated_at": "2014-03-19T10:44:32.286000+00:00",
"_week_start_at": "2013-03-28T00:00:00+00:00",
"cost_per_transaction": 5.2,
Expand Down
99 changes: 68 additions & 31 deletions tests/transformers/tasks/test_latest_transaction_explorer_values.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,25 +23,45 @@

data_to_post = [
{
"_id": encode_id('quarterly-nonsense', 'digital_cost_per_transaction'),
"_id": encode_id('quart', 'digital_cost_per_transaction'),
"_timestamp": "2014-12-12T00:00:00+00:00",
"digital_cost_per_transaction": 2.36,
"digital_cost_per_transaction": None,
"end_at": "2013-01-01T00:00:00+00:00",
"period": "year",
"service_id": "service-with-quarterly-data",
"dashboard_slug": "quarterly-nonsense",
"service_id": "service-with-quarterly-not-latest",
"dashboard_slug": "quart",
"type": "quarterly"
},
{
"_id": encode_id('quarterly-nonsense', 'digital_takeup'),
"_timestamp": "2013-12-12T00:00:00+00:00",
"digital_takeup": 0,
'_timestamp': u'2014-12-12T00:00:00+00:00',
"_id": encode_id('quart', 'digital_takeup'),
'period': u'year',
'end_at': u'2013-01-01T00:00:00+00:00',
'dashboard_slug': 'quart',
'service_id': u'service-with-quarterly-not-latest',
'digital_takeup': None,
"type": "quarterly"
},
{
"_id": encode_id('quarterly-nonsense', 'digital_cost_per_transaction'),
"_timestamp": "2014-12-12T00:00:00+00:00",
"digital_cost_per_transaction": 2.36,
"end_at": "2013-01-01T00:00:00+00:00",
"period": "year",
"service_id": "service-with-quarterly-data",
"dashboard_slug": "quarterly-nonsense",
"type": "quarterly"
},
{
'_timestamp': u'2014-12-12T00:00:00+00:00',
"_id": encode_id('quarterly-nonsense', 'digital_takeup'),
'period': u'year',
'end_at': u'2013-01-01T00:00:00+00:00',
'dashboard_slug': 'quarterly-nonsense',
'service_id': u'service-with-quarterly-data',
'digital_takeup': None,
'type': u'quarterly'
},
{
"_id": encode_id(
'quarterly-nonsense2',
Expand All @@ -55,19 +75,19 @@
"type": "quarterly"
},
{
'_timestamp': u'2014-12-12T00:00:00+00:00',
"_id": encode_id('quarterly-nonsense2', 'digital_takeup'),
"_timestamp": "2013-12-12T00:00:00+00:00",
"digital_takeup": 0,
"end_at": "2013-01-01T00:00:00+00:00",
"period": "year",
"service_id": "service-with-quarterly-data",
"dashboard_slug": "quarterly-nonsense2",
"type": "quarterly"
'period': u'year',
'end_at': u'2013-01-01T00:00:00+00:00',
'dashboard_slug': 'quarterly-nonsense2',
'service_id': u'service-with-quarterly-data',
'digital_takeup': None,
'type': u'quarterly'
},
{
"_id": encode_id('sorn', 'cost_per_transaction'),
"_timestamp": "2013-04-01T00:00:00+00:00",
"cost_per_transaction": 5.2,
"cost_per_transaction": None,
"end_at": "2012-04-01T00:00:00+00:00",
"period": "year",
"service_id": "sorn-innit",
Expand All @@ -77,7 +97,7 @@
{
"_id": encode_id('sorn', 'digital_cost_per_transaction'),
"_timestamp": "2013-04-01T00:00:00+00:00",
"digital_cost_per_transaction": 2.52,
"digital_cost_per_transaction": None,
"end_at": "2012-04-01T00:00:00+00:00",
"period": "year",
"service_id": "sorn-innit",
Expand All @@ -91,7 +111,7 @@
# the data_type is digital_takeup.
"_id": encode_id('sorn', 'digital_takeup'),
"_timestamp": "2013-04-01T00:00:00+00:00",
"digital_takeup": 0.965537995968002,
"digital_takeup": None,
"end_at": "2012-04-01T00:00:00+00:00",
"period": "year",
"service_id": "sorn-innit",
Expand All @@ -102,7 +122,7 @@
"_id": encode_id('sorn', 'number_of_digital_transactions'),
"_timestamp": "2013-04-01T00:00:00+00:00",
"end_at": "2012-04-01T00:00:00+00:00",
"number_of_digital_transactions": 2184914,
"number_of_digital_transactions": None,
"period": "year",
"service_id": "sorn-innit",
"dashboard_slug": "sorn",
Expand All @@ -115,9 +135,19 @@
"period": "year",
"service_id": "sorn-innit",
"dashboard_slug": "sorn",
"total_cost": 11767069.6,
"total_cost": None,
"type": "seasonally-adjusted"
},
{
'_timestamp': u'2013-04-01T00:00:00+00:00',
'period': u'year',
'end_at': u'2012-04-01T00:00:00+00:00',
'number_of_transactions': None,
'dashboard_slug': 'sorn',
'service_id': u'sorn-innit',
"_id": encode_id('sorn', 'number_of_transactions'),
'type': u'seasonally-adjusted'
},
{
"_id": encode_id('bis-returns', 'cost_per_transaction'),
"_timestamp": "2013-04-01T00:00:00+00:00",
Expand Down Expand Up @@ -148,16 +178,6 @@
"dashboard_slug": "bis-returns",
"type": "seasonally-adjusted"
},
{
"_id": encode_id('bis-returns', 'number_of_digital_transactions'),
"_timestamp": "2012-12-12T00:00:00+00:00",
"end_at": "2013-01-01T00:00:00+00:00",
"number_of_digital_transactions": 2301214,
"period": "year",
"service_id": "bis-annual-returns",
"dashboard_slug": "bis-returns",
"type": "seasonally-adjusted"
},
{
"_id": encode_id('bis-returns', 'number_of_transactions'),
"_timestamp": "2013-04-01T00:00:00+00:00",
Expand All @@ -177,6 +197,16 @@
"dashboard_slug": "bis-returns",
"total_cost": 11767069.6,
"type": "seasonally-adjusted"
},
{
'number_of_digital_transactions': None,
'_timestamp': u'2013-04-01T00:00:00+00:00',
'period': u'year',
'end_at': u'2012-04-01T00:00:00+00:00',
'dashboard_slug': 'bis-returns',
'service_id': u'bis-annual-returns',
"_id": encode_id('bis-returns', 'number_of_digital_transactions'),
'type': u'seasonally-adjusted'
}
]

Expand All @@ -196,6 +226,11 @@
'slug': 'quarterly-nonsense2'
}
]
quarterly_data_not_latest = [
{
'slug': 'quart'
}
]


class ComputeTestCase(unittest.TestCase):
Expand All @@ -218,7 +253,8 @@ def test_compute(self, mock_dashboard_finder, mock_dataset):
mock_dashboard_finder.side_effect = lambda x: {
'bis-annual-returns': bis_returns_dashboard_config,
'sorn-innit': sorn_dashboard_config,
'service-with-quarterly-data': quarterly_data_dashboard_config
'service-with-quarterly-data': quarterly_data_dashboard_config,
'service-with-quarterly-not-latest': quarterly_data_not_latest,
}.get(x, [])
transformed_data = compute(data, {'output': {
'data-group': 'transactions-explorer',
Expand Down Expand Up @@ -248,7 +284,8 @@ def test_compute_when_no_new_data(
mock_dashboard_finder.side_effect = lambda x: {
'bis-annual-returns': bis_returns_dashboard_config,
'sorn-innit': sorn_dashboard_config,
'service-with-quarterly-data': quarterly_data_dashboard_config
'service-with-quarterly-data': quarterly_data_dashboard_config,
'service-with-quarterly-not-latest': quarterly_data_not_latest,
}.get(x, [])
transformed_data = compute(data, {'output': {
'data-group': 'transactions-explorer',
Expand Down

0 comments on commit ad45fb6

Please sign in to comment.