Skip to content
This repository has been archived by the owner on Mar 24, 2021. It is now read-only.

Commit

Permalink
wip - don't add latest auto collected if not yet released
Browse files Browse the repository at this point in the history
doing metric_to_period_mappings because collected stuff does not have a
type but we need the type to work out what the timestamp should be.

perhaps it's never quarterly/yearly? maybe it's pointless to add in?
  • Loading branch information
jcbashdown committed Jul 23, 2015
1 parent 4dd18da commit fd73afb
Show file tree
Hide file tree
Showing 2 changed files with 100 additions and 4 deletions.
50 changes: 48 additions & 2 deletions backdrop/transformers/tasks/latest_dataset_value.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,54 @@
import string

from .util import encode_id, is_latest_data
from .util import encode_id, is_latest_data, _get_read_params
from ..worker import config

from performanceplatform.client import AdminAPI
from performanceplatform.client import DataSet

data_type_to_value_mappings = {
'completion-rate': 'rate',
'digital-takeup': 'rate',
'user-satisfaction-score': 'score',
}

# because collected data does not have a type
metric_to_period_mappings = {
'digital_cost_per_transaction': 'quarterly',
'digital_takeup': 'quarterly'
#everything else : seasonally-adjusted
}


def data_is_released(data_set_config,
transform,
latest_datum):
latest_aggregate_data = \
(transform['output']['data-group'] == 'service-aggregates' and
transform['output']['data-type'] == 'latest-dataset-value')
if latest_aggregate_data:

data_set = DataSet.from_group_and_type(
config.BACKDROP_READ_URL,
data_set_config['data_group'],
data_set_config['data_type']
)
generated_read_params = _get_read_params(
{}, latest_datum['_timestamp'])

# Checks against all of type that this is not newer than the type.
# If there is already data for this service id we will never get here.
additional_read_params = {'filter_by': 'type:{}'.format(
latest_datum['type'])}
read_params = dict(
generated_read_params.items() + additional_read_params.items())
existing_data = data_set.get(query_parameters=read_params)
if existing_data['data']:
if(existing_data['data'][0]['_timestamp'] <
latest_datum['_timestamp']):
return False
return True


def compute(new_data, transform, data_set_config):

Expand All @@ -19,7 +57,15 @@ def compute(new_data, transform, data_set_config):
latest_datum = new_data[0]

# Only continue if we are not back filling data.
if not is_latest_data(data_set_config, transform, latest_datum):
is_latest = is_latest_data(
data_set_config, transform, latest_datum)
if not is_latest:
pass

# This check ensures that we do not post data newer than the newest
# transactions explorer spreadsheet data for the type to
# service-aggregates/latest-dataset-values
if not data_is_released(data_set_config, transform, latest_datum):
pass

# Input data won't have a unique key for each type of value.
Expand Down
54 changes: 52 additions & 2 deletions tests/transformers/tasks/test_latest_dataset_value.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,9 @@ def test_compute(self, mock_dashboard, mock_dataset):
}
mock_dataset.return_value = mockdata

transformed_data = compute(data, {}, {
transformed_data = compute(data, {'output': {
'data-group': 'transactions-explorer',
'data-type': 'spreadsheet'}}, {
'name': 'apply_carers_allowance_completion_rate',
'data_group': 'apply-carers-allowance',
'data_type': 'completion-rate'
Expand Down Expand Up @@ -116,7 +118,9 @@ def test_compute_old_date_range(self, mock_dashboard, mock_dataset):
}
mock_dataset.return_value = mockdata

transformed_data = compute(data, {}, {
transformed_data = compute(data, {'output': {
'data-group': 'transactions-explorer',
'data-type': 'spreadsheet'}}, {
'name': 'apply_carers_allowance_completion_rate',
'data_group': 'apply-carers-allowance',
'data_type': 'completion-rate'
Expand Down Expand Up @@ -159,3 +163,49 @@ def test_compute_old_date_period(self, mock_dashboard, mock_dataset):
})

assert_that(len(transformed_data), is_(0))

@patch("performanceplatform.client.DataSet.from_group_and_type")
@patch("performanceplatform.client.AdminAPI.get_data_set_dashboard")
def test_compute_when_new_data_not_released(
self, mock_dashboard, mock_dataset):
mock_dashboard_data = [
{
'published': True,
'slug': 'published'
},
{
'published': False,
'slug': 'unpublished'
}
]
mock_dashboard.return_value = mock_dashboard_data

mockdata = Mock()
mockdata.get.return_value = {
'data': [
{
'_count': 1.0,
'_end_at': '2012-01-19T00:00:00+00:00',
'_timestamp': '2012-01-12T00:00:00+00:00'
}
]
}
mock_dataset.return_value = mockdata

transformed_data = compute(data, {'output': {
'data-group': 'service-aggregates',
'data-type': 'latest-dataset-value'}}, {
'name': 'apply_carers_allowance_completion_rate',
'data_group': 'apply-carers-allowance',
'data_type': 'completion-rate'
})

assert_that(len(transformed_data), is_(1))
assert_that(
transformed_data[0]['_id'],
is_('cHVibGlzaGVkX2NvbXBsZXRpb25fcmF0ZQ=='))
assert_that(
transformed_data[0]['_timestamp'],
is_('2013-10-14T00:00:00+00:00'))
assert_that(
transformed_data[0]['completion_rate'], is_(0.29334396173774413))

0 comments on commit fd73afb

Please sign in to comment.