Skip to content
This repository has been archived by the owner on Mar 24, 2021. It is now read-only.

Commit

Permalink
wip - don't add latest auto collected if not yet released
Browse files Browse the repository at this point in the history
doing metric_to_period_mappings because collected stuff does not have a
type but we need the type to work out what the timestamp should be.

perhaps it's never quarterly/yearly? maybe it's pointless to add in?

if so then maybe refactored 350cbe2 is
  enought for pr and we just delete these transforms

Or the client doesn't use this data as it has no type? Having it in the
dataset is fine
  • Loading branch information
jcbashdown committed Jul 23, 2015
1 parent 4dd18da commit ee6b1b4
Show file tree
Hide file tree
Showing 2 changed files with 100 additions and 4 deletions.
50 changes: 48 additions & 2 deletions backdrop/transformers/tasks/latest_dataset_value.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,54 @@
import string

from .util import encode_id, is_latest_data
from .util import encode_id, is_latest_data, _get_read_params
from ..worker import config

from performanceplatform.client import AdminAPI
from performanceplatform.client import DataSet

data_type_to_value_mappings = {
'completion-rate': 'rate',
'digital-takeup': 'rate',
'user-satisfaction-score': 'score',
}

# because collected data does not have a type
metric_to_period_mappings = {
'digital_cost_per_transaction': 'quarterly',
'digital_takeup': 'quarterly'
#everything else : seasonally-adjusted
}


def data_is_released(data_set_config,
transform,
latest_datum):
latest_aggregate_data = \
(transform['output']['data-group'] == 'service-aggregates' and
transform['output']['data-type'] == 'latest-dataset-value')
if latest_aggregate_data:

data_set = DataSet.from_group_and_type(
config.BACKDROP_READ_URL,
data_set_config['data_group'],
data_set_config['data_type']
)
generated_read_params = _get_read_params(
{}, latest_datum['_timestamp'])

# Checks against all of type that this is not newer than the type.
# If there is already data for this service id we will never get here.
additional_read_params = {'filter_by': 'type:{}'.format(
latest_datum['type'])}
read_params = dict(
generated_read_params.items() + additional_read_params.items())
existing_data = data_set.get(query_parameters=read_params)
if existing_data['data']:
if(existing_data['data'][0]['_timestamp'] <
latest_datum['_timestamp']):
return False
return True


def compute(new_data, transform, data_set_config):

Expand All @@ -19,7 +57,15 @@ def compute(new_data, transform, data_set_config):
latest_datum = new_data[0]

# Only continue if we are not back filling data.
if not is_latest_data(data_set_config, transform, latest_datum):
is_latest = is_latest_data(
data_set_config, transform, latest_datum)
if not is_latest:
pass

# This check ensures that we do not post data newer than the newest
# transactions explorer spreadsheet data for the type to
# service-aggregates/latest-dataset-values
if not data_is_released(data_set_config, transform, latest_datum):
pass

# Input data won't have a unique key for each type of value.
Expand Down
54 changes: 52 additions & 2 deletions tests/transformers/tasks/test_latest_dataset_value.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,9 @@ def test_compute(self, mock_dashboard, mock_dataset):
}
mock_dataset.return_value = mockdata

transformed_data = compute(data, {}, {
transformed_data = compute(data, {'output': {
'data-group': 'transactions-explorer',
'data-type': 'spreadsheet'}}, {
'name': 'apply_carers_allowance_completion_rate',
'data_group': 'apply-carers-allowance',
'data_type': 'completion-rate'
Expand Down Expand Up @@ -116,7 +118,9 @@ def test_compute_old_date_range(self, mock_dashboard, mock_dataset):
}
mock_dataset.return_value = mockdata

transformed_data = compute(data, {}, {
transformed_data = compute(data, {'output': {
'data-group': 'transactions-explorer',
'data-type': 'spreadsheet'}}, {
'name': 'apply_carers_allowance_completion_rate',
'data_group': 'apply-carers-allowance',
'data_type': 'completion-rate'
Expand Down Expand Up @@ -159,3 +163,49 @@ def test_compute_old_date_period(self, mock_dashboard, mock_dataset):
})

assert_that(len(transformed_data), is_(0))

@patch("performanceplatform.client.DataSet.from_group_and_type")
@patch("performanceplatform.client.AdminAPI.get_data_set_dashboard")
def test_compute_when_new_data_not_released(
self, mock_dashboard, mock_dataset):
mock_dashboard_data = [
{
'published': True,
'slug': 'published'
},
{
'published': False,
'slug': 'unpublished'
}
]
mock_dashboard.return_value = mock_dashboard_data

mockdata = Mock()
mockdata.get.return_value = {
'data': [
{
'_count': 1.0,
'_end_at': '2012-01-19T00:00:00+00:00',
'_timestamp': '2012-01-12T00:00:00+00:00'
}
]
}
mock_dataset.return_value = mockdata

transformed_data = compute(data, {'output': {
'data-group': 'service-aggregates',
'data-type': 'latest-dataset-value'}}, {
'name': 'apply_carers_allowance_completion_rate',
'data_group': 'apply-carers-allowance',
'data_type': 'completion-rate'
})

assert_that(len(transformed_data), is_(1))
assert_that(
transformed_data[0]['_id'],
is_('cHVibGlzaGVkX2NvbXBsZXRpb25fcmF0ZQ=='))
assert_that(
transformed_data[0]['_timestamp'],
is_('2013-10-14T00:00:00+00:00'))
assert_that(
transformed_data[0]['completion_rate'], is_(0.29334396173774413))

0 comments on commit ee6b1b4

Please sign in to comment.