From b38197b32e04b3edb68e4e177feda7e8b61dcb68 Mon Sep 17 00:00:00 2001 From: Leena Gupte Date: Wed, 28 Oct 2015 09:36:31 +0000 Subject: [PATCH] Format timestamp before creating id field MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Slight differences in the way the _timestamp cell is formatted (in file uploads) results in duplicated data in the dataset. What’s happening is this: _timestamp is listed as an auto_id for the data set. All auto_id fields and values are joined together and then base64 encoded and stored in the _id field. AFTER the _id field has been created for the data record, the _timestamp is parsed and converted to utc format. This means that the date format of the timestamp can differ from the date format of the timestamp in the _id. The _id is used as a unique key for the data set. The same _timestamp with a different date format will generate a different _id value and will added as a new row in backdrop. The fix was just to move converting the _timestamp to utc format to BEFORE the base64 encoded _id is generated. --- backdrop/core/data_set.py | 10 +++++----- tests/core/test_data_set.py | 9 +++++++++ 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/backdrop/core/data_set.py b/backdrop/core/data_set.py index 60c0931e..b0bcadad 100644 --- a/backdrop/core/data_set.py +++ b/backdrop/core/data_set.py @@ -84,17 +84,17 @@ def store(self, records): # doesn't change data, no need to return records errors += validate_record_schema(record, self.config['schema']) + # Parse timestamps + records, timestamp_errors = separate_errors_and_records( + map(parse_timestamps, records)) + errors += timestamp_errors + # Add auto-id keys records, auto_id_errors = add_auto_ids( records, self.config.get('auto_ids', None)) errors += auto_id_errors - # Parse timestamps - records, timestamp_errors = separate_errors_and_records( - map(parse_timestamps, records)) - errors += timestamp_errors - # Custom record validations # doesn't change data, no need to return records errors += filter(None, map(validate_record, records)) diff --git a/tests/core/test_data_set.py b/tests/core/test_data_set.py index 13bad40e..8fa2b4a6 100644 --- a/tests/core/test_data_set.py +++ b/tests/core/test_data_set.py @@ -219,6 +219,15 @@ def test_store_does_not_get_auto_id_type_error_due_to_datetime( assert_that(add_period_keys_patch.called, is_(False)) assert_that(save_record_patch.called, is_(False)) + def test_store_parses_timestamp_to_utc_before_generating_auto_id(self): + self.setup_config({'auto_ids': ['_timestamp']}) + self.data_set.store([{"_timestamp": "2012-12-12T00:00:00Z"}]) + + self.mock_storage.save_record.assert_called_with( + 'test_data_set', match(has_entry( + '_id', + 'MjAxMi0xMi0xMiAwMDowMDowMCswMDowMA=='))) + class TestDataSet_execute_query(BaseDataSetTest):