From 24909dd02c47391449f1dafc8092b37ac07c81b9 Mon Sep 17 00:00:00 2001 From: Adam Kariv Date: Thu, 22 Nov 2018 13:18:15 +0200 Subject: [PATCH 1/2] Make sure proper dialect is applied to dumped data --- dataflows/processors/dumpers/file_dumper.py | 3 ++- dataflows/processors/dumpers/file_formats.py | 7 ++----- tests/test_lib.py | 17 +++++++++++++++++ 3 files changed, 21 insertions(+), 6 deletions(-) diff --git a/dataflows/processors/dumpers/file_dumper.py b/dataflows/processors/dumpers/file_dumper.py index 17e55e9..dd03e7c 100644 --- a/dataflows/processors/dumpers/file_dumper.py +++ b/dataflows/processors/dumpers/file_dumper.py @@ -25,7 +25,7 @@ def process_datapackage(self, datapackage): # Make sure all resources are proper CSVs resource: Resource = None - for resource in datapackage.resources: + for i, resource in enumerate(datapackage.resources): if self.force_format: file_format = self.forced_format else: @@ -39,6 +39,7 @@ def process_datapackage(self, datapackage): self.file_formatters[resource.name] = file_formatter self.file_formatters[resource.name].prepare_resource(resource) resource.commit() + datapackage.descriptor['resources'][i] = resource.descriptor return datapackage diff --git a/dataflows/processors/dumpers/file_formats.py b/dataflows/processors/dumpers/file_formats.py index ecf41e9..8b9d61e 100644 --- a/dataflows/processors/dumpers/file_formats.py +++ b/dataflows/processors/dumpers/file_formats.py @@ -4,8 +4,6 @@ import isodate import logging -from tableschema import Field - DATE_FORMAT = '%Y-%m-%d' DATETIME_FORMAT = '%Y-%m-%dT%H:%M:%SZ' @@ -34,9 +32,8 @@ def __init__(self, writer, schema): @classmethod def prepare_resource(cls, resource): - field: Field = None - for field in resource.schema.fields: - field.descriptor.update(cls.PYTHON_DIALECT.get(field.type, {})) + for field in resource.descriptor['schema']['fields']: + field.update(cls.PYTHON_DIALECT.get(field['type'], {})) def __transform_row(self, row): try: diff --git a/tests/test_lib.py b/tests/test_lib.py index 009aa7e..6a9b3bd 100644 --- a/tests/test_lib.py +++ b/tests/test_lib.py @@ -547,3 +547,20 @@ def test_load_xml(): {'publication-year': 1954, 'title': 'The Two Towers'}, {'publication-year': 1955, 'title': 'The Return of the King'} ] + +def test_save_load_dates(): + from dataflows import Flow, dump_to_path, load, set_type, printer + import datetime + + Flow( + [{'id': 1, 'ts': datetime.datetime.now()}, + {'id': 2, 'ts': datetime.datetime.now()}], + set_type('ts', type='datetime', format='%Y-%m-%d/%H:%M:%S'), + dump_to_path('data/test_save_load_dates') + ).process() + + res, _, _ = Flow( + load('data/test_save_load_dates/datapackage.json'), + printer() + ).results() + \ No newline at end of file From 0651d5b6ab3997dee0fefec7f3df21a953c59608 Mon Sep 17 00:00:00 2001 From: Adam Kariv Date: Thu, 22 Nov 2018 13:27:17 +0200 Subject: [PATCH 2/2] Fix other test --- tests/test_lib.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/tests/test_lib.py b/tests/test_lib.py index 6a9b3bd..2eb19dc 100644 --- a/tests/test_lib.py +++ b/tests/test_lib.py @@ -464,10 +464,16 @@ def run_flow(datetime_format=None): except ValidationError: assert True - # must set format='any' to parse from datetime string - run_flow(datetime_format='any') - - out_now = datetime.datetime(_now.year, _now.month, _now.day, _now.hour, _now.minute, _now.second, tzinfo=tzutc()) + # Default is isoformat(), str() gives a slightly different format: + # >>> from datetime import datetime + # >>> n = datetime.now() + # >>> str(n) + # '2018-11-22 13:25:47.945209' + # >>> n.isoformat() + # '2018-11-22T13:25:47.945209' + run_flow(datetime_format='%Y-%m-%d %H:%M:%S.%f') + + out_now = datetime.datetime(_now.year, _now.month, _now.day, _now.hour, _now.minute, _now.second) assert Flow( load('data/dump_dates/datapackage.json'),