From 06b3e0ff6b8095486a952f6e08e333cbd85b00d1 Mon Sep 17 00:00:00 2001 From: roll Date: Thu, 24 Sep 2020 16:26:06 +0300 Subject: [PATCH] Rebased BigqueryStorage on the new test package --- tests/plugins/test_bigquery.py | 185 ++++++++++++++++++++++----------- tests/plugins/test_pandas.py | 2 +- 2 files changed, 124 insertions(+), 63 deletions(-) diff --git a/tests/plugins/test_bigquery.py b/tests/plugins/test_bigquery.py index fb65fc102d..114ee87ade 100644 --- a/tests/plugins/test_bigquery.py +++ b/tests/plugins/test_bigquery.py @@ -24,96 +24,139 @@ @pytest.mark.ci -def test_storage(): +def test_storage_types(): # Export/Import - source = Package("data/package-storage.json") + source = Package("data/storage/types.json") storage = source.to_bigquery(force=True, **OPTIONS) target = Package.from_bigquery(**OPTIONS) # Assert metadata - - assert target.get_resource("article").schema == { - "fields": [ - {"name": "id", "type": "integer", "constraints": {"required": True}}, - {"name": "parent", "type": "integer"}, - {"name": "name", "type": "string"}, - {"name": "current", "type": "boolean"}, - {"name": "rating", "type": "number"}, - ], - # primary key removal - # foreign keys removal - } - assert target.get_resource("comment").schema == { - "fields": [ - {"name": "entry_id", "type": "integer", "constraints": {"required": True}}, - {"name": "user_id", "type": "integer", "constraints": {"required": True}}, - {"name": "comment", "type": "string"}, - {"name": "note", "type": "string"}, # type fallback - ], - # primary key removal - # foreign keys removal - } - assert target.get_resource("location").schema == { - "fields": [ - {"name": "geojson", "type": "string"}, # type fallback - {"name": "geopoint", "type": "string"}, # type fallback - ] - } - assert target.get_resource("structure").schema == { + assert target.get_resource("main").schema == { "fields": [ - {"name": "object", "type": "string"}, # type fallback + {"name": "any", "type": "string"}, # type fallback {"name": "array", "type": "string"}, # type fallback - ] - } - assert target.get_resource("temporal").schema == { - "fields": [ + {"name": "boolean", "type": "boolean"}, {"name": "date", "type": "date"}, {"name": "date_year", "type": "date"}, # format removal {"name": "datetime", "type": "datetime"}, {"name": "duration", "type": "string"}, # type fallback + {"name": "geojson", "type": "string"}, # type fallback + {"name": "geopoint", "type": "string"}, # type fallback + {"name": "integer", "type": "integer"}, + {"name": "number", "type": "number"}, + {"name": "object", "type": "string"}, # type fallback + {"name": "string", "type": "string"}, {"name": "time", "type": "time"}, {"name": "year", "type": "integer"}, # type downgrade {"name": "yearmonth", "type": "string"}, # type fallback - ] + ], } # Assert data - - assert target.get_resource("article").read_rows() == [ - {"id": 1, "parent": None, "name": "Taxes", "current": True, "rating": 9.5}, - {"id": 2, "parent": 1, "name": "中国人", "current": False, "rating": 7}, - ] - assert target.get_resource("comment").read_rows() == [ - {"entry_id": 1, "user_id": 1, "comment": "good", "note": "note1"}, - {"entry_id": 2, "user_id": 2, "comment": "bad", "note": "note2"}, - ] - assert target.get_resource("location").read_rows() == [ - {"geojson": '{"type": "Point", "coordinates": [33, 33.33]}', "geopoint": "30,70"}, - {"geojson": '{"type": "Point", "coordinates": [55, 55.55]}', "geopoint": "90,40"}, - ] - assert target.get_resource("structure").read_rows() == [ - {"object": '{"chars": 560}', "array": '["Mike", "John"]'}, - {"object": '{"chars": 970}', "array": '["Paul", "Alex"]'}, - ] - assert target.get_resource("temporal").read_rows() == [ + assert target.get_resource("main").read_rows() == [ { + "any": "note1", + "array": '["Mike", "John"]', + "boolean": True, "date": datetime.date(2015, 1, 1), "date_year": datetime.date(2015, 1, 1), "datetime": datetime.datetime(2015, 1, 1, 3, 0), "duration": "P1Y1M", + "geojson": '{"type": "Point", "coordinates": [33, 33.33]}', + "geopoint": "30,70", + "integer": 1, + "number": 7, + "object": '{"chars": 560}', + "string": "good", "time": datetime.time(3, 0), "year": 2015, "yearmonth": "2015-01", }, + ] + + # Cleanup storage + storage.delete_package(target.resource_names) + + +@pytest.mark.ci +def test_storage_integrity(): + + # Export/Import + source = Package("data/storage/integrity.json") + storage = source.to_bigquery(force=True, **OPTIONS) + target = Package.from_bigquery(**OPTIONS) + + # Assert metadata (main) + assert target.get_resource("main").schema == { + "fields": [ + # added required + {"name": "id", "type": "integer"}, + {"name": "parent", "type": "integer"}, + {"name": "description", "type": "string"}, + ], + # primary key removal + # foreign keys removal + } + + # Assert metadata (link) + assert target.get_resource("link").schema == { + "fields": [ + {"name": "main_id", "type": "integer"}, + {"name": "some_id", "type": "integer"}, # constraint removal + {"name": "description", "type": "string"}, # constraint removal + ], + # primary key removal + # foreign keys removal + } + + # Assert data (main) + assert target.get_resource("main").read_rows() == [ + {"id": 1, "parent": None, "description": "english"}, + {"id": 2, "parent": 1, "description": "中国人"}, + ] + + # Assert data (link) + assert target.get_resource("link").read_rows() == [ + {"main_id": 1, "some_id": 1, "description": "note1"}, + {"main_id": 2, "some_id": 2, "description": "note2"}, + ] + + # Cleanup storage + storage.delete_package(target.resource_names) + + +@pytest.mark.ci +def test_storage_constraints(): + + # Export/Import + source = Package("data/storage/constraints.json") + storage = source.to_bigquery(force=True, **OPTIONS) + target = Package.from_bigquery(**OPTIONS) + + # Assert metadata + assert target.get_resource("main").schema == { + "fields": [ + {"name": "required", "type": "string", "constraints": {"required": True}}, + {"name": "minLength", "type": "string"}, # constraint removal + {"name": "maxLength", "type": "string"}, # constraint removal + {"name": "pattern", "type": "string"}, # constraint removal + {"name": "enum", "type": "string"}, # constraint removal + {"name": "minimum", "type": "integer"}, # constraint removal + {"name": "maximum", "type": "integer"}, # constraint removal + ], + } + + # Assert data + assert target.get_resource("main").read_rows() == [ { - "date": datetime.date(2015, 12, 31), - "date_year": datetime.date(2015, 1, 1), - "datetime": datetime.datetime(2015, 12, 31, 15, 45, 33), - "duration": "P2Y2M", - "time": datetime.time(15, 45, 33), - "year": 2015, - "yearmonth": "2015-01", + "required": "passing", + "minLength": "passing", + "maxLength": "passing", + "pattern": "passing", + "enum": "passing", + "minimum": 5, + "maximum": 5, }, ] @@ -121,6 +164,24 @@ def test_storage(): storage.delete_package(target.resource_names) +# NOTE: can we add constraints support to BigQuery? +@pytest.mark.skip +@pytest.mark.parametrize( + "field_name, cell", + [ + ("required", ""), + ("minLength", "bad"), + ("maxLength", "badbadbad"), + ("pattern", "bad"), + ("enum", "bad"), + ("minimum", 3), + ("maximum", 9), + ], +) +def test_storage_constraints_not_valid_error(field_name, cell): + pass + + @pytest.mark.ci def test_storage_read_resource_not_existent_error(): storage = BigqueryStorage(**OPTIONS) diff --git a/tests/plugins/test_pandas.py b/tests/plugins/test_pandas.py index e1fda5a62b..36d965ffd4 100644 --- a/tests/plugins/test_pandas.py +++ b/tests/plugins/test_pandas.py @@ -148,7 +148,7 @@ def test_storage_constraints(): storage.delete_package(target.resource_names) -# NOTE: can we add consratins support to Pandas? +# NOTE: can we add constraints support to Pandas? @pytest.mark.skip @pytest.mark.parametrize( "field_name, cell",