Skip to content

Commit

Permalink
Rebased BigqueryStorage on the new test package
Browse files Browse the repository at this point in the history
  • Loading branch information
roll committed Sep 24, 2020
1 parent 5cf68b5 commit 06b3e0f
Show file tree
Hide file tree
Showing 2 changed files with 124 additions and 63 deletions.
185 changes: 123 additions & 62 deletions tests/plugins/test_bigquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,103 +24,164 @@


@pytest.mark.ci
def test_storage():
def test_storage_types():

# Export/Import
source = Package("data/package-storage.json")
source = Package("data/storage/types.json")
storage = source.to_bigquery(force=True, **OPTIONS)
target = Package.from_bigquery(**OPTIONS)

# Assert metadata

assert target.get_resource("article").schema == {
"fields": [
{"name": "id", "type": "integer", "constraints": {"required": True}},
{"name": "parent", "type": "integer"},
{"name": "name", "type": "string"},
{"name": "current", "type": "boolean"},
{"name": "rating", "type": "number"},
],
# primary key removal
# foreign keys removal
}
assert target.get_resource("comment").schema == {
"fields": [
{"name": "entry_id", "type": "integer", "constraints": {"required": True}},
{"name": "user_id", "type": "integer", "constraints": {"required": True}},
{"name": "comment", "type": "string"},
{"name": "note", "type": "string"}, # type fallback
],
# primary key removal
# foreign keys removal
}
assert target.get_resource("location").schema == {
"fields": [
{"name": "geojson", "type": "string"}, # type fallback
{"name": "geopoint", "type": "string"}, # type fallback
]
}
assert target.get_resource("structure").schema == {
assert target.get_resource("main").schema == {
"fields": [
{"name": "object", "type": "string"}, # type fallback
{"name": "any", "type": "string"}, # type fallback
{"name": "array", "type": "string"}, # type fallback
]
}
assert target.get_resource("temporal").schema == {
"fields": [
{"name": "boolean", "type": "boolean"},
{"name": "date", "type": "date"},
{"name": "date_year", "type": "date"}, # format removal
{"name": "datetime", "type": "datetime"},
{"name": "duration", "type": "string"}, # type fallback
{"name": "geojson", "type": "string"}, # type fallback
{"name": "geopoint", "type": "string"}, # type fallback
{"name": "integer", "type": "integer"},
{"name": "number", "type": "number"},
{"name": "object", "type": "string"}, # type fallback
{"name": "string", "type": "string"},
{"name": "time", "type": "time"},
{"name": "year", "type": "integer"}, # type downgrade
{"name": "yearmonth", "type": "string"}, # type fallback
]
],
}

# Assert data

assert target.get_resource("article").read_rows() == [
{"id": 1, "parent": None, "name": "Taxes", "current": True, "rating": 9.5},
{"id": 2, "parent": 1, "name": "中国人", "current": False, "rating": 7},
]
assert target.get_resource("comment").read_rows() == [
{"entry_id": 1, "user_id": 1, "comment": "good", "note": "note1"},
{"entry_id": 2, "user_id": 2, "comment": "bad", "note": "note2"},
]
assert target.get_resource("location").read_rows() == [
{"geojson": '{"type": "Point", "coordinates": [33, 33.33]}', "geopoint": "30,70"},
{"geojson": '{"type": "Point", "coordinates": [55, 55.55]}', "geopoint": "90,40"},
]
assert target.get_resource("structure").read_rows() == [
{"object": '{"chars": 560}', "array": '["Mike", "John"]'},
{"object": '{"chars": 970}', "array": '["Paul", "Alex"]'},
]
assert target.get_resource("temporal").read_rows() == [
assert target.get_resource("main").read_rows() == [
{
"any": "note1",
"array": '["Mike", "John"]',
"boolean": True,
"date": datetime.date(2015, 1, 1),
"date_year": datetime.date(2015, 1, 1),
"datetime": datetime.datetime(2015, 1, 1, 3, 0),
"duration": "P1Y1M",
"geojson": '{"type": "Point", "coordinates": [33, 33.33]}',
"geopoint": "30,70",
"integer": 1,
"number": 7,
"object": '{"chars": 560}',
"string": "good",
"time": datetime.time(3, 0),
"year": 2015,
"yearmonth": "2015-01",
},
]

# Cleanup storage
storage.delete_package(target.resource_names)


@pytest.mark.ci
def test_storage_integrity():

# Export/Import
source = Package("data/storage/integrity.json")
storage = source.to_bigquery(force=True, **OPTIONS)
target = Package.from_bigquery(**OPTIONS)

# Assert metadata (main)
assert target.get_resource("main").schema == {
"fields": [
# added required
{"name": "id", "type": "integer"},
{"name": "parent", "type": "integer"},
{"name": "description", "type": "string"},
],
# primary key removal
# foreign keys removal
}

# Assert metadata (link)
assert target.get_resource("link").schema == {
"fields": [
{"name": "main_id", "type": "integer"},
{"name": "some_id", "type": "integer"}, # constraint removal
{"name": "description", "type": "string"}, # constraint removal
],
# primary key removal
# foreign keys removal
}

# Assert data (main)
assert target.get_resource("main").read_rows() == [
{"id": 1, "parent": None, "description": "english"},
{"id": 2, "parent": 1, "description": "中国人"},
]

# Assert data (link)
assert target.get_resource("link").read_rows() == [
{"main_id": 1, "some_id": 1, "description": "note1"},
{"main_id": 2, "some_id": 2, "description": "note2"},
]

# Cleanup storage
storage.delete_package(target.resource_names)


@pytest.mark.ci
def test_storage_constraints():

# Export/Import
source = Package("data/storage/constraints.json")
storage = source.to_bigquery(force=True, **OPTIONS)
target = Package.from_bigquery(**OPTIONS)

# Assert metadata
assert target.get_resource("main").schema == {
"fields": [
{"name": "required", "type": "string", "constraints": {"required": True}},
{"name": "minLength", "type": "string"}, # constraint removal
{"name": "maxLength", "type": "string"}, # constraint removal
{"name": "pattern", "type": "string"}, # constraint removal
{"name": "enum", "type": "string"}, # constraint removal
{"name": "minimum", "type": "integer"}, # constraint removal
{"name": "maximum", "type": "integer"}, # constraint removal
],
}

# Assert data
assert target.get_resource("main").read_rows() == [
{
"date": datetime.date(2015, 12, 31),
"date_year": datetime.date(2015, 1, 1),
"datetime": datetime.datetime(2015, 12, 31, 15, 45, 33),
"duration": "P2Y2M",
"time": datetime.time(15, 45, 33),
"year": 2015,
"yearmonth": "2015-01",
"required": "passing",
"minLength": "passing",
"maxLength": "passing",
"pattern": "passing",
"enum": "passing",
"minimum": 5,
"maximum": 5,
},
]

# Cleanup storage
storage.delete_package(target.resource_names)


# NOTE: can we add constraints support to BigQuery?
@pytest.mark.skip
@pytest.mark.parametrize(
"field_name, cell",
[
("required", ""),
("minLength", "bad"),
("maxLength", "badbadbad"),
("pattern", "bad"),
("enum", "bad"),
("minimum", 3),
("maximum", 9),
],
)
def test_storage_constraints_not_valid_error(field_name, cell):
pass


@pytest.mark.ci
def test_storage_read_resource_not_existent_error():
storage = BigqueryStorage(**OPTIONS)
Expand Down
2 changes: 1 addition & 1 deletion tests/plugins/test_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ def test_storage_constraints():
storage.delete_package(target.resource_names)


# NOTE: can we add consratins support to Pandas?
# NOTE: can we add constraints support to Pandas?
@pytest.mark.skip
@pytest.mark.parametrize(
"field_name, cell",
Expand Down

0 comments on commit 06b3e0f

Please sign in to comment.