Skip to content

Commit

Permalink
Merge 06b3e0f into 0d5a339
Browse files Browse the repository at this point in the history
  • Loading branch information
roll committed Sep 24, 2020
2 parents 0d5a339 + 06b3e0f commit bc74d41
Show file tree
Hide file tree
Showing 11 changed files with 986 additions and 422 deletions.
38 changes: 38 additions & 0 deletions data/storage/constraints.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
{
"resources": [
{
"name": "main",
"schema": {
"fields": [
{"name": "required", "type": "string", "constraints": {"required": true}},
{"name": "minLength", "type": "string", "constraints": {"minLength": 4}},
{"name": "maxLength", "type": "string", "constraints": {"maxLength": 8}},
{"name": "pattern", "type": "string", "constraints": {"pattern": "passing"}},
{"name": "enum", "type": "string", "constraints": {"enum": ["passing"]}},
{"name": "minimum", "type": "integer", "constraints": {"minimum": 4}},
{"name": "maximum", "type": "integer", "constraints": {"maximum": 8}}
]
},
"data": [
[
"requried",
"minLength",
"maxLength",
"pattern",
"enum",
"minimum",
"maximum"
],
[
"passing",
"passing",
"passing",
"passing",
"passing",
5,
5
]
]
}
]
}
42 changes: 42 additions & 0 deletions data/storage/integrity.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
{
"resources": [
{
"name": "main",
"schema": {
"fields": [
{"name": "id", "type": "integer"},
{"name": "parent", "type": "integer"},
{"name": "description", "type": "string"}
],
"primaryKey": ["id"],
"foreignKeys": [
{"fields": ["parent"], "reference": {"resource": "", "fields": ["id"]}}
]
},
"data": [
["id", "parent", "description"],
["1", "", "english"],
["2", "1", "中国人"]
]
},
{
"name": "link",
"schema": {
"fields": [
{"name": "main_id", "type": "integer"},
{"name": "some_id", "type": "integer", "constraints": {"unique": true}},
{"name": "description", "type": "string", "constraints": {"unique": true}}
],
"primaryKey": ["main_id", "some_id"],
"foreignKeys": [
{"fields": ["main_id"], "reference": {"resource": "main", "fields": ["id"]}}
]
},
"data": [
["main_id", "some_id", "description"],
["1", "1", "note1"],
["2", "2", "note2"]
]
}
]
}
65 changes: 65 additions & 0 deletions data/storage/types.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
{
"resources": [
{
"name": "main",
"schema": {
"fields": [
{"name": "any", "type": "any"},
{"name": "array", "type": "array"},
{"name": "boolean", "type": "boolean"},
{"name": "date", "type": "date"},
{"name": "date_year", "type": "date", "format": "%Y"},
{"name": "datetime", "type": "datetime"},
{"name": "duration", "type": "duration"},
{"name": "geojson", "type": "geojson"},
{"name": "geopoint", "type": "geopoint"},
{"name": "integer", "type": "integer"},
{"name": "number", "type": "number"},
{"name": "object", "type": "object"},
{"name": "string", "type": "string"},
{"name": "time", "type": "time"},
{"name": "year", "type": "year"},
{"name": "yearmonth", "type": "yearmonth"}
]
},
"data": [
[
"any",
"array",
"boolean",
"date",
"date_year",
"datetime",
"duration",
"geojson",
"geopoint",
"integer",
"number",
"object",
"string",
"time",
"year",
"yearmonth"
],
[
"note1",
"[\"Mike\", \"John\"]",
"True",
"2015-01-01",
"2015",
"2015-01-01T03:00:00Z",
"P1Y1M",
"{\"type\": \"Point\", \"coordinates\": [33, 33.33]}",
"30,70",
"1",
"7",
"{\"chars\": 560}",
"good",
"03:00:00",
"2015",
"2015-01"
]
]
}
]
}
3 changes: 0 additions & 3 deletions frictionless/plugins/sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,9 +266,6 @@ def __read_convert_schema(self, sql_table):
res_name = element.column.table.name
resource = self.__read_convert_name(res_name)
foreign_fields.append(element.column.name)
if len(own_fields) == len(foreign_fields) == 1:
own_fields = own_fields.pop()
foreign_fields = foreign_fields.pop()
ref = {"resource": resource, "fields": foreign_fields}
schema.foreign_keys.append({"fields": own_fields, "reference": ref})

Expand Down
185 changes: 123 additions & 62 deletions tests/plugins/test_bigquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,103 +24,164 @@


@pytest.mark.ci
def test_storage():
def test_storage_types():

# Export/Import
source = Package("data/package-storage.json")
source = Package("data/storage/types.json")
storage = source.to_bigquery(force=True, **OPTIONS)
target = Package.from_bigquery(**OPTIONS)

# Assert metadata

assert target.get_resource("article").schema == {
"fields": [
{"name": "id", "type": "integer", "constraints": {"required": True}},
{"name": "parent", "type": "integer"},
{"name": "name", "type": "string"},
{"name": "current", "type": "boolean"},
{"name": "rating", "type": "number"},
],
# primary key removal
# foreign keys removal
}
assert target.get_resource("comment").schema == {
"fields": [
{"name": "entry_id", "type": "integer", "constraints": {"required": True}},
{"name": "user_id", "type": "integer", "constraints": {"required": True}},
{"name": "comment", "type": "string"},
{"name": "note", "type": "string"}, # type fallback
],
# primary key removal
# foreign keys removal
}
assert target.get_resource("location").schema == {
"fields": [
{"name": "geojson", "type": "string"}, # type fallback
{"name": "geopoint", "type": "string"}, # type fallback
]
}
assert target.get_resource("structure").schema == {
assert target.get_resource("main").schema == {
"fields": [
{"name": "object", "type": "string"}, # type fallback
{"name": "any", "type": "string"}, # type fallback
{"name": "array", "type": "string"}, # type fallback
]
}
assert target.get_resource("temporal").schema == {
"fields": [
{"name": "boolean", "type": "boolean"},
{"name": "date", "type": "date"},
{"name": "date_year", "type": "date"}, # format removal
{"name": "datetime", "type": "datetime"},
{"name": "duration", "type": "string"}, # type fallback
{"name": "geojson", "type": "string"}, # type fallback
{"name": "geopoint", "type": "string"}, # type fallback
{"name": "integer", "type": "integer"},
{"name": "number", "type": "number"},
{"name": "object", "type": "string"}, # type fallback
{"name": "string", "type": "string"},
{"name": "time", "type": "time"},
{"name": "year", "type": "integer"}, # type downgrade
{"name": "yearmonth", "type": "string"}, # type fallback
]
],
}

# Assert data

assert target.get_resource("article").read_rows() == [
{"id": 1, "parent": None, "name": "Taxes", "current": True, "rating": 9.5},
{"id": 2, "parent": 1, "name": "中国人", "current": False, "rating": 7},
]
assert target.get_resource("comment").read_rows() == [
{"entry_id": 1, "user_id": 1, "comment": "good", "note": "note1"},
{"entry_id": 2, "user_id": 2, "comment": "bad", "note": "note2"},
]
assert target.get_resource("location").read_rows() == [
{"geojson": '{"type": "Point", "coordinates": [33, 33.33]}', "geopoint": "30,70"},
{"geojson": '{"type": "Point", "coordinates": [55, 55.55]}', "geopoint": "90,40"},
]
assert target.get_resource("structure").read_rows() == [
{"object": '{"chars": 560}', "array": '["Mike", "John"]'},
{"object": '{"chars": 970}', "array": '["Paul", "Alex"]'},
]
assert target.get_resource("temporal").read_rows() == [
assert target.get_resource("main").read_rows() == [
{
"any": "note1",
"array": '["Mike", "John"]',
"boolean": True,
"date": datetime.date(2015, 1, 1),
"date_year": datetime.date(2015, 1, 1),
"datetime": datetime.datetime(2015, 1, 1, 3, 0),
"duration": "P1Y1M",
"geojson": '{"type": "Point", "coordinates": [33, 33.33]}',
"geopoint": "30,70",
"integer": 1,
"number": 7,
"object": '{"chars": 560}',
"string": "good",
"time": datetime.time(3, 0),
"year": 2015,
"yearmonth": "2015-01",
},
]

# Cleanup storage
storage.delete_package(target.resource_names)


@pytest.mark.ci
def test_storage_integrity():

# Export/Import
source = Package("data/storage/integrity.json")
storage = source.to_bigquery(force=True, **OPTIONS)
target = Package.from_bigquery(**OPTIONS)

# Assert metadata (main)
assert target.get_resource("main").schema == {
"fields": [
# added required
{"name": "id", "type": "integer"},
{"name": "parent", "type": "integer"},
{"name": "description", "type": "string"},
],
# primary key removal
# foreign keys removal
}

# Assert metadata (link)
assert target.get_resource("link").schema == {
"fields": [
{"name": "main_id", "type": "integer"},
{"name": "some_id", "type": "integer"}, # constraint removal
{"name": "description", "type": "string"}, # constraint removal
],
# primary key removal
# foreign keys removal
}

# Assert data (main)
assert target.get_resource("main").read_rows() == [
{"id": 1, "parent": None, "description": "english"},
{"id": 2, "parent": 1, "description": "中国人"},
]

# Assert data (link)
assert target.get_resource("link").read_rows() == [
{"main_id": 1, "some_id": 1, "description": "note1"},
{"main_id": 2, "some_id": 2, "description": "note2"},
]

# Cleanup storage
storage.delete_package(target.resource_names)


@pytest.mark.ci
def test_storage_constraints():

# Export/Import
source = Package("data/storage/constraints.json")
storage = source.to_bigquery(force=True, **OPTIONS)
target = Package.from_bigquery(**OPTIONS)

# Assert metadata
assert target.get_resource("main").schema == {
"fields": [
{"name": "required", "type": "string", "constraints": {"required": True}},
{"name": "minLength", "type": "string"}, # constraint removal
{"name": "maxLength", "type": "string"}, # constraint removal
{"name": "pattern", "type": "string"}, # constraint removal
{"name": "enum", "type": "string"}, # constraint removal
{"name": "minimum", "type": "integer"}, # constraint removal
{"name": "maximum", "type": "integer"}, # constraint removal
],
}

# Assert data
assert target.get_resource("main").read_rows() == [
{
"date": datetime.date(2015, 12, 31),
"date_year": datetime.date(2015, 1, 1),
"datetime": datetime.datetime(2015, 12, 31, 15, 45, 33),
"duration": "P2Y2M",
"time": datetime.time(15, 45, 33),
"year": 2015,
"yearmonth": "2015-01",
"required": "passing",
"minLength": "passing",
"maxLength": "passing",
"pattern": "passing",
"enum": "passing",
"minimum": 5,
"maximum": 5,
},
]

# Cleanup storage
storage.delete_package(target.resource_names)


# NOTE: can we add constraints support to BigQuery?
@pytest.mark.skip
@pytest.mark.parametrize(
"field_name, cell",
[
("required", ""),
("minLength", "bad"),
("maxLength", "badbadbad"),
("pattern", "bad"),
("enum", "bad"),
("minimum", 3),
("maximum", 9),
],
)
def test_storage_constraints_not_valid_error(field_name, cell):
pass


@pytest.mark.ci
def test_storage_read_resource_not_existent_error():
storage = BigqueryStorage(**OPTIONS)
Expand Down
Loading

0 comments on commit bc74d41

Please sign in to comment.