Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SAT: check for not allowed keywords allOf, not in connectors schema #9851

Merged
merged 7 commits into from
Jan 28, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
# Changelog

## 0.1.45
Check for not allowed keywords `allOf`, `not` in connectors schema: [#9851](https://github.com/airbytehq/airbyte/pull/9851)

## 0.1.44
Fix incorrect name of primary_keys attribute: [#9768](https://github.com/airbytehq/airbyte/pull/9768)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ COPY pytest.ini setup.py ./
COPY source_acceptance_test ./source_acceptance_test
RUN pip install .

LABEL io.airbyte.version=0.1.44
LABEL io.airbyte.version=0.1.45
LABEL io.airbyte.name=airbyte/source-acceptance-test

ENTRYPOINT ["python", "-m", "pytest", "-p", "source_acceptance_test.plugin", "-r", "fEsx"]
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
from source_acceptance_test.base import BaseTest
from source_acceptance_test.config import BasicReadTestConfig, ConnectionTestConfig
from source_acceptance_test.utils import ConnectorRunner, SecretDict, filter_output, make_hashable, verify_records_schema
from source_acceptance_test.utils.common import find_key_inside_schema
from source_acceptance_test.utils.common import find_key_inside_schema, find_keyword_schema
from source_acceptance_test.utils.json_schema_helper import JsonSchemaHelper, get_expected_schema_structure, get_object_structure


Expand Down Expand Up @@ -200,6 +200,17 @@ def test_defined_refs_exist_in_schema(self, discovered_catalog: Mapping[str, Any

assert not schemas_errors, f"Found unresolved `$refs` values for selected streams: {tuple(schemas_errors)}."

@pytest.mark.parametrize("keyword", ["allOf", "not"])
def test_defined_keyword_exist_in_schema(self, keyword, discovered_catalog):
"""Checking for the presence of not allowed keywords within each json schema"""
schemas_errors = []
for stream_name, stream in discovered_catalog.items():
check_result = find_keyword_schema(stream.json_schema, key=keyword)
if check_result:
schemas_errors.append(stream_name)

assert not schemas_errors, f"Found not allowed `{keyword}` keyword for selected streams: {schemas_errors}."

def test_primary_keys_exist_in_schema(self, discovered_catalog: Mapping[str, Any]):
"""Check that all primary keys are present in catalog."""
for stream_name, stream in discovered_catalog.items():
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -81,3 +81,24 @@ def find_key_inside_schema(schema_item: Union[dict, list, str], key: str = "$ref
item = find_key_inside_schema(schema_object_value, key)
if item is not None:
return item


def find_keyword_schema(schema: Union[dict, list, str], key: str) -> bool:
"""Find at least one keyword in a schema, skip object properties"""

def _find_keyword(schema, key, _skip=False):
if isinstance(schema, list):
for v in schema:
_find_keyword(v, key)
elif isinstance(schema, dict):
for k, v in schema.items():
if k == key and not _skip:
raise StopIteration
rec_skip = k == "properties" and schema.get("type") == "object"
_find_keyword(v, key, rec_skip)

try:
_find_keyword(schema, key)
except StopIteration:
return True
return False
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,46 @@ def test_ref_in_discovery_schemas(schema, should_fail):
t.test_defined_refs_exist_in_schema(discovered_catalog)


@pytest.mark.parametrize(
"schema, keyword, should_fail",
[
({}, "allOf", False),
({"allOf": [{"type": "string"}, {"maxLength": 1}]}, "allOf", True),
({"type": "object", "properties": {"allOf": {"type": "string"}}}, "allOf", False),
({"type": "object", "properties": {"name": {"allOf": [{"type": "string"}, {"maxLength": 1}]}}}, "allOf", True),
(
{"type": "object", "properties": {"name": {"type": "array", "items": {"allOf": [{"type": "string"}, {"maxLength": 4}]}}}},
"allOf",
True,
),
(
{
"type": "object",
"properties": {
"name": {
"type": "array",
"items": {"anyOf": [{"type": "number"}, {"allOf": [{"type": "string"}, {"maxLength": 4}, {"minLength": 2}]}]},
}
},
},
"allOf",
True,
),
({"not": {"type": "string"}}, "not", True),
({"type": "object", "properties": {"not": {"type": "string"}}}, "not", False),
({"type": "object", "properties": {"name": {"not": {"type": "string"}}}}, "not", True),
],
)
def test_keyword_in_discovery_schemas(schema, keyword, should_fail):
t = _TestDiscovery()
discovered_catalog = {"test_stream": AirbyteStream.parse_obj({"name": "test_stream", "json_schema": schema})}
if should_fail:
with pytest.raises(AssertionError):
t.test_defined_keyword_exist_in_schema(keyword, discovered_catalog)
else:
t.test_defined_keyword_exist_in_schema(keyword, discovered_catalog)


@pytest.mark.parametrize(
"schema, record, should_fail",
[
Expand Down