diff --git a/airbyte-integrations/bases/source-acceptance-test/CHANGELOG.md b/airbyte-integrations/bases/source-acceptance-test/CHANGELOG.md index 0c3b4dcc7c6c..6203898acfb2 100644 --- a/airbyte-integrations/bases/source-acceptance-test/CHANGELOG.md +++ b/airbyte-integrations/bases/source-acceptance-test/CHANGELOG.md @@ -1,5 +1,8 @@ # Changelog +## 0.1.45 +Check for not allowed keywords `allOf`, `not` in connectors schema: [#9851](https://github.com/airbytehq/airbyte/pull/9851) + ## 0.1.44 Fix incorrect name of primary_keys attribute: [#9768](https://github.com/airbytehq/airbyte/pull/9768) diff --git a/airbyte-integrations/bases/source-acceptance-test/Dockerfile b/airbyte-integrations/bases/source-acceptance-test/Dockerfile index 849e15043350..ae1adebcf11b 100644 --- a/airbyte-integrations/bases/source-acceptance-test/Dockerfile +++ b/airbyte-integrations/bases/source-acceptance-test/Dockerfile @@ -33,7 +33,7 @@ COPY pytest.ini setup.py ./ COPY source_acceptance_test ./source_acceptance_test RUN pip install . -LABEL io.airbyte.version=0.1.44 +LABEL io.airbyte.version=0.1.45 LABEL io.airbyte.name=airbyte/source-acceptance-test ENTRYPOINT ["python", "-m", "pytest", "-p", "source_acceptance_test.plugin", "-r", "fEsx"] diff --git a/airbyte-integrations/bases/source-acceptance-test/source_acceptance_test/tests/test_core.py b/airbyte-integrations/bases/source-acceptance-test/source_acceptance_test/tests/test_core.py index a53116e5d419..a5d96fdf61ec 100644 --- a/airbyte-integrations/bases/source-acceptance-test/source_acceptance_test/tests/test_core.py +++ b/airbyte-integrations/bases/source-acceptance-test/source_acceptance_test/tests/test_core.py @@ -19,7 +19,7 @@ from source_acceptance_test.base import BaseTest from source_acceptance_test.config import BasicReadTestConfig, ConnectionTestConfig from source_acceptance_test.utils import ConnectorRunner, SecretDict, filter_output, make_hashable, verify_records_schema -from source_acceptance_test.utils.common import find_key_inside_schema +from source_acceptance_test.utils.common import find_key_inside_schema, find_keyword_schema from source_acceptance_test.utils.json_schema_helper import JsonSchemaHelper, get_expected_schema_structure, get_object_structure @@ -200,6 +200,17 @@ def test_defined_refs_exist_in_schema(self, discovered_catalog: Mapping[str, Any assert not schemas_errors, f"Found unresolved `$refs` values for selected streams: {tuple(schemas_errors)}." + @pytest.mark.parametrize("keyword", ["allOf", "not"]) + def test_defined_keyword_exist_in_schema(self, keyword, discovered_catalog): + """Checking for the presence of not allowed keywords within each json schema""" + schemas_errors = [] + for stream_name, stream in discovered_catalog.items(): + check_result = find_keyword_schema(stream.json_schema, key=keyword) + if check_result: + schemas_errors.append(stream_name) + + assert not schemas_errors, f"Found not allowed `{keyword}` keyword for selected streams: {schemas_errors}." + def test_primary_keys_exist_in_schema(self, discovered_catalog: Mapping[str, Any]): """Check that all primary keys are present in catalog.""" for stream_name, stream in discovered_catalog.items(): diff --git a/airbyte-integrations/bases/source-acceptance-test/source_acceptance_test/utils/common.py b/airbyte-integrations/bases/source-acceptance-test/source_acceptance_test/utils/common.py index 52da161c15e4..437d17a81390 100644 --- a/airbyte-integrations/bases/source-acceptance-test/source_acceptance_test/utils/common.py +++ b/airbyte-integrations/bases/source-acceptance-test/source_acceptance_test/utils/common.py @@ -81,3 +81,24 @@ def find_key_inside_schema(schema_item: Union[dict, list, str], key: str = "$ref item = find_key_inside_schema(schema_object_value, key) if item is not None: return item + + +def find_keyword_schema(schema: Union[dict, list, str], key: str) -> bool: + """Find at least one keyword in a schema, skip object properties""" + + def _find_keyword(schema, key, _skip=False): + if isinstance(schema, list): + for v in schema: + _find_keyword(v, key) + elif isinstance(schema, dict): + for k, v in schema.items(): + if k == key and not _skip: + raise StopIteration + rec_skip = k == "properties" and schema.get("type") == "object" + _find_keyword(v, key, rec_skip) + + try: + _find_keyword(schema, key) + except StopIteration: + return True + return False diff --git a/airbyte-integrations/bases/source-acceptance-test/unit_tests/test_core.py b/airbyte-integrations/bases/source-acceptance-test/unit_tests/test_core.py index c925f5a82ab4..9e4fd24d0bf5 100644 --- a/airbyte-integrations/bases/source-acceptance-test/unit_tests/test_core.py +++ b/airbyte-integrations/bases/source-acceptance-test/unit_tests/test_core.py @@ -195,6 +195,46 @@ def test_ref_in_discovery_schemas(schema, should_fail): t.test_defined_refs_exist_in_schema(discovered_catalog) +@pytest.mark.parametrize( + "schema, keyword, should_fail", + [ + ({}, "allOf", False), + ({"allOf": [{"type": "string"}, {"maxLength": 1}]}, "allOf", True), + ({"type": "object", "properties": {"allOf": {"type": "string"}}}, "allOf", False), + ({"type": "object", "properties": {"name": {"allOf": [{"type": "string"}, {"maxLength": 1}]}}}, "allOf", True), + ( + {"type": "object", "properties": {"name": {"type": "array", "items": {"allOf": [{"type": "string"}, {"maxLength": 4}]}}}}, + "allOf", + True, + ), + ( + { + "type": "object", + "properties": { + "name": { + "type": "array", + "items": {"anyOf": [{"type": "number"}, {"allOf": [{"type": "string"}, {"maxLength": 4}, {"minLength": 2}]}]}, + } + }, + }, + "allOf", + True, + ), + ({"not": {"type": "string"}}, "not", True), + ({"type": "object", "properties": {"not": {"type": "string"}}}, "not", False), + ({"type": "object", "properties": {"name": {"not": {"type": "string"}}}}, "not", True), + ], +) +def test_keyword_in_discovery_schemas(schema, keyword, should_fail): + t = _TestDiscovery() + discovered_catalog = {"test_stream": AirbyteStream.parse_obj({"name": "test_stream", "json_schema": schema})} + if should_fail: + with pytest.raises(AssertionError): + t.test_defined_keyword_exist_in_schema(keyword, discovered_catalog) + else: + t.test_defined_keyword_exist_in_schema(keyword, discovered_catalog) + + @pytest.mark.parametrize( "schema, record, should_fail", [