Skip to content

Commit

Permalink
SAT: check that previous config schema validates against current conn…
Browse files Browse the repository at this point in the history
…ector spec (#15367)
  • Loading branch information
alafanechere committed Aug 9, 2022
1 parent 123705c commit 62303a8
Show file tree
Hide file tree
Showing 7 changed files with 1,065 additions and 1,065 deletions.
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
# Changelog

## 0.1.60
Backward compatibility tests: validate fake previous config against current connector specification. [#15367](https://github.com/airbytehq/airbyte/pull/15367)

## 0.1.59
Backward compatibility tests: add syntactic validation of specs [#15194](https://github.com/airbytehq/airbyte/pull/15194/).

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ COPY pytest.ini setup.py ./
COPY source_acceptance_test ./source_acceptance_test
RUN pip install .

LABEL io.airbyte.version=0.1.59
LABEL io.airbyte.version=0.1.60
LABEL io.airbyte.name=airbyte/source-acceptance-test

ENTRYPOINT ["python", "-m", "pytest", "-p", "source_acceptance_test.plugin", "-r", "fEsx"]
4 changes: 3 additions & 1 deletion airbyte-integrations/bases/source-acceptance-test/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,10 @@
"jsonschema~=3.2.0",
"jsonref==0.2",
"deepdiff~=5.8.0",
"requests-mock",
"requests-mock~=1.9.3",
"pytest-mock~=3.6.1",
"hypothesis~=6.54.1",
"hypothesis-jsonschema~=0.20.1", # TODO alafanechere upgrade to latest when jsonschema lib is upgraded to >= 4.0.0 in airbyte-cdk and SAT
]

setuptools.setup(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
from source_acceptance_test.base import BaseTest
from source_acceptance_test.config import BasicReadTestConfig, ConnectionTestConfig, SpecTestConfig
from source_acceptance_test.utils import ConnectorRunner, SecretDict, filter_output, make_hashable, verify_records_schema
from source_acceptance_test.utils.backward_compatibility import SpecDiffChecker, validate_previous_configs
from source_acceptance_test.utils.common import find_all_values_for_key_in_schema, find_keyword_schema
from source_acceptance_test.utils.json_schema_helper import JsonSchemaHelper, get_expected_schema_structure, get_object_structure

Expand All @@ -47,7 +48,12 @@ class TestSpec(BaseTest):

@staticmethod
def compute_spec_diff(actual_connector_spec: ConnectorSpecification, previous_connector_spec: ConnectorSpecification):
return DeepDiff(previous_connector_spec.dict(), actual_connector_spec.dict(), view="tree", ignore_order=True)
return DeepDiff(
previous_connector_spec.dict()["connectionSpecification"],
actual_connector_spec.dict()["connectionSpecification"],
view="tree",
ignore_order=True,
)

@pytest.fixture(name="skip_backward_compatibility_tests")
def skip_backward_compatibility_tests_fixture(self, inputs: SpecTestConfig, previous_connector_docker_runner: ConnectorRunner) -> bool:
Expand All @@ -61,16 +67,6 @@ def skip_backward_compatibility_tests_fixture(self, inputs: SpecTestConfig, prev
pytest.skip(f"Backward compatibility tests are disabled for version {previous_connector_version}.")
return False

@pytest.fixture(name="spec_diff")
def spec_diff_fixture(
self,
skip_backward_compatibility_tests: bool,
actual_connector_spec: ConnectorSpecification,
previous_connector_spec: ConnectorSpecification,
) -> DeepDiff:
assert isinstance(actual_connector_spec, ConnectorSpecification) and isinstance(previous_connector_spec, ConnectorSpecification)
return self.compute_spec_diff(actual_connector_spec, previous_connector_spec)

def test_config_match_spec(self, actual_connector_spec: ConnectorSpecification, connector_config: SecretDict):
"""Check that config matches the actual schema from the spec call"""
# Getting rid of technical variables that start with an underscore
Expand Down Expand Up @@ -182,104 +178,22 @@ def test_oauth_flow_parameters(self, actual_connector_spec: ConnectorSpecificati

@pytest.mark.default_timeout(60)
@pytest.mark.backward_compatibility
def test_new_required_field_declaration(self, spec_diff: DeepDiff):
"""Check if a 'required' field was added to the spec."""
added_required_fields = [
addition for addition in spec_diff.get("dictionary_item_added", []) if addition.path(output_format="list")[-1] == "required"
]
assert len(added_required_fields) == 0, f"The current spec has a new required field: {spec_diff.pretty()}"

@pytest.mark.default_timeout(60)
@pytest.mark.backward_compatibility
def test_new_required_property(self, spec_diff: DeepDiff):
"""Check if a a new property was added to the 'required' field."""
added_required_properties = [
addition for addition in spec_diff.get("iterable_item_added", []) if addition.up.path(output_format="list")[-1] == "required"
]
assert len(added_required_properties) == 0, f"The current spec has a new required property: {spec_diff.pretty()}"

@pytest.mark.default_timeout(60)
@pytest.mark.backward_compatibility
def test_field_type_changed(self, spec_diff: DeepDiff):
"""Check if the current spec is changing the types of properties."""

common_error_message = f"The current spec changed the value of a 'type' field: {spec_diff.pretty()}"
# Detect type value change in case type field is declared as a string (e.g "str" -> "int"):
type_values_changed = [change for change in spec_diff.get("values_changed", []) if change.path(output_format="list")[-1] == "type"]

# Detect type value change in case type field is declared as a single item list (e.g ["str"] -> ["int"]):
type_values_changed_in_list = [
change for change in spec_diff.get("values_changed", []) if change.path(output_format="list")[-2] == "type"
]

assert len(type_values_changed_in_list) == 0 and len(type_values_changed) == 0, common_error_message

# Detect type value added to type list if new type value is not None (e.g ["str"] -> ["str", "int"]):
# It works because we compute the diff with 'ignore_order=True'
new_values_in_type_list = [ # noqa: F841
change
for change in spec_diff.get("iterable_item_added", [])
if change.path(output_format="list")[-2] == "type"
if change.t2 != "null"
]
# enable the assertion below if we want to disallow type expansion:
# assert len(new_values_in_type_list) == 0, common_error_message

# Detect the change of type of a type field
# e.g:
# - "str" -> ["str"] VALID
# - "str" -> ["str", "null"] VALID
# - "str" -> ["str", "int"] VALID
# - "str" -> 1 INVALID
# - ["str"] -> "str" VALID
# - ["str"] -> "int" INVALID
# - ["str"] -> 1 INVALID
type_changes = [change for change in spec_diff.get("type_changes", []) if change.path(output_format="list")[-1] == "type"]
for change in type_changes:
# We only accept change on the type field if the new type for this field is list or string
# This might be something already guaranteed by JSON schema validation.
if isinstance(change.t1, str):
assert isinstance(
change.t2, list
), f"The current spec change a type field from string to an invalid value: {spec_diff.pretty()}"
assert (
0 < len(change.t2) <= 2
), f"The current spec change a type field from string to an invalid value. The type list length should not be empty and have a maximum of two items {spec_diff.pretty()}."
# If the new type field is a list we want to make sure it only has the original type (t1) and null: e.g. "str" -> ["str", "null"]
# We want to raise an error otherwise.
t2_not_null_types = [_type for _type in change.t2 if _type != "null"]
assert (
len(t2_not_null_types) == 1 and t2_not_null_types[0] == change.t1
), "The current spec change a type field to a list with multiple invalid values."
if isinstance(change.t1, list):
assert isinstance(
change.t2, str
), f"The current spec change a type field from list to an invalid value: {spec_diff.pretty()}"
assert len(change.t1) == 1 and change.t2 == change.t1[0], f"The current spec narrowed a field type: {spec_diff.pretty()}"

# Detect when field was made not nullable but is still a list: e.g ["string", "null"] -> ["string"]
removed_nullable = [
change for change in spec_diff.get("iterable_item_removed", []) if change.path(output_format="list")[-2] == "type"
]
assert len(removed_nullable) == 0, f"The current spec narrowed a field type: {spec_diff.pretty()}"

@pytest.mark.default_timeout(60)
@pytest.mark.backward_compatibility
def test_enum_field_has_narrowed(self, spec_diff: DeepDiff):
"""Check if the list of values in a enum was shortened in a spec."""
removals = [
removal for removal in spec_diff.get("iterable_item_removed", []) if removal.up.path(output_format="list")[-1] == "enum"
]
assert len(removals) == 0, f"The current spec narrowed a field enum: {spec_diff.pretty()}"

@pytest.mark.default_timeout(60)
@pytest.mark.backward_compatibility
def test_new_enum_field_declaration(self, spec_diff: DeepDiff):
"""Check if an 'enum' field was added to the spec."""
added_enum_fields = [
addition for addition in spec_diff.get("dictionary_item_added", []) if addition.path(output_format="list")[-1] == "enum"
]
assert len(added_enum_fields) == 0, f"An 'enum' field was declared on an existing property of the spec: {spec_diff.pretty()}"
def test_backward_compatibility(
self,
skip_backward_compatibility_tests: bool,
actual_connector_spec: ConnectorSpecification,
previous_connector_spec: ConnectorSpecification,
number_of_configs_to_generate: int = 100,
):
"""Check if the current spec is backward_compatible:
1. Perform multiple hardcoded syntactic checks with SpecDiffChecker.
2. Validate fake generated previous configs against the actual connector specification with validate_previous_configs.
"""
assert isinstance(actual_connector_spec, ConnectorSpecification) and isinstance(previous_connector_spec, ConnectorSpecification)
spec_diff = self.compute_spec_diff(actual_connector_spec, previous_connector_spec)
checker = SpecDiffChecker(spec_diff)
checker.assert_spec_is_backward_compatible()
validate_previous_configs(previous_connector_spec, actual_connector_spec, number_of_configs_to_generate)

def test_additional_properties_is_true(self, actual_connector_spec: ConnectorSpecification):
"""Check that value of the "additionalProperties" field is always true.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
#
# Copyright (c) 2022 Airbyte, Inc., all rights reserved.
#

import jsonschema
from airbyte_cdk.models import ConnectorSpecification
from deepdiff import DeepDiff
from hypothesis import given, settings
from hypothesis_jsonschema import from_schema
from source_acceptance_test.utils import SecretDict


class NonBackwardCompatibleSpecError(Exception):
pass


class SpecDiffChecker:
"""A class to perform multiple backward compatible checks on a spec diff"""

def __init__(self, diff: DeepDiff) -> None:
self._diff = diff

def assert_spec_is_backward_compatible(self):
self.check_if_declared_new_required_field()
self.check_if_added_a_new_required_property()
self.check_if_value_of_type_field_changed()
# self.check_if_new_type_was_added() We want to allow type expansion atm
self.check_if_type_of_type_field_changed()
self.check_if_field_was_made_not_nullable()
self.check_if_enum_was_narrowed()
self.check_if_declared_new_enum_field()

def _raise_error(self, message: str):
raise NonBackwardCompatibleSpecError(f"{message}: {self._diff.pretty()}")

def check_if_declared_new_required_field(self):
"""Check if the new spec declared a 'required' field."""
added_required_fields = [
addition for addition in self._diff.get("dictionary_item_added", []) if addition.path(output_format="list")[-1] == "required"
]
if added_required_fields:
self._raise_error("The current spec declared a new 'required' field")

def check_if_added_a_new_required_property(self):
"""Check if the new spec added a property to the 'required' list."""
added_required_properties = [
addition for addition in self._diff.get("iterable_item_added", []) if addition.up.path(output_format="list")[-1] == "required"
]
if added_required_properties:
self._raise_error("A new property was added to 'required'")

def check_if_value_of_type_field_changed(self):
"""Check if a type was changed"""
# Detect type value change in case type field is declared as a string (e.g "str" -> "int"):
type_values_changed = [change for change in self._diff.get("values_changed", []) if change.path(output_format="list")[-1] == "type"]

# Detect type value change in case type field is declared as a single item list (e.g ["str"] -> ["int"]):
type_values_changed_in_list = [
change for change in self._diff.get("values_changed", []) if change.path(output_format="list")[-2] == "type"
]
if type_values_changed or type_values_changed_in_list:
self._raise_error("The current spec changed the value of a 'type' field")

def check_if_new_type_was_added(self):
"""Detect type value added to type list if new type value is not None (e.g ["str"] -> ["str", "int"])"""
new_values_in_type_list = [
change
for change in self._diff.get("iterable_item_added", [])
if change.path(output_format="list")[-2] == "type"
if change.t2 != "null"
]
if new_values_in_type_list:
self._raise_error("The current spec changed the value of a 'type' field")

def check_if_type_of_type_field_changed(self):
"""
Detect the change of type of a type field
e.g:
- "str" -> ["str"] VALID
- "str" -> ["str", "null"] VALID
- "str" -> ["str", "int"] VALID
- "str" -> 1 INVALID
- ["str"] -> "str" VALID
- ["str"] -> "int" INVALID
- ["str"] -> 1 INVALID
"""
type_changes = [change for change in self._diff.get("type_changes", []) if change.path(output_format="list")[-1] == "type"]
for change in type_changes:
# We only accept change on the type field if the new type for this field is list or string
# This might be something already guaranteed by JSON schema validation.
if isinstance(change.t1, str):
if not isinstance(change.t2, list):
self._raise_error("The current spec change a type field from string to an invalid value.")
if not 0 < len(change.t2) <= 2:
self._raise_error(
"The current spec change a type field from string to an invalid value. The type list length should not be empty and have a maximum of two items."
)
# If the new type field is a list we want to make sure it only has the original type (t1) and null: e.g. "str" -> ["str", "null"]
# We want to raise an error otherwise.
t2_not_null_types = [_type for _type in change.t2 if _type != "null"]
if not (len(t2_not_null_types) == 1 and t2_not_null_types[0] == change.t1):
self._raise_error("The current spec change a type field to a list with multiple invalid values.")
if isinstance(change.t1, list):
if not isinstance(change.t2, str):
self._raise_error("The current spec change a type field from list to an invalid value.")
if not (len(change.t1) == 1 and change.t2 == change.t1[0]):
self._raise_error("The current spec narrowed a field type.")

def check_if_field_was_made_not_nullable(self):
"""Detect when field was made not nullable but is still a list: e.g ["string", "null"] -> ["string"]"""
removed_nullable = [
change for change in self._diff.get("iterable_item_removed", []) if change.path(output_format="list")[-2] == "type"
]
if removed_nullable:
self._raise_error("The current spec narrowed a field type or made a field not nullable.")

def check_if_enum_was_narrowed(self):
"""Check if the list of values in a enum was shortened in a spec."""
enum_removals = [
enum_removal
for enum_removal in self._diff.get("iterable_item_removed", [])
if enum_removal.up.path(output_format="list")[-1] == "enum"
]
if enum_removals:
self._raise_error("The current spec narrowed an enum field.")

def check_if_declared_new_enum_field(self):
"""Check if an 'enum' field was added to the spec."""
enum_additions = [
enum_addition
for enum_addition in self._diff.get("dictionary_item_added", [])
if enum_addition.path(output_format="list")[-1] == "enum"
]
if enum_additions:
self._raise_error("An 'enum' field was declared on an existing property of the spec.")


def validate_previous_configs(
previous_connector_spec: ConnectorSpecification, actual_connector_spec: ConnectorSpecification, number_of_configs_to_generate=100
):
"""Use hypothesis and hypothesis-jsonschema to run property based testing:
1. Generate fake previous config with the previous connector specification json schema.
2. Validate a fake previous config against the actual connector specification json schema."""

@given(from_schema(previous_connector_spec.dict()["connectionSpecification"]))
@settings(max_examples=number_of_configs_to_generate)
def check_fake_previous_config_against_actual_spec(fake_previous_config):
fake_previous_config = SecretDict(fake_previous_config)
filtered_fake_previous_config = {key: value for key, value in fake_previous_config.data.items() if not key.startswith("_")}
try:
jsonschema.validate(instance=filtered_fake_previous_config, schema=actual_connector_spec.connectionSpecification)
except jsonschema.exceptions.ValidationError as err:
raise NonBackwardCompatibleSpecError(err)

check_fake_previous_config_against_actual_spec()
Loading

0 comments on commit 62303a8

Please sign in to comment.