diff --git a/README.md b/README.md index 4df178963..8da749370 100644 --- a/README.md +++ b/README.md @@ -93,6 +93,13 @@ Installing all extras is required to run the full suite of unit tests. To see all available scripts, run `poetry run poe`. +#### Formatting the code + +- Iterate on the CDK code locally +- Run `poetry run ruff format` to format your changes. + +To see all available `ruff` options, run `poetry run ruff`. + ##### Autogenerated files Low-code CDK models are generated from `sources/declarative/declarative_component_schema.yaml`. If diff --git a/airbyte_cdk/sources/streams/http/http_client.py b/airbyte_cdk/sources/streams/http/http_client.py index ef9adbc96..dcb69a981 100644 --- a/airbyte_cdk/sources/streams/http/http_client.py +++ b/airbyte_cdk/sources/streams/http/http_client.py @@ -45,6 +45,8 @@ rate_limit_default_backoff_handler, user_defined_backoff_handler, ) +from airbyte_cdk.sources.utils.types import JsonType +from airbyte_cdk.utils.airbyte_secrets_utils import filter_secrets from airbyte_cdk.utils.constants import ENV_REQUEST_CACHE_PATH from airbyte_cdk.utils.stream_status_utils import ( as_airbyte_message as stream_status_as_airbyte_message, @@ -334,6 +336,29 @@ def _send( return response # type: ignore # will either return a valid response of type requests.Response or raise an exception + def _get_response_body(self, response: requests.Response) -> Optional[JsonType]: + """ + Extracts and returns the body of an HTTP response. + + This method attempts to parse the response body as JSON. If the response + body is not valid JSON, it falls back to decoding the response content + as a UTF-8 string. If both attempts fail, it returns None. + + Args: + response (requests.Response): The HTTP response object. + + Returns: + Optional[JsonType]: The parsed JSON object as a string, the decoded + response content as a string, or None if both parsing attempts fail. + """ + try: + return str(response.json()) + except requests.exceptions.JSONDecodeError: + try: + return response.content.decode("utf-8") + except Exception: + return "The Content of the Response couldn't be decoded." + def _handle_error_resolution( self, response: Optional[requests.Response], @@ -362,12 +387,18 @@ def _handle_error_resolution( if error_resolution.response_action == ResponseAction.FAIL: if response is not None: - error_message = f"'{request.method}' request to '{request.url}' failed with status code '{response.status_code}' and error message '{self._error_message_parser.parse_response_error_message(response)}'" + filtered_response_message = filter_secrets( + f"Request (body): '{str(request.body)}'. Response (body): '{self._get_response_body(response)}'. Response (headers): '{response.headers}'." + ) + error_message = f"'{request.method}' request to '{request.url}' failed with status code '{response.status_code}' and error message: '{self._error_message_parser.parse_response_error_message(response)}'. {filtered_response_message}" else: error_message = ( f"'{request.method}' request to '{request.url}' failed with exception: '{exc}'" ) + # ensure the exception message is emitted before raised + self._logger.error(error_message) + raise MessageRepresentationAirbyteTracedErrors( internal_message=error_message, message=error_resolution.error_message or error_message, diff --git a/unit_tests/sources/streams/http/test_http.py b/unit_tests/sources/streams/http/test_http.py index 02f4517fc..40fdb3201 100644 --- a/unit_tests/sources/streams/http/test_http.py +++ b/unit_tests/sources/streams/http/test_http.py @@ -2,7 +2,6 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # - import json import logging from http import HTTPStatus @@ -29,6 +28,7 @@ ) from airbyte_cdk.sources.streams.http.http_client import MessageRepresentationAirbyteTracedErrors from airbyte_cdk.sources.streams.http.requests_native_auth import TokenAuthenticator +from airbyte_cdk.utils.airbyte_secrets_utils import update_secrets class StubBasicReadHttpStream(HttpStream): @@ -230,6 +230,43 @@ def test_4xx_error_codes_http_stream(mocker, http_code): list(stream.read_records(SyncMode.full_refresh)) +@pytest.mark.parametrize("http_code", [400, 401, 403]) +def test_error_codes_http_stream_error_resolution_with_response_secrets_filtered(mocker, http_code): + stream = StubCustomBackoffHttpStream() + + # expected assertion values + expected_header_secret_replaced = "'authorisation_header': '__****__'" + expected_content_str_secret_replaced = "this str contains **** secret" + + # mocking the response + res = requests.Response() + res.status_code = http_code + res._content = ( + b'{"error": "test error message", "secret_info": "this str contains SECRET_VALUE secret"}' + ) + res.headers = { + # simple non-secret header + "regular_header": "some_header_value", + # secret header + "authorisation_header": "__SECRET_X_VALUE__", + } + + # updating secrets to be filtered + update_secrets(["SECRET_X_VALUE", "SECRET_VALUE"]) + + # patch the `send` > response + mocker.patch.object(requests.Session, "send", return_value=res) + + # proceed + with pytest.raises(MessageRepresentationAirbyteTracedErrors) as err: + list(stream.read_records(SyncMode.full_refresh)) + + # we expect the header secrets are obscured + assert expected_header_secret_replaced in str(err._excinfo) + # we expect the response body values (any of them) are obscured + assert expected_content_str_secret_replaced in str(err._excinfo) + + class AutoFailFalseHttpStream(StubBasicReadHttpStream): raise_on_http_errors = False max_retries = 3