diff --git a/sentry_sdk/integrations/boto3.py b/sentry_sdk/integrations/boto3.py index d8e505b593..a21772fc1a 100644 --- a/sentry_sdk/integrations/boto3.py +++ b/sentry_sdk/integrations/boto3.py @@ -7,7 +7,7 @@ from sentry_sdk._functools import partial from sentry_sdk._types import TYPE_CHECKING -from sentry_sdk.utils import parse_url, parse_version +from sentry_sdk.utils import capture_internal_exceptions, parse_url, parse_version if TYPE_CHECKING: from typing import Any @@ -71,13 +71,14 @@ def _sentry_request_created(service_id, request, operation_name, **kwargs): description=description, ) - parsed_url = parse_url(request.url, sanitize=False) + with capture_internal_exceptions(): + parsed_url = parse_url(request.url, sanitize=False) + span.set_data("aws.request.url", parsed_url.url) + span.set_data(SPANDATA.HTTP_QUERY, parsed_url.query) + span.set_data(SPANDATA.HTTP_FRAGMENT, parsed_url.fragment) span.set_tag("aws.service_id", service_id) span.set_tag("aws.operation_name", operation_name) - span.set_data("aws.request.url", parsed_url.url) - span.set_data(SPANDATA.HTTP_QUERY, parsed_url.query) - span.set_data(SPANDATA.HTTP_FRAGMENT, parsed_url.fragment) span.set_data(SPANDATA.HTTP_METHOD, request.method) # We do it in order for subsequent http calls/retries be diff --git a/sentry_sdk/integrations/httpx.py b/sentry_sdk/integrations/httpx.py index 358562f791..e84a28d165 100644 --- a/sentry_sdk/integrations/httpx.py +++ b/sentry_sdk/integrations/httpx.py @@ -2,7 +2,12 @@ from sentry_sdk.consts import OP, SPANDATA from sentry_sdk.integrations import Integration, DidNotEnable from sentry_sdk.tracing_utils import should_propagate_trace -from sentry_sdk.utils import logger, parse_url +from sentry_sdk.utils import ( + SENSITIVE_DATA_SUBSTITUTE, + capture_internal_exceptions, + logger, + parse_url, +) from sentry_sdk._types import TYPE_CHECKING @@ -42,16 +47,23 @@ def send(self, request, **kwargs): if hub.get_integration(HttpxIntegration) is None: return real_send(self, request, **kwargs) - parsed_url = parse_url(str(request.url), sanitize=False) + parsed_url = None + with capture_internal_exceptions(): + parsed_url = parse_url(str(request.url), sanitize=False) with hub.start_span( op=OP.HTTP_CLIENT, - description="%s %s" % (request.method, parsed_url.url), + description="%s %s" + % ( + request.method, + parsed_url.url if parsed_url else SENSITIVE_DATA_SUBSTITUTE, + ), ) as span: span.set_data(SPANDATA.HTTP_METHOD, request.method) - span.set_data("url", parsed_url.url) - span.set_data(SPANDATA.HTTP_QUERY, parsed_url.query) - span.set_data(SPANDATA.HTTP_FRAGMENT, parsed_url.fragment) + if parsed_url is not None: + span.set_data("url", parsed_url.url) + span.set_data(SPANDATA.HTTP_QUERY, parsed_url.query) + span.set_data(SPANDATA.HTTP_FRAGMENT, parsed_url.fragment) if should_propagate_trace(hub, str(request.url)): for key, value in hub.iter_trace_propagation_headers(): @@ -82,16 +94,23 @@ async def send(self, request, **kwargs): if hub.get_integration(HttpxIntegration) is None: return await real_send(self, request, **kwargs) - parsed_url = parse_url(str(request.url), sanitize=False) + parsed_url = None + with capture_internal_exceptions(): + parsed_url = parse_url(str(request.url), sanitize=False) with hub.start_span( op=OP.HTTP_CLIENT, - description="%s %s" % (request.method, parsed_url.url), + description="%s %s" + % ( + request.method, + parsed_url.url if parsed_url else SENSITIVE_DATA_SUBSTITUTE, + ), ) as span: span.set_data(SPANDATA.HTTP_METHOD, request.method) - span.set_data("url", parsed_url.url) - span.set_data(SPANDATA.HTTP_QUERY, parsed_url.query) - span.set_data(SPANDATA.HTTP_FRAGMENT, parsed_url.fragment) + if parsed_url is not None: + span.set_data("url", parsed_url.url) + span.set_data(SPANDATA.HTTP_QUERY, parsed_url.query) + span.set_data(SPANDATA.HTTP_FRAGMENT, parsed_url.fragment) if should_propagate_trace(hub, str(request.url)): for key, value in hub.iter_trace_propagation_headers(): diff --git a/sentry_sdk/integrations/stdlib.py b/sentry_sdk/integrations/stdlib.py index 0add046bf8..be02779d88 100644 --- a/sentry_sdk/integrations/stdlib.py +++ b/sentry_sdk/integrations/stdlib.py @@ -9,6 +9,7 @@ from sentry_sdk.scope import add_global_event_processor from sentry_sdk.tracing_utils import EnvironHeaders, should_propagate_trace from sentry_sdk.utils import ( + SENSITIVE_DATA_SUBSTITUTE, capture_internal_exceptions, logger, safe_repr, @@ -84,17 +85,21 @@ def putrequest(self, method, url, *args, **kwargs): url, ) - parsed_url = parse_url(real_url, sanitize=False) + parsed_url = None + with capture_internal_exceptions(): + parsed_url = parse_url(real_url, sanitize=False) span = hub.start_span( op=OP.HTTP_CLIENT, - description="%s %s" % (method, parsed_url.url), + description="%s %s" + % (method, parsed_url.url if parsed_url else SENSITIVE_DATA_SUBSTITUTE), ) span.set_data(SPANDATA.HTTP_METHOD, method) - span.set_data("url", parsed_url.url) - span.set_data(SPANDATA.HTTP_QUERY, parsed_url.query) - span.set_data(SPANDATA.HTTP_FRAGMENT, parsed_url.fragment) + if parsed_url is not None: + span.set_data("url", parsed_url.url) + span.set_data(SPANDATA.HTTP_QUERY, parsed_url.query) + span.set_data(SPANDATA.HTTP_FRAGMENT, parsed_url.fragment) rv = real_putrequest(self, method, url, *args, **kwargs) diff --git a/tests/conftest.py b/tests/conftest.py index af1a40c37e..d9d88067dc 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -69,7 +69,7 @@ def _capture_internal_exception(self, exc_info): @request.addfinalizer def _(): - # rerasise the errors so that this just acts as a pass-through (that + # reraise the errors so that this just acts as a pass-through (that # happens to keep track of the errors which pass through it) for e in errors: reraise(*e) diff --git a/tests/integrations/boto3/test_s3.py b/tests/integrations/boto3/test_s3.py index 7f02d422a0..5812c2c1bb 100644 --- a/tests/integrations/boto3/test_s3.py +++ b/tests/integrations/boto3/test_s3.py @@ -1,9 +1,17 @@ +import pytest + +import boto3 + from sentry_sdk import Hub from sentry_sdk.integrations.boto3 import Boto3Integration from tests.integrations.boto3.aws_mock import MockResponse from tests.integrations.boto3 import read_fixture -import boto3 +try: + from unittest import mock # python 3.3 and above +except ImportError: + import mock # python < 3.3 + session = boto3.Session( aws_access_key_id="-", @@ -53,9 +61,17 @@ def test_streaming(sentry_init, capture_events): (event,) = events assert event["type"] == "transaction" assert len(event["spans"]) == 2 + span1 = event["spans"][0] assert span1["op"] == "http.client" assert span1["description"] == "aws.s3.GetObject" + assert span1["data"] == { + "http.method": "GET", + "aws.request.url": "https://bucket.s3.amazonaws.com/foo.pdf", + "http.fragment": "", + "http.query": "", + } + span2 = event["spans"][1] assert span2["op"] == "http.client.stream" assert span2["description"] == "aws.s3.GetObject" @@ -83,3 +99,31 @@ def test_streaming_close(sentry_init, capture_events): assert span1["op"] == "http.client" span2 = event["spans"][1] assert span2["op"] == "http.client.stream" + + +@pytest.mark.tests_internal_exceptions +def test_omit_url_data_if_parsing_fails(sentry_init, capture_events): + sentry_init(traces_sample_rate=1.0, integrations=[Boto3Integration()]) + events = capture_events() + + s3 = session.resource("s3") + + with mock.patch( + "sentry_sdk.integrations.boto3.parse_url", + side_effect=ValueError, + ): + with Hub.current.start_transaction() as transaction, MockResponse( + s3.meta.client, 200, {}, read_fixture("s3_list.xml") + ): + bucket = s3.Bucket("bucket") + items = [obj for obj in bucket.objects.all()] + assert len(items) == 2 + assert items[0].key == "foo.txt" + assert items[1].key == "bar.txt" + transaction.finish() + + (event,) = events + assert event["spans"][0]["data"] == { + "http.method": "GET", + # no url data + } diff --git a/tests/integrations/httpx/test_httpx.py b/tests/integrations/httpx/test_httpx.py index c948901588..72188a23e3 100644 --- a/tests/integrations/httpx/test_httpx.py +++ b/tests/integrations/httpx/test_httpx.py @@ -8,6 +8,11 @@ from sentry_sdk.consts import MATCH_ALL, SPANDATA from sentry_sdk.integrations.httpx import HttpxIntegration +try: + from unittest import mock # python 3.3 and above +except ImportError: + import mock # python < 3.3 + @pytest.mark.parametrize( "httpx_client", @@ -225,3 +230,30 @@ def test_option_trace_propagation_targets( assert "sentry-trace" in request_headers else: assert "sentry-trace" not in request_headers + + +@pytest.mark.tests_internal_exceptions +def test_omit_url_data_if_parsing_fails(sentry_init, capture_events): + sentry_init(integrations=[HttpxIntegration()]) + + httpx_client = httpx.Client() + url = "http://example.com" + responses.add(responses.GET, url, status=200) + + events = capture_events() + with mock.patch( + "sentry_sdk.integrations.httpx.parse_url", + side_effect=ValueError, + ): + response = httpx_client.get(url) + + assert response.status_code == 200 + capture_message("Testing!") + + (event,) = events + assert event["breadcrumbs"]["values"][0]["data"] == { + SPANDATA.HTTP_METHOD: "GET", + SPANDATA.HTTP_STATUS_CODE: 200, + "reason": "OK", + # no url related data + } diff --git a/tests/integrations/requests/test_requests.py b/tests/integrations/requests/test_requests.py index 9c77b290d1..aecf64762d 100644 --- a/tests/integrations/requests/test_requests.py +++ b/tests/integrations/requests/test_requests.py @@ -7,6 +7,11 @@ from sentry_sdk.consts import SPANDATA from sentry_sdk.integrations.stdlib import StdlibIntegration +try: + from unittest import mock # python 3.3 and above +except ImportError: + import mock # python < 3.3 + def test_crumb_capture(sentry_init, capture_events): sentry_init(integrations=[StdlibIntegration()]) @@ -31,3 +36,29 @@ def test_crumb_capture(sentry_init, capture_events): SPANDATA.HTTP_STATUS_CODE: response.status_code, "reason": response.reason, } + + +@pytest.mark.tests_internal_exceptions +def test_omit_url_data_if_parsing_fails(sentry_init, capture_events): + sentry_init(integrations=[StdlibIntegration()]) + + url = "https://example.com" + responses.add(responses.GET, url, status=200) + + events = capture_events() + + with mock.patch( + "sentry_sdk.integrations.stdlib.parse_url", + side_effect=ValueError, + ): + response = requests.get(url) + + capture_message("Testing!") + + (event,) = events + assert event["breadcrumbs"]["values"][0]["data"] == { + SPANDATA.HTTP_METHOD: "GET", + SPANDATA.HTTP_STATUS_CODE: response.status_code, + "reason": response.reason, + # no url related data + } diff --git a/tests/test_utils.py b/tests/test_utils.py index 4a028d70b3..47460d39b0 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -3,6 +3,7 @@ import sys from sentry_sdk.utils import ( + Components, is_valid_sample_rate, logger, match_regex_list, @@ -69,22 +70,126 @@ def test_sanitize_url(url, expected_result): assert parts == expected_parts -def test_sanitize_url_and_split(): - parts = sanitize_url( - "https://username:password@example.com?token=abc&sessionid=123&save=true", - split=True, - ) - - expected_query = sorted( - "token=[Filtered]&sessionid=[Filtered]&save=[Filtered]".split("&") - ) - query = sorted(parts.query.split("&")) +@pytest.mark.parametrize( + ("url", "expected_result"), + [ + ( + "http://localhost:8000", + Components( + scheme="http", netloc="localhost:8000", path="", query="", fragment="" + ), + ), + ( + "http://example.com", + Components( + scheme="http", netloc="example.com", path="", query="", fragment="" + ), + ), + ( + "https://example.com", + Components( + scheme="https", netloc="example.com", path="", query="", fragment="" + ), + ), + ( + "example.com?token=abc&sessionid=123&save=true", + Components( + scheme="", + netloc="", + path="example.com", + query="token=[Filtered]&sessionid=[Filtered]&save=[Filtered]", + fragment="", + ), + ), + ( + "http://example.com?token=abc&sessionid=123&save=true", + Components( + scheme="http", + netloc="example.com", + path="", + query="token=[Filtered]&sessionid=[Filtered]&save=[Filtered]", + fragment="", + ), + ), + ( + "https://example.com?token=abc&sessionid=123&save=true", + Components( + scheme="https", + netloc="example.com", + path="", + query="token=[Filtered]&sessionid=[Filtered]&save=[Filtered]", + fragment="", + ), + ), + ( + "http://localhost:8000/?token=abc&sessionid=123&save=true", + Components( + scheme="http", + netloc="localhost:8000", + path="/", + query="token=[Filtered]&sessionid=[Filtered]&save=[Filtered]", + fragment="", + ), + ), + ( + "ftp://username:password@ftp.example.com:9876/bla/blub#foo", + Components( + scheme="ftp", + netloc="[Filtered]:[Filtered]@ftp.example.com:9876", + path="/bla/blub", + query="", + fragment="foo", + ), + ), + ( + "https://username:password@example.com/bla/blub?token=abc&sessionid=123&save=true#fragment", + Components( + scheme="https", + netloc="[Filtered]:[Filtered]@example.com", + path="/bla/blub", + query="token=[Filtered]&sessionid=[Filtered]&save=[Filtered]", + fragment="fragment", + ), + ), + ( + "bla/blub/foo", + Components( + scheme="", netloc="", path="bla/blub/foo", query="", fragment="" + ), + ), + ( + "bla/blub/foo?token=abc&sessionid=123&save=true", + Components( + scheme="", + netloc="", + path="bla/blub/foo", + query="token=[Filtered]&sessionid=[Filtered]&save=[Filtered]", + fragment="", + ), + ), + ( + "/bla/blub/foo/?token=abc&sessionid=123&save=true", + Components( + scheme="", + netloc="", + path="/bla/blub/foo/", + query="token=[Filtered]&sessionid=[Filtered]&save=[Filtered]", + fragment="", + ), + ), + ], +) +def test_sanitize_url_and_split(url, expected_result): + sanitized_url = sanitize_url(url, split=True) + # sort query because old Python versions (<3.6) don't preserve order + query = sorted(sanitized_url.query.split("&")) + expected_query = sorted(expected_result.query.split("&")) - assert parts.scheme == "https" - assert parts.netloc == "[Filtered]:[Filtered]@example.com" + assert sanitized_url.scheme == expected_result.scheme + assert sanitized_url.netloc == expected_result.netloc assert query == expected_query - assert parts.path == "" - assert parts.fragment == "" + assert sanitized_url.path == expected_result.path + assert sanitized_url.fragment == expected_result.fragment @pytest.mark.parametrize(