diff --git a/sentry_sdk/client.py b/sentry_sdk/client.py index 22255e80f0..efa62fdd7f 100644 --- a/sentry_sdk/client.py +++ b/sentry_sdk/client.py @@ -29,6 +29,7 @@ from sentry_sdk.sessions import SessionFlusher from sentry_sdk.envelope import Envelope from sentry_sdk.profiler import has_profiling_enabled, setup_profiler +from sentry_sdk.scrubber import EventScrubber from sentry_sdk._types import TYPE_CHECKING @@ -111,6 +112,9 @@ def _get_options(*args, **kwargs): if rv["enable_tracing"] is True and rv["traces_sample_rate"] is None: rv["traces_sample_rate"] = 1.0 + if rv["event_scrubber"] is None: + rv["event_scrubber"] = EventScrubber() + return rv @@ -249,6 +253,11 @@ def _prepare_event( self.options["project_root"], ) + if event is not None: + event_scrubber = self.options["event_scrubber"] + if event_scrubber and not self.options["send_default_pii"]: + event_scrubber.scrub_event(event) + # Postprocess the event here so that annotated types do # generally not surface in before_send if event is not None: diff --git a/sentry_sdk/consts.py b/sentry_sdk/consts.py index fea3036624..fff6cb2a6e 100644 --- a/sentry_sdk/consts.py +++ b/sentry_sdk/consts.py @@ -133,6 +133,7 @@ def __init__( trace_propagation_targets=[ # noqa: B006 MATCH_ALL ], # type: Optional[Sequence[str]] + event_scrubber=None, # type: Optional[sentry_sdk.scrubber.EventScrubber] ): # type: (...) -> None pass diff --git a/sentry_sdk/scrubber.py b/sentry_sdk/scrubber.py new file mode 100644 index 0000000000..e7fcc31970 --- /dev/null +++ b/sentry_sdk/scrubber.py @@ -0,0 +1,116 @@ +from sentry_sdk.utils import ( + capture_internal_exceptions, + AnnotatedValue, + iter_event_frames, +) +from sentry_sdk._compat import string_types +from sentry_sdk._types import TYPE_CHECKING + +if TYPE_CHECKING: + from sentry_sdk._types import Event + from typing import Any + from typing import Dict + from typing import List + from typing import Optional + + +DEFAULT_DENYLIST = [ + # stolen from relay + "password", + "passwd", + "secret", + "api_key", + "apikey", + "auth", + "credentials", + "mysql_pwd", + "privatekey", + "private_key", + "token", + "ip_address", + "session", + # django + "csrftoken", + "sessionid", + # wsgi + "remote_addr", + "x_csrftoken", + "x_forwarded_for", + "set_cookie", + "cookie", + "authorization", + "x_api_key", + "x_forwarded_for", + "x_real_ip", +] + + +class EventScrubber(object): + def __init__(self, denylist=None): + # type: (Optional[List[str]]) -> None + self.denylist = DEFAULT_DENYLIST if denylist is None else denylist + + def scrub_dict(self, d): + # type: (Dict[str, Any]) -> None + if not isinstance(d, dict): + return + + for k in d.keys(): + if isinstance(k, string_types) and k.lower() in self.denylist: + d[k] = AnnotatedValue.substituted_because_contains_sensitive_data() + + def scrub_request(self, event): + # type: (Event) -> None + with capture_internal_exceptions(): + if "request" in event: + if "headers" in event["request"]: + self.scrub_dict(event["request"]["headers"]) + if "cookies" in event["request"]: + self.scrub_dict(event["request"]["cookies"]) + if "data" in event["request"]: + self.scrub_dict(event["request"]["data"]) + + def scrub_extra(self, event): + # type: (Event) -> None + with capture_internal_exceptions(): + if "extra" in event: + self.scrub_dict(event["extra"]) + + def scrub_user(self, event): + # type: (Event) -> None + with capture_internal_exceptions(): + if "user" in event: + self.scrub_dict(event["user"]) + + def scrub_breadcrumbs(self, event): + # type: (Event) -> None + with capture_internal_exceptions(): + if "breadcrumbs" in event: + if "values" in event["breadcrumbs"]: + for value in event["breadcrumbs"]["values"]: + if "data" in value: + self.scrub_dict(value["data"]) + + def scrub_frames(self, event): + # type: (Event) -> None + with capture_internal_exceptions(): + for frame in iter_event_frames(event): + if "vars" in frame: + self.scrub_dict(frame["vars"]) + + def scrub_spans(self, event): + # type: (Event) -> None + with capture_internal_exceptions(): + if "spans" in event: + for span in event["spans"]: + if "data" in span: + self.scrub_dict(span["data"]) + + def scrub_event(self, event): + # type: (Event) -> None + self.scrub_request(event) + self.scrub_extra(event) + self.scrub_user(event) + self.scrub_breadcrumbs(event) + self.scrub_frames(event) + self.scrub_spans(event) diff --git a/sentry_sdk/serializer.py b/sentry_sdk/serializer.py index 74cbe45b56..29495c3118 100644 --- a/sentry_sdk/serializer.py +++ b/sentry_sdk/serializer.py @@ -254,6 +254,8 @@ def _serialize_node_impl( obj, is_databag, should_repr_strings, remaining_depth, remaining_breadth ): # type: (Any, Optional[bool], Optional[bool], Optional[int], Optional[int]) -> Any + if isinstance(obj, AnnotatedValue): + should_repr_strings = False if should_repr_strings is None: should_repr_strings = _should_repr_strings() diff --git a/tests/test_scrubber.py b/tests/test_scrubber.py new file mode 100644 index 0000000000..d76e5a7fc1 --- /dev/null +++ b/tests/test_scrubber.py @@ -0,0 +1,155 @@ +import sys +import logging + +from sentry_sdk import capture_exception, capture_event, start_transaction, start_span +from sentry_sdk.utils import event_from_exception +from sentry_sdk.scrubber import EventScrubber + + +logger = logging.getLogger(__name__) +logger.setLevel(logging.DEBUG) + + +def test_request_scrubbing(sentry_init, capture_events): + sentry_init() + events = capture_events() + + try: + 1 / 0 + except ZeroDivisionError: + ev, _hint = event_from_exception(sys.exc_info()) + + ev["request"] = { + "headers": { + "COOKIE": "secret", + "authorization": "Bearer bla", + "ORIGIN": "google.com", + }, + "cookies": { + "sessionid": "secret", + "foo": "bar", + }, + "data": { + "token": "secret", + "foo": "bar", + }, + } + + capture_event(ev) + + (event,) = events + + assert event["request"] == { + "headers": { + "COOKIE": "[Filtered]", + "authorization": "[Filtered]", + "ORIGIN": "google.com", + }, + "cookies": {"sessionid": "[Filtered]", "foo": "bar"}, + "data": {"token": "[Filtered]", "foo": "bar"}, + } + + assert event["_meta"]["request"] == { + "headers": { + "COOKIE": {"": {"rem": [["!config", "s"]]}}, + "authorization": {"": {"rem": [["!config", "s"]]}}, + }, + "cookies": {"sessionid": {"": {"rem": [["!config", "s"]]}}}, + "data": {"token": {"": {"rem": [["!config", "s"]]}}}, + } + + +def test_stack_var_scrubbing(sentry_init, capture_events): + sentry_init() + events = capture_events() + + try: + password = "supersecret" # noqa + api_key = "1231231231" # noqa + safe = "keepthis" # noqa + 1 / 0 + except ZeroDivisionError: + capture_exception() + + (event,) = events + + frames = event["exception"]["values"][0]["stacktrace"]["frames"] + (frame,) = frames + assert frame["vars"]["password"] == "[Filtered]" + assert frame["vars"]["api_key"] == "[Filtered]" + assert frame["vars"]["safe"] == "'keepthis'" + + meta = event["_meta"]["exception"]["values"]["0"]["stacktrace"]["frames"]["0"][ + "vars" + ] + assert meta == { + "password": {"": {"rem": [["!config", "s"]]}}, + "api_key": {"": {"rem": [["!config", "s"]]}}, + } + + +def test_breadcrumb_extra_scrubbing(sentry_init, capture_events): + sentry_init() + events = capture_events() + + logger.info("bread", extra=dict(foo=42, password="secret")) + logger.critical("whoops", extra=dict(bar=69, auth="secret")) + + (event,) = events + + assert event["extra"]["bar"] == 69 + assert event["extra"]["auth"] == "[Filtered]" + + assert event["breadcrumbs"]["values"][0]["data"] == { + "foo": 42, + "password": "[Filtered]", + } + + assert event["_meta"] == { + "extra": {"auth": {"": {"rem": [["!config", "s"]]}}}, + "breadcrumbs": { + "values": {"0": {"data": {"password": {"": {"rem": [["!config", "s"]]}}}}} + }, + } + + +def test_span_data_scrubbing(sentry_init, capture_events): + sentry_init(traces_sample_rate=1.0) + events = capture_events() + + with start_transaction(name="hi"): + with start_span(op="foo", description="bar") as span: + span.set_data("password", "secret") + span.set_data("datafoo", "databar") + + (event,) = events + assert event["spans"][0]["data"] == {"password": "[Filtered]", "datafoo": "databar"} + assert event["_meta"] == { + "spans": {"0": {"data": {"password": {"": {"rem": [["!config", "s"]]}}}}} + } + + +def test_custom_denylist(sentry_init, capture_events): + sentry_init(event_scrubber=EventScrubber(denylist=["my_sensitive_var"])) + events = capture_events() + + try: + my_sensitive_var = "secret" # noqa + safe = "keepthis" # noqa + 1 / 0 + except ZeroDivisionError: + capture_exception() + + (event,) = events + + frames = event["exception"]["values"][0]["stacktrace"]["frames"] + (frame,) = frames + assert frame["vars"]["my_sensitive_var"] == "[Filtered]" + assert frame["vars"]["safe"] == "'keepthis'" + + meta = event["_meta"]["exception"]["values"]["0"]["stacktrace"]["frames"]["0"][ + "vars" + ] + assert meta == { + "my_sensitive_var": {"": {"rem": [["!config", "s"]]}}, + }