diff --git a/sentry_sdk/integrations/_wsgi.py b/sentry_sdk/integrations/_wsgi.py index d51f4e3518..fef54fdbf0 100644 --- a/sentry_sdk/integrations/_wsgi.py +++ b/sentry_sdk/integrations/_wsgi.py @@ -1,3 +1,9 @@ +import json +import base64 + +from sentry_sdk.stripping import AnnotatedValue + + def get_environ(environ): """ Returns our whitelisted environment variables. @@ -5,3 +11,134 @@ def get_environ(environ): for key in ("REMOTE_ADDR", "SERVER_NAME", "SERVER_PORT"): if key in environ: yield key, environ[key] + + +# `get_headers` comes from `werkzeug.datastructures.EnvironHeaders` +# +# We need this function because Django does not give us a "pure" http header +# dict. So we might as well use it for all WSGI integrations. +def get_headers(environ): + """ + Returns only proper HTTP headers. + + """ + for key, value in environ.items(): + key = str(key) + if key.startswith("HTTP_") and key not in ( + "HTTP_CONTENT_TYPE", + "HTTP_CONTENT_LENGTH", + ): + yield key[5:].replace("_", "-").title(), value + elif key in ("CONTENT_TYPE", "CONTENT_LENGTH"): + yield key.replace("_", "-").title(), value + + +class RequestExtractor(object): + def __init__(self, request): + self.request = request + + def extract_into_scope(self, scope): + # if the code below fails halfway through we at least have some data + scope.request = request_info = {} + + request_info["url"] = self.url + request_info["query_string"] = self.query_string + request_info["method"] = self.method + request_info["headers"] = dict(self.headers) + request_info["env"] = dict(get_environ(self.env)) + request_info["cookies"] = dict(self.cookies) + + if self.form or self.files: + data = dict(self.form.items()) + for k, v in self.files.items(): + data[k] = AnnotatedValue( + "", + {"len": self.size_of_file(v), "rem": [["!filecontent", "x", 0, 0]]}, + ) + + if self.files or self.form_is_multipart: + ct = "multipart" + else: + ct = "urlencoded" + repr = "structured" + elif self.json is not None: + data = self.json + ct = "json" + repr = "structured" + elif self.raw_data: + data = self.raw_data + + try: + if isinstance(data, bytes): + data = data.decode("utf-8") + ct = "plain" + repr = "other" + except UnicodeDecodeError: + ct = "bytes" + repr = "base64" + data = base64.b64encode(data).decode("ascii") + else: + return + + request_info["data"] = data + request_info["data_info"] = {"ct": ct, "repr": repr} + + @property + def url(self): + raise NotImplementedError() + + @property + def query_string(self): + return self.env.get("QUERY_STRING") + + @property + def method(self): + return self.env.get("REQUEST_METHOD") + + @property + def headers(self): + return get_headers(self.env) + + @property + def env(self): + raise NotImplementedError() + + @property + def cookies(self): + raise NotImplementedError() + + @property + def raw_data(self): + raise NotImplementedError() + + @property + def form(self): + raise NotImplementedError() + + @property + def form_is_multipart(self): + return self.env.get("CONTENT_TYPE").startswith("multipart/form-data") + + @property + def is_json(self): + mt = (self.env.get("CONTENT_TYPE") or "").split(";", 1)[0] + return ( + mt == "application/json" + or (mt.startswith("application/")) + and mt.endswith("+json") + ) + + @property + def json(self): + try: + if self.is_json: + return json.loads(self.raw_data.decode("utf-8")) + except ValueError: + pass + + @property + def files(self): + raise NotImplementedError() + + def size_of_file(self, file): + raise NotImplementedError() diff --git a/sentry_sdk/integrations/django/__init__.py b/sentry_sdk/integrations/django/__init__.py index fef2c8ae51..87cde5a17b 100644 --- a/sentry_sdk/integrations/django/__init__.py +++ b/sentry_sdk/integrations/django/__init__.py @@ -11,6 +11,7 @@ from django.core.urlresolvers import resolve from sentry_sdk import get_current_hub, configure_scope, capture_exception +from .._wsgi import RequestExtractor try: @@ -35,10 +36,46 @@ def process_request(self, request): with configure_scope() as scope: scope.transaction = _get_transaction_from_request(request) + try: + DjangoRequestExtractor(request).extract_into_scope(scope) + except Exception: + get_current_hub().capture_internal_exception() + + # TODO: user info + except Exception: get_current_hub().capture_internal_exception() +class DjangoRequestExtractor(RequestExtractor): + @property + def url(self): + return self.request.build_absolute_uri(self.request.path) + + @property + def env(self): + return self.request.META + + @property + def cookies(self): + return self.request.COOKIES + + @property + def raw_data(self): + return self.request.body + + @property + def form(self): + return self.request.POST + + @property + def files(self): + return self.request.FILES + + def size_of_file(self, file): + return file.size + + def _request_finished(*args, **kwargs): get_current_hub().pop_scope_unsafe() diff --git a/sentry_sdk/integrations/flask.py b/sentry_sdk/integrations/flask.py index 923cb4c316..08b9b5d54a 100644 --- a/sentry_sdk/integrations/flask.py +++ b/sentry_sdk/integrations/flask.py @@ -1,7 +1,7 @@ from __future__ import absolute_import from sentry_sdk import capture_exception, configure_scope, get_current_hub -from ._wsgi import get_environ +from ._wsgi import RequestExtractor try: from flask_login import current_user @@ -55,30 +55,48 @@ def _before_request(*args, **kwargs): scope.transaction = request.url_rule.endpoint try: - scope.request = _get_request_info() + FlaskRequestExtractor(request).extract_into_scope(scope) except Exception: get_current_hub().capture_internal_exception() try: - scope.user = _get_user_info() + _set_user_info(scope) except Exception: get_current_hub().capture_internal_exception() except Exception: get_current_hub().capture_internal_exception() -def _get_request_info(): - return { - "url": "%s://%s%s" % (request.scheme, request.host, request.path), - "query_string": request.query_string, - "method": request.method, - "data": request.get_data(cache=True, as_text=True, parse_form_data=True), - "headers": dict(request.headers), - "env": get_environ(request.environ), - } +class FlaskRequestExtractor(RequestExtractor): + @property + def url(self): + return "%s://%s%s" % (self.request.scheme, self.request.host, self.request.path) + @property + def env(self): + return self.request.environ -def _get_user_info(): + @property + def cookies(self): + return self.request.cookies + + @property + def raw_data(self): + return self.request.data + + @property + def form(self): + return self.request.form + + @property + def files(self): + return request.files + + def size_of_file(self, file): + return file.content_length + + +def _set_user_info(scope): try: ip_address = request.access_route[0] except IndexError: @@ -96,4 +114,4 @@ def _get_user_info(): # - no user is logged in pass - return user_info + scope.user = user_info diff --git a/tests/integrations/django/myapp/urls.py b/tests/integrations/django/myapp/urls.py index 17fe56313d..7a4d903a70 100644 --- a/tests/integrations/django/myapp/urls.py +++ b/tests/integrations/django/myapp/urls.py @@ -27,4 +27,5 @@ path("view-exc", views.view_exc, name="view_exc"), path("middleware-exc", views.self_check, name="middleware_exc"), path("get-dsn", views.get_dsn, name="get_dsn"), + path("message", views.message, name="message"), ] diff --git a/tests/integrations/django/myapp/views.py b/tests/integrations/django/myapp/views.py index 6cfb0778e0..aa4d320a2b 100644 --- a/tests/integrations/django/myapp/views.py +++ b/tests/integrations/django/myapp/views.py @@ -19,3 +19,8 @@ def get_dsn(request): return HttpResponse( template.render(Context()), content_type="application/xhtml+xml" ) + + +def message(request): + sentry_sdk.capture_message("hi") + return HttpResponse("ok") diff --git a/tests/integrations/django/test_basic.py b/tests/integrations/django/test_basic.py index 37237b1505..5804817dd7 100644 --- a/tests/integrations/django/test_basic.py +++ b/tests/integrations/django/test_basic.py @@ -53,6 +53,25 @@ def test_middleware_exceptions(client, capture_exceptions): assert capture_exceptions == [exc.value] -def test_get_dsn(request, client): +def test_get_dsn(client): response = client.get(reverse("get_dsn")) assert response.content == b"LOL!" + + +def test_request_captured(client, capture_events): + response = client.get(reverse("message")) + assert response.content == b"ok" + + event, = capture_events + assert event["request"] == { + "cookies": {}, + "env": { + "REMOTE_ADDR": "127.0.0.1", + "SERVER_NAME": "testserver", + "SERVER_PORT": "80", + }, + "headers": {"Cookie": ""}, + "method": "GET", + "query_string": "", + "url": "http://testserver/message", + } diff --git a/tests/integrations/flask/test_flask.py b/tests/integrations/flask/test_flask.py index f4816bec26..feaa9bdb58 100644 --- a/tests/integrations/flask/test_flask.py +++ b/tests/integrations/flask/test_flask.py @@ -1,8 +1,11 @@ +import json import pytest +from io import BytesIO + flask = pytest.importorskip("flask") -from flask import Flask +from flask import Flask, request from flask_login import LoginManager, login_user @@ -36,7 +39,7 @@ def test_has_context(app): def index(): with configure_scope() as scope: assert scope._data["transaction"] == "index" - assert scope._data["request"]["data"] == "" + assert "data" not in scope._data["request"] assert scope._data["request"]["url"] == "http://localhost/" return "ok" @@ -127,3 +130,126 @@ def login(): assert event.get("user", {}).get("id") is None else: assert event["user"]["id"] == str(user_id) + + +def test_flask_large_json_request(capture_events, app): + data = {"foo": {"bar": "a" * 2000}} + + @app.route("/", methods=["POST"]) + def index(): + assert request.json == data + assert request.data == json.dumps(data).encode("ascii") + assert not request.form + capture_message("hi") + return "ok" + + client = app.test_client() + response = client.post("/", content_type="application/json", data=json.dumps(data)) + assert response.status_code == 200 + + event, = capture_events + assert event[""]["request"]["data"]["foo"]["bar"] == { + "": {"len": 2000, "rem": [["!len", "x", 509, 512]]} + } + assert len(event["request"]["data"]["foo"]["bar"]) == 512 + assert event["request"]["data_info"] == {"ct": "json", "repr": "structured"} + + +def test_flask_large_formdata_request(capture_events, app): + data = {"foo": "a" * 2000} + + @app.route("/", methods=["POST"]) + def index(): + assert request.form["foo"] == data["foo"] + assert not request.data + assert not request.json + capture_message("hi") + return "ok" + + client = app.test_client() + response = client.post("/", data=data) + assert response.status_code == 200 + + event, = capture_events + assert event[""]["request"]["data"]["foo"] == { + "": {"len": 2000, "rem": [["!len", "x", 509, 512]]} + } + assert len(event["request"]["data"]["foo"]) == 512 + assert event["request"]["data_info"] == {"ct": "urlencoded", "repr": "structured"} + + +@pytest.mark.parametrize("input_char", [u"a", b"a"]) +def test_flask_large_text_request(input_char, capture_events, app): + data = input_char * 2000 + + @app.route("/", methods=["POST"]) + def index(): + assert not request.form + if isinstance(data, bytes): + assert request.data == data + else: + assert request.data == data.encode("ascii") + assert not request.json + capture_message("hi") + return "ok" + + client = app.test_client() + response = client.post("/", data=data) + assert response.status_code == 200 + + event, = capture_events + assert event[""]["request"]["data"] == { + "": {"len": 2000, "rem": [["!len", "x", 509, 512]]} + } + assert len(event["request"]["data"]) == 512 + assert event["request"]["data_info"] == {"ct": "plain", "repr": "other"} + + +def test_flask_large_bytes_request(capture_events, app): + data = b"\xc3" * 2000 + + @app.route("/", methods=["POST"]) + def index(): + assert not request.form + assert request.data == data + assert not request.json + capture_message("hi") + return "ok" + + client = app.test_client() + response = client.post("/", data=data) + assert response.status_code == 200 + + event, = capture_events + assert event[""]["request"]["data"] == { + "": {"len": 2668, "rem": [["!len", "x", 509, 512]]} + } + assert len(event["request"]["data"]) == 512 + assert event["request"]["data_info"] == {"ct": "bytes", "repr": "base64"} + + +def test_flask_files_and_form(capture_events, app): + data = {"foo": "a" * 2000, "file": (BytesIO(b"hello"), "hello.txt")} + + @app.route("/", methods=["POST"]) + def index(): + assert list(request.form) == ["foo"] + assert list(request.files) == ["file"] + assert not request.json + capture_message("hi") + return "ok" + + client = app.test_client() + response = client.post("/", data=data) + assert response.status_code == 200 + + event, = capture_events + assert event[""]["request"]["data"]["foo"] == { + "": {"len": 2000, "rem": [["!len", "x", 509, 512]]} + } + assert len(event["request"]["data"]["foo"]) == 512 + + assert event[""]["request"]["data"]["file"] == { + "": {"len": 0, "rem": [["!filecontent", "x", 0, 0]]} + } + assert not event["request"]["data"]["file"]