getsentry · untitaker · Sep 2, 2020 · Aug 31, 2020 · Sep 1, 2020 · untitaker
@@ -2,14 +2,14 @@
 
 import ast
 
-from sentry_sdk import Hub
+from sentry_sdk import Hub, serializer
 from sentry_sdk._types import MYPY
 from sentry_sdk.integrations import Integration, DidNotEnable
 from sentry_sdk.scope import add_global_event_processor
 from sentry_sdk.utils import walk_exception_chain, iter_stacks
 
 if MYPY:
-    from typing import Optional, Dict, Any
+    from typing import Optional, Dict, Any, Tuple, List
     from types import FrameType
 
     from sentry_sdk._types import Event, Hint
@@ -75,7 +75,9 @@ def add_executing_info(event, hint):
                     continue
 
                 for sentry_frame, tb in zip(sentry_frames, tbs):
-                    sentry_frame["vars"].update(pure_eval_frame(tb.tb_frame))
+                    sentry_frame["vars"] = (
+                        pure_eval_frame(tb.tb_frame) or sentry_frame["vars"]
+                    )
             return event
 
 
@@ -89,16 +91,42 @@ def pure_eval_frame(frame):
     if not statements:
         return {}
 
-    stmt = list(statements)[0]
+    scope = stmt = list(statements)[0]
     while True:
         # Get the parent first in case the original statement is already
         # a function definition, e.g. if we're calling a decorator
         # In that case we still want the surrounding scope, not that function
-        stmt = stmt.parent
-        if isinstance(stmt, (ast.FunctionDef, ast.ClassDef, ast.Module)):
+        scope = scope.parent
+        if isinstance(scope, (ast.FunctionDef, ast.ClassDef, ast.Module)):
             break
 
     evaluator = pure_eval.Evaluator.from_frame(frame)
-    expressions = evaluator.interesting_expressions_grouped(stmt)
+    expressions = evaluator.interesting_expressions_grouped(scope)
+
+    def closeness(expression):
+        # type: (Tuple[List[Any], Any]) -> int
+        # Prioritise expressions with a node closer to the statement executed
+        # without being after that statement
+        # A higher return value is better - the expression will appear
+        # earlier in the list of values and is less likely to be trimmed
+        nodes, _value = expression
+        nodes_before_stmt = [
+            node for node in nodes if node.first_token.startpos < stmt.last_token.endpos
+        ]
+        if nodes_before_stmt:
+            # The position of the last node before or in the statement
+            return max(node.first_token.startpos for node in nodes_before_stmt)
+        else:
+            # The position of the first node after the statement
+            # Negative means it's always lower priority than nodes that come before
+            # Less negative means closer to the statement and higher priority
+            return -min(node.first_token.startpos for node in nodes)
+
+    # This adds the first_token and last_token attributes to nodes
     atok = source.asttokens()
-    return {atok.get_text(nodes[0]): value for nodes, value in expressions}
+
+    expressions.sort(key=closeness, reverse=True)
+    return {
+        atok.get_text(nodes[0]): value
+        for nodes, value in expressions[: serializer.MAX_DATABAG_BREADTH]
+    }
@@ -1,6 +1,9 @@
+import sys
+from types import SimpleNamespace
+
 import pytest
 
-from sentry_sdk import capture_exception
+from sentry_sdk import capture_exception, serializer
 from sentry_sdk.integrations.pure_eval import PureEvalIntegration
 
 
@@ -10,8 +13,27 @@ def test_with_locals_enabled(sentry_init, capture_events, integrations):
     events = capture_events()
 
     def foo():
-        foo.d = {1: 2}
-        print(foo.d[1] / 0)
+        namespace = SimpleNamespace()
+        q = 1
+        w = 2
+        e = 3
+        r = 4
+        t = 5
+        y = 6
+        u = 7
+        i = 8
+        o = 9
+        p = 10
+        a = 11
+        s = 12
+        str((q, w, e, r, t, y, u, i, o, p, a, s))  # use variables for linter
+        namespace.d = {1: 2}
+        print(namespace.d[1] / 0)
+
+        # Appearances of variables after the main statement don't affect order
+        print(q)
+        print(s)
+        print(events)
 
     try:
         foo()
@@ -28,8 +50,43 @@ def foo():
     frame_vars = event["exception"]["values"][0]["stacktrace"]["frames"][-1]["vars"]
 
     if integrations:
-        assert sorted(frame_vars.keys()) == ["foo", "foo.d", "foo.d[1]"]
-        assert frame_vars["foo.d"] == {"1": "2"}
-        assert frame_vars["foo.d[1]"] == "2"
+        # Values closest to the exception line appear first
+        # Test this order if possible given the Python version and dict order
+        expected_keys = [
+            "namespace",
+            "namespace.d",
+            "namespace.d[1]",
+            "s",
+            "a",
+            "p",
+            "o",
+            "i",
+            "u",
+            "y",
+        ]
+        if sys.version_info[:2] == (3, 5):
+            assert frame_vars.keys() == set(expected_keys)
+        else:
+            assert list(frame_vars.keys()) == expected_keys
+        assert frame_vars["namespace.d"] == {"1": "2"}
+        assert frame_vars["namespace.d[1]"] == "2"
     else:
-        assert sorted(frame_vars.keys()) == ["foo"]
+        # Without pure_eval, the variables are unpredictable.
+        # In later versions, those at the top appear first and are thus included
+        assert frame_vars.keys() <= {
+            "namespace",
+            "q",
+            "w",
+            "e",
+            "r",
+            "t",
+            "y",
+            "u",
+            "i",
+            "o",
+            "p",
+            "a",
+            "s",
+            "events",
+        }
+        assert len(frame_vars) == serializer.MAX_DATABAG_BREADTH