getsentry · Zylphrex · Oct 21, 2022 · Oct 19, 2022 · Oct 20, 2022 · Oct 20, 2022
diff --git a/sentry_sdk/profiler.py b/sentry_sdk/profiler.py
@@ -45,7 +45,7 @@
     from typing_extensions import TypedDict
     import sentry_sdk.tracing
 
-    RawSampleData = Tuple[int, Sequence[Tuple[int, Sequence[RawFrameData]]]]
+    RawSampleData = Tuple[int, Sequence[Tuple[str, Sequence[RawFrameData]]]]
 
     ProcessedStack = Tuple[int, ...]
 
@@ -162,14 +162,14 @@ def extract_stack(frame, max_stack_depth=MAX_STACK_DEPTH):
         stack.append(frame)
         frame = frame.f_back
 
-    return [
+    return tuple(
         RawFrameData(
             function=get_frame_name(frame),
             abs_path=frame.f_code.co_filename,
             lineno=frame.f_lineno,
         )
         for frame in stack
-    ]
+    )
 
 
 def get_frame_name(frame):
@@ -324,7 +324,7 @@ def write(self, sample):
     def slice_profile(self, start_ns, stop_ns):
         # type: (int, int) -> ProcessedProfile
         samples = []  # type: List[ProcessedSample]
-        stacks = dict()  # type: Dict[ProcessedStack, int]
+        stacks = dict()  # type: Dict[int, int]
         stacks_list = list()  # type: List[ProcessedStack]
         frames = dict()  # type: Dict[RawFrameData, int]
         frames_list = list()  # type: List[ProcessedFrame]
@@ -334,39 +334,44 @@ def slice_profile(self, start_ns, stop_ns):
         #
         # Is it safe to assume that the samples are always in
         # chronological order and binary search the buffer?
-        for raw_sample in self.buffer:
-            if raw_sample is None:
-                continue
-
-            ts = raw_sample[0]
+        for ts, sample in filter(None, self.buffer):
             if start_ns > ts or ts > stop_ns:
                 continue
 
-            for tid, stack in raw_sample[1]:
-                current_stack = []
-
-                for frame in stack:
-                    if frame not in frames:
-                        frames[frame] = len(frames)
-                        frames_list.append(
-                            {
-                                "function": frame.function,
-                                "filename": frame.abs_path,
-                                "lineno": frame.lineno,
-                            }
-                        )
-                    current_stack.append(frames[frame])
-
-                current_stack = tuple(current_stack)
-                if current_stack not in stacks:
-                    stacks[current_stack] = len(stacks)
-                    stacks_list.append(current_stack)
+            elapsed_since_start_ns = str(ts - start_ns)
+
+            for tid, stack in sample:
+                # Instead of mapping the stack into frame ids and hashing
+                # that as a tuple, we can directly hash the stack.
+                # This saves us from having to generate yet another list.
+                # Additionally, using the stack as the key directly is
+                # costly because the stack can be large, so we pre-hash
+                # the stack, and use the hash as the key as this will be
+                # needed a few times to improve performance.
+                hashed_stack = hash(stack)
+
+                # Check if the stack is indexed first, this lets us skip
+                # indexing frames if it's not necessary
+                if hashed_stack not in stacks:
+                    for frame in stack:
+                        if frame not in frames:
+                            frames[frame] = len(frames)
+                            frames_list.append(
+                                {
+                                    "function": frame.function,
+                                    "filename": frame.abs_path,
+                                    "lineno": frame.lineno,
+                                }
+                            )
+
+                    stacks[hashed_stack] = len(stacks)
+                    stacks_list.append(tuple(frames[frame] for frame in stack))
 
                 samples.append(
                     {
-                        "elapsed_since_start_ns": str(ts - start_ns),
-                        "thread_id": str(tid),
-                        "stack_id": stacks[current_stack],
+                        "elapsed_since_start_ns": elapsed_since_start_ns,
+                        "thread_id": tid,
+                        "stack_id": stacks[hashed_stack],
                     }
                 )
 
@@ -375,7 +380,7 @@ def slice_profile(self, start_ns, stop_ns):
         # will not have any metadata associated with it.
         thread_metadata = {
             str(thread.ident): {
-                "name": thread.name,
+                "name": str(thread.name),
             }
             for thread in threading.enumerate()
         }  # type: Dict[str, ProcessedThreadMetadata]
@@ -401,7 +406,7 @@ def _sample_stack(*args, **kwargs):
                 (
                     nanosecond_time(),
                     [
-                        (tid, extract_stack(frame))
+                        (str(tid), extract_stack(frame))
                         for tid, frame in sys._current_frames().items()
                     ],
                 )

diff --git a/tests/test_profiler.py b/tests/test_profiler.py
@@ -211,7 +211,7 @@ def _sample_stack(*args, **kwargs):
 )
 def test_thread_scheduler_takes_first_samples(scheduler_class):
     sample_buffer = DummySampleBuffer(
-        capacity=1, sample_data=[(0, [(0, [RawFrameData("name", "file", 1)])])]
+        capacity=1, sample_data=[(0, [(0, (RawFrameData("name", "file", 1),))])]
     )
     scheduler = scheduler_class(sample_buffer=sample_buffer, frequency=1000)
     assert scheduler.start_profiling()
@@ -237,7 +237,7 @@ def test_thread_scheduler_takes_first_samples(scheduler_class):
 def test_thread_scheduler_takes_more_samples(scheduler_class):
     sample_buffer = DummySampleBuffer(
         capacity=10,
-        sample_data=[(i, [(0, [RawFrameData("name", "file", 1)])]) for i in range(3)],
+        sample_data=[(i, [(0, (RawFrameData("name", "file", 1),))]) for i in range(3)],
     )
     scheduler = scheduler_class(sample_buffer=sample_buffer, frequency=1000)
     assert scheduler.start_profiling()
@@ -305,7 +305,7 @@ def test_thread_scheduler_single_background_thread(scheduler_class):
 current_thread = threading.current_thread()
 thread_metadata = {
     str(current_thread.ident): {
-        "name": current_thread.name,
+        "name": str(current_thread.name),
     },
 }
 
@@ -330,7 +330,7 @@ def test_thread_scheduler_single_background_thread(scheduler_class):
             10,
             0,
             1,
-            [(2, [(1, [RawFrameData("name", "file", 1)])])],
+            [(2, [("1", (RawFrameData("name", "file", 1),))])],
             {
                 "frames": [],
                 "samples": [],
@@ -343,7 +343,7 @@ def test_thread_scheduler_single_background_thread(scheduler_class):
             10,
             0,
             1,
-            [(0, [(1, [RawFrameData("name", "file", 1)])])],
+            [(0, [("1", (RawFrameData("name", "file", 1),))])],
             {
                 "frames": [
                     {
@@ -369,8 +369,8 @@ def test_thread_scheduler_single_background_thread(scheduler_class):
             0,
             1,
             [
-                (0, [(1, [RawFrameData("name", "file", 1)])]),
-                (1, [(1, [RawFrameData("name", "file", 1)])]),
+                (0, [("1", (RawFrameData("name", "file", 1),))]),
+                (1, [("1", (RawFrameData("name", "file", 1),))]),
             ],
             {
                 "frames": [
@@ -402,16 +402,16 @@ def test_thread_scheduler_single_background_thread(scheduler_class):
             0,
             1,
             [
-                (0, [(1, [RawFrameData("name1", "file", 1)])]),
+                (0, [("1", (RawFrameData("name1", "file", 1),))]),
                 (
                     1,
                     [
                         (
-                            1,
-                            [
+                            "1",
+                            (
                                 RawFrameData("name1", "file", 1),
                                 RawFrameData("name2", "file", 2),
-                            ],
+                            ),
                         )
                     ],
                 ),
@@ -455,23 +455,23 @@ def test_thread_scheduler_single_background_thread(scheduler_class):
                     0,
                     [
                         (
-                            1,
-                            [
+                            "1",
+                            (
                                 RawFrameData("name1", "file", 1),
                                 RawFrameData("name2", "file", 2),
-                            ],
+                            ),
                         )
                     ],
                 ),
                 (
                     1,
                     [
                         (
-                            1,
-                            [
+                            "1",
+                            (
                                 RawFrameData("name3", "file", 3),
                                 RawFrameData("name4", "file", 4),
-                            ],
+                            ),
                         )
                     ],
                 ),
@@ -521,16 +521,16 @@ def test_thread_scheduler_single_background_thread(scheduler_class):
             0,
             1,
             [
-                (0, [(1, [RawFrameData("name1", "file", 1)])]),
+                (0, [("1", (RawFrameData("name1", "file", 1),))]),
                 (
                     1,
                     [
                         (
-                            1,
-                            [
+                            "1",
+                            (
                                 RawFrameData("name2", "file", 2),
                                 RawFrameData("name3", "file", 3),
-                            ],
+                            ),
                         )
                     ],
                 ),