diff --git a/src/sentry/seer/explorer/utils.py b/src/sentry/seer/explorer/utils.py index e221c5913c084a..42bc649cd97d0a 100644 --- a/src/sentry/seer/explorer/utils.py +++ b/src/sentry/seer/explorer/utils.py @@ -48,8 +48,9 @@ def normalize_description(description: str) -> str: def _convert_profile_to_execution_tree(profile_data: dict) -> list[dict]: """ - Converts profile data into a hierarchical representation of code execution, - including only items from the MainThread and app frames. + Converts profile data into a hierarchical representation of code execution. + Selects the thread with the most in_app frames, or falls back to MainThread if no + in_app frames exist (showing all frames including system frames). Calculates accurate durations for all nodes based on call stack transitions. """ profile = profile_data.get( @@ -65,15 +66,47 @@ def _convert_profile_to_execution_tree(profile_data: dict) -> list[dict]: frames = profile.get("frames") stacks = profile.get("stacks") samples = profile.get("samples") + thread_metadata = profile.get("thread_metadata", {}) if not all([frames, stacks, samples]): return [] - # Use the thread ID from the first sample as this should be the one with the most application logic - if samples: - main_thread_id = str(samples[0]["thread_id"]) + # Count in_app frames per thread + thread_in_app_counts: dict[str, int] = {} + for sample in samples: + thread_id = str(sample["thread_id"]) + thread_in_app_counts.setdefault(thread_id, 0) + + stack_index = sample.get("stack_id") + if stack_index is not None and stack_index < len(stacks): + for idx in stacks[stack_index]: + if idx < len(frames) and frames[idx].get("in_app", False): + thread_in_app_counts[thread_id] += 1 + + # Select thread with most in_app frames + selected_thread_id: str | None = None + max_in_app_count = max(thread_in_app_counts.values()) if thread_in_app_counts else 0 + + if max_in_app_count > 0: + # Find thread with most in_app frames + selected_thread_id = max(thread_in_app_counts.items(), key=lambda x: x[1])[0] + show_all_frames = False else: - # No samples - can't determine thread - main_thread_id = None + # No in_app frames found, try to find MainThread + main_thread_id_from_metadata = next( + ( + str(thread_id) + for thread_id, metadata in thread_metadata.items() + if metadata.get("name") == "MainThread" + ), + None, + ) + + selected_thread_id = main_thread_id_from_metadata or ( + str(samples[0]["thread_id"]) if samples else None + ) + show_all_frames = ( + True # Show all frames including system frames when no in_app frames exist + ) def _get_elapsed_since_start_ns( sample: dict[str, Any], all_samples: list[dict[str, Any]] @@ -139,35 +172,50 @@ def find_or_create_child(parent: dict[str, Any], frame_data: dict[str, Any]) -> parent["children"].append(frame_data) return frame_data - def process_stack(stack_index): - """Extract app frames from a stack trace""" + def is_valid_frame(frame: dict[str, Any], include_all_frames: bool) -> bool: + """Check if a frame should be included in the execution tree.""" + filename = frame.get("filename", "") + is_generated = filename.startswith("<") and filename.endswith(">") + if is_generated: + return False + return include_all_frames or frame.get("in_app", False) + + def process_stack(stack_index: int, include_all_frames: bool = False) -> list[dict[str, Any]]: + """Extract frames from a stack trace. + + Args: + stack_index: Index into the stacks array + include_all_frames: If True, include all frames (including system frames). + If False, only include in_app frames. + """ frame_indices = stacks[stack_index] if not frame_indices: return [] - # Create nodes for app frames only, maintaining order (bottom to top) + # Create nodes for frames, maintaining order (bottom to top) nodes = [] for idx in reversed(frame_indices): - frame = frames[idx] - if frame.get("in_app", False) and not ( - frame.get("filename", "").startswith("<") - and frame.get("filename", "").endswith(">") - ): + if idx >= len(frames): + continue + if is_valid_frame(frames[idx], include_all_frames): nodes.append(create_frame_node(idx)) return nodes + if not selected_thread_id: + return [] + # Build the execution tree and track call stacks execution_tree: list[dict[str, Any]] = [] call_stack_history: list[tuple[list[str], int]] = [] # [(node_ids, timestamp), ...] node_registry: dict[str, dict[str, Any]] = {} # {node_id: node_reference} for sample in sorted_samples: - if str(sample["thread_id"]) != str(main_thread_id): + if str(sample["thread_id"]) != selected_thread_id: continue timestamp_ns = _get_elapsed_since_start_ns(sample, sorted_samples) - stack_frames = process_stack(sample["stack_id"]) + stack_frames = process_stack(sample["stack_id"], include_all_frames=show_all_frames) if not stack_frames: continue @@ -175,19 +223,23 @@ def process_stack(stack_index): current_stack_ids = [] # Find or create root node - root = None - for existing_root in execution_tree: - if ( - existing_root["function"] == stack_frames[0]["function"] - and existing_root["module"] == stack_frames[0]["module"] - and existing_root["filename"] == stack_frames[0]["filename"] - and existing_root["lineno"] == stack_frames[0]["lineno"] - ): - root = existing_root - break + root_frame = stack_frames[0] + root = next( + ( + existing_root + for existing_root in execution_tree + if ( + existing_root["function"] == root_frame["function"] + and existing_root["module"] == root_frame["module"] + and existing_root["filename"] == root_frame["filename"] + and existing_root["lineno"] == root_frame["lineno"] + ) + ), + None, + ) if root is None: - root = stack_frames[0] + root = root_frame execution_tree.append(root) # Process root node @@ -302,8 +354,9 @@ def apply_durations(node): def convert_profile_to_execution_tree(profile_data: dict) -> list[ExecutionTreeNode]: """ - Converts profile data into a hierarchical representation of code execution, - including only items from the MainThread and app frames. + Converts profile data into a hierarchical representation of code execution. + Selects the thread with the most in_app frames, or falls back to MainThread if no + in_app frames exist (showing all frames including system frames). Calculates accurate durations for all nodes based on call stack transitions. """ dict_tree = _convert_profile_to_execution_tree(profile_data) diff --git a/tests/sentry/seer/autofix/test_autofix.py b/tests/sentry/seer/autofix/test_autofix.py index 663f348702a1e0..d4c59fed1e9d00 100644 --- a/tests/sentry/seer/autofix/test_autofix.py +++ b/tests/sentry/seer/autofix/test_autofix.py @@ -64,7 +64,7 @@ def test_convert_profile_to_execution_tree(self) -> None: execution_tree = _convert_profile_to_execution_tree(profile_data) - # Should only include in_app frames from MainThread + # Should only include in_app frames from the selected thread (MainThread in this case) assert len(execution_tree) == 1 # One root node root = execution_tree[0] assert root["function"] == "main" @@ -81,7 +81,7 @@ def test_convert_profile_to_execution_tree(self) -> None: assert len(child["children"]) == 0 # No children for the last in_app frame def test_convert_profile_to_execution_tree_non_main_thread(self) -> None: - """Test that the first sample's thread is used (even if not MainThread)""" + """Test that the thread with in_app frames is selected (even if not MainThread)""" profile_data = { "profile": { "frames": [ @@ -101,7 +101,7 @@ def test_convert_profile_to_execution_tree_non_main_thread(self) -> None: execution_tree = _convert_profile_to_execution_tree(profile_data) - # Should include the worker thread since it's the first sample's thread + # Should include the worker thread since it has in_app frames assert len(execution_tree) == 1 assert execution_tree[0]["function"] == "worker" assert execution_tree[0]["filename"] == "worker.py" diff --git a/tests/sentry/seer/explorer/test_explorer_utils.py b/tests/sentry/seer/explorer/test_explorer_utils.py index 9042f55760add7..40c1ae6cea509a 100644 --- a/tests/sentry/seer/explorer/test_explorer_utils.py +++ b/tests/sentry/seer/explorer/test_explorer_utils.py @@ -362,7 +362,7 @@ def test_convert_profile_filters_generated_frames(self) -> None: assert len(root.children) == 0 def test_convert_profile_single_thread_fallback(self) -> None: - """Test fallback to single thread when no MainThread is found.""" + """Test fallback to first sample's thread when no MainThread is found.""" profile_data: dict[str, Any] = { "profile": { "frames": [ @@ -433,7 +433,7 @@ def test_convert_profile_ignores_other_threads(self) -> None: result = convert_profile_to_execution_tree(profile_data) assert len(result) == 1 - # Should only contain the main function from MainThread + # Should only contain the main function from MainThread (selected because it has in_app frames) root = result[0] assert root.function == "main_function" @@ -554,3 +554,157 @@ def test_convert_profile_duration_calculation_accuracy(self) -> None: # Verify sample counts assert short_func.sample_count == 1 assert long_func.sample_count == 3 + + def test_convert_profile_selects_thread_with_most_in_app_frames(self) -> None: + """Test that the thread with the most in_app frames is selected.""" + profile_data: dict[str, Any] = { + "profile": { + "frames": [ + { + "function": "main_function", + "module": "app", + "filename": "main.py", + "lineno": 10, + "in_app": True, + }, + { + "function": "worker_function_1", + "module": "app", + "filename": "worker.py", + "lineno": 20, + "in_app": True, + }, + { + "function": "worker_function_2", + "module": "app", + "filename": "worker.py", + "lineno": 30, + "in_app": True, + }, + { + "function": "worker_function_3", + "module": "app", + "filename": "worker.py", + "lineno": 40, + "in_app": True, + }, + ], + "stacks": [[0], [1, 2, 3]], # MainThread has 1 frame, WorkerThread has 3 frames + "samples": [ + { + "elapsed_since_start_ns": 1000000, + "thread_id": "1", # MainThread - 1 in_app frame + "stack_id": 0, + }, + { + "elapsed_since_start_ns": 1000000, + "thread_id": "2", # WorkerThread - 3 in_app frames (should be selected) + "stack_id": 1, + }, + ], + "thread_metadata": { + "1": {"name": "MainThread"}, + "2": {"name": "WorkerThread"}, + }, + } + } + + result = convert_profile_to_execution_tree(profile_data) + assert len(result) == 1 + + # Should contain worker_function_3 from WorkerThread (thread with most in_app frames) + root = result[0] + assert root.function == "worker_function_3" + assert len(root.children) == 1 + assert root.children[0].function == "worker_function_2" + assert len(root.children[0].children) == 1 + assert root.children[0].children[0].function == "worker_function_1" + + def test_convert_profile_fallback_to_mainthread_when_no_in_app_frames(self) -> None: + """Test fallback to MainThread when no threads have in_app frames.""" + profile_data: dict[str, Any] = { + "profile": { + "frames": [ + { + "function": "stdlib_function", + "module": "stdlib", + "filename": "/usr/lib/stdlib.py", + "lineno": 100, + "in_app": False, + }, + { + "function": "another_stdlib_function", + "module": "stdlib", + "filename": "/usr/lib/stdlib.py", + "lineno": 200, + "in_app": False, + }, + ], + "stacks": [[0, 1]], + "samples": [ + { + "elapsed_since_start_ns": 1000000, + "thread_id": "1", # MainThread + "stack_id": 0, + }, + { + "elapsed_since_start_ns": 1000000, + "thread_id": "2", # WorkerThread + "stack_id": 0, + }, + ], + "thread_metadata": { + "1": {"name": "MainThread"}, + "2": {"name": "WorkerThread"}, + }, + } + } + + result = convert_profile_to_execution_tree(profile_data) + assert len(result) == 1 + + # Should contain all frames from MainThread (including system frames) + root = result[0] + assert root.function == "another_stdlib_function" + assert len(root.children) == 1 + assert root.children[0].function == "stdlib_function" + + def test_convert_profile_fallback_to_first_sample_when_no_mainthread_no_in_app(self) -> None: + """Test fallback to first sample's thread when no MainThread and no in_app frames.""" + profile_data: dict[str, Any] = { + "profile": { + "frames": [ + { + "function": "stdlib_function", + "module": "stdlib", + "filename": "/usr/lib/stdlib.py", + "lineno": 100, + "in_app": False, + }, + ], + "stacks": [[0]], + "samples": [ + { + "elapsed_since_start_ns": 1000000, + "thread_id": "worker1", # First sample - should be selected + "stack_id": 0, + }, + { + "elapsed_since_start_ns": 1000000, + "thread_id": "worker2", + "stack_id": 0, + }, + ], + "thread_metadata": { + "worker1": {"name": "WorkerThread1"}, + "worker2": {"name": "WorkerThread2"}, + }, + } + } + + result = convert_profile_to_execution_tree(profile_data) + assert len(result) == 1 + + # Should contain frames from worker1 (first sample's thread) + root = result[0] + assert root.function == "stdlib_function"