Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
113 changes: 83 additions & 30 deletions src/sentry/seer/explorer/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,9 @@ def normalize_description(description: str) -> str:

def _convert_profile_to_execution_tree(profile_data: dict) -> list[dict]:
"""
Converts profile data into a hierarchical representation of code execution,
including only items from the MainThread and app frames.
Converts profile data into a hierarchical representation of code execution.
Selects the thread with the most in_app frames, or falls back to MainThread if no
in_app frames exist (showing all frames including system frames).
Calculates accurate durations for all nodes based on call stack transitions.
"""
profile = profile_data.get(
Expand All @@ -65,15 +66,47 @@ def _convert_profile_to_execution_tree(profile_data: dict) -> list[dict]:
frames = profile.get("frames")
stacks = profile.get("stacks")
samples = profile.get("samples")
thread_metadata = profile.get("thread_metadata", {})
if not all([frames, stacks, samples]):
return []

# Use the thread ID from the first sample as this should be the one with the most application logic
if samples:
main_thread_id = str(samples[0]["thread_id"])
# Count in_app frames per thread
thread_in_app_counts: dict[str, int] = {}
for sample in samples:
thread_id = str(sample["thread_id"])
thread_in_app_counts.setdefault(thread_id, 0)

stack_index = sample.get("stack_id")
if stack_index is not None and stack_index < len(stacks):
for idx in stacks[stack_index]:
if idx < len(frames) and frames[idx].get("in_app", False):
thread_in_app_counts[thread_id] += 1

# Select thread with most in_app frames
selected_thread_id: str | None = None
max_in_app_count = max(thread_in_app_counts.values()) if thread_in_app_counts else 0

if max_in_app_count > 0:
# Find thread with most in_app frames
selected_thread_id = max(thread_in_app_counts.items(), key=lambda x: x[1])[0]
show_all_frames = False
else:
# No samples - can't determine thread
main_thread_id = None
# No in_app frames found, try to find MainThread
main_thread_id_from_metadata = next(
(
str(thread_id)
for thread_id, metadata in thread_metadata.items()
if metadata.get("name") == "MainThread"
),
None,
)

selected_thread_id = main_thread_id_from_metadata or (
str(samples[0]["thread_id"]) if samples else None
)
show_all_frames = (
True # Show all frames including system frames when no in_app frames exist
)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: MainThread selection ignores whether thread has samples

When no threads have in_app frames, the code attempts to select MainThread from metadata without verifying it actually contains any samples. If MainThread exists in metadata but never appears in the samples data, selected_thread_id will be set to a thread that has no profiling data, causing all samples to be skipped and an empty execution tree to be returned instead of falling back to the first sample's thread.

Fix in Cursor Fix in Web


def _get_elapsed_since_start_ns(
sample: dict[str, Any], all_samples: list[dict[str, Any]]
Expand Down Expand Up @@ -139,55 +172,74 @@ def find_or_create_child(parent: dict[str, Any], frame_data: dict[str, Any]) ->
parent["children"].append(frame_data)
return frame_data

def process_stack(stack_index):
"""Extract app frames from a stack trace"""
def is_valid_frame(frame: dict[str, Any], include_all_frames: bool) -> bool:
"""Check if a frame should be included in the execution tree."""
filename = frame.get("filename", "")
is_generated = filename.startswith("<") and filename.endswith(">")
if is_generated:
return False
return include_all_frames or frame.get("in_app", False)

def process_stack(stack_index: int, include_all_frames: bool = False) -> list[dict[str, Any]]:
"""Extract frames from a stack trace.

Args:
stack_index: Index into the stacks array
include_all_frames: If True, include all frames (including system frames).
If False, only include in_app frames.
"""
frame_indices = stacks[stack_index]
if not frame_indices:
return []

# Create nodes for app frames only, maintaining order (bottom to top)
# Create nodes for frames, maintaining order (bottom to top)
nodes = []
for idx in reversed(frame_indices):
frame = frames[idx]
if frame.get("in_app", False) and not (
frame.get("filename", "").startswith("<")
and frame.get("filename", "").endswith(">")
):
if idx >= len(frames):
continue
if is_valid_frame(frames[idx], include_all_frames):
nodes.append(create_frame_node(idx))

return nodes

if not selected_thread_id:
return []

# Build the execution tree and track call stacks
execution_tree: list[dict[str, Any]] = []
call_stack_history: list[tuple[list[str], int]] = [] # [(node_ids, timestamp), ...]
node_registry: dict[str, dict[str, Any]] = {} # {node_id: node_reference}

for sample in sorted_samples:
if str(sample["thread_id"]) != str(main_thread_id):
if str(sample["thread_id"]) != selected_thread_id:
continue

timestamp_ns = _get_elapsed_since_start_ns(sample, sorted_samples)
stack_frames = process_stack(sample["stack_id"])
stack_frames = process_stack(sample["stack_id"], include_all_frames=show_all_frames)
if not stack_frames:
continue

# Process this stack sample
current_stack_ids = []

# Find or create root node
root = None
for existing_root in execution_tree:
if (
existing_root["function"] == stack_frames[0]["function"]
and existing_root["module"] == stack_frames[0]["module"]
and existing_root["filename"] == stack_frames[0]["filename"]
and existing_root["lineno"] == stack_frames[0]["lineno"]
):
root = existing_root
break
root_frame = stack_frames[0]
root = next(
(
existing_root
for existing_root in execution_tree
if (
existing_root["function"] == root_frame["function"]
and existing_root["module"] == root_frame["module"]
and existing_root["filename"] == root_frame["filename"]
and existing_root["lineno"] == root_frame["lineno"]
)
),
None,
)

if root is None:
root = stack_frames[0]
root = root_frame
execution_tree.append(root)

# Process root node
Expand Down Expand Up @@ -302,8 +354,9 @@ def apply_durations(node):

def convert_profile_to_execution_tree(profile_data: dict) -> list[ExecutionTreeNode]:
"""
Converts profile data into a hierarchical representation of code execution,
including only items from the MainThread and app frames.
Converts profile data into a hierarchical representation of code execution.
Selects the thread with the most in_app frames, or falls back to MainThread if no
in_app frames exist (showing all frames including system frames).
Calculates accurate durations for all nodes based on call stack transitions.
"""
dict_tree = _convert_profile_to_execution_tree(profile_data)
Expand Down
6 changes: 3 additions & 3 deletions tests/sentry/seer/autofix/test_autofix.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def test_convert_profile_to_execution_tree(self) -> None:

execution_tree = _convert_profile_to_execution_tree(profile_data)

# Should only include in_app frames from MainThread
# Should only include in_app frames from the selected thread (MainThread in this case)
assert len(execution_tree) == 1 # One root node
root = execution_tree[0]
assert root["function"] == "main"
Expand All @@ -81,7 +81,7 @@ def test_convert_profile_to_execution_tree(self) -> None:
assert len(child["children"]) == 0 # No children for the last in_app frame

def test_convert_profile_to_execution_tree_non_main_thread(self) -> None:
"""Test that the first sample's thread is used (even if not MainThread)"""
"""Test that the thread with in_app frames is selected (even if not MainThread)"""
profile_data = {
"profile": {
"frames": [
Expand All @@ -101,7 +101,7 @@ def test_convert_profile_to_execution_tree_non_main_thread(self) -> None:

execution_tree = _convert_profile_to_execution_tree(profile_data)

# Should include the worker thread since it's the first sample's thread
# Should include the worker thread since it has in_app frames
assert len(execution_tree) == 1
assert execution_tree[0]["function"] == "worker"
assert execution_tree[0]["filename"] == "worker.py"
Expand Down
158 changes: 156 additions & 2 deletions tests/sentry/seer/explorer/test_explorer_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -362,7 +362,7 @@ def test_convert_profile_filters_generated_frames(self) -> None:
assert len(root.children) == 0

def test_convert_profile_single_thread_fallback(self) -> None:
"""Test fallback to single thread when no MainThread is found."""
"""Test fallback to first sample's thread when no MainThread is found."""
profile_data: dict[str, Any] = {
"profile": {
"frames": [
Expand Down Expand Up @@ -433,7 +433,7 @@ def test_convert_profile_ignores_other_threads(self) -> None:
result = convert_profile_to_execution_tree(profile_data)
assert len(result) == 1

# Should only contain the main function from MainThread
# Should only contain the main function from MainThread (selected because it has in_app frames)
root = result[0]
assert root.function == "main_function"

Expand Down Expand Up @@ -554,3 +554,157 @@ def test_convert_profile_duration_calculation_accuracy(self) -> None:
# Verify sample counts
assert short_func.sample_count == 1
assert long_func.sample_count == 3

def test_convert_profile_selects_thread_with_most_in_app_frames(self) -> None:
"""Test that the thread with the most in_app frames is selected."""
profile_data: dict[str, Any] = {
"profile": {
"frames": [
{
"function": "main_function",
"module": "app",
"filename": "main.py",
"lineno": 10,
"in_app": True,
},
{
"function": "worker_function_1",
"module": "app",
"filename": "worker.py",
"lineno": 20,
"in_app": True,
},
{
"function": "worker_function_2",
"module": "app",
"filename": "worker.py",
"lineno": 30,
"in_app": True,
},
{
"function": "worker_function_3",
"module": "app",
"filename": "worker.py",
"lineno": 40,
"in_app": True,
},
],
"stacks": [[0], [1, 2, 3]], # MainThread has 1 frame, WorkerThread has 3 frames
"samples": [
{
"elapsed_since_start_ns": 1000000,
"thread_id": "1", # MainThread - 1 in_app frame
"stack_id": 0,
},
{
"elapsed_since_start_ns": 1000000,
"thread_id": "2", # WorkerThread - 3 in_app frames (should be selected)
"stack_id": 1,
},
],
"thread_metadata": {
"1": {"name": "MainThread"},
"2": {"name": "WorkerThread"},
},
}
}

result = convert_profile_to_execution_tree(profile_data)
assert len(result) == 1

# Should contain worker_function_3 from WorkerThread (thread with most in_app frames)
root = result[0]
assert root.function == "worker_function_3"
assert len(root.children) == 1
assert root.children[0].function == "worker_function_2"
assert len(root.children[0].children) == 1
assert root.children[0].children[0].function == "worker_function_1"

def test_convert_profile_fallback_to_mainthread_when_no_in_app_frames(self) -> None:
"""Test fallback to MainThread when no threads have in_app frames."""
profile_data: dict[str, Any] = {
"profile": {
"frames": [
{
"function": "stdlib_function",
"module": "stdlib",
"filename": "/usr/lib/stdlib.py",
"lineno": 100,
"in_app": False,
},
{
"function": "another_stdlib_function",
"module": "stdlib",
"filename": "/usr/lib/stdlib.py",
"lineno": 200,
"in_app": False,
},
],
"stacks": [[0, 1]],
"samples": [
{
"elapsed_since_start_ns": 1000000,
"thread_id": "1", # MainThread
"stack_id": 0,
},
{
"elapsed_since_start_ns": 1000000,
"thread_id": "2", # WorkerThread
"stack_id": 0,
},
],
"thread_metadata": {
"1": {"name": "MainThread"},
"2": {"name": "WorkerThread"},
},
}
}

result = convert_profile_to_execution_tree(profile_data)
assert len(result) == 1

# Should contain all frames from MainThread (including system frames)
root = result[0]
assert root.function == "another_stdlib_function"
assert len(root.children) == 1
assert root.children[0].function == "stdlib_function"

def test_convert_profile_fallback_to_first_sample_when_no_mainthread_no_in_app(self) -> None:
"""Test fallback to first sample's thread when no MainThread and no in_app frames."""
profile_data: dict[str, Any] = {
"profile": {
"frames": [
{
"function": "stdlib_function",
"module": "stdlib",
"filename": "/usr/lib/stdlib.py",
"lineno": 100,
"in_app": False,
},
],
"stacks": [[0]],
"samples": [
{
"elapsed_since_start_ns": 1000000,
"thread_id": "worker1", # First sample - should be selected
"stack_id": 0,
},
{
"elapsed_since_start_ns": 1000000,
"thread_id": "worker2",
"stack_id": 0,
},
],
"thread_metadata": {
"worker1": {"name": "WorkerThread1"},
"worker2": {"name": "WorkerThread2"},
},
}
}

result = convert_profile_to_execution_tree(profile_data)
assert len(result) == 1

# Should contain frames from worker1 (first sample's thread)
root = result[0]
assert root.function == "stdlib_function"
Loading