Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions src/sentry/seer/explorer/index_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def get_transactions_for_project(
project_id: int, limit: int = 500, start_time_delta: dict[str, int] | None = None
) -> list[Transaction]:
"""
Get a list of transactions for a project using EAP, sorted by volume/traffic.
Get a list of transactions for a project using EAP, sorted by total time spent.

Args:
project_id: The ID of the project to fetch transactions for
Expand Down Expand Up @@ -75,15 +75,15 @@ def get_transactions_for_project(
auto_fields=True,
)

# Query EAP for transactions with volume metrics
# Query EAP for most important transactions (highest total time spent)
result = Spans.run_table_query(
params=snuba_params,
query_string=f"is_transaction:true project.id:{project_id}",
query_string="is_transaction:true",
selected_columns=[
"transaction",
"count()",
"sum(span.duration)",
],
orderby=["-count()"], # Sort by count descending (highest volume first)
orderby=["-sum(span.duration)"],
offset=0,
limit=limit,
Comment on lines 79 to 88
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: get_transactions_for_project() lacks an explicit project.id filter in its query_string, potentially returning data from all projects.
Severity: CRITICAL | Confidence: 0.95

🔍 Detailed Analysis

The get_transactions_for_project() function constructs a query_string="is_transaction:true" without explicitly including project.id:{project_id}. Despite snuba_params containing projects=[project], this metadata is not automatically applied as a server-side filter in the RPC layer, as indicated by similar filtering requirements in SearchResolver.resolve_query(). This results in the function returning transactions from all projects in the organization, rather than being scoped to the specified project_id.

💡 Suggested Fix

Modify the query_string in get_transactions_for_project() to explicitly include project.id:{project_id} to ensure results are correctly scoped.

🤖 Prompt for AI Agent
Review the code at the location below. A potential bug has been identified by an AI
agent.
Verify if this is a real issue. If it is, propose a fix; if not, explain why it's not
valid.

Location: src/sentry/seer/explorer/index_data.py#L79-L88

Potential issue: The `get_transactions_for_project()` function constructs a
`query_string="is_transaction:true"` without explicitly including
`project.id:{project_id}`. Despite `snuba_params` containing `projects=[project]`, this
metadata is not automatically applied as a server-side filter in the RPC layer, as
indicated by similar filtering requirements in `SearchResolver.resolve_query()`. This
results in the function returning transactions from all projects in the organization,
rather than being scoped to the specified `project_id`.

Did we get this right? 👍 / 👎 to inform future reviews.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i believe this is wrong because the snuba params has the project filter

referrer=Referrer.SEER_RPC,
Expand Down
23 changes: 13 additions & 10 deletions tests/sentry/seer/explorer/test_index_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,22 +23,24 @@ def setUp(self) -> None:

def test_get_transactions_for_project(self) -> None:
"""Test the full end-to-end happy path for get_transactions_for_project."""
# Create spans for different transactions with varying volumes
# Create spans for different transactions with varying total time spent
# Format: (transaction_name, count, avg_duration_ms)
transactions_data = [
("api/users/profile", 5), # High volume
("api/posts/create", 3), # Medium volume
("api/health", 1), # Low volume
("api/users/profile", 5, 100.0), # 5 * 100 = 500ms total (highest)
("api/posts/create", 3, 150.0), # 3 * 150 = 450ms total (middle)
("api/health", 10, 10.0), # 10 * 10 = 100ms total (lowest, despite high count)
]

# Store transaction spans with different volumes
# Store transaction spans with different volumes and durations
spans = []
for transaction_name, count in transactions_data:
for transaction_name, count, duration_ms in transactions_data:
for i in range(count):
span = self.create_span(
{
"description": f"transaction-span-{i}",
"sentry_tags": {"transaction": transaction_name},
"is_segment": True, # This marks it as a transaction span
"duration_ms": duration_ms,
},
start_ts=self.ten_mins_ago + timedelta(minutes=i),
)
Expand All @@ -51,6 +53,7 @@ def test_get_transactions_for_project(self) -> None:
"description": f"regular-span-{i}",
"sentry_tags": {"transaction": transaction_name},
"is_segment": False, # This marks it as a regular span
"duration_ms": 50.0,
},
start_ts=self.ten_mins_ago + timedelta(minutes=i, seconds=30),
)
Expand All @@ -64,11 +67,11 @@ def test_get_transactions_for_project(self) -> None:
# Verify basic structure and data
assert len(result) == 3

# Should be sorted by volume (count) descending - only transaction spans count
# Should be sorted by total time spent (sum of duration) descending
transaction_names = [t.name for t in result]
assert transaction_names[0] == "api/users/profile" # Highest count (5 transaction spans)
assert transaction_names[1] == "api/posts/create" # Medium count (3 transaction spans)
assert transaction_names[2] == "api/health" # Lowest count (1 transaction span)
assert transaction_names[0] == "api/users/profile" # 500ms total (highest)
assert transaction_names[1] == "api/posts/create" # 450ms total (middle)
assert transaction_names[2] == "api/health" # 100ms total (lowest despite high count)

# Verify all transactions have correct project_id and structure
for transaction in result:
Expand Down
Loading