Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
132 changes: 132 additions & 0 deletions src/sentry/seer/assisted_query/traces_tools.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
import logging

from sentry.api import client
from sentry.constants import ALL_ACCESS_PROJECT_ID
from sentry.models.apikey import ApiKey
from sentry.models.organization import Organization

logger = logging.getLogger(__name__)

API_KEY_SCOPES = ["org:read", "project:read", "event:read"]


def get_attribute_names(
*, org_id: int, project_ids: list[int], stats_period: str, item_type: str = "spans"
) -> dict:
"""
Get attribute names for trace items by calling the public API endpoint.
This ensures all queryable built-in fields (like span.op, span.description, etc.)
are included in the response, unlike the Snuba RPC which may exclude certain
standard fields.
Args:
org_id: Organization ID
project_ids: List of project IDs to query
stats_period: Time period string (e.g., "7d", "24h", "30d")
item_type: Type of trace item (default: "spans", can be "spans", "logs", etc.)
Returns:
Dictionary with attributes:
{
"fields": {
"string": ["span.op", "span.description", ...],
"number": ["span.duration", ...]
}
}
"""
organization = Organization.objects.get(id=org_id)
api_key = ApiKey(organization_id=org_id, scope_list=API_KEY_SCOPES)
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: client.get() uses user=None, leading to AnonymousUser() and permission failures for organization access.
Severity: CRITICAL | Confidence: High

🔍 Detailed Analysis

The client.get() method is invoked with user=None, which causes src/sentry/api/client.py to default to AnonymousUser(). This AnonymousUser() lacks organization membership, leading to permission checks failing within OrganizationEventsV2EndpointBase (specifically in get_snuba_params()) when the endpoint /organizations/{organization.slug}/trace-items/attributes/ is called. Consequently, the API calls will consistently fail with permission errors, rendering the feature non-functional.

💡 Suggested Fix

Provide an authenticated user with organization permissions to the user parameter of client.get().

🤖 Prompt for AI Agent
Review the code at the location below. A potential bug has been identified by an AI
agent.
Verify if this is a real issue. If it is, propose a fix; if not, explain why it's not
valid.

Location: src/sentry/seer/assisted_query/traces_tools.py#L39

Potential issue: The `client.get()` method is invoked with `user=None`, which causes
`src/sentry/api/client.py` to default to `AnonymousUser()`. This `AnonymousUser()` lacks
organization membership, leading to permission checks failing within
`OrganizationEventsV2EndpointBase` (specifically in `get_snuba_params()`) when the
endpoint `/organizations/{organization.slug}/trace-items/attributes/` is called.
Consequently, the API calls will consistently fail with permission errors, rendering the
feature non-functional.

Did we get this right? 👍 / 👎 to inform future reviews.
Reference ID: 3254778

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is what we do everywhere with client.get


fields: dict[str, list[str]] = {"string": [], "number": []}

# Fetch both string and number attributes from the public API
for attr_type in ["string", "number"]:
query_params = {
"attributeType": attr_type,
"itemType": item_type,
"statsPeriod": stats_period,
"project": project_ids or [ALL_ACCESS_PROJECT_ID],
}

# API returns: [{"key": "...", "name": "span.op", "attributeSource": {...}}, ...]
resp = client.get(
auth=api_key,
user=None,
path=f"/organizations/{organization.slug}/trace-items/attributes/",
params=query_params,
)

fields[attr_type] = [item["name"] for item in resp.data]

return {"fields": fields}


def get_attribute_values_with_substring(
*,
org_id: int,
project_ids: list[int],
fields_with_substrings: list[dict[str, str]],
stats_period: str = "7d",
limit: int = 100,
item_type: str = "spans",
) -> dict:
"""
Get attribute values for specific fields, optionally filtered by substring. Only string attributes are supported.
Args:
org_id: Organization ID
project_ids: List of project IDs to query
fields_with_substrings: List of dicts with "field" and optional "substring" keys
Example: [{"field": "span.status", "substring": "error"}]
stats_period: Time period string (e.g., "7d", "24h", "30d")
limit: Maximum number of values to return per field (API default is 1000)
item_type: Type of trace item (default: "spans")
Returns:
Dictionary with values:
{
"values": {
"span.status": ["ok", "error", ...],
"transaction": ["checkout", ...]
}
}
"""
if not fields_with_substrings:
return {"values": {}}

organization = Organization.objects.get(id=org_id)
api_key = ApiKey(organization_id=org_id, scope_list=API_KEY_SCOPES)

values: dict[str, set[str]] = {}

for field_with_substring in fields_with_substrings:
field = field_with_substring["field"]
substring = field_with_substring.get("substring", "")

query_params = {
"itemType": item_type,
"attributeType": "string",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

there can be attrs w other types right? should we pass in the field's type, maybe return it in the _names rpc?

Copy link
Member Author

@roaga roaga Nov 24, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the old RPC this is replacing only supported strings, which makes sense because this is about searching values by substring

didn't want to make any breaking changes in this PR either way

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok, if we reuse this for metrics/logs (I think we should!) can do a refactor then

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it's already re-usable by changing itemType, that's the intention, and the same thing applies where we can only do substring search on string fields

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

as more context, it only makes sense to search for values of string types as booleans are also stored as strings and numerical search (int/float/datetimes) I believe aren't supported because of insane cardinality which is why even in sentry search it defaults to a few suggested values.

"statsPeriod": stats_period,
"project": project_ids or [ALL_ACCESS_PROJECT_ID],
}
if substring:
query_params["substringMatch"] = substring

# API returns: [{"value": "ok", "count": 123, ...}, ...]
resp = client.get(
auth=api_key,
user=None,
path=f"/organizations/{organization.slug}/trace-items/attributes/{field}/values/",
params=query_params,
)

# Extract "value" from each item, filter out None/empty, and respect limit
field_values_list = [item["value"] for item in resp.data if item.get("value")]
# Merge with existing values if field already exists (multiple substrings for same field)
values.setdefault(field, set()).update(field_values_list[:limit])
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: Redundant limiting causes inefficient memory usage

The function applies [:limit] twice: once when adding values to the set (line 127) and again when returning the sorted result (line 131). When the same field is queried with multiple substrings, the set accumulates up to limit values from each query, potentially growing to several times the limit before being trimmed again. This wastes memory and processing by storing and sorting unnecessary values. The slicing on line 127 should be removed to collect all API responses, then apply the limit once on the final sorted output.

Additional Locations (1)

Fix in Cursor Fix in Web

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

multi limit lets us balance the results between the two fields somewhat which i kinda like


# Convert sets to sorted lists for JSON serialization
return {
"values": {field: sorted(field_values)[:limit] for field, field_values in values.items()}
}
186 changes: 4 additions & 182 deletions src/sentry/seer/endpoints/seer_rpc.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import logging
import uuid
from collections.abc import Callable
from concurrent.futures import ThreadPoolExecutor, as_completed
from typing import Any, TypedDict

import sentry_sdk
Expand All @@ -26,10 +25,6 @@
from rest_framework.request import Request
from rest_framework.response import Response
from sentry_protos.snuba.v1.downsampled_storage_pb2 import DownsampledStorageConfig
from sentry_protos.snuba.v1.endpoint_trace_item_attributes_pb2 import (
TraceItemAttributeNamesRequest,
TraceItemAttributeValuesRequest,
)
from sentry_protos.snuba.v1.endpoint_trace_item_details_pb2 import TraceItemDetailsRequest
from sentry_protos.snuba.v1.endpoint_trace_item_stats_pb2 import (
AttributeDistributionsRequest,
Expand All @@ -46,7 +41,6 @@
from sentry.api.api_publish_status import ApiPublishStatus
from sentry.api.authentication import AuthenticationSiloLimit, StandardAuthentication
from sentry.api.base import Endpoint, region_silo_endpoint
from sentry.api.endpoints.organization_trace_item_attributes import as_attribute_key
from sentry.api.endpoints.project_trace_item_details import convert_rpc_attribute_to_json
from sentry.constants import (
ENABLE_PR_REVIEW_TEST_GENERATION_DEFAULT,
Expand All @@ -66,7 +60,6 @@
from sentry.search.eap.resolver import SearchResolver
from sentry.search.eap.spans.definitions import SPAN_DEFINITIONS
from sentry.search.eap.types import SearchResolverConfig, SupportedTraceItemType
from sentry.search.eap.utils import can_expose_attribute
from sentry.search.events.types import SnubaParams
from sentry.seer.assisted_query.discover_tools import (
get_event_filter_key_values,
Expand All @@ -78,6 +71,10 @@
get_issue_filter_keys,
get_issues_stats,
)
from sentry.seer.assisted_query.traces_tools import (
get_attribute_names,
get_attribute_values_with_substring,
)
from sentry.seer.autofix.autofix_tools import get_error_event_details, get_profile_details
from sentry.seer.autofix.coding_agent import launch_coding_agents_for_run
from sentry.seer.autofix.utils import AutofixTriggerSource
Expand Down Expand Up @@ -370,181 +367,6 @@ def get_organization_seer_consent_by_org_name(
return {"consent": False, "consent_url": consent_url}


def get_attribute_names(*, org_id: int, project_ids: list[int], stats_period: str) -> dict:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

think we'd need to clean these up in a follow up, to avoid breaking current query agent

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this PR (at least what I intended, double check if me if wrong) just moves those functions into a new file and keeps the same input and output signatures, and I just change the internal logic, so nothing should break

type_mapping = {
AttributeKey.Type.TYPE_STRING: "string",
AttributeKey.Type.TYPE_DOUBLE: "number",
}

period = parse_stats_period(stats_period)
if period is None:
period = datetime.timedelta(days=7)

end = datetime.datetime.now()
start = end - period

start_time_proto = ProtobufTimestamp()
start_time_proto.FromDatetime(start)
end_time_proto = ProtobufTimestamp()
end_time_proto.FromDatetime(end)

fields: dict[str, list[str]] = {type_str: [] for type_str in type_mapping.values()}

for attr_type, type_str in type_mapping.items():
req = TraceItemAttributeNamesRequest(
meta=RequestMeta(
organization_id=org_id,
cogs_category="events_analytics_platform",
referrer=Referrer.SEER_RPC.value,
project_ids=project_ids,
start_timestamp=start_time_proto,
end_timestamp=end_time_proto,
trace_item_type=TraceItemType.TRACE_ITEM_TYPE_SPAN,
),
type=attr_type,
limit=1000,
)

fields_resp = snuba_rpc.attribute_names_rpc(req)

parsed_fields = [
as_attribute_key(
attr.name,
"string" if attr_type == AttributeKey.Type.TYPE_STRING else "number",
SupportedTraceItemType.SPANS,
)["name"]
for attr in fields_resp.attributes
if attr.name
and can_expose_attribute(
attr.name, SupportedTraceItemType.SPANS, include_internal=False
)
]

fields[type_str].extend(parsed_fields)

return {"fields": fields}


def get_attribute_values_with_substring(
*,
org_id: int,
project_ids: list[int],
fields_with_substrings: list[dict[str, str]],
stats_period: str = "48h",
limit: int = 100,
sampled: bool = True,
) -> dict:
"""
Get attribute values with substring.
Note: The RPC is guaranteed to not return duplicate values for the same field.
ie: if span.description is requested with both null and "payment" substrings,
the RPC will return the set of values for span.description to avoid duplicates.

TODO: Replace with batch attribute values RPC once available
"""
values: dict[str, set[str]] = {}

if not fields_with_substrings:
return {"values": values}

period = parse_stats_period(stats_period)
if period is None:
period = datetime.timedelta(days=7)

end = datetime.datetime.now()
start = end - period

start_time_proto = ProtobufTimestamp()
start_time_proto.FromDatetime(start)
end_time_proto = ProtobufTimestamp()
end_time_proto.FromDatetime(end)

sampling_mode = (
DownsampledStorageConfig.MODE_NORMAL
if sampled
else DownsampledStorageConfig.MODE_HIGHEST_ACCURACY
)

resolver = SearchResolver(
params=SnubaParams(
start=start,
end=end,
),
config=SearchResolverConfig(),
definitions=SPAN_DEFINITIONS,
)

def process_field_with_substring(
field_with_substring: dict[str, str],
) -> tuple[str, set[str]] | None:
"""Helper function to process a single field_with_substring request."""
field = field_with_substring["field"]
substring = field_with_substring["substring"]

resolved_field, _ = resolver.resolve_attribute(field)
if resolved_field.proto_definition.type == AttributeKey.Type.TYPE_STRING:
req = TraceItemAttributeValuesRequest(
meta=RequestMeta(
organization_id=org_id,
cogs_category="events_analytics_platform",
referrer=Referrer.SEER_RPC.value,
project_ids=project_ids,
start_timestamp=start_time_proto,
end_timestamp=end_time_proto,
trace_item_type=TraceItemType.TRACE_ITEM_TYPE_SPAN,
downsampled_storage_config=DownsampledStorageConfig(mode=sampling_mode),
),
key=resolved_field.proto_definition,
limit=limit,
value_substring_match=substring,
)

values_response = snuba_rpc.attribute_values_rpc(req)
return field, {value for value in values_response.values if value}
return None

timeout_seconds = 1.0

with ThreadPoolExecutor(max_workers=min(len(fields_with_substrings), 10)) as executor:
future_to_field = {
executor.submit(
process_field_with_substring, field_with_substring
): field_with_substring
for field_with_substring in fields_with_substrings
}

try:
for future in as_completed(future_to_field, timeout=timeout_seconds):
field_with_substring = future_to_field[future]

try:
result = future.result()
if result is not None:
field, field_values = result
if field in values:
values[field].update(field_values)
else:
values[field] = field_values
except TimeoutError:
logger.warning(
"RPC call timed out after %s seconds for field %s, skipping",
timeout_seconds,
field_with_substring.get("field", "unknown"),
)
except Exception as e:
logger.warning(
"RPC call failed for field %s: %s",
field_with_substring.get("field", "unknown"),
str(e),
)
except TimeoutError:
for future in future_to_field:
future.cancel()
logger.warning("Overall timeout exceeded, cancelled remaining RPC calls")

return {"values": values}


def get_attributes_and_values(
*,
org_id: int,
Expand Down
Loading
Loading