-
-
Notifications
You must be signed in to change notification settings - Fork 4.5k
feat(assisted-query): include built-in fields in span attrs rpc and support logs and metrics #103874
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat(assisted-query): include built-in fields in span attrs rpc and support logs and metrics #103874
Changes from all commits
5f2b833
f83a2ec
2fa9a58
ea9c9cc
f0834d0
3d577f6
1168e6f
a17a389
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,132 @@ | ||
| import logging | ||
|
|
||
| from sentry.api import client | ||
| from sentry.constants import ALL_ACCESS_PROJECT_ID | ||
| from sentry.models.apikey import ApiKey | ||
| from sentry.models.organization import Organization | ||
|
|
||
| logger = logging.getLogger(__name__) | ||
|
|
||
| API_KEY_SCOPES = ["org:read", "project:read", "event:read"] | ||
|
|
||
|
|
||
| def get_attribute_names( | ||
| *, org_id: int, project_ids: list[int], stats_period: str, item_type: str = "spans" | ||
| ) -> dict: | ||
| """ | ||
| Get attribute names for trace items by calling the public API endpoint. | ||
| This ensures all queryable built-in fields (like span.op, span.description, etc.) | ||
| are included in the response, unlike the Snuba RPC which may exclude certain | ||
| standard fields. | ||
| Args: | ||
| org_id: Organization ID | ||
| project_ids: List of project IDs to query | ||
| stats_period: Time period string (e.g., "7d", "24h", "30d") | ||
| item_type: Type of trace item (default: "spans", can be "spans", "logs", etc.) | ||
| Returns: | ||
| Dictionary with attributes: | ||
| { | ||
| "fields": { | ||
| "string": ["span.op", "span.description", ...], | ||
| "number": ["span.duration", ...] | ||
| } | ||
| } | ||
| """ | ||
| organization = Organization.objects.get(id=org_id) | ||
| api_key = ApiKey(organization_id=org_id, scope_list=API_KEY_SCOPES) | ||
|
|
||
| fields: dict[str, list[str]] = {"string": [], "number": []} | ||
|
|
||
| # Fetch both string and number attributes from the public API | ||
| for attr_type in ["string", "number"]: | ||
| query_params = { | ||
| "attributeType": attr_type, | ||
| "itemType": item_type, | ||
| "statsPeriod": stats_period, | ||
| "project": project_ids or [ALL_ACCESS_PROJECT_ID], | ||
| } | ||
|
|
||
| # API returns: [{"key": "...", "name": "span.op", "attributeSource": {...}}, ...] | ||
| resp = client.get( | ||
| auth=api_key, | ||
| user=None, | ||
| path=f"/organizations/{organization.slug}/trace-items/attributes/", | ||
| params=query_params, | ||
| ) | ||
roaga marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| fields[attr_type] = [item["name"] for item in resp.data] | ||
|
|
||
| return {"fields": fields} | ||
|
|
||
|
|
||
| def get_attribute_values_with_substring( | ||
| *, | ||
| org_id: int, | ||
| project_ids: list[int], | ||
| fields_with_substrings: list[dict[str, str]], | ||
| stats_period: str = "7d", | ||
| limit: int = 100, | ||
| item_type: str = "spans", | ||
| ) -> dict: | ||
| """ | ||
| Get attribute values for specific fields, optionally filtered by substring. Only string attributes are supported. | ||
| Args: | ||
| org_id: Organization ID | ||
| project_ids: List of project IDs to query | ||
| fields_with_substrings: List of dicts with "field" and optional "substring" keys | ||
| Example: [{"field": "span.status", "substring": "error"}] | ||
| stats_period: Time period string (e.g., "7d", "24h", "30d") | ||
| limit: Maximum number of values to return per field (API default is 1000) | ||
| item_type: Type of trace item (default: "spans") | ||
| Returns: | ||
| Dictionary with values: | ||
| { | ||
| "values": { | ||
| "span.status": ["ok", "error", ...], | ||
| "transaction": ["checkout", ...] | ||
| } | ||
| } | ||
| """ | ||
| if not fields_with_substrings: | ||
| return {"values": {}} | ||
|
|
||
| organization = Organization.objects.get(id=org_id) | ||
| api_key = ApiKey(organization_id=org_id, scope_list=API_KEY_SCOPES) | ||
|
|
||
| values: dict[str, set[str]] = {} | ||
|
|
||
| for field_with_substring in fields_with_substrings: | ||
| field = field_with_substring["field"] | ||
| substring = field_with_substring.get("substring", "") | ||
|
|
||
| query_params = { | ||
| "itemType": item_type, | ||
| "attributeType": "string", | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. there can be attrs w other types right? should we pass in the field's type, maybe return it in the _names rpc?
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. the old RPC this is replacing only supported strings, which makes sense because this is about searching values by substring didn't want to make any breaking changes in this PR either way
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok, if we reuse this for metrics/logs (I think we should!) can do a refactor then
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. it's already re-usable by changing
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. as more context, it only makes sense to search for values of string types as booleans are also stored as strings and numerical search (int/float/datetimes) I believe aren't supported because of insane cardinality which is why even in sentry search it defaults to a few suggested values. |
||
| "statsPeriod": stats_period, | ||
| "project": project_ids or [ALL_ACCESS_PROJECT_ID], | ||
| } | ||
| if substring: | ||
| query_params["substringMatch"] = substring | ||
|
|
||
| # API returns: [{"value": "ok", "count": 123, ...}, ...] | ||
| resp = client.get( | ||
| auth=api_key, | ||
| user=None, | ||
| path=f"/organizations/{organization.slug}/trace-items/attributes/{field}/values/", | ||
| params=query_params, | ||
| ) | ||
|
|
||
| # Extract "value" from each item, filter out None/empty, and respect limit | ||
| field_values_list = [item["value"] for item in resp.data if item.get("value")] | ||
| # Merge with existing values if field already exists (multiple substrings for same field) | ||
| values.setdefault(field, set()).update(field_values_list[:limit]) | ||
aliu39 marked this conversation as resolved.
Show resolved
Hide resolved
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Bug: Redundant limiting causes inefficient memory usageThe function applies Additional Locations (1)
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. multi limit lets us balance the results between the two fields somewhat which i kinda like |
||
|
|
||
| # Convert sets to sorted lists for JSON serialization | ||
| return { | ||
| "values": {field: sorted(field_values)[:limit] for field, field_values in values.items()} | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -4,7 +4,6 @@ | |
| import logging | ||
| import uuid | ||
| from collections.abc import Callable | ||
| from concurrent.futures import ThreadPoolExecutor, as_completed | ||
| from typing import Any, TypedDict | ||
|
|
||
| import sentry_sdk | ||
|
|
@@ -26,10 +25,6 @@ | |
| from rest_framework.request import Request | ||
| from rest_framework.response import Response | ||
| from sentry_protos.snuba.v1.downsampled_storage_pb2 import DownsampledStorageConfig | ||
| from sentry_protos.snuba.v1.endpoint_trace_item_attributes_pb2 import ( | ||
| TraceItemAttributeNamesRequest, | ||
| TraceItemAttributeValuesRequest, | ||
| ) | ||
| from sentry_protos.snuba.v1.endpoint_trace_item_details_pb2 import TraceItemDetailsRequest | ||
| from sentry_protos.snuba.v1.endpoint_trace_item_stats_pb2 import ( | ||
| AttributeDistributionsRequest, | ||
|
|
@@ -46,7 +41,6 @@ | |
| from sentry.api.api_publish_status import ApiPublishStatus | ||
| from sentry.api.authentication import AuthenticationSiloLimit, StandardAuthentication | ||
| from sentry.api.base import Endpoint, region_silo_endpoint | ||
| from sentry.api.endpoints.organization_trace_item_attributes import as_attribute_key | ||
| from sentry.api.endpoints.project_trace_item_details import convert_rpc_attribute_to_json | ||
| from sentry.constants import ( | ||
| ENABLE_PR_REVIEW_TEST_GENERATION_DEFAULT, | ||
|
|
@@ -66,7 +60,6 @@ | |
| from sentry.search.eap.resolver import SearchResolver | ||
| from sentry.search.eap.spans.definitions import SPAN_DEFINITIONS | ||
| from sentry.search.eap.types import SearchResolverConfig, SupportedTraceItemType | ||
| from sentry.search.eap.utils import can_expose_attribute | ||
| from sentry.search.events.types import SnubaParams | ||
| from sentry.seer.assisted_query.discover_tools import ( | ||
| get_event_filter_key_values, | ||
|
|
@@ -78,6 +71,10 @@ | |
| get_issue_filter_keys, | ||
| get_issues_stats, | ||
| ) | ||
| from sentry.seer.assisted_query.traces_tools import ( | ||
| get_attribute_names, | ||
| get_attribute_values_with_substring, | ||
| ) | ||
| from sentry.seer.autofix.autofix_tools import get_error_event_details, get_profile_details | ||
| from sentry.seer.autofix.coding_agent import launch_coding_agents_for_run | ||
| from sentry.seer.autofix.utils import AutofixTriggerSource | ||
|
|
@@ -370,181 +367,6 @@ def get_organization_seer_consent_by_org_name( | |
| return {"consent": False, "consent_url": consent_url} | ||
|
|
||
|
|
||
| def get_attribute_names(*, org_id: int, project_ids: list[int], stats_period: str) -> dict: | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. think we'd need to clean these up in a follow up, to avoid breaking current query agent
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this PR (at least what I intended, double check if me if wrong) just moves those functions into a new file and keeps the same input and output signatures, and I just change the internal logic, so nothing should break |
||
| type_mapping = { | ||
| AttributeKey.Type.TYPE_STRING: "string", | ||
| AttributeKey.Type.TYPE_DOUBLE: "number", | ||
| } | ||
|
|
||
| period = parse_stats_period(stats_period) | ||
| if period is None: | ||
| period = datetime.timedelta(days=7) | ||
|
|
||
| end = datetime.datetime.now() | ||
| start = end - period | ||
|
|
||
| start_time_proto = ProtobufTimestamp() | ||
| start_time_proto.FromDatetime(start) | ||
| end_time_proto = ProtobufTimestamp() | ||
| end_time_proto.FromDatetime(end) | ||
|
|
||
| fields: dict[str, list[str]] = {type_str: [] for type_str in type_mapping.values()} | ||
|
|
||
| for attr_type, type_str in type_mapping.items(): | ||
| req = TraceItemAttributeNamesRequest( | ||
| meta=RequestMeta( | ||
| organization_id=org_id, | ||
| cogs_category="events_analytics_platform", | ||
| referrer=Referrer.SEER_RPC.value, | ||
| project_ids=project_ids, | ||
| start_timestamp=start_time_proto, | ||
| end_timestamp=end_time_proto, | ||
| trace_item_type=TraceItemType.TRACE_ITEM_TYPE_SPAN, | ||
| ), | ||
| type=attr_type, | ||
| limit=1000, | ||
| ) | ||
|
|
||
| fields_resp = snuba_rpc.attribute_names_rpc(req) | ||
|
|
||
| parsed_fields = [ | ||
| as_attribute_key( | ||
| attr.name, | ||
| "string" if attr_type == AttributeKey.Type.TYPE_STRING else "number", | ||
| SupportedTraceItemType.SPANS, | ||
| )["name"] | ||
| for attr in fields_resp.attributes | ||
| if attr.name | ||
| and can_expose_attribute( | ||
| attr.name, SupportedTraceItemType.SPANS, include_internal=False | ||
| ) | ||
| ] | ||
|
|
||
| fields[type_str].extend(parsed_fields) | ||
|
|
||
| return {"fields": fields} | ||
|
|
||
|
|
||
| def get_attribute_values_with_substring( | ||
| *, | ||
| org_id: int, | ||
| project_ids: list[int], | ||
| fields_with_substrings: list[dict[str, str]], | ||
| stats_period: str = "48h", | ||
| limit: int = 100, | ||
| sampled: bool = True, | ||
| ) -> dict: | ||
| """ | ||
| Get attribute values with substring. | ||
| Note: The RPC is guaranteed to not return duplicate values for the same field. | ||
| ie: if span.description is requested with both null and "payment" substrings, | ||
| the RPC will return the set of values for span.description to avoid duplicates. | ||
|
|
||
| TODO: Replace with batch attribute values RPC once available | ||
| """ | ||
| values: dict[str, set[str]] = {} | ||
|
|
||
| if not fields_with_substrings: | ||
| return {"values": values} | ||
|
|
||
| period = parse_stats_period(stats_period) | ||
| if period is None: | ||
| period = datetime.timedelta(days=7) | ||
|
|
||
| end = datetime.datetime.now() | ||
| start = end - period | ||
|
|
||
| start_time_proto = ProtobufTimestamp() | ||
| start_time_proto.FromDatetime(start) | ||
| end_time_proto = ProtobufTimestamp() | ||
| end_time_proto.FromDatetime(end) | ||
|
|
||
| sampling_mode = ( | ||
| DownsampledStorageConfig.MODE_NORMAL | ||
| if sampled | ||
| else DownsampledStorageConfig.MODE_HIGHEST_ACCURACY | ||
| ) | ||
|
|
||
| resolver = SearchResolver( | ||
| params=SnubaParams( | ||
| start=start, | ||
| end=end, | ||
| ), | ||
| config=SearchResolverConfig(), | ||
| definitions=SPAN_DEFINITIONS, | ||
| ) | ||
|
|
||
| def process_field_with_substring( | ||
| field_with_substring: dict[str, str], | ||
| ) -> tuple[str, set[str]] | None: | ||
| """Helper function to process a single field_with_substring request.""" | ||
| field = field_with_substring["field"] | ||
| substring = field_with_substring["substring"] | ||
|
|
||
| resolved_field, _ = resolver.resolve_attribute(field) | ||
| if resolved_field.proto_definition.type == AttributeKey.Type.TYPE_STRING: | ||
| req = TraceItemAttributeValuesRequest( | ||
| meta=RequestMeta( | ||
| organization_id=org_id, | ||
| cogs_category="events_analytics_platform", | ||
| referrer=Referrer.SEER_RPC.value, | ||
| project_ids=project_ids, | ||
| start_timestamp=start_time_proto, | ||
| end_timestamp=end_time_proto, | ||
| trace_item_type=TraceItemType.TRACE_ITEM_TYPE_SPAN, | ||
| downsampled_storage_config=DownsampledStorageConfig(mode=sampling_mode), | ||
| ), | ||
| key=resolved_field.proto_definition, | ||
| limit=limit, | ||
| value_substring_match=substring, | ||
| ) | ||
|
|
||
| values_response = snuba_rpc.attribute_values_rpc(req) | ||
| return field, {value for value in values_response.values if value} | ||
| return None | ||
|
|
||
| timeout_seconds = 1.0 | ||
|
|
||
| with ThreadPoolExecutor(max_workers=min(len(fields_with_substrings), 10)) as executor: | ||
| future_to_field = { | ||
| executor.submit( | ||
| process_field_with_substring, field_with_substring | ||
| ): field_with_substring | ||
| for field_with_substring in fields_with_substrings | ||
| } | ||
|
|
||
| try: | ||
| for future in as_completed(future_to_field, timeout=timeout_seconds): | ||
| field_with_substring = future_to_field[future] | ||
|
|
||
| try: | ||
| result = future.result() | ||
| if result is not None: | ||
| field, field_values = result | ||
| if field in values: | ||
| values[field].update(field_values) | ||
| else: | ||
| values[field] = field_values | ||
| except TimeoutError: | ||
| logger.warning( | ||
| "RPC call timed out after %s seconds for field %s, skipping", | ||
| timeout_seconds, | ||
| field_with_substring.get("field", "unknown"), | ||
| ) | ||
| except Exception as e: | ||
| logger.warning( | ||
| "RPC call failed for field %s: %s", | ||
| field_with_substring.get("field", "unknown"), | ||
| str(e), | ||
| ) | ||
| except TimeoutError: | ||
| for future in future_to_field: | ||
| future.cancel() | ||
| logger.warning("Overall timeout exceeded, cancelled remaining RPC calls") | ||
|
|
||
| return {"values": values} | ||
|
|
||
|
|
||
| def get_attributes_and_values( | ||
| *, | ||
| org_id: int, | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Bug:
client.get()usesuser=None, leading toAnonymousUser()and permission failures for organization access.Severity: CRITICAL | Confidence: High
🔍 Detailed Analysis
The
client.get()method is invoked withuser=None, which causessrc/sentry/api/client.pyto default toAnonymousUser(). ThisAnonymousUser()lacks organization membership, leading to permission checks failing withinOrganizationEventsV2EndpointBase(specifically inget_snuba_params()) when the endpoint/organizations/{organization.slug}/trace-items/attributes/is called. Consequently, the API calls will consistently fail with permission errors, rendering the feature non-functional.💡 Suggested Fix
Provide an authenticated user with organization permissions to the
userparameter ofclient.get().🤖 Prompt for AI Agent
Did we get this right? 👍 / 👎 to inform future reviews.
Reference ID:
3254778There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this is what we do everywhere with client.get