From 31df43ba81d7b7187b69f10823bd0d82ad9f2339 Mon Sep 17 00:00:00 2001 From: Pierre Massat Date: Thu, 21 Nov 2024 11:52:46 -0500 Subject: [PATCH 1/5] chore(metrics-summaries): Remove metrics summaries queries and endpoints --- .../endpoints/organization_metrics_samples.py | 107 --- src/sentry/api/urls.py | 6 - .../events/builder/metrics_summaries.py | 38 -- .../sentry_metrics/querying/samples_list.py | 220 +----- src/sentry/snuba/dataset.py | 7 - src/sentry/snuba/metrics_summaries.py | 60 -- src/sentry/snuba/referrer.py | 3 - src/sentry/testutils/cases.py | 49 -- src/sentry/utils/snuba.py | 49 +- .../test_organization_metrics_samples.py | 638 ------------------ .../api/endpoints/test_organization_traces.py | 39 +- 11 files changed, 49 insertions(+), 1167 deletions(-) delete mode 100644 src/sentry/api/endpoints/organization_metrics_samples.py delete mode 100644 src/sentry/search/events/builder/metrics_summaries.py delete mode 100644 src/sentry/snuba/metrics_summaries.py delete mode 100644 tests/sentry/api/endpoints/test_organization_metrics_samples.py diff --git a/src/sentry/api/endpoints/organization_metrics_samples.py b/src/sentry/api/endpoints/organization_metrics_samples.py deleted file mode 100644 index f81916c0159c8d..00000000000000 --- a/src/sentry/api/endpoints/organization_metrics_samples.py +++ /dev/null @@ -1,107 +0,0 @@ -import sentry_sdk -from rest_framework import serializers -from rest_framework.exceptions import ParseError -from rest_framework.request import Request -from rest_framework.response import Response - -from sentry.api.api_owners import ApiOwner -from sentry.api.api_publish_status import ApiPublishStatus -from sentry.api.base import region_silo_endpoint -from sentry.api.bases import NoProjects, OrganizationEventsV2EndpointBase -from sentry.api.paginator import GenericOffsetPaginator -from sentry.api.utils import handle_query_errors -from sentry.exceptions import InvalidSearchQuery -from sentry.models.organization import Organization -from sentry.sentry_metrics.querying.samples_list import get_sample_list_executor_cls -from sentry.snuba.metrics.naming_layer.mri import is_mri -from sentry.snuba.referrer import Referrer -from sentry.utils.dates import get_rollup_from_request -from sentry.utils.snuba import SnubaError - - -class MetricsSamplesSerializer(serializers.Serializer): - mri = serializers.CharField(required=True) - field = serializers.ListField(required=True, allow_empty=False, child=serializers.CharField()) - max = serializers.FloatField(required=False) - min = serializers.FloatField(required=False) - operation = serializers.CharField(required=False) - query = serializers.CharField(required=False) - referrer = serializers.CharField(required=False) - sort = serializers.CharField(required=False) - - def validate_mri(self, mri: str) -> str: - if not is_mri(mri): - raise serializers.ValidationError(f"Invalid MRI: {mri}") - - return mri - - -@region_silo_endpoint -class OrganizationMetricsSamplesEndpoint(OrganizationEventsV2EndpointBase): - publish_status = { - "GET": ApiPublishStatus.EXPERIMENTAL, - } - owner = ApiOwner.TELEMETRY_EXPERIENCE - snuba_methods = ["GET"] - - def get(self, request: Request, organization: Organization) -> Response: - try: - snuba_params = self.get_snuba_params(request, organization) - except NoProjects: - return Response(status=404) - - try: - rollup = get_rollup_from_request( - request, - snuba_params.end_date - snuba_params.start_date, - default_interval=None, - error=InvalidSearchQuery(), - ) - except InvalidSearchQuery: - rollup = 3600 # use a default of 1 hour - - serializer = MetricsSamplesSerializer(data=request.GET) - if not serializer.is_valid(): - return Response(serializer.errors, status=400) - - serialized = serializer.validated_data - - executor_cls = get_sample_list_executor_cls(serialized["mri"]) - if not executor_cls: - raise ParseError(f"Unsupported MRI: {serialized['mri']}") - - sort = serialized.get("sort") - if sort is not None: - column = sort[1:] if sort.startswith("-") else sort - if not executor_cls.supports_sort(column): - raise ParseError(f"Unsupported sort: {sort} for MRI") - - executor = executor_cls( - mri=serialized["mri"], - snuba_params=snuba_params, - fields=serialized["field"], - operation=serialized.get("operation"), - query=serialized.get("query", ""), - min=serialized.get("min"), - max=serialized.get("max"), - sort=serialized.get("sort"), - rollup=rollup, - referrer=Referrer.API_ORGANIZATION_METRICS_SAMPLES, - ) - - with handle_query_errors(): - try: - return self.paginate( - request=request, - paginator=GenericOffsetPaginator(data_fn=executor.get_matching_spans), - on_results=lambda results: self.handle_results_with_meta( - request, - organization, - snuba_params.project_ids, - results, - standard_meta=True, - ), - ) - except SnubaError as exc: - sentry_sdk.capture_exception(exc) - raise diff --git a/src/sentry/api/urls.py b/src/sentry/api/urls.py index a79f58f9bcd77a..de1e0776fcae2a 100644 --- a/src/sentry/api/urls.py +++ b/src/sentry/api/urls.py @@ -506,7 +506,6 @@ OrganizationMetricsCompatibilitySums, ) from .endpoints.organization_metrics_query import OrganizationMetricsQueryEndpoint -from .endpoints.organization_metrics_samples import OrganizationMetricsSamplesEndpoint from .endpoints.organization_metrics_tag_details import OrganizationMetricsTagDetailsEndpoint from .endpoints.organization_metrics_tags import OrganizationMetricsTagsEndpoint from .endpoints.organization_on_demand_metrics_estimation_stats import ( @@ -2136,11 +2135,6 @@ def create_group_urls(name_prefix: str) -> list[URLPattern | URLResolver]: OrganizationMetricsQueryEndpoint.as_view(), name="sentry-api-0-organization-metrics-query", ), - re_path( - r"^(?P[^/]+)/metrics/samples/$", - OrganizationMetricsSamplesEndpoint.as_view(), - name="sentry-api-0-organization-metrics-samples", - ), re_path( r"^(?P[^/]+)/metrics/tags/$", OrganizationMetricsTagsEndpoint.as_view(), diff --git a/src/sentry/search/events/builder/metrics_summaries.py b/src/sentry/search/events/builder/metrics_summaries.py deleted file mode 100644 index 5d2968ab2ba104..00000000000000 --- a/src/sentry/search/events/builder/metrics_summaries.py +++ /dev/null @@ -1,38 +0,0 @@ -from snuba_sdk import Entity, Flags, Query, Request - -from sentry.search.events.builder.base import BaseQueryBuilder -from sentry.search.events.datasets.metrics_summaries import MetricsSummariesDatasetConfig -from sentry.snuba.dataset import Dataset - - -class MetricsSummariesQueryBuilder(BaseQueryBuilder): - requires_organization_condition = False - config_class = MetricsSummariesDatasetConfig - - def get_field_type(self, field: str) -> str | None: - if field in ["min_metric", "max_metric", "sum_metric", "count_metric"]: - return "number" - return None - - def get_snql_query(self) -> Request: - self.validate_having_clause() - - return Request( - # the metrics summaries entity exists within the spans indexed dataset - dataset=Dataset.SpansIndexed.value, - app_id="default", - query=Query( - match=Entity(self.dataset.value, sample=self.sample_rate), - select=self.columns, - array_join=self.array_join, - where=self.where, - having=self.having, - groupby=self.groupby, - orderby=self.orderby, - limit=self.limit, - offset=self.offset, - limitby=self.limitby, - ), - flags=Flags(turbo=self.turbo), - tenant_ids=self.tenant_ids, - ) diff --git a/src/sentry/sentry_metrics/querying/samples_list.py b/src/sentry/sentry_metrics/querying/samples_list.py index ef305548c71a71..f7b0fb8f175ec1 100644 --- a/src/sentry/sentry_metrics/querying/samples_list.py +++ b/src/sentry/sentry_metrics/querying/samples_list.py @@ -11,7 +11,6 @@ from sentry.api.event_search import SearchFilter, SearchKey, SearchValue from sentry.search.events.builder.base import BaseQueryBuilder from sentry.search.events.builder.discover import DiscoverQueryBuilder -from sentry.search.events.builder.metrics_summaries import MetricsSummariesQueryBuilder from sentry.search.events.builder.spans_indexed import SpansIndexedQueryBuilder from sentry.search.events.types import QueryBuilderConfig, SelectType, SnubaParams from sentry.snuba.dataset import Dataset @@ -141,7 +140,9 @@ def get_spans_by_key( [ Condition(builder.column("span.group"), Op.EQ, key.group), Condition( - builder.column("timestamp"), Op.EQ, datetime.fromisoformat(key.timestamp) + builder.column("timestamp"), + Op.EQ, + datetime.fromisoformat(key.timestamp), ), ] ) @@ -910,74 +911,6 @@ def supports_mri(cls, mri: str) -> bool: return True return False - def get_matching_traces(self, limit: int) -> tuple[list[str], list[datetime]]: - builder = MetricsSummariesQueryBuilder( - Dataset.MetricsSummaries, - params={}, - snuba_params=self.snuba_params, - query=self.query, - selected_columns=["trace", "timestamp"], - # The orderby is intentionally `None` here as this query is much faster - # if we let Clickhouse decide which order to return the results in. - # This also means we cannot order by any columns or paginate. - orderby=None, - limit=limit, - limitby=("trace", 1), - ) - - additional_conditions = self.get_additional_conditions(builder) - min_max_conditions = self.get_min_max_conditions(builder) - builder.add_conditions([*additional_conditions, *min_max_conditions]) - - query_results = builder.run_query(self.referrer.value) - results = builder.process_results(query_results) - - trace_ids = [row["trace"] for row in results["data"]] - timestamps = [datetime.fromisoformat(row["timestamp"]) for row in results["data"]] - return trace_ids, timestamps - - def get_matching_spans_from_traces( - self, - trace_ids: list[str], - max_spans_per_trace: int, - ) -> list[SpanKey]: - builder = MetricsSummariesQueryBuilder( - Dataset.MetricsSummaries, - params={}, - snuba_params=self.snuba_params, - query=self.query, - selected_columns=["span.group", "timestamp", "id"], - # The orderby is intentionally `None` here as this query is much faster - # if we let Clickhouse decide which order to return the results in. - # This also means we cannot order by any columns or paginate. - orderby=None, - limit=len(trace_ids) * max_spans_per_trace, - limitby=("trace", max_spans_per_trace), - ) - - trace_id_condition = Condition(Column("trace_id"), Op.IN, trace_ids) - additional_conditions = self.get_additional_conditions(builder) - min_max_conditions = self.get_min_max_conditions(builder) - builder.add_conditions( - [ - trace_id_condition, - *additional_conditions, - *min_max_conditions, - ] - ) - - query_results = builder.run_query(self.referrer.value) - results = builder.process_results(query_results) - - return [ - SpanKey( - group=row["span.group"], - timestamp=row["timestamp"], - span_id=row["id"], - ) - for row in results["data"] - ] - def _get_spans( self, span_keys: list[SpanKey], @@ -999,153 +932,6 @@ def _get_spans( return result - def get_matching_spans_sorted(self, offset, limit): - span_keys, summaries = self.get_sorted_span_keys(offset, limit) - return self._get_spans(span_keys, summaries) - - def get_sorted_span_keys( - self, - offset: int, - limit: int, - ) -> tuple[list[SpanKey], dict[str, Summary]]: - assert self.sort - sort = self.convert_sort(self.sort, self.operation) - assert sort is not None - direction, sort_column = sort - - fields = [ - "id", - "timestamp", - "span.group", - "min_metric", - "max_metric", - "sum_metric", - "count_metric", - ] - if sort_column not in fields: - fields.append(sort_column) - - builder = MetricsSummariesQueryBuilder( - Dataset.MetricsSummaries, - params={}, - snuba_params=self.snuba_params, - query=self.query, - selected_columns=fields, - orderby=f"{direction}{sort_column}", - limit=limit, - offset=offset, - # This table has a poor SAMPLE BY so DO NOT use it for now - # sample_rate=options.get("metrics.sample-list.sample-rate"), - config=QueryBuilderConfig(functions_acl=["rounded_timestamp", "example"]), - ) - - additional_conditions = self.get_additional_conditions(builder) - min_max_conditions = self.get_min_max_conditions(builder) - builder.add_conditions([*additional_conditions, *min_max_conditions]) - - query_results = builder.run_query(self.referrer.value) - result = builder.process_results(query_results) - - span_keys = [ - SpanKey( - group=row["span.group"], - timestamp=row["timestamp"], - span_id=row["id"], - ) - for row in result["data"] - ] - - """ - The indexed spans dataset does not contain any metric related - data. To propagate these values, we read it from the metric - summaries table, and copy them to the results in the next step. - """ - summaries = { - cast(str, row["id"]): cast( - Summary, - { - "min": row["min_metric"], - "max": row["max_metric"], - "sum": row["sum_metric"], - "count": row["count_metric"], - }, - ) - for row in result["data"] - } - - return span_keys, summaries - - def get_matching_spans_unsorted(self, offset, limit): - span_keys, summaries = self.get_unsorted_span_keys(offset, limit) - return self._get_spans(span_keys, summaries) - - def get_unsorted_span_keys( - self, - offset: int, - limit: int, - ) -> tuple[list[SpanKey], dict[str, Summary]]: - builder = MetricsSummariesQueryBuilder( - Dataset.MetricsSummaries, - params={}, - snuba_params=self.snuba_params, - query=self.query, - selected_columns=[ - f"rounded_timestamp({self.rollup})", - f"examples({self.num_samples}) AS examples", - ], - limit=limit, - offset=offset, - # This table has a poor SAMPLE BY so DO NOT use it for now - # sample_rate=options.get("metrics.sample-list.sample-rate"), - config=QueryBuilderConfig(functions_acl=["rounded_timestamp", "examples"]), - ) - - additional_conditions = self.get_additional_conditions(builder) - min_max_conditions = self.get_min_max_conditions(builder) - builder.add_conditions([*additional_conditions, *min_max_conditions]) - - query_results = builder.run_query(self.referrer.value) - result = builder.process_results(query_results) - - # 7 here refers to the avg value which is the default - # if the operaton doesn't have metric it should sort by - index = self.EXAMPLES_SORT_KEY.get(self.operation or "", 7) # sort by metric - metric_key = lambda example: example[index] - - for row in result["data"]: - row["examples"] = pick_samples(row["examples"], metric_key=metric_key) - - span_keys = [ - SpanKey( - group=example[0], - timestamp=example[1], - span_id=example[2], - ) - for row in result["data"] - for example in row["examples"] - ][:limit] - - """ - The indexed spans dataset does not contain any metric related - data. To propagate these values, we read it from the metric - summaries table, and copy them to the results in the next step. - """ - summaries = { - cast(str, example[2]): cast( - Summary, - { - "min": example[3], - "max": example[4], - "sum": example[5], - "count": example[6], - }, - ) - for row in result["data"] - for example in row["examples"] - } - - return span_keys, summaries - def get_additional_conditions(self, builder: BaseQueryBuilder) -> list[Condition]: return [ builder.convert_search_filter_to_condition( diff --git a/src/sentry/snuba/dataset.py b/src/sentry/snuba/dataset.py index 799029ea0e7b19..e2ab7d47fd87f5 100644 --- a/src/sentry/snuba/dataset.py +++ b/src/sentry/snuba/dataset.py @@ -54,12 +54,6 @@ class Dataset(Enum): EventsAnalyticsPlatform = "events_analytics_platform" - MetricsSummaries = "metrics_summaries" - """ - Summaries of all metrics within a span. Used to correlate indexed - spans to a metric. - """ - @unique class EntityKey(Enum): @@ -79,7 +73,6 @@ class EntityKey(Enum): GenericOrgMetricsCounters = "generic_org_metrics_counters" IssuePlatform = "search_issues" Functions = "functions" - MetricsSummaries = "metrics_summaries" @unique diff --git a/src/sentry/snuba/metrics_summaries.py b/src/sentry/snuba/metrics_summaries.py deleted file mode 100644 index 3abff797de7237..00000000000000 --- a/src/sentry/snuba/metrics_summaries.py +++ /dev/null @@ -1,60 +0,0 @@ -from sentry.search.events.builder.metrics_summaries import MetricsSummariesQueryBuilder -from sentry.search.events.types import QueryBuilderConfig -from sentry.snuba.dataset import Dataset -from sentry.snuba.metrics.extraction import MetricSpecType -from sentry.snuba.query_sources import QuerySource - - -def query( - selected_columns, - query, - params, - snuba_params=None, - equations=None, - orderby=None, - offset=None, - limit=50, - referrer=None, - auto_fields=False, - auto_aggregations=False, - include_equation_fields=False, - allow_metric_aggregates=False, - use_aggregate_conditions=False, - conditions=None, - functions_acl=None, - transform_alias_to_input_format=False, - sample=None, - has_metrics=False, - use_metrics_layer=False, - skip_tag_resolution=False, - extra_columns=None, - on_demand_metrics_enabled=False, - on_demand_metrics_type: MetricSpecType | None = None, - fallback_to_transactions=False, - query_source: QuerySource | None = None, -): - builder = MetricsSummariesQueryBuilder( - Dataset.MetricsSummaries, - params, - snuba_params=snuba_params, - query=query, - selected_columns=selected_columns, - equations=equations, - orderby=orderby, - limit=limit, - offset=offset, - sample_rate=sample, - config=QueryBuilderConfig( - has_metrics=has_metrics, - transform_alias_to_input_format=transform_alias_to_input_format, - skip_tag_resolution=skip_tag_resolution, - equation_config={"auto_add": include_equation_fields}, - auto_fields=auto_fields, - auto_aggregations=auto_aggregations, - use_aggregate_conditions=use_aggregate_conditions, - functions_acl=functions_acl, - ), - ) - - result = builder.process_results(builder.run_query(referrer, query_source=query_source)) - return result diff --git a/src/sentry/snuba/referrer.py b/src/sentry/snuba/referrer.py index dada359f171a7c..33ad6ae43918b8 100644 --- a/src/sentry/snuba/referrer.py +++ b/src/sentry/snuba/referrer.py @@ -165,9 +165,6 @@ class Referrer(Enum): API_ORGANIZATION_METRICS_DATA = "api.organization.metrics-data" API_ORGANIZATION_METRICS_ESTIMATION_STATS = "api.organization-metrics-estimation-stats" API_ORGANIZATION_METRICS_METADATA_FETCH_SPANS = "api.organization.metrics-metadata.fetch-spans" - API_ORGANIZATION_METRICS_METADATA_FETCH_METRICS_SUMMARIES = ( - "api.organization.metrics-metadata.fetch-metrics-summaries" - ) API_ORGANIZATION_METRICS_QUERY = "api.organization.metrics-query" API_ORGANIZATION_METRICS_EAP_QUERY = "api.organization.metrics-eap-query" API_ORGANIZATION_METRICS_SAMPLES = "api.organization.metrics-samples" diff --git a/src/sentry/testutils/cases.py b/src/sentry/testutils/cases.py index 836d38a94a6d15..96624344a96d0b 100644 --- a/src/sentry/testutils/cases.py +++ b/src/sentry/testutils/cases.py @@ -1407,43 +1407,6 @@ def store_issues(self, issues): == 200 ) - def store_metrics_summary(self, span): - common_fields = { - "duration_ms": span["duration_ms"], - "end_timestamp": (span["start_timestamp_ms"] + span["duration_ms"]) / 1000, - "group": span["sentry_tags"].get("group", "0"), - "is_segment": span["is_segment"], - "project_id": span["project_id"], - "received": span["received"], - "retention_days": span["retention_days"], - "segment_id": span.get("segment_id", "0"), - "span_id": span["span_id"], - "trace_id": span["trace_id"], - } - rows = [] - for mri, summaries in span.get("_metrics_summary", {}).items(): - for summary in summaries: - rows.append( - { - **common_fields, - **{ - "count": summary.get("count", 0), - "max": summary.get("max", 0.0), - "mri": mri, - "min": summary.get("min", 0.0), - "sum": summary.get("sum", 0.0), - "tags": summary.get("tags", {}), - }, - } - ) - assert ( - requests.post( - settings.SENTRY_SNUBA + "/tests/entities/metrics_summaries/insert", - data=json.dumps(rows), - ).status_code - == 200 - ) - def to_snuba_time_format(self, datetime_value): date_format = "%Y-%m-%d %H:%M:%S%z" return datetime_value.strftime(date_format) @@ -1531,7 +1494,6 @@ def store_segment( tags: Mapping[str, Any] | None = None, measurements: Mapping[str, int | float] | None = None, timestamp: datetime | None = None, - store_metrics_summary: Mapping[str, Sequence[Mapping[str, Any]]] | None = None, sdk_name: str | None = None, op: str | None = None, status: str | None = None, @@ -1570,8 +1532,6 @@ def store_segment( payload["measurements"] = { measurement: {"value": value} for measurement, value in measurements.items() } - if store_metrics_summary: - payload["_metrics_summary"] = store_metrics_summary if parent_span_id: payload["parent_span_id"] = parent_span_id if sdk_name is not None: @@ -1583,9 +1543,6 @@ def store_segment( self.store_span(payload, is_eap=is_eap) - if "_metrics_summary" in payload: - self.store_metrics_summary(payload) - def store_indexed_span( self, project_id: int, @@ -1602,7 +1559,6 @@ def store_indexed_span( measurements: Mapping[str, int | float] | None = None, timestamp: datetime | None = None, store_only_summary: bool = False, - store_metrics_summary: Mapping[str, Sequence[Mapping[str, Any]]] | None = None, group: str = "00", category: str | None = None, organization_id: int = 1, @@ -1644,8 +1600,6 @@ def store_indexed_span( payload["segment_id"] = transaction_id[:16] if profile_id: payload["profile_id"] = profile_id - if store_metrics_summary: - payload["_metrics_summary"] = store_metrics_summary if parent_span_id: payload["parent_span_id"] = parent_span_id if category is not None: @@ -1656,9 +1610,6 @@ def store_indexed_span( if not store_only_summary: self.store_span(payload, is_eap=is_eap) - if "_metrics_summary" in payload: - self.store_metrics_summary(payload) - class BaseMetricsTestCase(SnubaTestCase): ENTITY_SHORTHANDS = { diff --git a/src/sentry/utils/snuba.py b/src/sentry/utils/snuba.py index 0b7bfb116ec057..9dc053c6b02289 100644 --- a/src/sentry/utils/snuba.py +++ b/src/sentry/utils/snuba.py @@ -232,22 +232,6 @@ def log_snuba_info(content): "user.geo.country_code": "attr_str[sentry.user.geo.country_code]", } -METRICS_SUMMARIES_COLUMN_MAP = { - "project": "project_id", - "project.id": "project_id", - "id": "span_id", - "trace": "trace_id", - "metric": "metric_mri", - "timestamp": "end_timestamp", - "segment.id": "segment_id", - "span.duration": "duration_ms", - "span.group": "group", - "min_metric": "min", - "max_metric": "max", - "sum_metric": "sum", - "count_metric": "count", -} - SPAN_COLUMN_MAP.update( {col.value.alias: col.value.spans_name for col in Columns if col.value.spans_name is not None} ) @@ -298,7 +282,6 @@ def log_snuba_info(content): Dataset.Discover: DISCOVER_COLUMN_MAP, Dataset.Sessions: SESSIONS_SNUBA_MAP, Dataset.Metrics: METRICS_COLUMN_MAP, - Dataset.MetricsSummaries: METRICS_SUMMARIES_COLUMN_MAP, Dataset.PerformanceMetrics: METRICS_COLUMN_MAP, Dataset.SpansIndexed: SPAN_COLUMN_MAP, Dataset.EventsAnalyticsPlatform: SPAN_EAP_COLUMN_MAP, @@ -317,7 +300,6 @@ def log_snuba_info(content): Dataset.IssuePlatform: list(ISSUE_PLATFORM_MAP.values()), Dataset.SpansIndexed: list(SPAN_COLUMN_MAP.values()), Dataset.EventsAnalyticsPlatform: list(SPAN_EAP_COLUMN_MAP.values()), - Dataset.MetricsSummaries: list(METRICS_SUMMARIES_COLUMN_MAP.values()), } SNUBA_OR = "or" @@ -497,7 +479,13 @@ class RetrySkipTimeout(urllib3.Retry): """ def increment( - self, method=None, url=None, response=None, error=None, _pool=None, _stacktrace=None + self, + method=None, + url=None, + response=None, + error=None, + _pool=None, + _stacktrace=None, ): """ Just rely on the parent class unless we have a read timeout. In that case @@ -636,7 +624,9 @@ def get_organization_id_from_project_ids(project_ids: Sequence[int]) -> int: return organization_id -def infer_project_ids_from_related_models(filter_keys: Mapping[str, Sequence[int]]) -> list[int]: +def infer_project_ids_from_related_models( + filter_keys: Mapping[str, Sequence[int]], +) -> list[int]: ids = [set(get_related_project_ids(k, filter_keys[k])) for k in filter_keys] return list(set.union(*ids)) @@ -956,7 +946,10 @@ def raw_snql_query( # other functions do here. It does not add any automatic conditions, format # results, nothing. Use at your own risk. return bulk_snuba_queries( - requests=[request], referrer=referrer, use_cache=use_cache, query_source=query_source + requests=[request], + referrer=referrer, + use_cache=use_cache, + query_source=query_source, )[0] @@ -1095,7 +1088,9 @@ def _apply_cache_and_build_results( for result, (query_pos, _, opt_cache_key) in zip(query_results, to_query): if opt_cache_key: cache.set( - opt_cache_key, json.dumps(result), settings.SENTRY_SNUBA_CACHE_TTL_SECONDS + opt_cache_key, + json.dumps(result), + settings.SENTRY_SNUBA_CACHE_TTL_SECONDS, ) results.append((query_pos, result)) @@ -1164,7 +1159,8 @@ def _bulk_snuba_query(snuba_requests: Sequence[SnubaRequest]) -> ResultSet: except ValueError: if response.status != 200: logger.exception( - "snuba.query.invalid-json", extra={"response.data": response.data} + "snuba.query.invalid-json", + extra={"response.data": response.data}, ) raise SnubaError("Failed to parse snuba error response") raise UnexpectedResponseError(f"Could not decode JSON response: {response.data!r}") @@ -1441,7 +1437,6 @@ def _resolve_column(col): # Some dataset specific logic: if dataset == Dataset.Discover: - if isinstance(col, (list, tuple)) or col in ("project_id", "group_id"): return col elif dataset == Dataset.EventsAnalyticsPlatform: @@ -1821,7 +1816,11 @@ def replace(d, key, val): reverse = compose( reverse, lambda row: ( - replace(row, "bucketed_end", int(parse_datetime(row["bucketed_end"]).timestamp())) + replace( + row, + "bucketed_end", + int(parse_datetime(row["bucketed_end"]).timestamp()), + ) if "bucketed_end" in row else row ), diff --git a/tests/sentry/api/endpoints/test_organization_metrics_samples.py b/tests/sentry/api/endpoints/test_organization_metrics_samples.py deleted file mode 100644 index 5a80374344e384..00000000000000 --- a/tests/sentry/api/endpoints/test_organization_metrics_samples.py +++ /dev/null @@ -1,638 +0,0 @@ -from datetime import timedelta -from uuid import uuid4 - -from django.urls import reverse -from rest_framework.exceptions import ErrorDetail - -from sentry.testutils.cases import APITestCase, BaseSpansTestCase -from sentry.testutils.helpers.datetime import before_now -from sentry.utils.samples import load_data - - -class OrganizationMetricsSamplesEndpointTest(BaseSpansTestCase, APITestCase): - view = "sentry-api-0-organization-metrics-samples" - - def setUp(self): - super().setUp() - self.login_as(user=self.user) - - def do_request(self, query, **kwargs): - return self.client.get( - reverse(self.view, kwargs={"organization_id_or_slug": self.organization.slug}), - query, - format="json", - **kwargs, - ) - - def test_no_project(self): - query = { - "mri": "d:spans/exclusive_time@millisecond", - "field": ["id"], - "project": [], - } - - response = self.do_request(query) - assert response.status_code == 404, response.data - - def test_bad_params(self): - query = { - "mri": "foo", - "field": [], - "project": [self.project.id], - } - - response = self.do_request(query) - assert response.status_code == 400, response.data - assert response.data == { - "mri": [ErrorDetail(string="Invalid MRI: foo", code="invalid")], - "field": [ErrorDetail(string="This field is required.", code="required")], - } - - def test_unsupported_mri(self): - query = { - "mri": "d:spans/made_up@none", - "field": ["id"], - "project": [self.project.id], - } - - response = self.do_request(query) - assert response.status_code == 400, response.data - assert response.data == { - "detail": ErrorDetail( - string="Unsupported MRI: d:spans/made_up@none", code="parse_error" - ) - } - - def test_unsupported_sort(self): - query = { - "mri": "d:spans/exclusive_time@millisecond", - "field": ["id"], - "project": [self.project.id], - "sort": "id", - } - - response = self.do_request(query) - assert response.status_code == 400, response.data - assert response.data == { - "detail": ErrorDetail(string="Unsupported sort: id for MRI", code="parse_error") - } - - def test_span_exclusive_time_samples_zero_group(self): - durations = [100, 200, 300] - span_ids = [uuid4().hex[:16] for _ in durations] - good_span_id = span_ids[1] - - for i, (span_id, duration) in enumerate(zip(span_ids, durations)): - ts = before_now(days=i, minutes=10).replace(microsecond=0) - - self.store_indexed_span( - self.project.id, - uuid4().hex, - uuid4().hex, - span_id=span_id, - duration=duration, - exclusive_time=duration, - timestamp=ts, - ) - - query = { - "mri": "d:spans/exclusive_time@millisecond", - "field": ["id"], - "project": [self.project.id], - "statsPeriod": "14d", - "min": 150, - "max": 250, - } - response = self.do_request(query) - assert response.status_code == 200, response.data - expected = {int(good_span_id, 16)} - actual = {int(row["id"], 16) for row in response.data["data"]} - assert actual == expected - - for row in response.data["data"]: - assert row["summary"] == { - "min": 200.0, - "max": 200.0, - "sum": 200.0, - "count": 1, - } - - query = { - "mri": "d:spans/duration@millisecond", - "field": ["id", "span.self_time"], - "project": [self.project.id], - "statsPeriod": "14d", - "sort": "-summary", - } - response = self.do_request(query) - assert response.status_code == 200, response.data - expected = {int(span_id, 16) for span_id in span_ids} - actual = {int(row["id"], 16) for row in response.data["data"]} - assert actual == expected - - for duration, row in zip(reversed(durations), response.data["data"]): - assert row["summary"] == { - "min": duration, - "max": duration, - "sum": duration, - "count": 1, - } - - def test_span_measurement_samples(self): - durations = [100, 200, 300] - span_ids = [uuid4().hex[:16] for _ in durations] - good_span_id = span_ids[1] - - for i, (span_id, duration) in enumerate(zip(span_ids, durations)): - ts = before_now(days=i, minutes=10).replace(microsecond=0) - - self.store_indexed_span( - self.project.id, - uuid4().hex, - uuid4().hex, - span_id=span_id, - duration=duration, - timestamp=ts, - measurements={ - measurement: duration + j + 1 - for j, measurement in enumerate( - [ - "frames.slow", - "score.total", - "score.inp", - "score.weight.inp", - "http.response_content_length", - "http.decoded_response_content_length", - "http.response_transfer_size", - ] - ) - }, - ) - - self.store_indexed_span( - self.project.id, - uuid4().hex, - uuid4().hex, - span_id=uuid4().hex[:16], - timestamp=ts, - ) - - for i, mri in enumerate( - [ - "g:spans/mobile.slow_frames@none", - "d:spans/webvital.score.total@ratio", - "d:spans/webvital.score.inp@ratio", - "d:spans/webvital.score.weight.inp@ratio", - "d:spans/http.response_content_length@byte", - "d:spans/http.decoded_response_content_length@byte", - "d:spans/http.response_transfer_size@byte", - ] - ): - query = { - "mri": mri, - "field": ["id"], - "project": [self.project.id], - "statsPeriod": "14d", - "min": 150.0, - "max": 250.0, - } - response = self.do_request(query) - assert response.status_code == 200, response.data - expected = {int(good_span_id, 16)} - actual = {int(row["id"], 16) for row in response.data["data"]} - assert actual == expected, mri - - for row in response.data["data"]: - assert row["summary"] == { - "min": 201 + i, - "max": 201 + i, - "sum": 201 + i, - "count": 1, - }, mri - - query = { - "mri": mri, - "field": ["id", "span.duration"], - "project": [self.project.id], - "statsPeriod": "14d", - "sort": "-summary", - } - response = self.do_request(query) - assert response.status_code == 200, response.data - expected = {int(span_id, 16) for span_id in span_ids} - actual = {int(row["id"], 16) for row in response.data["data"]} - assert actual == expected, mri - - for duration, row in zip(reversed(durations), response.data["data"]): - assert row["summary"] == { - "min": duration + i + 1, - "max": duration + i + 1, - "sum": duration + i + 1, - "count": 1, - }, mri - - def test_transaction_duration_samples(self): - durations = [100, 200, 300] - span_ids = [uuid4().hex[:16] for _ in durations] - good_span_id = span_ids[1] - - for i, (span_id, duration) in enumerate(zip(span_ids, durations)): - ts = before_now(days=i, minutes=10).replace(microsecond=0) - start_ts = ts - timedelta(microseconds=duration * 1000) - - # first write to the transactions dataset - data = load_data("transaction", start_timestamp=start_ts, timestamp=ts) - data["contexts"]["trace"]["span_id"] = span_id - self.store_event( - data=data, - project_id=self.project.id, - ) - - # next write to the spans dataset - self.store_segment( - self.project.id, - uuid4().hex, - uuid4().hex, - duration=duration, - span_id=span_id, - timestamp=ts, - ) - - query = { - "mri": "d:transactions/duration@millisecond", - "field": ["id"], - "project": [self.project.id], - "statsPeriod": "14d", - "min": 150, - "max": 250, - } - response = self.do_request(query) - assert response.status_code == 200, response.data - expected = {int(good_span_id, 16)} - actual = {int(row["id"], 16) for row in response.data["data"]} - assert actual == expected - - for row in response.data["data"]: - assert row["summary"] == { - "min": 200, - "max": 200, - "sum": 200, - "count": 1, - } - - query = { - "mri": "d:transactions/duration@millisecond", - "field": ["id", "span.duration"], - "project": [self.project.id], - "statsPeriod": "14d", - "sort": "-summary", - } - response = self.do_request(query) - assert response.status_code == 200, response.data - expected = {int(span_id, 16) for span_id in span_ids} - actual = {int(row["id"], 16) for row in response.data["data"]} - assert actual == expected - - for duration, row in zip(reversed(durations), response.data["data"]): - assert row["summary"] == { - "min": duration, - "max": duration, - "sum": duration, - "count": 1, - } - - def test_transaction_measurement_samples(self): - durations = [100, 200, 300] - span_ids = [uuid4().hex[:16] for _ in durations] - good_span_id = span_ids[1] - - for i, (span_id, duration) in enumerate(zip(span_ids, durations)): - ts = before_now(days=i, minutes=10).replace(microsecond=0) - start_ts = ts - timedelta(microseconds=duration * 1000) - - # first write to the transactions dataset - data = load_data("transaction", start_timestamp=start_ts, timestamp=ts) - # good span ids will have the measurement - data["measurements"] = {"lcp": {"value": duration}} - data["contexts"]["trace"]["span_id"] = span_id - self.store_event( - data=data, - project_id=self.project.id, - ) - - # next write to the spans dataset - self.store_segment( - self.project.id, - uuid4().hex, - uuid4().hex, - duration=duration, - span_id=span_id, - timestamp=ts, - ) - - span_id = uuid4().hex[:16] - ts = before_now(days=10, minutes=10).replace(microsecond=0) - - # first write to the transactions dataset - data = load_data("transaction", timestamp=ts) - # bad span ids will not have the measurement - data["measurements"] = {} - data["contexts"]["trace"]["span_id"] = span_id - self.store_event( - data=data, - project_id=self.project.id, - ) - - # next write to the spans dataset - self.store_segment( - self.project.id, - uuid4().hex, - uuid4().hex, - span_id=span_id, - timestamp=ts, - ) - - query = { - "mri": "d:transactions/measurements.lcp@millisecond", - "field": ["id"], - "project": [self.project.id], - "statsPeriod": "14d", - "min": 150.0, - "max": 250.0, - } - response = self.do_request(query) - assert response.status_code == 200, response.data - expected = {int(good_span_id, 16)} - actual = {int(row["id"], 16) for row in response.data["data"]} - assert actual == expected - - for row in response.data["data"]: - assert row["summary"] == { - "min": 200.0, - "max": 200.0, - "sum": 200.0, - "count": 1, - } - - query = { - "mri": "d:transactions/measurements.lcp@millisecond", - "field": ["id", "span.duration"], - "project": [self.project.id], - "statsPeriod": "14d", - "sort": "-summary", - } - response = self.do_request(query) - assert response.status_code == 200, response.data - expected = {int(span_id, 16) for span_id in span_ids} - actual = {int(row["id"], 16) for row in response.data["data"]} - assert actual == expected - - for duration, row in zip(reversed(durations), response.data["data"]): - assert row["summary"] == { - "min": duration, - "max": duration, - "sum": duration, - "count": 1, - } - - def test_custom_samples(self): - mri = "d:custom/value@millisecond" - values = [100, 200, 300] - span_ids = [uuid4().hex[:16] for _ in values] - good_span_id = span_ids[1] - - # 10 is below the min - # 20 is within bounds - # 30 is above the max - for i, (span_id, val) in enumerate(zip(span_ids, values)): - ts = before_now(days=i, minutes=10).replace(microsecond=0) - self.store_indexed_span( - self.project.id, - uuid4().hex, - uuid4().hex, - span_id=span_id, - duration=val, - timestamp=ts, - store_metrics_summary={ - mri: [ - { - "min": val - 1, - "max": val + 1, - "sum": val * (i + 1) * 2, - "count": (i + 1) * 2, - "tags": {}, - } - ] - }, - ) - - self.store_indexed_span( - self.project.id, - uuid4().hex, - uuid4().hex, - span_id=uuid4().hex[:16], - timestamp=before_now(days=10, minutes=10).replace(microsecond=0), - store_metrics_summary={ - "d:custom/other@millisecond": [ - { - "min": 210.0, - "max": 210.0, - "sum": 210.0, - "count": 1, - "tags": {}, - } - ] - }, - ) - - for operation, min_bound, max_bound in [ - ("avg", 150.0, 250.0), - ("min", 150.0, 250.0), - ("max", 150.0, 250.0), - ("count", 3, 5), - ]: - query = { - "mri": mri, - "field": ["id"], - "project": [self.project.id], - "statsPeriod": "14d", - "min": min_bound, - "max": max_bound, - "operation": operation, - } - response = self.do_request(query) - assert response.status_code == 200, (operation, response.data) - expected = {int(good_span_id, 16)} - actual = {int(row["id"], 16) for row in response.data["data"]} - assert actual == expected, operation - - for row in response.data["data"]: - assert row["summary"] == { - "min": 199.0, - "max": 201.0, - "sum": 800.0, - "count": 4, - }, operation - - for operation in ["avg", "min", "max", "count"]: - query = { - "mri": mri, - "field": ["id", "span.duration"], - "project": [self.project.id], - "statsPeriod": "14d", - "sort": "-summary", - "operation": operation, - } - response = self.do_request(query) - assert response.status_code == 200, response.data - expected = {int(span_id, 16) for span_id in span_ids} - actual = {int(row["id"], 16) for row in response.data["data"]} - assert actual == expected - - for i, (val, row) in enumerate(zip(reversed(values), response.data["data"])): - assert row["summary"] == { - "min": val - 1, - "max": val + 1, - "sum": val * (len(values) - i) * 2, - "count": (len(values) - i) * 2, - } - - def test_multiple_span_sample_per_time_bucket(self): - custom_mri = "d:custom/value@millisecond" - values = [100, 200, 300, 400, 500] - span_ids = [uuid4().hex[:16] for _ in values] - ts = before_now(days=0, minutes=10).replace(microsecond=0) - - for span_id, value in zip(span_ids, values): - self.store_indexed_span( - self.project.id, - uuid4().hex, - uuid4().hex, - span_id=span_id, - duration=value, - exclusive_time=value, - timestamp=ts, - measurements={"score.total": value}, - store_metrics_summary={ - custom_mri: [ - { - "min": value - 1, - "max": value + 1, - "sum": value * 2, - "count": 2, - "tags": {}, - } - ] - }, - ) - - for mri in [ - "d:spans/exclusive_time@millisecond", - "d:spans/webvital.score.total@ratio", - custom_mri, - ]: - query = { - "mri": mri, - "field": ["id"], - "project": [self.project.id], - "statsPeriod": "24h", - } - response = self.do_request(query) - assert response.status_code == 200, response.data - expected = {int(span_ids[i], 16) for i in [2, 3, 4]} - actual = {int(row["id"], 16) for row in response.data["data"]} - assert actual == expected - - def test_multiple_transaction_sample_per_time_bucket(self): - values = [100, 200, 300, 400, 500] - span_ids = [uuid4().hex[:16] for _ in values] - ts = before_now(days=0, minutes=10).replace(microsecond=0) - - for span_id, value in zip(span_ids, values): - start_ts = ts - timedelta(microseconds=value * 1000) - - # first write to the transactions dataset - data = load_data("transaction", start_timestamp=start_ts, timestamp=ts) - # good span ids will have the measurement - data["measurements"] = {"lcp": {"value": value}} - data["contexts"]["trace"]["span_id"] = span_id - self.store_event( - data=data, - project_id=self.project.id, - ) - - # next write to the spans dataset - self.store_segment( - self.project.id, - uuid4().hex, - uuid4().hex, - duration=value, - span_id=span_id, - timestamp=ts, - ) - - for mri in [ - "d:transactions/duration@millisecond", - "d:transactions/measurements.lcp@millisecond", - ]: - query = { - "mri": mri, - "field": ["id"], - "project": [self.project.id], - "statsPeriod": "24h", - } - response = self.do_request(query) - assert response.status_code == 200, response.data - expected = {int(span_ids[i], 16) for i in [2, 3, 4]} - actual = {int(row["id"], 16) for row in response.data["data"]} - assert actual == expected - - def test_filtering_custom_metrics_by_project(self): - custom_mri = "d:custom/value@millisecond" - value = 100 - span_id = uuid4().hex[:16] - first_project = self.create_project(name="My New Project") - ts = before_now(days=0, minutes=10).replace(microsecond=0) - - self.store_indexed_span( - first_project.id, - uuid4().hex, - uuid4().hex, - span_id=span_id, - duration=value, - exclusive_time=value, - timestamp=ts, - measurements={"score.total": value}, - store_metrics_summary={ - custom_mri: [ - { - "min": value - 1, - "max": value + 2, - "sum": value * 2, - "count": 2, - "tags": {}, - } - ] - }, - ) - - responses = [] - for query in (f"project:{first_project.slug}", f"project_id:{first_project.id}"): - request = { - "mri": custom_mri, - "field": ["id"], - "project": [first_project.id], - "query": query, - "statsPeriod": "24h", - } - response = self.do_request(request) - assert response.status_code == 200 - assert response.data - - expected = {int(span_id, 16)} - actual = {int(row["id"], 16) for row in response.data["data"]} - assert actual == expected, f"failed for query {query}" - - responses.append(response) - - assert len({str(r.data) for r in responses}) == 1 diff --git a/tests/sentry/api/endpoints/test_organization_traces.py b/tests/sentry/api/endpoints/test_organization_traces.py index 75e492d45287dc..e146be6da313a6 100644 --- a/tests/sentry/api/endpoints/test_organization_traces.py +++ b/tests/sentry/api/endpoints/test_organization_traces.py @@ -209,17 +209,6 @@ def create_mock_traces(self): ] ) }, - store_metrics_summary={ - "d:custom/value@millisecond": [ - { - "min": 40_000, - "max": 40_000, - "sum": 40_000, - "count": 1, - "tags": {"foo": "qux"}, - } - ] - }, sdk_name="sentry.javascript.remix", ) @@ -278,7 +267,10 @@ class OrganizationTracesEndpointTest(OrganizationTracesEndpointTestBase): def do_request(self, query, features=None, **kwargs): if features is None: - features = ["organizations:performance-trace-explorer", "organizations:global-views"] + features = [ + "organizations:performance-trace-explorer", + "organizations:global-views", + ] if self.is_eap: if query is None: @@ -287,7 +279,10 @@ def do_request(self, query, features=None, **kwargs): with self.feature(features): return self.client.get( - reverse(self.view, kwargs={"organization_id_or_slug": self.organization.slug}), + reverse( + self.view, + kwargs={"organization_id_or_slug": self.organization.slug}, + ), query, format="json", **kwargs, @@ -434,7 +429,8 @@ def test_process_breakdown_error(self, mock_process_breakdowns, mock_capture_exc ] mock_capture_exception.assert_called_with( - exception, contexts={"bad_traces": {"traces": list(sorted([trace_id_1, trace_id_2]))}} + exception, + contexts={"bad_traces": {"traces": list(sorted([trace_id_1, trace_id_2]))}}, ) def test_use_first_span_for_name(self): @@ -939,7 +935,10 @@ class OrganizationTraceSpansEndpointTest(OrganizationTracesEndpointTestBase): def do_request(self, trace_id, query, features=None, **kwargs): if features is None: - features = ["organizations:performance-trace-explorer", "organizations:global-views"] + features = [ + "organizations:performance-trace-explorer", + "organizations:global-views", + ] if self.is_eap: if query is None: @@ -1117,7 +1116,10 @@ class OrganizationTracesStatsEndpointTest(OrganizationTracesEndpointTestBase): def do_request(self, query, features=None, **kwargs): if features is None: - features = ["organizations:performance-trace-explorer", "organizations:global-views"] + features = [ + "organizations:performance-trace-explorer", + "organizations:global-views", + ] if self.is_eap: if query is None: @@ -1126,7 +1128,10 @@ def do_request(self, query, features=None, **kwargs): with self.feature(features): return self.client.get( - reverse(self.view, kwargs={"organization_id_or_slug": self.organization.slug}), + reverse( + self.view, + kwargs={"organization_id_or_slug": self.organization.slug}, + ), query, format="json", **kwargs, From 79c54e2dc0f9ee77732b06ed1b488585a9ca74f0 Mon Sep 17 00:00:00 2001 From: Pierre Massat Date: Fri, 22 Nov 2024 11:00:30 -0500 Subject: [PATCH 2/5] Remove more irrelevant code --- .../api/endpoints/organization_traces.py | 208 +--- .../sentry_metrics/querying/samples_list.py | 1011 ----------------- .../api/endpoints/test_organization_traces.py | 184 --- 3 files changed, 36 insertions(+), 1367 deletions(-) delete mode 100644 src/sentry/sentry_metrics/querying/samples_list.py diff --git a/src/sentry/api/endpoints/organization_traces.py b/src/sentry/api/endpoints/organization_traces.py index 50eff297976adc..19810a78e67b7d 100644 --- a/src/sentry/api/endpoints/organization_traces.py +++ b/src/sentry/api/endpoints/organization_traces.py @@ -12,7 +12,7 @@ from rest_framework.exceptions import ParseError, ValidationError from rest_framework.request import Request from rest_framework.response import Response -from snuba_sdk import And, BooleanCondition, BooleanOp, Column, Condition, Function, Op, Or +from snuba_sdk import BooleanCondition, BooleanOp, Column, Condition, Function, Op from urllib3.exceptions import ReadTimeoutError from sentry import features, options @@ -35,7 +35,6 @@ ) from sentry.search.events.constants import TIMEOUT_SPAN_ERROR_MESSAGE from sentry.search.events.types import QueryBuilderConfig, SnubaParams, WhereType -from sentry.sentry_metrics.querying.samples_list import SpanKey, get_sample_list_executor_cls from sentry.snuba import discover, spans_indexed from sentry.snuba.dataset import Dataset from sentry.snuba.referrer import Referrer @@ -310,7 +309,9 @@ def get(self, request: Request, organization: Organization) -> Response: zerofill = not ( request.GET.get("withoutZerofill") == "1" and features.get( - "organizations:performance-chart-interpolation", organization, actor=request.user + "organizations:performance-chart-interpolation", + organization, + actor=request.user, ) ) @@ -470,76 +471,8 @@ def get_traces_matching_conditions( self, snuba_params: SnubaParams, ) -> tuple[datetime, datetime, list[str]]: - if self.mri is not None: - sentry_sdk.set_tag("mri", self.mri) - return self.get_traces_matching_metric_conditions(snuba_params) - return self.get_traces_matching_span_conditions(snuba_params) - def get_traces_matching_metric_conditions( - self, - snuba_params: SnubaParams, - ) -> tuple[datetime, datetime, list[str]]: - assert self.mri is not None - - executor_cls = get_sample_list_executor_cls(self.mri) - if executor_cls is None: - raise ParseError(detail=f"Unsupported MRI: {self.mri}") - - executor = executor_cls( - mri=self.mri, - snuba_params=snuba_params, - fields=["trace"], - max=self.metrics_max, - min=self.metrics_min, - operation=self.metrics_operation, - query=self.metrics_query, - referrer=Referrer.API_TRACE_EXPLORER_METRICS_SPANS_LIST, - ) - - trace_ids, timestamps = executor.get_matching_traces(MAX_SNUBA_RESULTS) - - min_timestamp = snuba_params.end - max_timestamp = snuba_params.start - assert min_timestamp is not None - assert max_timestamp is not None - - for timestamp in timestamps: - min_timestamp = min(min_timestamp, timestamp) - max_timestamp = max(max_timestamp, timestamp) - - if not trace_ids or min_timestamp > max_timestamp: - return min_timestamp, max_timestamp, [] - - self.refine_params(min_timestamp, max_timestamp) - - if self.user_queries: - # If there are user queries, further refine the trace ids by applying them - # leaving us with only traces where the metric exists and matches the user - # queries. - ( - min_timestamp, - max_timestamp, - trace_ids, - ) = self.get_traces_matching_span_conditions_in_traces(snuba_params, trace_ids) - - if not trace_ids: - return min_timestamp, max_timestamp, [] - else: - # No user queries so take the first N trace ids as our list - min_timestamp = snuba_params.end - max_timestamp = snuba_params.start - assert min_timestamp is not None - assert max_timestamp is not None - - trace_ids = trace_ids[: self.limit] - timestamps = timestamps[: self.limit] - for timestamp in timestamps: - min_timestamp = min(min_timestamp, timestamp) - max_timestamp = max(max_timestamp, timestamp) - - return min_timestamp, max_timestamp, trace_ids - def get_traces_matching_span_conditions( self, snuba_params: SnubaParams, @@ -867,14 +800,20 @@ def process_final_results( # The underlying column is a Nullable(UInt64) but we write a default of 0 to it. # So make sure to handle both in case something changes. if not row["parent_span"] or int(row["parent_span"], 16) == 0: - traces_primary_names[row["trace"]] = (row["project"], row["transaction"]) + traces_primary_names[row["trace"]] = ( + row["project"], + row["transaction"], + ) if row["trace"] in traces_fallback_names: continue # This span is a good candidate for the trace name so use it. if row["trace"] not in traces_fallback_names and is_trace_name_candidate(row): - traces_fallback_names[row["trace"]] = (row["project"], row["transaction"]) + traces_fallback_names[row["trace"]] = ( + row["project"], + row["transaction"], + ) if row["trace"] in traces_default_names: continue @@ -1203,54 +1142,20 @@ def __init__( def execute(self, offset: int, limit: int): with handle_span_query_errors(): - span_keys = self.get_metrics_span_keys() - - with handle_span_query_errors(): - spans = self.get_user_spans( + return self.get_user_spans( self.snuba_params, - span_keys, offset=offset, limit=limit, ) - return spans - - def get_metrics_span_keys(self) -> list[SpanKey] | None: - if self.mri is None: - return None - - executor_cls = get_sample_list_executor_cls(self.mri) - if executor_cls is None: - raise ParseError(detail=f"Unsupported MRI: {self.mri}") - - executor = executor_cls( - mri=self.mri, - snuba_params=self.snuba_params, - fields=["trace"], - max=self.metrics_max, - min=self.metrics_min, - operation=self.metrics_operation, - query=self.metrics_query, - referrer=Referrer.API_TRACE_EXPLORER_METRICS_SPANS_LIST, - ) - - span_keys = executor.get_matching_spans_from_traces( - [self.trace_id], - MAX_SNUBA_RESULTS, - ) - - return span_keys - def get_user_spans( self, snuba_params: SnubaParams, - span_keys: list[SpanKey] | None, limit: int, offset: int, ): user_spans_query = self.get_user_spans_query( snuba_params, - span_keys, limit=limit, offset=offset, ) @@ -1268,7 +1173,6 @@ def get_user_spans( def get_user_spans_query( self, snuba_params: SnubaParams, - span_keys: list[SpanKey] | None, limit: int, offset: int, ) -> BaseQueryBuilder: @@ -1276,7 +1180,7 @@ def get_user_spans_query( # span_keys is not supported in EAP mode because that's a legacy # code path to support metrics that no longer exists return self.get_user_spans_query_eap(snuba_params, limit, offset) - return self.get_user_spans_query_indexed(snuba_params, span_keys, limit, offset) + return self.get_user_spans_query_indexed(snuba_params, limit, offset) def get_user_spans_query_eap( self, @@ -1338,7 +1242,6 @@ def get_user_spans_query_eap( def get_user_spans_query_indexed( self, snuba_params: SnubaParams, - span_keys: list[SpanKey] | None, limit: int, offset: int, ) -> BaseQueryBuilder: @@ -1366,69 +1269,30 @@ def get_user_spans_query_indexed( conditions = [] - if span_keys is None: - # Next we have to turn the user queries into the appropriate conditions in - # the SnQL that we produce. - - # There are multiple sets of user conditions that needs to be satisfied - # and if a span satisfy any of them, it should be considered. - # - # To handle this use case, we want to OR all the user specified - # conditions together in this query. - for where in user_conditions: - if len(where) > 1: - conditions.append(BooleanCondition(op=BooleanOp.AND, conditions=where)) - elif len(where) == 1: - conditions.append(where[0]) - - if len(conditions) > 1: - # More than 1 set of conditions were specified, we want to show - # spans that match any 1 of them so join the conditions with `OR`s. - user_spans_query.add_conditions( - [BooleanCondition(op=BooleanOp.OR, conditions=conditions)] - ) - elif len(conditions) == 1: - # Only 1 set of user conditions were specified, simply insert them into - # the final query. - user_spans_query.add_conditions([conditions[0]]) - else: - # Next if there are known span_keys, we only try to fetch those spans - # This are the additional conditions to better take advantage of the ORDER BY - # on the spans table. This creates a list of conditions to be `OR`ed together - # that can will be used by ClickHouse to narrow down the granules. - # - # The span ids are not in this condition because they are more effective when - # specified within the `PREWHERE` clause. So, it's in a separate condition. - conditions = [ - And( - [ - Condition(user_spans_query.column("span.group"), Op.EQ, key.group), - Condition( - user_spans_query.column("timestamp"), - Op.EQ, - datetime.fromisoformat(key.timestamp), - ), - ] - ) - for key in span_keys - ] + # Next we have to turn the user queries into the appropriate conditions in + # the SnQL that we produce. - if len(conditions) == 1: - order_by_condition = conditions[0] - else: - order_by_condition = Or(conditions) - - # Using `IN` combined with putting the list in a SnQL "tuple" triggers an optimizer - # in snuba where it - # 1. moves the condition into the `PREWHERE` clause - # 2. maps the ids to the underlying UInt64 and uses the bloom filter index - span_id_condition = Condition( - user_spans_query.column("id"), - Op.IN, - Function("tuple", [key.span_id for key in span_keys]), - ) + # There are multiple sets of user conditions that needs to be satisfied + # and if a span satisfy any of them, it should be considered. + # + # To handle this use case, we want to OR all the user specified + # conditions together in this query. + for where in user_conditions: + if len(where) > 1: + conditions.append(BooleanCondition(op=BooleanOp.AND, conditions=where)) + elif len(where) == 1: + conditions.append(where[0]) - user_spans_query.add_conditions([order_by_condition, span_id_condition]) + if len(conditions) > 1: + # More than 1 set of conditions were specified, we want to show + # spans that match any 1 of them so join the conditions with `OR`s. + user_spans_query.add_conditions( + [BooleanCondition(op=BooleanOp.OR, conditions=conditions)] + ) + elif len(conditions) == 1: + # Only 1 set of user conditions were specified, simply insert them into + # the final query. + user_spans_query.add_conditions([conditions[0]]) return user_spans_query diff --git a/src/sentry/sentry_metrics/querying/samples_list.py b/src/sentry/sentry_metrics/querying/samples_list.py deleted file mode 100644 index f7b0fb8f175ec1..00000000000000 --- a/src/sentry/sentry_metrics/querying/samples_list.py +++ /dev/null @@ -1,1011 +0,0 @@ -from abc import ABC, abstractmethod -from bisect import bisect -from collections.abc import Callable -from dataclasses import dataclass -from datetime import datetime -from typing import Any, Literal, TypedDict, cast - -from snuba_sdk import And, Column, Condition, Function, Op, Or - -from sentry import options -from sentry.api.event_search import SearchFilter, SearchKey, SearchValue -from sentry.search.events.builder.base import BaseQueryBuilder -from sentry.search.events.builder.discover import DiscoverQueryBuilder -from sentry.search.events.builder.spans_indexed import SpansIndexedQueryBuilder -from sentry.search.events.types import QueryBuilderConfig, SelectType, SnubaParams -from sentry.snuba.dataset import Dataset -from sentry.snuba.metrics.naming_layer.mri import ( - SpanMRI, - TransactionMRI, - is_custom_metric, - is_measurement, - parse_mri, -) -from sentry.snuba.referrer import Referrer -from sentry.utils.numbers import clip - - -@dataclass(frozen=True) -class SpanKey: - group: str - timestamp: str - span_id: str - - -class Summary(TypedDict): - min: float - max: float - sum: float - count: int - - -class AbstractSamplesListExecutor(ABC): - # picking 30 samples gives a decent chance to surface a few samples from the higher percentiles - num_samples = 30 - - sortable_columns: set[str] - - def __init__( - self, - *, - mri: str, - snuba_params: SnubaParams, - referrer: Referrer, - fields: list[str], - operation: str | None = None, - query: str | None = None, - min: float | None = None, - max: float | None = None, - sort: str | None = None, - rollup: int | None = None, - ): - self.mri = mri - self.snuba_params = snuba_params - self.fields = fields - self.operation = operation - self.query = query - self.min = min - self.max = max - self.sort = sort - self.rollup = rollup - self.referrer = referrer - - @classmethod - @abstractmethod - def supports_mri(cls, mri: str) -> bool: - raise NotImplementedError - - @classmethod - def supports_sort(cls, column: str) -> bool: - return column in cls.sortable_columns - - @abstractmethod - def get_matching_traces(self, limit: int) -> tuple[list[str], list[datetime]]: - raise NotImplementedError - - @abstractmethod - def get_matching_spans_from_traces( - self, - trace_ids: list[str], - max_spans_per_trace: int, - ) -> list[SpanKey]: - raise NotImplementedError - - def get_matching_spans(self, offset, limit): - assert self.rollup is not None - - if self.sort is None: - execute_fn = self.get_matching_spans_unsorted - else: - execute_fn = self.get_matching_spans_sorted - return execute_fn(offset, limit) - - @abstractmethod - def get_matching_spans_sorted(self, offset, limit): - raise NotImplementedError - - @abstractmethod - def get_matching_spans_unsorted(self, offset, limit): - raise NotImplementedError - - def get_spans_by_key( - self, - span_keys: list[SpanKey], - additional_fields: list[str] | None = None, - ): - if not span_keys: - return {"data": []} - - fields = self.fields[:] - if additional_fields is not None: - fields.extend(additional_fields) - - builder = SpansIndexedQueryBuilder( - Dataset.SpansIndexed, - params={}, - snuba_params=self.snuba_params, - selected_columns=fields, - limit=len(span_keys), - offset=0, - ) - - # This are the additional conditions to better take advantage of the ORDER BY - # on the spans table. This creates a list of conditions to be `OR`ed together - # that can will be used by ClickHouse to narrow down the granules. - # - # The span ids are not in this condition because they are more effective when - # specified within the `PREWHERE` clause. So, it's in a separate condition. - conditions = [ - And( - [ - Condition(builder.column("span.group"), Op.EQ, key.group), - Condition( - builder.column("timestamp"), - Op.EQ, - datetime.fromisoformat(key.timestamp), - ), - ] - ) - for key in span_keys - ] - - if len(conditions) == 1: - order_by_condition = conditions[0] - else: - order_by_condition = Or(conditions) - - # Using `IN` combined with putting the list in a SnQL "tuple" triggers an optimizer - # in snuba where it - # 1. moves the condition into the `PREWHERE` clause - # 2. maps the ids to the underlying UInt64 and uses the bloom filter index - # - # NOTE: the "tuple" here is critical as without it, snuba will not correctly - # rewrite the condition and keep it in the WHERE and as a hexidecimal. - span_id_condition = Condition( - builder.column("id"), - Op.IN, - Function("tuple", [key.span_id for key in span_keys]), - ) - - builder.add_conditions([order_by_condition, span_id_condition]) - - query_results = builder.run_query(self.referrer.value) - return builder.process_results(query_results) - - -class SegmentsSamplesListExecutor(AbstractSamplesListExecutor): - sortable_columns = {"timestamp", "span.duration", "summary"} - - SORT_MAPPING = { - "span.duration": "transaction.duration", - "timestamp": "timestamp", - } - - @classmethod - @abstractmethod - def mri_to_column(cls, mri: str) -> str | None: - raise NotImplementedError - - @classmethod - def convert_sort(cls, sort: str, mri: str) -> tuple[Literal["", "-"], str] | None: - direction: Literal["", "-"] = "" - - if sort.startswith("-"): - direction = "-" - sort = sort[1:] - - if sort in cls.SORT_MAPPING: - return direction, cls.SORT_MAPPING[sort] - - if sort == "summary": - column = cls.mri_to_column(mri) - if column is not None: - return direction, column - - return None - - @classmethod - def supports_mri(cls, mri: str) -> bool: - return cls.mri_to_column(mri) is not None - - def get_matching_traces(self, limit: int) -> tuple[list[str], list[datetime]]: - column = self.mri_to_column(self.mri) - assert column - - builder = SpansIndexedQueryBuilder( - Dataset.Transactions, - params={}, - snuba_params=self.snuba_params, - query=self.query, - selected_columns=["trace", "timestamp"], - # The orderby is intentionally `None` here as this query is much faster - # if we let Clickhouse decide which order to return the results in. - # This also means we cannot order by any columns or paginate. - orderby=None, - limit=limit, - limitby=("trace", 1), - ) - - additional_conditions = self.get_additional_conditions(builder) - min_max_conditions = self.get_min_max_conditions(builder.resolve_column(column)) - builder.add_conditions([*additional_conditions, *min_max_conditions]) - - query_results = builder.run_query(self.referrer.value) - results = builder.process_results(query_results) - - trace_ids = [row["trace"] for row in results["data"]] - timestamps = [datetime.fromisoformat(row["timestamp"]) for row in results["data"]] - return trace_ids, timestamps - - def get_matching_spans_from_traces( - self, - trace_ids: list[str], - max_spans_per_trace: int, - ) -> list[SpanKey]: - column = self.mri_to_column(self.mri) - assert column is not None - - builder = SpansIndexedQueryBuilder( - Dataset.Transactions, - params={}, - snuba_params=self.snuba_params, - query=self.query, - selected_columns=["timestamp", "span_id"], - # The orderby is intentionally `None` here as this query is much faster - # if we let Clickhouse decide which order to return the results in. - # This also means we cannot order by any columns or paginate. - orderby=None, - limit=len(trace_ids) * max_spans_per_trace, - limitby=("trace", max_spans_per_trace), - ) - - trace_id_condition = Condition(Column("trace_id"), Op.IN, trace_ids) - additional_conditions = self.get_additional_conditions(builder) - min_max_conditions = self.get_min_max_conditions(builder.resolve_column(column)) - builder.add_conditions( - [ - trace_id_condition, - *additional_conditions, - *min_max_conditions, - ] - ) - - query_results = builder.run_query(self.referrer.value) - results = builder.process_results(query_results) - - return [ - SpanKey( - group="00", # all segments have a group of `00` currently - timestamp=row["timestamp"], - span_id=row["span_id"], - ) - for row in results["data"] - ] - - def _get_spans( - self, - span_keys: list[SpanKey], - summaries: dict[str, Summary], - ): - result = self.get_spans_by_key( - span_keys, - # force `id` to be one of the fields - additional_fields=["id"], - ) - - # if there is a sort, we want to preserve the result in the same - # order as the span keys which we can do by checking the span ids - if self.sort: - order = {key.span_id: i for i, key in enumerate(span_keys)} - result["data"].sort(key=lambda row: order[row["id"]]) - - # if `id` wasn't initially there, we should remove it - should_pop_id = "id" not in self.fields - - for row in result["data"]: - span_id = row.pop("id") if should_pop_id else row["id"] - row["summary"] = summaries[span_id] - - return result - - def get_matching_spans_sorted(self, offset, limit): - span_keys, summaries = self.get_sorted_span_keys(offset, limit) - return self._get_spans(span_keys, summaries) - - def get_sorted_span_keys( - self, - offset: int, - limit: int, - ) -> tuple[list[SpanKey], dict[str, Summary]]: - """ - When getting examples for a segment, it's actually much faster to read it - from the transactions dataset compared to the spans dataset as it's a much - smaller dataset. - - One consideration here is that there is an one to one mapping between a - transaction to a segment today. If this relationship changes, we'll have to - rethink how to fetch segment samples a little as the transactions dataset - may not contain all the necessary data. - """ - assert self.sort - sort = self.convert_sort(self.sort, self.mri) - assert sort is not None - direction, sort_column = sort - - mri_column = self.mri_to_column(self.mri) - assert mri_column is not None - - fields = ["span_id", "timestamp"] - if sort_column not in fields: - fields.append(sort_column) - if mri_column not in fields: - fields.append(mri_column) - - builder = DiscoverQueryBuilder( - Dataset.Transactions, - params={}, - snuba_params=self.snuba_params, - query=self.query, - selected_columns=fields, - orderby=f"{direction}{sort_column}", - limit=limit, - offset=offset, - ) - - additional_conditions = self.get_additional_conditions(builder) - min_max_conditions = self.get_min_max_conditions(builder.column(mri_column)) - builder.add_conditions([*additional_conditions, *min_max_conditions]) - - query_results = builder.run_query(self.referrer.value) - result = builder.process_results(query_results) - - span_keys = [ - SpanKey( - group="00", # all segments have a group of `00` currently - timestamp=row["timestamp"], - span_id=row["span_id"], - ) - for row in result["data"] - ] - - """ - Because transaction level measurements currently do not get - propagated to the spans dataset, we have to query them here, - generate the summary for it here, and propagate it to the - results of the next stage. - - Once we start writing transaction level measurements to the - indexed spans dataset, we can stop doing this and read the - value directly from the indexed spans dataset. - - For simplicity, all transaction based metrics use this approach. - """ - summaries = { - cast(str, row["span_id"]): cast( - Summary, - { - "min": row[mri_column], - "max": row[mri_column], - "sum": row[mri_column], - "count": 1, - }, - ) - for row in result["data"] - } - - return span_keys, summaries - - def get_matching_spans_unsorted(self, offset, limit): - span_keys, summaries = self.get_unsorted_span_keys(offset, limit) - return self._get_spans(span_keys, summaries) - - def get_unsorted_span_keys( - self, - offset: int, - limit: int, - ) -> tuple[list[SpanKey], dict[str, Summary]]: - """ - When getting examples for a segment, it's actually much faster to read it - from the transactions dataset compared to the spans dataset as it's a much - smaller dataset. - - One consideration here is that there is an one to one mapping between a - transaction to a segment today. If this relationship changes, we'll have to - rethink how to fetch segment samples a little as the transactions dataset - may not contain all the necessary data. - """ - column = self.mri_to_column(self.mri) - assert column is not None - - builder = DiscoverQueryBuilder( - Dataset.Transactions, - params={}, - snuba_params=self.snuba_params, - query=self.query, - selected_columns=[ - f"rounded_timestamp({self.rollup})", - f"examples({column}, {self.num_samples}) AS examples", - ], - limit=limit, - offset=offset, - sample_rate=options.get("metrics.sample-list.sample-rate"), - config=QueryBuilderConfig(functions_acl=["rounded_timestamp", "examples"]), - ) - - additional_conditions = self.get_additional_conditions(builder) - min_max_conditions = self.get_min_max_conditions(builder.column(column)) - builder.add_conditions([*additional_conditions, *min_max_conditions]) - - query_results = builder.run_query(self.referrer.value) - result = builder.process_results(query_results) - - metric_key = lambda example: example[2] # sort by metric - for row in result["data"]: - row["examples"] = pick_samples(row["examples"], metric_key=metric_key) - - span_keys = [ - SpanKey( - group="00", # all segments have a group of `00` currently - timestamp=example[0], - span_id=example[1], - ) - for row in result["data"] - for example in row["examples"] - ][:limit] - - """ - Because transaction level measurements currently do not get - propagated to the spans dataset, we have to query them here, - generate the summary for it here, and propagate it to the - results of the next stage. - - Once we start writing transaction level measurements to the - indexed spans dataset, we can stop doing this and read the - value directly from the indexed spans dataset. - - For simplicity, all transaction based metrics use this approach. - """ - summaries = { - cast(str, example[1]): cast( - Summary, - { - "min": example[2], - "max": example[2], - "sum": example[2], - "count": 1, - }, - ) - for row in result["data"] - for example in row["examples"] - } - - return span_keys, summaries - - def get_additional_conditions(self, builder: BaseQueryBuilder) -> list[Condition]: - raise NotImplementedError - - def get_min_max_conditions(self, column: Column) -> list[Condition]: - conditions = [] - - if self.min is not None: - conditions.append(Condition(column, Op.GTE, self.min)) - if self.max is not None: - conditions.append(Condition(column, Op.LTE, self.max)) - - return conditions - - -class TransactionDurationSamplesListExecutor(SegmentsSamplesListExecutor): - @classmethod - def mri_to_column(cls, mri: str) -> str | None: - if mri == TransactionMRI.DURATION.value: - # Because we read this from the transactions dataset, - # we use the name for the transactions dataset instead. - return "transaction.duration" - return None - - def get_additional_conditions(self, builder: BaseQueryBuilder) -> list[Condition]: - return [] - - -class TransactionMeasurementsSamplesListExecutor(SegmentsSamplesListExecutor): - @classmethod - def mri_to_column(cls, mri) -> str | None: - name = cls.mri_to_measurement_name(mri) - if name is not None: - return f"measurements.{name}" - - return None - - @classmethod - def mri_to_measurement_name(cls, mri) -> str | None: - parsed_mri = parse_mri(mri) - if parsed_mri is not None and is_measurement(parsed_mri): - return parsed_mri.name[len("measurements:") :] - return None - - def get_additional_conditions(self, builder: BaseQueryBuilder) -> list[Condition]: - name = self.mri_to_measurement_name(self.mri) - return [Condition(Function("has", [Column("measurements.key"), name]), Op.EQ, 1)] - - -class SpansSamplesListExecutor(AbstractSamplesListExecutor): - sortable_columns = {"timestamp", "span.duration", "span.self_time", "summary"} - - @classmethod - @abstractmethod - def mri_to_column(cls, mri) -> str | None: - raise NotImplementedError - - @classmethod - def convert_sort(cls, sort: str, mri: str) -> tuple[Literal["", "-"], str] | None: - direction: Literal["", "-"] = "" - - if sort.startswith("-"): - direction = "-" - sort = sort[1:] - - if sort == "summary": - column = cls.mri_to_column(mri) - if column is not None: - return direction, column - - if sort in cls.sortable_columns: - return direction, sort - - return None - - @classmethod - def supports_mri(cls, mri: str) -> bool: - return cls.mri_to_column(mri) is not None - - def get_matching_traces(self, limit: int) -> tuple[list[str], list[datetime]]: - column = self.mri_to_column(self.mri) - assert column is not None - - builder = SpansIndexedQueryBuilder( - Dataset.SpansIndexed, - params={}, - snuba_params=self.snuba_params, - query=self.query, - selected_columns=["trace", "timestamp"], - # The orderby is intentionally `None` here as this query is much faster - # if we let Clickhouse decide which order to return the results in. - # This also means we cannot order by any columns or paginate. - orderby=None, - limit=limit, - limitby=("trace", 1), - ) - - additional_conditions = self.get_additional_conditions(builder) - min_max_conditions = self.get_min_max_conditions(builder.resolve_column(column)) - builder.add_conditions([*additional_conditions, *min_max_conditions]) - - query_results = builder.run_query(self.referrer.value) - results = builder.process_results(query_results) - - trace_ids = [row["trace"] for row in results["data"]] - timestamps = [datetime.fromisoformat(row["timestamp"]) for row in results["data"]] - return trace_ids, timestamps - - def get_matching_spans_from_traces( - self, - trace_ids: list[str], - max_spans_per_trace: int, - ) -> list[SpanKey]: - column = self.mri_to_column(self.mri) - assert column is not None - - builder = SpansIndexedQueryBuilder( - Dataset.SpansIndexed, - params={}, - snuba_params=self.snuba_params, - query=self.query, - selected_columns=["span.group", "timestamp", "id"], - # The orderby is intentionally `None` here as this query is much faster - # if we let Clickhouse decide which order to return the results in. - # This also means we cannot order by any columns or paginate. - orderby=None, - limit=len(trace_ids) * max_spans_per_trace, - limitby=("trace", max_spans_per_trace), - ) - - trace_id_condition = Condition(Column("trace_id"), Op.IN, trace_ids) - additional_conditions = self.get_additional_conditions(builder) - min_max_conditions = self.get_min_max_conditions(builder.resolve_column(column)) - builder.add_conditions( - [ - trace_id_condition, - *additional_conditions, - *min_max_conditions, - ] - ) - - query_results = builder.run_query(self.referrer.value) - results = builder.process_results(query_results) - - return [ - SpanKey( - group=row["span.group"], - timestamp=row["timestamp"], - span_id=row["id"], - ) - for row in results["data"] - ] - - def get_matching_spans_sorted(self, offset, limit): - """ - Since we're already querying the spans table sorted on some column, - there's no reason to split this into 2 queries. We can go ahead and - just do it all in a single query. - """ - assert self.sort - sort = self.convert_sort(self.sort, self.mri) - assert sort is not None - direction, sort_column = sort - - fields = self.fields[:] - if sort_column not in fields: - fields.append(sort_column) - - column = self.mri_to_column(self.mri) - assert column is not None - if column not in fields: - fields.append(column) - - builder = SpansIndexedQueryBuilder( - Dataset.SpansIndexed, - params={}, - snuba_params=self.snuba_params, - selected_columns=fields, - orderby=f"{direction}{sort_column}", - limit=limit, - offset=0, - ) - - additional_conditions = self.get_additional_conditions(builder) - - min_max_conditions = self.get_min_max_conditions(builder.resolve_column(column)) - - builder.add_conditions([*additional_conditions, *min_max_conditions]) - - query_results = builder.run_query(self.referrer.value) - result = builder.process_results(query_results) - - should_pop_column = column not in self.fields - - for row in result["data"]: - value = row.pop(column) if should_pop_column else row[column] - row["summary"] = { - "min": value, - "max": value, - "sum": value, - "count": 1, - } - - return result - - def get_matching_spans_unsorted(self, offset, limit): - span_keys = self.get_unsorted_span_keys(offset, limit) - - column = self.mri_to_column(self.mri) - assert column is not None # should always resolve to a column here - - result = self.get_spans_by_key(span_keys, additional_fields=[column]) - - should_pop_column = column not in self.fields - - for row in result["data"]: - value = row.pop(column) if should_pop_column else row[column] - row["summary"] = { - "min": value, - "max": value, - "sum": value, - "count": 1, - } - - return result - - def get_unsorted_span_keys(self, offset: int, limit: int) -> list[SpanKey]: - column = self.mri_to_column(self.mri) - - for dataset_segmentation_condition_fn in self.dataset_segmentation_conditions(): - builder = SpansIndexedQueryBuilder( - Dataset.SpansIndexed, - params={}, - snuba_params=self.snuba_params, - query=self.query, - selected_columns=[ - f"rounded_timestamp({self.rollup})", - f"examples({column}, {self.num_samples}) AS examples", - ], - limit=limit, - offset=offset, - sample_rate=options.get("metrics.sample-list.sample-rate"), - config=QueryBuilderConfig(functions_acl=["rounded_timestamp", "examples"]), - ) - - segmentation_conditions = dataset_segmentation_condition_fn(builder) - - additional_conditions = self.get_additional_conditions(builder) - - assert column is not None - min_max_conditions = self.get_min_max_conditions(builder.resolve_column(column)) - - builder.add_conditions( - [ - *segmentation_conditions, - *additional_conditions, - *min_max_conditions, - ] - ) - - query_results = builder.run_query(self.referrer.value) - result = builder.process_results(query_results) - - if not result["data"]: - continue - - metric_key = lambda example: example[3] # sort by metric - for row in result["data"]: - row["examples"] = pick_samples(row["examples"], metric_key=metric_key) - - return [ - SpanKey( - group=example[0], - timestamp=example[1], - span_id=example[2], - ) - for row in result["data"] - for example in row["examples"] - ][:limit] - - return [] - - @abstractmethod - def get_additional_conditions(self, builder: BaseQueryBuilder) -> list[Condition]: - raise NotImplementedError - - def dataset_segmentation_conditions( - self, - ) -> list[Callable[[BaseQueryBuilder], list[Condition]]]: - return [lambda builder: []] - - def get_min_max_conditions(self, column: SelectType) -> list[Condition]: - conditions = [] - - if self.min is not None: - conditions.append(Condition(column, Op.GTE, self.min)) - if self.max is not None: - conditions.append(Condition(column, Op.LTE, self.max)) - - return conditions - - -class SpansTimingsSamplesListExecutor(SpansSamplesListExecutor): - MRI_MAPPING = { - SpanMRI.DURATION.value: "span.duration", - SpanMRI.SELF_TIME.value: "span.self_time", - } - - @classmethod - def mri_to_column(cls, mri) -> str | None: - return cls.MRI_MAPPING.get(mri) - - def get_additional_conditions(self, builder: BaseQueryBuilder) -> list[Condition]: - return [] - - def dataset_segmentation_conditions( - self, - ) -> list[Callable[[BaseQueryBuilder], list[Condition]]]: - return [ - # This grouping makes the assumption that spans are divided into 2 groups right now. - # Those that are classified with a non zero group, and those that are unclassified - # with a zero group. - # - # In the future, if all span groups are classified, this segmentation should change - # to reflect that. - lambda builder: [ - # The `00` group is used for spans not used within the - # new starfish experience. It's effectively the group - # for other. It is a massive group, so we've chosen - # to exclude it here. - Condition(builder.column("span.group"), Op.NEQ, "00"), - ], - lambda builder: [ - # If the previous query contained no results, we'll - # have to search the `00` group which is slower but - # unfortunately necessary here. - Condition(builder.column("span.group"), Op.EQ, "00"), - ], - ] - - -class SpansMeasurementsSamplesListExecutor(SpansSamplesListExecutor): - # These are some hard coded metrics in the spans name space that can be - # queried in the measurements of the indexed spans dataset - MRI_MAPPING = { - SpanMRI.RESPONSE_CONTENT_LENGTH.value: "http.response_content_length", - SpanMRI.DECODED_RESPONSE_CONTENT_LENGTH.value: "http.decoded_response_content_length", - SpanMRI.RESPONSE_TRANSFER_SIZE.value: "http.response_transfer_size", - SpanMRI.AI_TOTAL_TOKENS.value: "ai_total_tokens_used", - SpanMRI.AI_TOTAL_COST.value: "ai_total_cost", - SpanMRI.CACHE_ITEM_SIZE.value: "cache.item_size", - SpanMRI.MOBILE_SLOW_FRAMES.value: "frames.slow", - SpanMRI.MOBILE_FROZEN_FRAMES.value: "frames.frozen", - SpanMRI.MOBILE_TOTAL_FRAMES.value: "frames.total", - SpanMRI.MOBILE_FRAMES_DELAY.value: "frames.delay", - SpanMRI.MESSAGE_RECEIVE_LATENCY.value: "messaging.message.receive.latency", - } - - @classmethod - def mri_to_column(cls, mri) -> str | None: - name = cls.mri_measurement_name(mri) - if name is not None: - return f"measurements.{name}" - - return None - - @classmethod - def mri_measurement_name(cls, mri) -> str | None: - if name := cls.MRI_MAPPING.get(mri): - return name - - # some web vitals exist on spans - parsed_mri = parse_mri(mri) - if ( - parsed_mri is not None - and parsed_mri.namespace == "spans" - and parsed_mri.name.startswith("webvital.") - ): - return parsed_mri.name[len("webvital:") :] - - return None - - def get_additional_conditions(self, builder: BaseQueryBuilder) -> list[Condition]: - name = self.mri_measurement_name(self.mri) - return [Condition(Function("has", [Column("measurements.key"), name]), Op.EQ, 1)] - - -class CustomSamplesListExecutor(AbstractSamplesListExecutor): - sortable_columns = {"timestamp", "span.duration", "summary"} - - SORT_MAPPING = { - "span.duration": "span.duration", - "timestamp": "timestamp", - } - - OPERATION_COLUMN_MAPPING = { - "min": "min_metric", - "max": "max_metric", - "count": "count_metric", - } - - # refer to the definition of `examples()` in the metrics summary dataset - EXAMPLES_SORT_KEY = { - "min": 3, - "max": 4, - "count": 6, - } - - @classmethod - def convert_sort(cls, sort: str, operation: str | None) -> tuple[Literal["", "-"], str] | None: - direction: Literal["", "-"] = "" - - if sort.startswith("-"): - direction = "-" - sort = sort[1:] - - if sort in cls.SORT_MAPPING: - return direction, cls.SORT_MAPPING[sort] - - if sort == "summary": - return direction, cls.OPERATION_COLUMN_MAPPING.get(operation or "", "avg_metric") - - return None - - @classmethod - def supports_mri(cls, mri: str) -> bool: - parsed_mri = parse_mri(mri) - if parsed_mri is not None and is_custom_metric(parsed_mri): - return True - return False - - def _get_spans( - self, - span_keys: list[SpanKey], - summaries: dict[str, Summary], - ): - result = self.get_spans_by_key(span_keys, additional_fields=["id"]) - - # if there is a sort, we want to preserve the result in the same - # order as the span keys which we can do by checking the span ids - if self.sort: - order = {key.span_id: i for i, key in enumerate(span_keys)} - result["data"].sort(key=lambda row: order[row["id"]]) - - should_pop_id = "id" not in self.fields - - for row in result["data"]: - span_id = row.pop("id") if should_pop_id else row["id"] - row["summary"] = summaries[span_id] - - return result - - def get_additional_conditions(self, builder: BaseQueryBuilder) -> list[Condition]: - return [ - builder.convert_search_filter_to_condition( - SearchFilter(SearchKey("metric"), "=", SearchValue(self.mri)), - ) - ] - - def get_min_max_conditions(self, builder: BaseQueryBuilder) -> list[Condition]: - conditions = [] - - column = builder.resolve_column( - self.OPERATION_COLUMN_MAPPING.get(self.operation or "", "avg_metric") - ) - - if self.min is not None: - conditions.append(Condition(column, Op.GTE, self.min)) - if self.max is not None: - conditions.append(Condition(column, Op.LTE, self.max)) - - return conditions - - -SAMPLE_LIST_EXECUTORS = [ - TransactionDurationSamplesListExecutor, - TransactionMeasurementsSamplesListExecutor, - SpansTimingsSamplesListExecutor, - SpansMeasurementsSamplesListExecutor, - CustomSamplesListExecutor, -] - - -def get_sample_list_executor_cls(mri) -> type[AbstractSamplesListExecutor] | None: - for executor_cls in SAMPLE_LIST_EXECUTORS: - if executor_cls.supports_mri(mri): - return executor_cls - return None - - -def pick_samples( - samples: list[Any], - metric_key: Callable[[Any], float], -) -> list[Any]: - # if there are at most 3 samples, there's no picking needed - # as we want to return at most 3 from the list provided - if len(samples) <= 3: - return samples - - samples.sort(key=metric_key) - - keys = [metric_key(sample) for sample in samples] - - # first element is the one near the average - # but must not be the first or last element - avg_m = sum(keys) / len(keys) - idx_m = bisect(keys, avg_m) - # ensure there is at least 1 element on both sides - # of the middle element we just picked - # i.e. should not pick index 0 and len(keys) - 1 - idx_m = clip(idx_m, 1, len(keys) - 2) - - # second element is near the average of first - # split, but must not be the split element - avg_l = sum(keys[:idx_m]) / idx_m - idx_l = bisect(keys, avg_l, hi=idx_m - 1) - idx_l += 1 # push it closer to the middle - # ensure this is not the same as middle element - idx_l = clip(idx_l, 0, idx_m - 1) - - # third element is near the average of second - # split, but must not be the split element - avg_r = sum(keys[idx_m + 1 :]) / (len(keys) - idx_m - 1) - idx_r = bisect(keys, avg_r, lo=idx_m + 1) - idx_r -= 1 # push it closer to the middle - # ensure this is not the same as middle element - idx_r = clip(idx_r, idx_m + 1, len(keys) - 1) - - return [samples[idx_m], samples[idx_l], samples[idx_r]] diff --git a/tests/sentry/api/endpoints/test_organization_traces.py b/tests/sentry/api/endpoints/test_organization_traces.py index e146be6da313a6..7b7821d506a4df 100644 --- a/tests/sentry/api/endpoints/test_organization_traces.py +++ b/tests/sentry/api/endpoints/test_organization_traces.py @@ -7,7 +7,6 @@ from rest_framework.exceptions import ErrorDetail from sentry.api.endpoints.organization_traces import process_breakdowns -from sentry.snuba.metrics.naming_layer.mri import SpanMRI, TransactionMRI from sentry.testutils.cases import APITestCase, BaseSpansTestCase from sentry.testutils.helpers import parse_link_header from sentry.testutils.helpers.datetime import before_now @@ -805,130 +804,6 @@ def test_matching_tag(self): }, ] - def test_matching_tag_metrics(self): - ( - project_1, - _, - _, - _, - trace_id_3, - timestamps, - span_ids, - ) = self.create_mock_traces() - - for mri, op in [ - (TransactionMRI.DURATION.value, "count"), - ("d:transactions/measurements.lcp@millisecond", "max"), - (SpanMRI.DURATION.value, "min"), - (SpanMRI.SELF_TIME.value, "avg"), - ("d:spans/webvital.score.total@ratio", "count"), - ("d:spans/webvital.score.inp@ratio", "max"), - ("d:spans/webvital.score.weight.inp@ratio", "min"), - ("d:spans/http.response_content_length@byte", "avg"), - ("d:spans/http.decoded_response_content_length@byte", "count"), - ("d:spans/http.response_transfer_size@byte", "max"), - ("d:custom/value@millisecond", "min"), - ]: - for user_query in ["foo:qux", None]: - query = { - "mri": mri, - "metricsMin": 30_000, - "metricsMax": 50_000, - "metricsOp": op, - "metricsQuery": ["foo:qux"], - "project": [], - "field": ["id", "parent_span", "span.duration"], - "maxSpansPerTrace": 3, - "per_page": 1, - } - if user_query: - query["query"] = user_query - - response = self.do_request(query) - assert response.status_code == 200, (mri, response.data) - - result_data = sorted(response.data["data"], key=lambda trace: trace["trace"]) - - assert result_data == [ - { - "trace": trace_id_3, - "numErrors": 0, - "numOccurrences": 0, - "numSpans": 2, - "matchingSpans": 1 if user_query else 2, - "project": project_1.slug, - "name": "qux", - "duration": 40_000, - "start": timestamps[10], - "end": timestamps[10] + 40_000, - "breakdowns": [ - { - "project": project_1.slug, - "sdkName": "sentry.javascript.remix", - "isRoot": False, - "start": timestamps[10], - "end": timestamps[10] + 40_000, - "sliceStart": 0, - "sliceEnd": 40, - "sliceWidth": 40, - "kind": "project", - "duration": 40_000, - }, - { - "project": project_1.slug, - "sdkName": "sentry.javascript.node", - "isRoot": False, - "start": timestamps[11], - "end": timestamps[11] + 10_000, - "sliceStart": 10, - "sliceEnd": 20, - "sliceWidth": 10, - "kind": "project", - "duration": 10_000, - }, - ], - }, - ], (mri, user_query) - - def test_matching_tag_metrics_but_no_matching_spans(self): - for mri in [ - TransactionMRI.DURATION.value, - "d:transactions/measurements.lcp@millisecond", - SpanMRI.DURATION.value, - SpanMRI.SELF_TIME.value, - "d:spans/webvital.score.total@ratio", - "d:spans/webvital.score.inp@ratio", - "d:spans/webvital.score.weight.inp@ratio", - "d:spans/http.response_content_length@byte", - "d:spans/http.decoded_response_content_length@byte", - "d:spans/http.response_transfer_size@byte", - "d:custom/value@millisecond", - ]: - for user_query in ["foo:qux", None]: - query = { - "mri": mri, - "metricsQuery": ["foo:qux"], - "project": [self.project.id], - "field": ["id", "parent_span", "span.duration"], - "query": "foo:foobar", - "maxSpansPerTrace": 3, - } - - response = self.do_request(query) - assert response.status_code == 200, (mri, response.data) - assert response.data == { - "data": [], - "meta": { - "dataset": "unknown", - "datasetReason": "unchanged", - "fields": {}, - "isMetricsData": False, - "isMetricsExtractedData": False, - "tips": {}, - "units": {}, - }, - } - class OrganizationTraceSpansEndpointTest(OrganizationTracesEndpointTestBase): view = "sentry-api-0-organization-trace-spans" @@ -1055,61 +930,6 @@ def test_get_spans_for_trace_matching_tags(self): } assert response.data["data"] == [{"id": span_id} for span_id in sorted(span_ids[1:4])] - def test_get_spans_for_trace_matching_tags_metrics(self): - ( - project_1, - project_2, - _, - _, - trace_id, - timestamps, - span_ids, - ) = self.create_mock_traces() - - for mri, op in [ - (TransactionMRI.DURATION.value, "count"), - ("d:transactions/measurements.lcp@millisecond", "max"), - (SpanMRI.DURATION.value, "min"), - (SpanMRI.SELF_TIME.value, "avg"), - ("d:spans/webvital.score.total@ratio", "count"), - ("d:spans/webvital.score.inp@ratio", "max"), - ("d:spans/webvital.score.weight.inp@ratio", "min"), - ("d:spans/http.response_content_length@byte", "avg"), - ("d:spans/http.decoded_response_content_length@byte", "count"), - ("d:spans/http.response_transfer_size@byte", "max"), - ("d:custom/value@millisecond", "min"), - ]: - for user_query in ["foo:qux", None]: - query = { - "mri": mri, - "metricsMin": 30_000, - "metricsMax": 50_000, - "metricsOp": op, - "metricsQuery": ["foo:qux"], - "project": [], - "field": ["id"], - "sort": "id", - } - if user_query: - query["query"] = user_query - - response = self.do_request(trace_id, query) - assert response.status_code == 200, response.data - assert response.data["meta"] == { - "dataset": "unknown", - "datasetReason": "unchanged", - "fields": { - "id": "string", - }, - "isMetricsData": False, - "isMetricsExtractedData": False, - "tips": {}, - "units": { - "id": None, - }, - } - assert response.data["data"] == [{"id": span_ids[10]}] - class OrganizationTracesStatsEndpointTest(OrganizationTracesEndpointTestBase): view = "sentry-api-0-organization-traces-stats" @@ -2566,10 +2386,6 @@ def test_build_breakdown_error(mock_new_trace_interval, mock_capture_exception): class OrganizationTracesEAPEndpointTest(OrganizationTracesEndpointTest): is_eap: bool = True - @pytest.mark.skip(reason="no support for metrics so not back porting this feature") - def test_matching_tag_metrics(self): - pass - def test_invalid_sort(self): for sort in ["foo", "-foo"]: query = { From 2c82b8c0dd1f34358d3fe2b54143da3f2704531c Mon Sep 17 00:00:00 2001 From: "getsantry[bot]" <66042841+getsantry[bot]@users.noreply.github.com> Date: Fri, 22 Nov 2024 16:06:55 +0000 Subject: [PATCH 3/5] :hammer_and_wrench: apply pre-commit fixes --- tests/sentry/api/endpoints/test_organization_traces.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/sentry/api/endpoints/test_organization_traces.py b/tests/sentry/api/endpoints/test_organization_traces.py index 91d8f3ee3e34c5..b88f52f4b00dc8 100644 --- a/tests/sentry/api/endpoints/test_organization_traces.py +++ b/tests/sentry/api/endpoints/test_organization_traces.py @@ -819,7 +819,7 @@ def test_matching_tag(self): ], }, ] - + class OrganizationTraceSpansEndpointTest(OrganizationTracesEndpointTestBase): view = "sentry-api-0-organization-trace-spans" From 6cc5cb77d56bfebb05835b12ae2d0a37a90f4a18 Mon Sep 17 00:00:00 2001 From: Pierre Massat Date: Fri, 22 Nov 2024 11:07:26 -0500 Subject: [PATCH 4/5] Remove more irrelevant metrics samples related code --- src/sentry/features/temporary.py | 2 -- src/sentry/snuba/referrer.py | 1 - tests/sentry/api/endpoints/test_organization_metrics.py | 5 ++--- 3 files changed, 2 insertions(+), 6 deletions(-) diff --git a/src/sentry/features/temporary.py b/src/sentry/features/temporary.py index 3ca21391020541..b9d3d605627acd 100644 --- a/src/sentry/features/temporary.py +++ b/src/sentry/features/temporary.py @@ -203,8 +203,6 @@ def register_temporary_features(manager: FeatureManager): manager.add("organizations:messaging-integration-onboarding-project-creation", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=True) # Enable threshold period in metric alert rule builder manager.add("organizations:metric-alert-threshold-period", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=True) - # Enables the search bar for metrics samples list - manager.add("organizations:metrics-samples-list-search", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=True) # Migrate Orgs to new Azure DevOps Integration manager.add("organizations:migrate-azure-devops-integration", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=True) # Enable Session Stats down to a minute resolution diff --git a/src/sentry/snuba/referrer.py b/src/sentry/snuba/referrer.py index 7ace324f5c9257..f1888419aab37b 100644 --- a/src/sentry/snuba/referrer.py +++ b/src/sentry/snuba/referrer.py @@ -167,7 +167,6 @@ class Referrer(Enum): API_ORGANIZATION_METRICS_METADATA_FETCH_SPANS = "api.organization.metrics-metadata.fetch-spans" API_ORGANIZATION_METRICS_QUERY = "api.organization.metrics-query" API_ORGANIZATION_METRICS_EAP_QUERY = "api.organization.metrics-eap-query" - API_ORGANIZATION_METRICS_SAMPLES = "api.organization.metrics-samples" API_ORGANIZATION_ISSUE_REPLAY_COUNT = "api.organization-issue-replay-count" API_ORGANIZATION_SDK_UPDATES = "api.organization-sdk-updates" API_ORGANIZATION_SPANS_HISTOGRAM_MIN_MAX = "api.organization-spans-histogram-min-max" diff --git a/tests/sentry/api/endpoints/test_organization_metrics.py b/tests/sentry/api/endpoints/test_organization_metrics.py index ae9a95f8815831..bfda3dbdc1d6d4 100644 --- a/tests/sentry/api/endpoints/test_organization_metrics.py +++ b/tests/sentry/api/endpoints/test_organization_metrics.py @@ -32,7 +32,8 @@ ], unit="percentage", snql=lambda crashed_count, errored_set, entity, metric_ids, alias=None: complement( - division_float(crashed_count, errored_set, alias=alias), alias="crash_free_fake" + division_float(crashed_count, errored_set, alias=alias), + alias="crash_free_fake", ), ) } @@ -54,7 +55,6 @@ def indexer_record(use_case_id: UseCaseID, org_id: int, string: str) -> int: class OrganizationMetricsPermissionTest(APITestCase): - endpoints = ( ( "get", @@ -73,7 +73,6 @@ class OrganizationMetricsPermissionTest(APITestCase): "post", "sentry-api-0-organization-metrics-query", ), - ("get", "sentry-api-0-organization-metrics-samples"), ) def setUp(self): From 1be01f2a480655b616790db383a510f3e1615b16 Mon Sep 17 00:00:00 2001 From: Pierre Massat Date: Fri, 22 Nov 2024 11:20:14 -0500 Subject: [PATCH 5/5] Remove another mri related test --- .../api/endpoints/test_organization_traces.py | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/tests/sentry/api/endpoints/test_organization_traces.py b/tests/sentry/api/endpoints/test_organization_traces.py index b88f52f4b00dc8..1dbb69c357bef0 100644 --- a/tests/sentry/api/endpoints/test_organization_traces.py +++ b/tests/sentry/api/endpoints/test_organization_traces.py @@ -319,22 +319,6 @@ def test_bad_params_too_many_per_page(self): ), } - def test_unsupported_mri(self): - query = { - "project": [self.project.id], - "field": ["id"], - "maxSpansPerTrace": 1, - "mri": "d:spans/made_up@none", - } - - response = self.do_request(query) - assert response.status_code == 400, response.data - assert response.data == { - "detail": ErrorDetail( - string="Unsupported MRI: d:spans/made_up@none", code="parse_error" - ), - } - def test_no_traces(self): query = { "project": [self.project.id],