From 8701ed3306b14462b49fdd446e435dba4e9fc9b8 Mon Sep 17 00:00:00 2001 From: William Mak Date: Mon, 17 Nov 2025 15:55:03 -0500 Subject: [PATCH 1/3] fix(timeseries): Handle orderbys not in groupby - If a field is in the orderby but not in the groupbys we currently error in snuba, this updates the resolver to add the orderby field to the groupbys automatically for both table/topEvents - Also fixes a bug where timeseries could be passed to the orderby, but we don't want that --- .../organization_events_timeseries.py | 4 + src/sentry/snuba/rpc_dataset_common.py | 8 +- ...st_organization_events_timeseries_spans.py | 116 ++++++++++++++++++ 3 files changed, 127 insertions(+), 1 deletion(-) diff --git a/src/sentry/api/endpoints/organization_events_timeseries.py b/src/sentry/api/endpoints/organization_events_timeseries.py index 5c0c9eca9de9c2..6b7e9f9d8f6575 100644 --- a/src/sentry/api/endpoints/organization_events_timeseries.py +++ b/src/sentry/api/endpoints/organization_events_timeseries.py @@ -248,10 +248,14 @@ def get_rpc_config(): if top_events > 0: raw_groupby = self.get_field_list(organization, request, param_name="groupBy") + raw_orderby = self.get_orderby(request) if len(raw_groupby) == 0: raise ParseError("groupBy is a required parameter when doing topEvents") if "timestamp" in raw_groupby: raise ParseError("Cannot group by timestamp") + if raw_orderby: + if "timestamp" in [col.strip("-") for col in raw_orderby]: + raise ParseError("Cannot order by timestamp") if dataset in RPC_DATASETS: return dataset.run_top_events_timeseries_query( params=snuba_params, diff --git a/src/sentry/snuba/rpc_dataset_common.py b/src/sentry/snuba/rpc_dataset_common.py index 3ed2c43b7a1794..3a462f4fe3b284 100644 --- a/src/sentry/snuba/rpc_dataset_common.py +++ b/src/sentry/snuba/rpc_dataset_common.py @@ -259,13 +259,19 @@ def get_table_rpc_request(cls, query: TableQuery) -> TableRequest: if stripped_orderby in orderby_aliases: resolved_column = orderby_aliases[stripped_orderby] else: - resolved_column = resolver.resolve_column(stripped_orderby)[0] + resolved_column, resolved_context = resolver.resolve_column(stripped_orderby) + # If the orderby isn't in the selected columns, instead of erroring out add the column to the query + if stripped_orderby not in query.selected_columns: + columns.append(resolved_column) + column_contexts.append(resolved_context) resolved_orderby.append( TraceItemTableRequest.OrderBy( column=cls.categorize_column(resolved_column), descending=orderby_column.startswith("-"), ) ) + # need to reset all columns cause we may have added some while resolving orderby + all_columns = columns + equations has_aggregations = any(col for col in columns if col.is_aggregate) or any( col for col in equations if col.is_aggregate diff --git a/tests/snuba/api/endpoints/test_organization_events_timeseries_spans.py b/tests/snuba/api/endpoints/test_organization_events_timeseries_spans.py index 3654af08139eb1..2d76fa12732014 100644 --- a/tests/snuba/api/endpoints/test_organization_events_timeseries_spans.py +++ b/tests/snuba/api/endpoints/test_organization_events_timeseries_spans.py @@ -525,6 +525,122 @@ def test_top_events(self) -> None: "order": 2, } + def test_top_events_orderby_not_in_groupby(self) -> None: + self.store_spans( + [ + self.create_span( + {"sentry_tags": {"transaction": "foo", "status": "success"}}, + start_ts=self.start + timedelta(minutes=1), + duration=2001, + ), + self.create_span( + {"sentry_tags": {"transaction": "bar", "status": "success"}}, + start_ts=self.start + timedelta(minutes=1), + duration=2000, + ), + self.create_span( + {"sentry_tags": {"transaction": "baz", "status": "success"}}, + start_ts=self.start + timedelta(minutes=1), + duration=1999, + ), + self.create_span( + {"sentry_tags": {"transaction": "qux", "status": "success"}}, + start_ts=self.start + timedelta(minutes=1), + duration=1998, + ), + ], + is_eap=True, + ) + + self.end = self.start + timedelta(minutes=6) + response = self._do_request( + data={ + "start": self.start, + "end": self.end, + "interval": "1m", + "yAxis": "count()", + "groupBy": ["transaction"], + "orderby": ["-sum(span.self_time)"], + "project": self.project.id, + "dataset": "spans", + "excludeOther": 0, + "topEvents": 2, + }, + ) + assert response.status_code == 200, response.content + + assert response.data["meta"] == { + "dataset": "spans", + "start": self.start.timestamp() * 1000, + "end": self.end.timestamp() * 1000, + } + assert len(response.data["timeSeries"]) == 3 + + timeseries = response.data["timeSeries"][0] + assert len(timeseries["values"]) == 6 + assert timeseries["yAxis"] == "count()" + assert timeseries["values"] == build_expected_timeseries( + self.start, 60_000, [0, 1, 0, 0, 0, 0], ignore_accuracy=True + ) + assert timeseries["groupBy"] == [{"key": "transaction", "value": "foo"}] + assert timeseries["meta"] == { + "dataScanned": "full", + "valueType": "integer", + "valueUnit": None, + "interval": 60_000, + "isOther": False, + "order": 0, + } + + timeseries = response.data["timeSeries"][1] + assert len(timeseries["values"]) == 6 + assert timeseries["yAxis"] == "count()" + assert timeseries["values"] == build_expected_timeseries( + self.start, 60_000, [0, 1, 0, 0, 0, 0], ignore_accuracy=True + ) + assert timeseries["groupBy"] == [{"key": "transaction", "value": "bar"}] + assert timeseries["meta"] == { + "dataScanned": "full", + "valueType": "integer", + "valueUnit": None, + "interval": 60_000, + "isOther": False, + "order": 1, + } + + timeseries = response.data["timeSeries"][2] + assert len(timeseries["values"]) == 6 + assert timeseries["yAxis"] == "count()" + assert timeseries["values"] == build_expected_timeseries( + self.start, 60_000, [0, 2, 0, 0, 0, 0], ignore_accuracy=True + ) + assert timeseries["groupBy"] is None + assert timeseries["meta"] == { + "dataScanned": "full", + "valueType": "integer", + "valueUnit": None, + "interval": 60_000, + "isOther": True, + "order": 2, + } + + def test_top_events_orderby_is_timestamp(self) -> None: + response = self._do_request( + data={ + "start": self.start, + "end": self.end, + "interval": "1m", + "yAxis": "count()", + "groupBy": ["transaction"], + "orderby": ["timestamp"], + "project": self.project.id, + "dataset": "spans", + "excludeOther": 0, + "topEvents": 2, + }, + ) + assert response.status_code == 400, response.content + def test_top_events_with_none_as_groupby(self) -> None: self.store_spans( [ From 08927b30b9c9b0a453ced7df48c14856bca36452 Mon Sep 17 00:00:00 2001 From: William Mak Date: Tue, 18 Nov 2025 14:51:30 -0500 Subject: [PATCH 2/3] ref: Change implementation, error instead of auto adding --- src/sentry/snuba/rpc_dataset_common.py | 10 +--- ...st_organization_events_timeseries_spans.py | 59 +------------------ 2 files changed, 6 insertions(+), 63 deletions(-) diff --git a/src/sentry/snuba/rpc_dataset_common.py b/src/sentry/snuba/rpc_dataset_common.py index 3a462f4fe3b284..ee0812e7bbf4d4 100644 --- a/src/sentry/snuba/rpc_dataset_common.py +++ b/src/sentry/snuba/rpc_dataset_common.py @@ -256,22 +256,18 @@ def get_table_rpc_request(cls, query: TableQuery) -> TableRequest: orderby_columns = query.orderby if query.orderby is not None else [] for orderby_column in orderby_columns: stripped_orderby = orderby_column.lstrip("-") + if stripped_orderby not in query.selected_columns: + raise InvalidSearchQuery("OrderBy must also be in the selected columns or groupby") if stripped_orderby in orderby_aliases: resolved_column = orderby_aliases[stripped_orderby] else: - resolved_column, resolved_context = resolver.resolve_column(stripped_orderby) - # If the orderby isn't in the selected columns, instead of erroring out add the column to the query - if stripped_orderby not in query.selected_columns: - columns.append(resolved_column) - column_contexts.append(resolved_context) + resolved_column = resolver.resolve_column(stripped_orderby)[0] resolved_orderby.append( TraceItemTableRequest.OrderBy( column=cls.categorize_column(resolved_column), descending=orderby_column.startswith("-"), ) ) - # need to reset all columns cause we may have added some while resolving orderby - all_columns = columns + equations has_aggregations = any(col for col in columns if col.is_aggregate) or any( col for col in equations if col.is_aggregate diff --git a/tests/snuba/api/endpoints/test_organization_events_timeseries_spans.py b/tests/snuba/api/endpoints/test_organization_events_timeseries_spans.py index 2d76fa12732014..1a3471be2da7f2 100644 --- a/tests/snuba/api/endpoints/test_organization_events_timeseries_spans.py +++ b/tests/snuba/api/endpoints/test_organization_events_timeseries_spans.py @@ -567,62 +567,8 @@ def test_top_events_orderby_not_in_groupby(self) -> None: "topEvents": 2, }, ) - assert response.status_code == 200, response.content - - assert response.data["meta"] == { - "dataset": "spans", - "start": self.start.timestamp() * 1000, - "end": self.end.timestamp() * 1000, - } - assert len(response.data["timeSeries"]) == 3 - - timeseries = response.data["timeSeries"][0] - assert len(timeseries["values"]) == 6 - assert timeseries["yAxis"] == "count()" - assert timeseries["values"] == build_expected_timeseries( - self.start, 60_000, [0, 1, 0, 0, 0, 0], ignore_accuracy=True - ) - assert timeseries["groupBy"] == [{"key": "transaction", "value": "foo"}] - assert timeseries["meta"] == { - "dataScanned": "full", - "valueType": "integer", - "valueUnit": None, - "interval": 60_000, - "isOther": False, - "order": 0, - } - - timeseries = response.data["timeSeries"][1] - assert len(timeseries["values"]) == 6 - assert timeseries["yAxis"] == "count()" - assert timeseries["values"] == build_expected_timeseries( - self.start, 60_000, [0, 1, 0, 0, 0, 0], ignore_accuracy=True - ) - assert timeseries["groupBy"] == [{"key": "transaction", "value": "bar"}] - assert timeseries["meta"] == { - "dataScanned": "full", - "valueType": "integer", - "valueUnit": None, - "interval": 60_000, - "isOther": False, - "order": 1, - } - - timeseries = response.data["timeSeries"][2] - assert len(timeseries["values"]) == 6 - assert timeseries["yAxis"] == "count()" - assert timeseries["values"] == build_expected_timeseries( - self.start, 60_000, [0, 2, 0, 0, 0, 0], ignore_accuracy=True - ) - assert timeseries["groupBy"] is None - assert timeseries["meta"] == { - "dataScanned": "full", - "valueType": "integer", - "valueUnit": None, - "interval": 60_000, - "isOther": True, - "order": 2, - } + assert response.status_code == 400, response.content + assert "OrderBy must also be in the selected columns or groupby" == response.data["detail"] def test_top_events_orderby_is_timestamp(self) -> None: response = self._do_request( @@ -640,6 +586,7 @@ def test_top_events_orderby_is_timestamp(self) -> None: }, ) assert response.status_code == 400, response.content + assert "Cannot order by timestamp" == response.data["detail"] def test_top_events_with_none_as_groupby(self) -> None: self.store_spans( From 121cf3ca956a3d33f0436300c9bb4b36bf65d101 Mon Sep 17 00:00:00 2001 From: William Mak Date: Wed, 19 Nov 2025 15:35:32 -0500 Subject: [PATCH 3/3] fix tests and casing --- src/sentry/snuba/rpc_dataset_common.py | 5 +++-- .../endpoints/test_organization_events_timeseries_spans.py | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/sentry/snuba/rpc_dataset_common.py b/src/sentry/snuba/rpc_dataset_common.py index d7b3a191172778..1286af420dbee6 100644 --- a/src/sentry/snuba/rpc_dataset_common.py +++ b/src/sentry/snuba/rpc_dataset_common.py @@ -261,10 +261,11 @@ def get_table_rpc_request(cls, query: TableQuery) -> TableRequest: orderby_columns = query.orderby if query.orderby is not None else [] for orderby_column in orderby_columns: stripped_orderby = orderby_column.lstrip("-") - if stripped_orderby not in query.selected_columns: - raise InvalidSearchQuery("OrderBy must also be in the selected columns or groupby") if stripped_orderby in orderby_aliases: resolved_column = orderby_aliases[stripped_orderby] + # If this orderby isn't in the aliases, check if its a selected column + elif stripped_orderby not in query.selected_columns: + raise InvalidSearchQuery("orderby must also be in the selected columns or groupby") else: resolved_column = resolver.resolve_column(stripped_orderby)[0] resolved_orderby.append( diff --git a/tests/snuba/api/endpoints/test_organization_events_timeseries_spans.py b/tests/snuba/api/endpoints/test_organization_events_timeseries_spans.py index 0b8367c3bae213..f159d73aaa2236 100644 --- a/tests/snuba/api/endpoints/test_organization_events_timeseries_spans.py +++ b/tests/snuba/api/endpoints/test_organization_events_timeseries_spans.py @@ -570,7 +570,7 @@ def test_top_events_orderby_not_in_groupby(self) -> None: }, ) assert response.status_code == 400, response.content - assert "OrderBy must also be in the selected columns or groupby" == response.data["detail"] + assert "orderby must also be in the selected columns or groupby" == response.data["detail"] def test_top_events_orderby_is_timestamp(self) -> None: response = self._do_request(