From 5933ec83210d278293e51fc6b3d9a5dc07c72e2c Mon Sep 17 00:00:00 2001 From: William Mak Date: Wed, 27 Oct 2021 17:32:44 -0400 Subject: [PATCH 1/3] fix(discover): Add snql aggregations - This is far easier in snql since we include the entire condition in the having clause now, so there's no need to select the extra data anymore. - Instead we only need to check there's at least 1 selected aggregate to avoid sudden grouping --- src/sentry/search/events/builder.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/sentry/search/events/builder.py b/src/sentry/search/events/builder.py index a28bb76ba34d83..27fb031248b564 100644 --- a/src/sentry/search/events/builder.py +++ b/src/sentry/search/events/builder.py @@ -36,7 +36,6 @@ def __init__( ): super().__init__(dataset, params, auto_fields, functions_acl) - # TODO: implement this in `resolve_select` self.auto_aggregations = auto_aggregations self.limit = None if limit is None else Limit(limit) @@ -102,7 +101,10 @@ def validate_aggregate_arguments(self): f"A single field cannot be used both inside and outside a function in the same query. To use {column.alias} you must first remove the function(s): {function_msg}" ) - def validate_having_clause(self): + def validate_having_clause(self) -> None: + # No need to validate if auto aggregations and there's at least one aggregate + if self.auto_aggregations and self.aggregates: + return error_extra = ", and could not be automatically added" if self.auto_aggregations else "" for condition in self.having: lhs = condition.lhs From a597fdf43cbe8513a2fcfdc39d93a8ab8f5c7ac5 Mon Sep 17 00:00:00 2001 From: William Mak Date: Wed, 27 Oct 2021 17:59:19 -0400 Subject: [PATCH 2/3] ref: Adding tests and fixing a found bug - Also updating some docstrings --- src/sentry/search/events/builder.py | 6 ++- src/sentry/search/events/fields.py | 15 +++++-- src/sentry/search/events/filter.py | 2 +- .../endpoints/test_organization_events_v2.py | 39 +++++++++++++++++++ 4 files changed, 56 insertions(+), 6 deletions(-) diff --git a/src/sentry/search/events/builder.py b/src/sentry/search/events/builder.py index 27fb031248b564..fc70f41b638241 100644 --- a/src/sentry/search/events/builder.py +++ b/src/sentry/search/events/builder.py @@ -102,7 +102,11 @@ def validate_aggregate_arguments(self): ) def validate_having_clause(self) -> None: - # No need to validate if auto aggregations and there's at least one aggregate + """Validate that the functions in having are selected columns + + Skipped if auto_aggregations are enabled, and at least one other aggregate is selected + This is so we don't change grouping suddenly + """ if self.auto_aggregations and self.aggregates: return error_extra = ", and could not be automatically added" if self.auto_aggregations else "" diff --git a/src/sentry/search/events/fields.py b/src/sentry/search/events/fields.py index f993f5ce9203de..c01e6f8c76ea9f 100644 --- a/src/sentry/search/events/fields.py +++ b/src/sentry/search/events/fields.py @@ -2985,9 +2985,15 @@ def is_function(self, function: str) -> bool: """ "Given a public field, check if it's a supported function""" return function in self.function_converter - def resolve_function(self, function: str, match: Optional[Match[str]] = None) -> SelectType: - """Given a public function, resolve to the corresponding Snql - function + def resolve_function( + self, function: str, match: Optional[Match[str]] = None, resolve_only=False + ) -> SelectType: + """Given a public function, resolve to the corresponding Snql function + + + :param function: the public alias for a function eg. "p50(transaction.duration)" + :param match: the Match so we don't have to run the regex twice + :param resolve_only: whether we should add the aggregate to self.aggregates """ if match is None: match = is_function(function) @@ -3028,7 +3034,8 @@ def resolve_function(self, function: str, match: Optional[Match[str]] = None) -> raise InvalidSearchQuery("Invalid combinator: Arguments passed were incompatible") if snql_function.snql_aggregate is not None: - self.aggregates.append(snql_function.snql_aggregate(arguments, alias)) + if not resolve_only: + self.aggregates.append(snql_function.snql_aggregate(arguments, alias)) return snql_function.snql_aggregate(arguments, alias) return snql_function.snql_column(arguments, alias) diff --git a/src/sentry/search/events/filter.py b/src/sentry/search/events/filter.py index 68bbd113f73f34..6f587cf89ce3fa 100644 --- a/src/sentry/search/events/filter.py +++ b/src/sentry/search/events/filter.py @@ -1307,7 +1307,7 @@ def convert_aggregate_filter_to_condition( operator = Op.IS_NULL if aggregate_filter.operator == "=" else Op.IS_NOT_NULL return Condition(name, operator) - function = self.resolve_function(name) + function = self.resolve_function(name, resolve_only=True) return Condition(function, Op(aggregate_filter.operator), value) diff --git a/tests/snuba/api/endpoints/test_organization_events_v2.py b/tests/snuba/api/endpoints/test_organization_events_v2.py index c8cbe0d2f55c86..758d2308bb3603 100644 --- a/tests/snuba/api/endpoints/test_organization_events_v2.py +++ b/tests/snuba/api/endpoints/test_organization_events_v2.py @@ -1777,6 +1777,45 @@ def test_aggregation_alias_comparison(self): assert data[0]["transaction"] == event.transaction assert data[0]["p95"] == 3000 + def test_auto_aggregations(self): + project = self.create_project() + data = load_data( + "transaction", + timestamp=before_now(minutes=1), + start_timestamp=before_now(minutes=1, seconds=5), + ) + data["transaction"] = "/aggregates/1" + self.store_event(data, project_id=project.id) + + data = load_data( + "transaction", + timestamp=before_now(minutes=1), + start_timestamp=before_now(minutes=1, seconds=3), + ) + data["transaction"] = "/aggregates/2" + event = self.store_event(data, project_id=project.id) + + query = { + "field": ["transaction", "p75()"], + "query": "event.type:transaction p95():<4000", + "orderby": ["transaction"], + } + response = self.do_request(query) + + assert response.status_code == 200, response.content + assert len(response.data["data"]) == 1 + data = response.data["data"] + assert data[0]["transaction"] == event.transaction + + query = { + "field": ["transaction"], + "query": "event.type:transaction p95():<4000", + "orderby": ["transaction"], + } + response = self.do_request(query) + + assert response.status_code == 400, response.content + def test_aggregation_comparison_with_conditions(self): project = self.create_project() self.store_event( From 40e71893bfc0b0d4ce406ffff6132de02c1ed788 Mon Sep 17 00:00:00 2001 From: William Mak Date: Thu, 28 Oct 2021 13:37:25 -0400 Subject: [PATCH 3/3] ref: Add comments --- src/sentry/search/events/base.py | 1 + src/sentry/search/events/filter.py | 1 + 2 files changed, 2 insertions(+) diff --git a/src/sentry/search/events/base.py b/src/sentry/search/events/base.py index 1e0d3ebc399d24..85756aad34e1a6 100644 --- a/src/sentry/search/events/base.py +++ b/src/sentry/search/events/base.py @@ -28,6 +28,7 @@ def __init__( # Function is a subclass of CurriedFunction self.where: List[WhereType] = [] + # The list of aggregates to be selected self.aggregates: List[CurriedFunction] = [] self.columns: List[SelectType] = [] self.orderby: List[OrderBy] = [] diff --git a/src/sentry/search/events/filter.py b/src/sentry/search/events/filter.py index 6f587cf89ce3fa..5e84f32493f485 100644 --- a/src/sentry/search/events/filter.py +++ b/src/sentry/search/events/filter.py @@ -1307,6 +1307,7 @@ def convert_aggregate_filter_to_condition( operator = Op.IS_NULL if aggregate_filter.operator == "=" else Op.IS_NOT_NULL return Condition(name, operator) + # When resolving functions in conditions we don't want to add them to the list of aggregates function = self.resolve_function(name, resolve_only=True) return Condition(function, Op(aggregate_filter.operator), value)