From ec58f6654142d920e58cdced883f7349cbdc7405 Mon Sep 17 00:00:00 2001 From: William Mak Date: Mon, 18 Oct 2021 18:44:20 -0400 Subject: [PATCH 1/3] feat(discover) Add equation support to SnQL - This allows the snql query builder to accept equations - Had to introduce some wonky importing here to avoid circular imports - Otherwise the idea was to pass the parsed equations to the query builder which can then resolve the functions and columns as needed --- .../serializers/rest_framework/dashboard.py | 2 +- .../data_export/endpoints/data_export.py | 2 +- src/sentry/discover/arithmetic.py | 35 ++++++++++++------- src/sentry/search/events/builder.py | 9 +++-- src/sentry/search/events/fields.py | 35 ++++++++++++++++++- src/sentry/snuba/discover.py | 5 +-- 6 files changed, 69 insertions(+), 19 deletions(-) diff --git a/src/sentry/api/serializers/rest_framework/dashboard.py b/src/sentry/api/serializers/rest_framework/dashboard.py index c4b3122abb99c1..b43a28bbb7af61 100644 --- a/src/sentry/api/serializers/rest_framework/dashboard.py +++ b/src/sentry/api/serializers/rest_framework/dashboard.py @@ -71,7 +71,7 @@ def validate(self, data): if equations is not None: try: - resolved_equations, _ = resolve_equation_list(equations, fields) + resolved_equations, _, _ = resolve_equation_list(equations, fields) except (InvalidSearchQuery, ArithmeticError) as err: raise serializers.ValidationError({"fields": f"Invalid fields: {err}"}) else: diff --git a/src/sentry/data_export/endpoints/data_export.py b/src/sentry/data_export/endpoints/data_export.py index 0891282548a898..4b3e608332f644 100644 --- a/src/sentry/data_export/endpoints/data_export.py +++ b/src/sentry/data_export/endpoints/data_export.py @@ -93,7 +93,7 @@ def validate(self, data): try: snuba_filter = get_filter(query_info["query"], processor.params) if len(equations) > 0: - resolved_equations, _ = resolve_equation_list(equations, fields) + resolved_equations, _, _ = resolve_equation_list(equations, fields) else: resolved_equations = [] resolve_field_list( diff --git a/src/sentry/discover/arithmetic.py b/src/sentry/discover/arithmetic.py index 7079ba8d1d2ca0..a10ad0af05c306 100644 --- a/src/sentry/discover/arithmetic.py +++ b/src/sentry/discover/arithmetic.py @@ -4,7 +4,6 @@ from parsimonious.grammar import Grammar, NodeVisitor from sentry.exceptions import InvalidSearchQuery -from sentry.search.events.fields import get_function_alias # prefix on fields so we know they're equations EQUATION_PREFIX = "equation|" @@ -174,13 +173,14 @@ class ArithmeticVisitor(NodeVisitor): "count_miserable", } - def __init__(self, max_operators): + def __init__(self, max_operators: int, use_snql: bool): super().__init__() self.operators: int = 0 self.terms: int = 0 self.max_operators = max_operators if max_operators else self.DEFAULT_MAX_OPERATORS self.fields: set[str] = set() self.functions: set[str] = set() + self.use_snql = use_snql def visit_term(self, _, children): maybe_factor, remaining_adds = children @@ -264,15 +264,21 @@ def visit_function_value(self, node, children): if function_name not in self.function_allowlist: raise ArithmeticValidationError(f"{function_name} not allowed in arithmetic") self.functions.add(field) - # use the alias to reference the function in arithmetic - return get_function_alias(field) + if self.use_snql: + return field + else: + # use the alias to reference the function in arithmetic + # TODO(snql): once fully on snql no longer need the alias + from sentry.search.events.fields import get_function_alias + + return get_function_alias(field) def generic_visit(self, node, children): return children or node def parse_arithmetic( - equation: str, max_operators: Optional[int] = None + equation: str, max_operators: Optional[int] = None, use_snql: Optional[bool] = False ) -> Tuple[Operation, List[str], List[str]]: """Given a string equation try to parse it into a set of Operations""" try: @@ -281,7 +287,7 @@ def parse_arithmetic( raise ArithmeticParseError( "Unable to parse your equation, make sure it is well formed arithmetic" ) - visitor = ArithmeticVisitor(max_operators) + visitor = ArithmeticVisitor(max_operators, use_snql) result = visitor.visit(tree) if len(visitor.fields) > 0 and len(visitor.functions) > 0: raise ArithmeticValidationError("Cannot mix functions and fields in arithmetic") @@ -296,7 +302,8 @@ def resolve_equation_list( aggregates_only: Optional[bool] = False, auto_add: Optional[bool] = False, plain_math: Optional[bool] = False, -) -> Tuple[List[JsonQueryType], List[str]]: + use_snql: Optional[bool] = False, +) -> Tuple[List[JsonQueryType], List[str], List[Operation]]: """Given a list of equation strings, resolve them to their equivalent snuba json query formats :param equations: list of equations strings that haven't been parsed yet :param selected_columns: list of public aliases from the endpoint, can be a mix of fields and aggregates @@ -305,11 +312,13 @@ def resolve_equation_list( :param: auto_add: Optional parameter that will take any fields in the equation that's missing in the selected_columns and return a new list with them added :param plain_math: Allow equations that don't include any fields or functions, disabled by default + :param use_snql: Whether we're resolving for snql or not """ - resolved_equations = [] - resolved_columns = selected_columns[:] + resolved_equations: List[JsonQueryType] = [] + parsed_equations: List[Operation] = [] + resolved_columns: List[str] = selected_columns[:] for index, equation in enumerate(equations): - parsed_equation, fields, functions = parse_arithmetic(equation) + parsed_equation, fields, functions = parse_arithmetic(equation, use_snql=use_snql) if (len(fields) == 0 and len(functions) == 0) and not plain_math: raise InvalidSearchQuery("Equations need to include a field or function") @@ -335,9 +344,11 @@ def resolve_equation_list( # We just jam everything into resolved_equations because the json format can't take arithmetic in the aggregates # field, but can do the aliases in the selected_columns field - # TODO(snql): we can do better + # TODO: currently returning "resolved_equations" for the json syntax + # once we're converted to SnQL this should only return parsed_equations + parsed_equations.append(parsed_equation) resolved_equations.append(parsed_equation.to_snuba_json(f"equation[{index}]")) - return resolved_equations, resolved_columns + return resolved_equations, resolved_columns, parsed_equations def is_equation(field: str) -> bool: diff --git a/src/sentry/search/events/builder.py b/src/sentry/search/events/builder.py index c0bd9efb2ce5b1..824e281cabdefa 100644 --- a/src/sentry/search/events/builder.py +++ b/src/sentry/search/events/builder.py @@ -22,6 +22,7 @@ def __init__( params: ParamsType, query: Optional[str] = None, selected_columns: Optional[List[str]] = None, + equations: Optional[List[str]] = None, orderby: Optional[List[str]] = None, auto_fields: bool = False, auto_aggregations: bool = False, @@ -48,7 +49,7 @@ def __init__( # params depends on parse_query, and conditions being resolved first since there may be projects in conditions self.where += self.resolve_params() - self.columns = self.resolve_select(selected_columns) + self.columns = self.resolve_select(selected_columns, equations) self.orderby = self.resolve_orderby(orderby) @property @@ -73,7 +74,11 @@ def resolve_limitby(self, limitby: Optional[Tuple[str, int]]) -> Optional[LimitB def groupby(self) -> Optional[List[SelectType]]: if self.aggregates: self.validate_aggregate_arguments() - return [c for c in self.columns if c not in self.aggregates] + return [ + c + for c in self.columns + if c not in self.aggregates and not self.is_equation_column(c) + ] else: return [] diff --git a/src/sentry/search/events/fields.py b/src/sentry/search/events/fields.py index 3a7d2ffad713db..3805746c5c54f8 100644 --- a/src/sentry/search/events/fields.py +++ b/src/sentry/search/events/fields.py @@ -11,6 +11,7 @@ from snuba_sdk.function import CurriedFunction, Function from snuba_sdk.orderby import Direction, OrderBy +from sentry.discover.arithmetic import Operation, OperationSideType, resolve_equation_list from sentry.discover.models import TeamKeyTransaction from sentry.exceptions import InvalidSearchQuery from sentry.models import Project, ProjectTeam, ProjectTransactionThreshold @@ -2785,16 +2786,30 @@ def __init__( for alias, name in FUNCTION_ALIASES.items(): self.function_converter[alias] = self.function_converter[name].alias_as(alias) - def resolve_select(self, selected_columns: Optional[List[str]]) -> List[SelectType]: + def resolve_select( + self, selected_columns: Optional[List[str]], equations: Optional[List[str]] + ) -> List[SelectType]: """Given a public list of discover fields, construct the corresponding list of Snql Columns or Functions. Duplicate columns are ignored """ + if selected_columns is None: return [] resolved_columns = [] stripped_columns = [column.strip() for column in selected_columns] + if equations: + _, _, parsed_equations = resolve_equation_list( + equations, stripped_columns, use_snql=True + ) + resolved_columns.extend( + [ + self.resolve_equation(equation, f"equation[{index}]") + for index, equation in enumerate(parsed_equations) + ] + ) + # Add threshold config alias if there's a function that depends on it # TODO: this should be replaced with an explicit request for the project_threshold_config as a column for column in { @@ -2860,6 +2875,24 @@ def resolve_field(self, raw_field: str, alias: bool = False) -> Column: else: raise InvalidSearchQuery(f"Invalid characters in field {field}") + def resolve_equation(self, equation: Operation, alias: Optional[str] = None) -> SelectType: + """Convert this tree of Operations to the equivalent snql functions""" + lhs = self._resolve_equation_side(equation.lhs) + rhs = self._resolve_equation_side(equation.rhs) + result = Function(equation.operator, [lhs, rhs], alias) + return result + + def _resolve_equation_side(self, side: OperationSideType) -> Union[SelectType, float]: + if isinstance(side, Operation): + return self.resolve_equation(side) + elif isinstance(side, float): + return side + else: + return self.resolve_column(side) + + def is_equation_column(self, column: SelectType) -> bool: + return isinstance(column, CurriedFunction) and column.alias.startswith("equation[") + def resolve_orderby(self, orderby: Optional[Union[List[str], str]]) -> List[OrderBy]: """Given a list of public aliases, optionally prefixed by a `-` to represent direction, construct a list of Snql Orderbys diff --git a/src/sentry/snuba/discover.py b/src/sentry/snuba/discover.py index d69a11353cca09..6d30ee214f9b57 100644 --- a/src/sentry/snuba/discover.py +++ b/src/sentry/snuba/discover.py @@ -240,6 +240,7 @@ def query( params, query=query, selected_columns=selected_columns, + equations=equations, orderby=orderby, auto_fields=auto_fields, auto_aggregations=auto_aggregations, @@ -330,7 +331,7 @@ def prepare_discover_query( with sentry_sdk.start_span(op="discover.discover", description="query.field_translations"): if equations is not None: - resolved_equations, _ = resolve_equation_list(equations, selected_columns) + resolved_equations, _, _ = resolve_equation_list(equations, selected_columns) else: resolved_equations = [] @@ -408,7 +409,7 @@ def get_timeseries_snuba_filter(selected_columns, query, params): equations, columns = categorize_columns(selected_columns) if len(equations) > 0: - resolved_equations, updated_columns = resolve_equation_list( + resolved_equations, updated_columns, _ = resolve_equation_list( equations, columns, aggregates_only=True, auto_add=True ) else: From f92f56bb3a628118fbcb94e37d22b54a5f97ea75 Mon Sep 17 00:00:00 2001 From: William Mak Date: Mon, 18 Oct 2021 19:11:46 -0400 Subject: [PATCH 2/3] fix: tests --- tests/sentry/search/events/test_fields.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/sentry/search/events/test_fields.py b/tests/sentry/search/events/test_fields.py index 92ca61f3c9ee33..a500c0a9d31c5b 100644 --- a/tests/sentry/search/events/test_fields.py +++ b/tests/sentry/search/events/test_fields.py @@ -1632,7 +1632,7 @@ def test_invalid_count_if_fields(self): def resolve_snql_fieldlist(fields): - return QueryFields(Dataset.Discover, {}).resolve_select(fields) + return QueryFields(Dataset.Discover, {}).resolve_select(fields, []) @pytest.mark.parametrize( From 88109d1351d0a3deb256eb1380d90b2200ad93b5 Mon Sep 17 00:00:00 2001 From: William Mak Date: Tue, 19 Oct 2021 15:01:22 -0400 Subject: [PATCH 3/3] ref: Updating comment and reordering to make comments more readable --- src/sentry/discover/arithmetic.py | 2 +- src/sentry/search/events/fields.py | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/sentry/discover/arithmetic.py b/src/sentry/discover/arithmetic.py index a10ad0af05c306..49db33772b08b2 100644 --- a/src/sentry/discover/arithmetic.py +++ b/src/sentry/discover/arithmetic.py @@ -344,10 +344,10 @@ def resolve_equation_list( # We just jam everything into resolved_equations because the json format can't take arithmetic in the aggregates # field, but can do the aliases in the selected_columns field + resolved_equations.append(parsed_equation.to_snuba_json(f"equation[{index}]")) # TODO: currently returning "resolved_equations" for the json syntax # once we're converted to SnQL this should only return parsed_equations parsed_equations.append(parsed_equation) - resolved_equations.append(parsed_equation.to_snuba_json(f"equation[{index}]")) return resolved_equations, resolved_columns, parsed_equations diff --git a/src/sentry/search/events/fields.py b/src/sentry/search/events/fields.py index 3805746c5c54f8..6a23016f9d40fe 100644 --- a/src/sentry/search/events/fields.py +++ b/src/sentry/search/events/fields.py @@ -2891,6 +2891,9 @@ def _resolve_equation_side(self, side: OperationSideType) -> Union[SelectType, f return self.resolve_column(side) def is_equation_column(self, column: SelectType) -> bool: + """Equations are only ever functions, and shouldn't be literals so we + need to check that the column is a Function + """ return isinstance(column, CurriedFunction) and column.alias.startswith("equation[") def resolve_orderby(self, orderby: Optional[Union[List[str], str]]) -> List[OrderBy]: