From 4541a95766609d1c900d7bdd2c94224c00178daf Mon Sep 17 00:00:00 2001 From: Riccardo Busetti Date: Thu, 29 Sep 2022 11:42:22 +0200 Subject: [PATCH 1/6] feat(metrics): Add support for 'tuple' operation --- src/sentry/snuba/metrics/query_builder.py | 10 +- .../test_metrics_enhanced_performance.py | 163 +++++++++++++++++- 2 files changed, 170 insertions(+), 3 deletions(-) diff --git a/src/sentry/snuba/metrics/query_builder.py b/src/sentry/snuba/metrics/query_builder.py index a5ed49dc5e8aaa..1de2e6517ccbc1 100644 --- a/src/sentry/snuba/metrics/query_builder.py +++ b/src/sentry/snuba/metrics/query_builder.py @@ -88,7 +88,7 @@ def parse_field(field: str) -> MetricField: # These are only allowed because the parser in metrics_sessions_v2 # generates them. Long term we should not allow any functions, but rather # a limited expression language with only AND, OR, IN and NOT IN -FUNCTION_ALLOWLIST = ("and", "or", "equals", "in") +FUNCTION_ALLOWLIST = ("and", "or", "equals", "in", "tuple") def resolve_tags( @@ -104,7 +104,13 @@ def resolve_tags( elements = [resolve_tags(use_case_id, org_id, item, is_tag_value=True) for item in input_] # Lists are either arguments to IN or NOT IN. In both cases, we can # drop unknown strings: - return [x for x in elements if x != STRING_NOT_FOUND] + filtered_elements = [x for x in elements if x != STRING_NOT_FOUND] + # We check whether it is a list or tuple in order to know which type to return. This is needed + # because in the "tuple" function the parameters must be a list of tuples and not a list of lists. + if isinstance(input_, list): + return filtered_elements + elif isinstance(input_, tuple): + return tuple(filtered_elements) if isinstance(input_, Function): if input_.function == "ifNull": # This was wrapped automatically by QueryBuilder, remove wrapper diff --git a/tests/sentry/snuba/metrics/test_metrics_layer/test_metrics_enhanced_performance.py b/tests/sentry/snuba/metrics/test_metrics_layer/test_metrics_enhanced_performance.py index 8c955c3aca4230..402d9e65f41156 100644 --- a/tests/sentry/snuba/metrics/test_metrics_layer/test_metrics_enhanced_performance.py +++ b/tests/sentry/snuba/metrics/test_metrics_layer/test_metrics_enhanced_performance.py @@ -10,7 +10,7 @@ from django.utils import timezone from django.utils.datastructures import MultiValueDict from freezegun import freeze_time -from snuba_sdk import Direction, Granularity, Limit, Offset +from snuba_sdk import Column, Condition, Direction, Function, Granularity, Limit, Offset, Op from sentry.api.utils import InvalidParams from sentry.sentry_metrics import indexer @@ -349,6 +349,167 @@ def test_custom_measurement_query_with_invalid_mri(self): use_case_id=UseCaseKey.PERFORMANCE, ) + @freeze_time("2022-09-29 11:30:00") + def test_query_with_unary_tuple_condition(self): + now = timezone.now() + + for value, transaction in ((10, "/foo"), (20, "/bar"), (30, "/lorem")): + self.store_metric( + org_id=self.organization.id, + project_id=self.project.id, + type="distribution", + name=TransactionMRI.DURATION.value, + tags={"transaction": transaction}, + timestamp=(now - timedelta(seconds=1)).timestamp(), + value=value, + use_case_id=UseCaseKey.PERFORMANCE, + ) + + metrics_query = MetricsQuery( + org_id=self.organization.id, + project_ids=[self.project.id], + select=[ + MetricField( + op="count", + metric_mri=TransactionMRI.DURATION.value, + ), + ], + start=now - timedelta(minutes=1), + end=now, + groupby=[], + where=[ + Condition( + lhs=Function( + function="tuple", + parameters=[ + Column( + name="tags[transaction]", + ) + ], + ), + op=Op.IN, + rhs=Function( + function="tuple", + parameters=[("/foo",), ("/bar",)], + ), + ) + ], + granularity=Granularity(granularity=60), + limit=Limit(limit=3), + offset=Offset(offset=0), + include_series=False, + ) + + data = get_series( + [self.project], + metrics_query=metrics_query, + include_meta=True, + use_case_id=UseCaseKey.PERFORMANCE, + ) + + groups = data["groups"] + assert len(groups) == 1 + + expected_count = 2 + expected_alias = "count(transaction.duration)" + assert groups[0]["totals"] == { + expected_alias: expected_count, + } + assert data["meta"] == sorted( + [ + {"name": expected_alias, "type": "UInt64"}, + ], + key=lambda elem: elem["name"], + ) + + @freeze_time("2022-09-29 11:30:00") + def test_query_with_binary_tuple_condition(self): + now = timezone.now() + + for value, transaction, platform in ( + ( + 10, + "/foo", + "android", + ), + ( + 20, + "/bar", + "ios", + ), + (30, "/lorem", "windows"), + ): + self.store_metric( + org_id=self.organization.id, + project_id=self.project.id, + type="distribution", + name=TransactionMRI.DURATION.value, + tags={"transaction": transaction, "platform": platform}, + timestamp=(now - timedelta(seconds=1)).timestamp(), + value=value, + use_case_id=UseCaseKey.PERFORMANCE, + ) + + metrics_query = MetricsQuery( + org_id=self.organization.id, + project_ids=[self.project.id], + select=[ + MetricField( + op="count", + metric_mri=TransactionMRI.DURATION.value, + ), + ], + start=now - timedelta(minutes=1), + end=now, + groupby=[], + where=[ + Condition( + lhs=Function( + function="tuple", + parameters=[ + Column( + name="tags[transaction]", + ), + Column( + name="tags[platform]", + ), + ], + ), + op=Op.IN, + rhs=Function( + function="tuple", + parameters=[("/foo", "android"), ("/bar", "ios")], + ), + ) + ], + granularity=Granularity(granularity=60), + limit=Limit(limit=3), + offset=Offset(offset=0), + include_series=False, + ) + + data = get_series( + [self.project], + metrics_query=metrics_query, + include_meta=True, + use_case_id=UseCaseKey.PERFORMANCE, + ) + + groups = data["groups"] + assert len(groups) == 1 + + expected_count = 2 + expected_alias = "count(transaction.duration)" + assert groups[0]["totals"] == { + expected_alias: expected_count, + } + assert data["meta"] == sorted( + [ + {"name": expected_alias, "type": "UInt64"}, + ], + key=lambda elem: elem["name"], + ) + @freeze_time("2022-09-22 10:01:09") def test_count_transaction_with_valid_condition(self): now = timezone.now() From b6d3e6222e90316b25eaea52b7a4b8e03d32cd74 Mon Sep 17 00:00:00 2001 From: Riccardo Busetti Date: Thu, 29 Sep 2022 13:55:32 +0200 Subject: [PATCH 2/6] Add test for resolve_tags --- src/sentry/snuba/metrics/query_builder.py | 2 +- .../snuba/metrics/test_query_builder.py | 48 +++++++++++++++++++ 2 files changed, 49 insertions(+), 1 deletion(-) diff --git a/src/sentry/snuba/metrics/query_builder.py b/src/sentry/snuba/metrics/query_builder.py index 1de2e6517ccbc1..b23494502be9c5 100644 --- a/src/sentry/snuba/metrics/query_builder.py +++ b/src/sentry/snuba/metrics/query_builder.py @@ -103,7 +103,7 @@ def resolve_tags( if isinstance(input_, (list, tuple)): elements = [resolve_tags(use_case_id, org_id, item, is_tag_value=True) for item in input_] # Lists are either arguments to IN or NOT IN. In both cases, we can - # drop unknown strings: + # drop unknown strings. filtered_elements = [x for x in elements if x != STRING_NOT_FOUND] # We check whether it is a list or tuple in order to know which type to return. This is needed # because in the "tuple" function the parameters must be a list of tuples and not a list of lists. diff --git a/tests/sentry/snuba/metrics/test_query_builder.py b/tests/sentry/snuba/metrics/test_query_builder.py index 95437b0bd8cbf7..fc0fb6be38def9 100644 --- a/tests/sentry/snuba/metrics/test_query_builder.py +++ b/tests/sentry/snuba/metrics/test_query_builder.py @@ -238,6 +238,54 @@ def test_parse_query(query_string, expected): assert parsed == expected() +def test_resolve_tags_with_tuple(): + org_id = ORG_ID + use_case_id = UseCaseKey.PERFORMANCE + + transactions = ["/foo", "/bar"] + for transaction in transactions: + indexer.record(use_case_id=use_case_id, org_id=org_id, string=transaction) + + resolved_query = resolve_tags( + use_case_id, + org_id, + Condition( + lhs=Function( + function="tuple", + parameters=[ + Column( + name="tags[transaction]", + ) + ], + ), + op=Op.IN, + rhs=Function( + function="tuple", + parameters=[(transaction,) for transaction in transactions], + ), + ), + ) + + assert resolved_query == Condition( + lhs=Function( + function="tuple", + parameters=[ + Column( + name=resolve_tag_key(use_case_id, org_id, "transaction"), + ) + ], + ), + op=Op.IN, + rhs=Function( + function="tuple", + parameters=[ + (resolve_tag_value(use_case_id, org_id, transaction),) + for transaction in transactions + ], + ), + ) + + @freeze_time("2018-12-11 03:21:00") def test_round_range(): start, end, interval = get_date_range({"statsPeriod": "2d"}) From 42de6c0becb8c89e5308c10177e210371bef00cf Mon Sep 17 00:00:00 2001 From: Riccardo Busetti Date: Thu, 29 Sep 2022 14:14:29 +0200 Subject: [PATCH 3/6] Change limit --- .../test_metrics_layer/test_metrics_enhanced_performance.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/sentry/snuba/metrics/test_metrics_layer/test_metrics_enhanced_performance.py b/tests/sentry/snuba/metrics/test_metrics_layer/test_metrics_enhanced_performance.py index 402d9e65f41156..603bd9df4a22b9 100644 --- a/tests/sentry/snuba/metrics/test_metrics_layer/test_metrics_enhanced_performance.py +++ b/tests/sentry/snuba/metrics/test_metrics_layer/test_metrics_enhanced_performance.py @@ -395,7 +395,7 @@ def test_query_with_unary_tuple_condition(self): ) ], granularity=Granularity(granularity=60), - limit=Limit(limit=3), + limit=Limit(limit=1), offset=Offset(offset=0), include_series=False, ) From fc11b12cdb6bf38e29f7d9b899fe20245d927076 Mon Sep 17 00:00:00 2001 From: Riccardo Busetti Date: Thu, 29 Sep 2022 15:15:21 +0200 Subject: [PATCH 4/6] Refactor tests --- .../test_metrics_enhanced_performance.py | 90 +--------- .../snuba/metrics/test_query_builder.py | 159 ++++++++++++------ 2 files changed, 112 insertions(+), 137 deletions(-) diff --git a/tests/sentry/snuba/metrics/test_metrics_layer/test_metrics_enhanced_performance.py b/tests/sentry/snuba/metrics/test_metrics_layer/test_metrics_enhanced_performance.py index 603bd9df4a22b9..cb4d85b661e1a8 100644 --- a/tests/sentry/snuba/metrics/test_metrics_layer/test_metrics_enhanced_performance.py +++ b/tests/sentry/snuba/metrics/test_metrics_layer/test_metrics_enhanced_performance.py @@ -350,7 +350,7 @@ def test_custom_measurement_query_with_invalid_mri(self): ) @freeze_time("2022-09-29 11:30:00") - def test_query_with_unary_tuple_condition(self): + def test_query_with_tuple_condition(self): now = timezone.now() for value, transaction in ((10, "/foo"), (20, "/bar"), (30, "/lorem")): @@ -422,94 +422,6 @@ def test_query_with_unary_tuple_condition(self): key=lambda elem: elem["name"], ) - @freeze_time("2022-09-29 11:30:00") - def test_query_with_binary_tuple_condition(self): - now = timezone.now() - - for value, transaction, platform in ( - ( - 10, - "/foo", - "android", - ), - ( - 20, - "/bar", - "ios", - ), - (30, "/lorem", "windows"), - ): - self.store_metric( - org_id=self.organization.id, - project_id=self.project.id, - type="distribution", - name=TransactionMRI.DURATION.value, - tags={"transaction": transaction, "platform": platform}, - timestamp=(now - timedelta(seconds=1)).timestamp(), - value=value, - use_case_id=UseCaseKey.PERFORMANCE, - ) - - metrics_query = MetricsQuery( - org_id=self.organization.id, - project_ids=[self.project.id], - select=[ - MetricField( - op="count", - metric_mri=TransactionMRI.DURATION.value, - ), - ], - start=now - timedelta(minutes=1), - end=now, - groupby=[], - where=[ - Condition( - lhs=Function( - function="tuple", - parameters=[ - Column( - name="tags[transaction]", - ), - Column( - name="tags[platform]", - ), - ], - ), - op=Op.IN, - rhs=Function( - function="tuple", - parameters=[("/foo", "android"), ("/bar", "ios")], - ), - ) - ], - granularity=Granularity(granularity=60), - limit=Limit(limit=3), - offset=Offset(offset=0), - include_series=False, - ) - - data = get_series( - [self.project], - metrics_query=metrics_query, - include_meta=True, - use_case_id=UseCaseKey.PERFORMANCE, - ) - - groups = data["groups"] - assert len(groups) == 1 - - expected_count = 2 - expected_alias = "count(transaction.duration)" - assert groups[0]["totals"] == { - expected_alias: expected_count, - } - assert data["meta"] == sorted( - [ - {"name": expected_alias, "type": "UInt64"}, - ], - key=lambda elem: elem["name"], - ) - @freeze_time("2022-09-22 10:01:09") def test_count_transaction_with_valid_condition(self): now = timezone.now() diff --git a/tests/sentry/snuba/metrics/test_query_builder.py b/tests/sentry/snuba/metrics/test_query_builder.py index fc0fb6be38def9..dd382e1f153826 100644 --- a/tests/sentry/snuba/metrics/test_query_builder.py +++ b/tests/sentry/snuba/metrics/test_query_builder.py @@ -238,54 +238,6 @@ def test_parse_query(query_string, expected): assert parsed == expected() -def test_resolve_tags_with_tuple(): - org_id = ORG_ID - use_case_id = UseCaseKey.PERFORMANCE - - transactions = ["/foo", "/bar"] - for transaction in transactions: - indexer.record(use_case_id=use_case_id, org_id=org_id, string=transaction) - - resolved_query = resolve_tags( - use_case_id, - org_id, - Condition( - lhs=Function( - function="tuple", - parameters=[ - Column( - name="tags[transaction]", - ) - ], - ), - op=Op.IN, - rhs=Function( - function="tuple", - parameters=[(transaction,) for transaction in transactions], - ), - ), - ) - - assert resolved_query == Condition( - lhs=Function( - function="tuple", - parameters=[ - Column( - name=resolve_tag_key(use_case_id, org_id, "transaction"), - ) - ], - ), - op=Op.IN, - rhs=Function( - function="tuple", - parameters=[ - (resolve_tag_value(use_case_id, org_id, transaction),) - for transaction in transactions - ], - ), - ) - - @freeze_time("2018-12-11 03:21:00") def test_round_range(): start, end, interval = get_date_range({"statsPeriod": "2d"}) @@ -1084,3 +1036,114 @@ def test_valid_latest_release_alias_filter(self): rhs=["bar"], ) ] + + +class ResolveTagsTestCase(TestCase): + def setUp(self): + self.org_id = ORG_ID + self.use_case_id = UseCaseKey.PERFORMANCE + + def test_resolve_tags_with_unary_tuple(self): + transactions = ["/foo", "/bar"] + + indexer.record(self.use_case_id, self.org_id, string="transaction") + + for transaction in transactions: + indexer.record(use_case_id=self.use_case_id, org_id=self.org_id, string=transaction) + + resolved_query = resolve_tags( + self.use_case_id, + self.org_id, + Condition( + lhs=Function( + function="tuple", + parameters=[ + Column( + name="tags[transaction]", + ) + ], + ), + op=Op.IN, + rhs=Function( + function="tuple", + parameters=[(transaction,) for transaction in transactions], + ), + ), + ) + + assert resolved_query == Condition( + lhs=Function( + function="tuple", + parameters=[ + Column( + name=resolve_tag_key(self.use_case_id, self.org_id, "transaction"), + ) + ], + ), + op=Op.IN, + rhs=Function( + function="tuple", + parameters=[ + (resolve_tag_value(self.use_case_id, self.org_id, transaction),) + for transaction in transactions + ], + ), + ) + + def test_resolve_tags_with_binary_tuple(self): + tags = [("/foo", "ios"), ("/bar", "android")] + + indexer.record(self.use_case_id, self.org_id, string="transaction") + indexer.record(self.use_case_id, self.org_id, string="platform") + + for transaction, platform in tags: + indexer.record(use_case_id=self.use_case_id, org_id=self.org_id, string=transaction) + indexer.record(use_case_id=self.use_case_id, org_id=self.org_id, string=platform) + + resolved_query = resolve_tags( + self.use_case_id, + self.org_id, + Condition( + lhs=Function( + function="tuple", + parameters=[ + Column( + name="tags[transaction]", + ), + Column( + name="tags[platform]", + ), + ], + ), + op=Op.IN, + rhs=Function( + function="tuple", + parameters=[(transaction, platform) for transaction, platform in tags], + ), + ), + ) + + assert resolved_query == Condition( + lhs=Function( + function="tuple", + parameters=[ + Column( + name=resolve_tag_key(self.use_case_id, self.org_id, "transaction"), + ), + Column( + name=resolve_tag_key(self.use_case_id, self.org_id, "platform"), + ), + ], + ), + op=Op.IN, + rhs=Function( + function="tuple", + parameters=[ + ( + resolve_tag_value(self.use_case_id, self.org_id, transaction), + resolve_tag_value(self.use_case_id, self.org_id, platform), + ) + for transaction, platform in tags + ], + ), + ) From bec0cc01bd93965513a99365274f867da494215f Mon Sep 17 00:00:00 2001 From: Riccardo Busetti Date: Thu, 29 Sep 2022 15:35:01 +0200 Subject: [PATCH 5/6] Fix PR comments --- src/sentry/snuba/metrics/query_builder.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/sentry/snuba/metrics/query_builder.py b/src/sentry/snuba/metrics/query_builder.py index b23494502be9c5..74520e194b87cd 100644 --- a/src/sentry/snuba/metrics/query_builder.py +++ b/src/sentry/snuba/metrics/query_builder.py @@ -107,10 +107,7 @@ def resolve_tags( filtered_elements = [x for x in elements if x != STRING_NOT_FOUND] # We check whether it is a list or tuple in order to know which type to return. This is needed # because in the "tuple" function the parameters must be a list of tuples and not a list of lists. - if isinstance(input_, list): - return filtered_elements - elif isinstance(input_, tuple): - return tuple(filtered_elements) + return filtered_elements if isinstance(input_, list) else tuple(filtered_elements) if isinstance(input_, Function): if input_.function == "ifNull": # This was wrapped automatically by QueryBuilder, remove wrapper From 8e2c50809dbbde4a397ea3286a17ce417fb7da84 Mon Sep 17 00:00:00 2001 From: Riccardo Busetti Date: Thu, 29 Sep 2022 15:39:03 +0200 Subject: [PATCH 6/6] Fix PR comments --- tests/sentry/snuba/metrics/test_query_builder.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/tests/sentry/snuba/metrics/test_query_builder.py b/tests/sentry/snuba/metrics/test_query_builder.py index dd382e1f153826..3fffbae2f5f4dd 100644 --- a/tests/sentry/snuba/metrics/test_query_builder.py +++ b/tests/sentry/snuba/metrics/test_query_builder.py @@ -1046,9 +1046,7 @@ def setUp(self): def test_resolve_tags_with_unary_tuple(self): transactions = ["/foo", "/bar"] - indexer.record(self.use_case_id, self.org_id, string="transaction") - - for transaction in transactions: + for transaction in ["transaction"] + transactions: indexer.record(use_case_id=self.use_case_id, org_id=self.org_id, string=transaction) resolved_query = resolve_tags( @@ -1093,10 +1091,7 @@ def test_resolve_tags_with_unary_tuple(self): def test_resolve_tags_with_binary_tuple(self): tags = [("/foo", "ios"), ("/bar", "android")] - indexer.record(self.use_case_id, self.org_id, string="transaction") - indexer.record(self.use_case_id, self.org_id, string="platform") - - for transaction, platform in tags: + for transaction, platform in [("transaction", "platform")] + tags: indexer.record(use_case_id=self.use_case_id, org_id=self.org_id, string=transaction) indexer.record(use_case_id=self.use_case_id, org_id=self.org_id, string=platform)