Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 67 additions & 1 deletion src/sentry/api/serializers/models/group.py
Original file line number Diff line number Diff line change
Expand Up @@ -375,6 +375,12 @@ def _seen_stats_performance(
) -> Mapping[Group, SeenStats]:
pass

@abstractmethod
def _seen_stats_generic(
self, generic_issue_list: Sequence[Group], user
) -> Mapping[Group, SeenStats]:
pass

def _expand(self, key) -> bool:
if self.expand is None:
return False
Expand Down Expand Up @@ -460,11 +466,20 @@ def _get_seen_stats(
perf_issues = [
group for group in item_list if GroupCategory.PERFORMANCE == group.issue_category
]
generic_issues = [
group
for group in item_list
if group.issue_category
and group.issue_category not in (GroupCategory.ERROR, GroupCategory.PERFORMANCE)
]

# bulk query for the seen_stats by type
error_stats = (self._seen_stats_error(error_issues, user) if error_issues else {}) or {}
perf_stats = (self._seen_stats_performance(perf_issues, user) if perf_issues else {}) or {}
agg_stats = {**error_stats, **perf_stats}
generic_stats = (
self._seen_stats_generic(generic_issues, user) if generic_issues else {}
) or {}
agg_stats = {**error_stats, **perf_stats, **generic_stats}
# combine results back
return {group: agg_stats.get(group, {}) for group in item_list}

Expand Down Expand Up @@ -747,6 +762,15 @@ def _seen_stats_performance(
tagstore.get_perf_group_list_tag_value,
)

def _seen_stats_generic(
self, generic_issue_list: Sequence[Group], user
) -> Mapping[Group, SeenStats]:
return self.__seen_stats_impl(
generic_issue_list,
tagstore.get_generic_groups_user_counts,
tagstore.get_generic_group_list_tag_value,
)

def __seen_stats_impl(
self,
issue_list: Sequence[Group],
Expand Down Expand Up @@ -918,6 +942,22 @@ def _seen_stats_performance(
self.environment_ids,
)

def _seen_stats_generic(
self, generic_issue_list: Sequence[Group], user
) -> Mapping[Group, SeenStats]:
return self._parse_seen_stats_results(
self._execute_generic_seen_stats_query(
item_list=generic_issue_list,
start=self.start,
end=self.end,
conditions=self.conditions,
environment_ids=self.environment_ids,
),
generic_issue_list,
bool(self.start or self.end or self.conditions),
self.environment_ids,
)

@staticmethod
def _execute_error_seen_stats_query(
item_list, start=None, end=None, conditions=None, environment_ids=None
Expand Down Expand Up @@ -974,6 +1014,32 @@ def _execute_perf_seen_stats_query(
referrer="serializers.GroupSerializerSnuba._execute_perf_seen_stats_query",
)

@staticmethod
def _execute_generic_seen_stats_query(
item_list, start=None, end=None, conditions=None, environment_ids=None
):
project_ids = list({item.project_id for item in item_list})
group_ids = [item.id for item in item_list]
aggregations = [
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be nice to see these new queries be written using SnQL instead of the legacy format.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, that's definitely something we want to do since SnQL, from what I can skim through, is a lot more intuitive to use. Unfortunately, there's a lot of baked in logic in the query processing tied to the legacy querying that would make porting the lagacy format to SnQL a bit unwieldy for this PR.

We'll keep this in mind and start using SnQL for future queries and I'll try backporting this query when we don't have immediate deadlines to hit.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not to beat a dead horse, but you're building a full query here and calling aliased_query. Couldn't you build a SnQL query instead and call raw_snql_query?

Copy link
Contributor Author

@barkbarkimashark barkbarkimashark Jan 10, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think my only reservations about directly using SnQL versus legacy is the accumulated processing here:

def _prepare_query_params(query_params):

I created a couple tickets to start porting over some of these new queries to SnQL but those will likely get tackled a bit down the line.

["count()", "", "times_seen"],
["min", "timestamp", "first_seen"],
["max", "timestamp", "last_seen"],
["uniq", "tags[sentry:user]", "count"],
]
filters = {"project_id": project_ids, "group_id": group_ids}
if environment_ids:
filters["environment"] = environment_ids
return aliased_query(
dataset=Dataset.IssuePlatform,
start=start,
end=end,
groupby=["group_id"],
conditions=conditions,
filter_keys=filters,
aggregations=aggregations,
referrer="serializers.GroupSerializerSnuba._execute_generic_seen_stats_query",
)

@staticmethod
def _parse_seen_stats_results(
result, item_list, use_result_first_seen_times_seen, environment_ids=None
Expand Down
12 changes: 12 additions & 0 deletions src/sentry/tagstore/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,11 @@ class TagStorage(Service):
"get_group_tag_values",
"get_group_list_tag_value",
"get_perf_group_list_tag_value",
"get_generic_group_list_tag_value",
"get_tag_keys_for_projects",
"get_groups_user_counts",
"get_perf_groups_user_counts",
"get_generic_groups_user_counts",
"get_group_event_filter",
"get_group_tag_value_count",
"get_top_group_tag_values",
Expand Down Expand Up @@ -192,6 +194,11 @@ def get_perf_group_list_tag_value(
):
raise NotImplementedError

def get_generic_group_list_tag_value(
self, project_ids, group_id_list, environment_ids, key, value
):
raise NotImplementedError

def get_group_event_filter(self, project_id, group_id, environment_ids, tags, start, end):
"""
>>> get_group_event_filter(1, 2, 3, {'key1': 'value1', 'key2': 'value2'})
Expand Down Expand Up @@ -258,6 +265,11 @@ def get_perf_groups_user_counts(
):
raise NotImplementedError

def get_generic_groups_user_counts(
self, project_ids, group_ids, environment_ids, start=None, end=None
):
raise NotImplementedError

def get_group_tag_value_count(self, group, environment_id, key):
"""
>>> get_group_tag_value_count(group, 3, 'key1')
Expand Down
97 changes: 87 additions & 10 deletions src/sentry/tagstore/snuba/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -440,25 +440,35 @@ def get_group_tag_values(self, group, environment_id, key):
)
return set(key.top_values)

def get_group_list_tag_value(self, project_ids, group_id_list, environment_ids, key, value):
tag = f"tags[{key}]"
def __get_group_list_tag_value(
self,
project_ids,
group_id_list,
environment_ids,
key,
value,
dataset,
extra_conditions,
extra_aggregations,
referrer,
):
filters = {"project_id": project_ids, "group_id": group_id_list}
if environment_ids:
filters["environment"] = environment_ids
conditions = [[tag, "=", value], DEFAULT_TYPE_CONDITION]
aggregations = [
conditions = (extra_conditions if extra_conditions else []) + [[f"tags[{key}]", "=", value]]
aggregations = (extra_aggregations if extra_aggregations else []) + [
["count()", "", "times_seen"],
["min", SEEN_COLUMN, "first_seen"],
["max", SEEN_COLUMN, "last_seen"],
]

result = snuba.query(
dataset=Dataset.Events,
dataset=dataset,
groupby=["group_id"],
conditions=conditions,
filter_keys=filters,
aggregations=aggregations,
referrer="tagstore.get_group_list_tag_value",
referrer=referrer,
)

return {
Expand All @@ -468,6 +478,19 @@ def get_group_list_tag_value(self, project_ids, group_id_list, environment_ids,
for group_id, data in result.items()
}

def get_group_list_tag_value(self, project_ids, group_id_list, environment_ids, key, value):
return self.__get_group_list_tag_value(
project_ids,
group_id_list,
environment_ids,
key,
value,
Dataset.Events,
[DEFAULT_TYPE_CONDITION],
[],
"tagstore.get_group_list_tag_value",
)

def get_perf_group_list_tag_value(
self, project_ids, group_id_list, environment_ids, key, value
):
Expand Down Expand Up @@ -497,6 +520,21 @@ def get_perf_group_list_tag_value(
for group_id, data in result.items()
}

def get_generic_group_list_tag_value(
self, project_ids, group_id_list, environment_ids, key, value
):
return self.__get_group_list_tag_value(
project_ids,
group_id_list,
environment_ids,
key,
value,
Dataset.IssuePlatform,
[],
[],
"tagstore.get_generic_group_list_tag_value",
)

def get_group_seen_values_for_environments(
self, project_ids, group_id_list, environment_ids, start=None, end=None
):
Expand Down Expand Up @@ -724,26 +762,51 @@ def get_group_tag_values_for_users(self, event_users, limit=100):
)
return values

def get_groups_user_counts(self, project_ids, group_ids, environment_ids, start=None, end=None):
def __get_groups_user_counts(
self,
project_ids,
group_ids,
environment_ids,
start=None,
end=None,
dataset=Dataset.Events,
extra_aggregations=None,
referrer="tagstore.__get_groups_user_counts",
):
filters = {"project_id": project_ids, "group_id": group_ids}
if environment_ids:
filters["environment"] = environment_ids
aggregations = [["uniq", "tags[sentry:user]", "count"]]

aggregations = (extra_aggregations if extra_aggregations else []) + [
["uniq", "tags[sentry:user]", "count"]
]

result = snuba.query(
dataset=Dataset.Events,
dataset=dataset,
start=start,
end=end,
groupby=["group_id"],
conditions=None,
filter_keys=filters,
aggregations=aggregations,
orderby="-count",
referrer="tagstore.get_groups_user_counts",
referrer=referrer,
)

return defaultdict(int, {k: v for k, v in result.items() if v})

def get_groups_user_counts(self, project_ids, group_ids, environment_ids, start=None, end=None):
return self.__get_groups_user_counts(
project_ids,
group_ids,
environment_ids,
start,
end,
Dataset.Events,
[],
"tagstore.get_groups_user_counts",
)

def get_perf_groups_user_counts(
self, project_ids, group_ids, environment_ids, start=None, end=None
):
Expand Down Expand Up @@ -774,6 +837,20 @@ def get_perf_groups_user_counts(
},
)

def get_generic_groups_user_counts(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same comment regarding duplication - this is the same as get_groups_user_counts except for the dataset.

self, project_ids, group_ids, environment_ids, start=None, end=None
):
return self.__get_groups_user_counts(
project_ids,
group_ids,
environment_ids,
start,
end,
Dataset.IssuePlatform,
[],
"tagstore.get_generic_groups_user_counts",
)

def get_tag_value_paginator(
self,
project_id,
Expand Down
50 changes: 50 additions & 0 deletions tests/snuba/api/serializers/test_group.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from sentry.testutils.silo import region_silo_test
from sentry.types.integrations import ExternalProviders
from sentry.types.issues import GroupType
from tests.sentry.issues.test_utils import SearchIssueTestMixin


@region_silo_test
Expand Down Expand Up @@ -469,3 +470,52 @@ def test_perf_seen_stats(self):
assert iso_format(result["lastSeen"]) == iso_format(timestamp + timedelta(minutes=2))
assert iso_format(result["firstSeen"]) == iso_format(timestamp + timedelta(minutes=1))
assert result["count"] == str(times + 1)


@region_silo_test
class ProfilingGroupSerializerSnubaTest(
APITestCase,
SnubaTestCase,
SearchIssueTestMixin,
):
def test_profiling_seen_stats(self):
proj = self.create_project()
environment = self.create_environment(project=proj)

first_group_fingerprint = f"{GroupType.PROFILE_BLOCKED_THREAD.value}-group1"
timestamp = timezone.now().replace(hour=0, minute=0, second=0)
times = 5
for incr in range(0, times):
# for user_0 - user_4, first_group
self.store_search_issue(
proj.id,
incr,
[first_group_fingerprint],
environment.name,
timestamp + timedelta(minutes=incr),
)

# user_5, another_group
event, issue_occurrence, group_info = self.store_search_issue(
proj.id,
5,
[first_group_fingerprint],
environment.name,
timestamp + timedelta(minutes=5),
)

first_group = group_info.group

result = serialize(
first_group,
serializer=GroupSerializerSnuba(
environment_ids=[environment.id],
start=timestamp - timedelta(days=1),
end=timestamp + timedelta(days=1),
),
)

assert result["userCount"] == 6
assert iso_format(result["lastSeen"]) == iso_format(timestamp + timedelta(minutes=5))
assert iso_format(result["firstSeen"]) == iso_format(timestamp)
assert result["count"] == str(times + 1)
Loading