-
-
Notifications
You must be signed in to change notification settings - Fork 4.5k
Feat(metrics): Crash free breakdown on metrics backend [INGEST-249] #28945
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
6e376cd
ab07ea4
3437397
3c0a8d7
1293d88
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -8,6 +8,7 @@ | |
|
|
||
| from sentry.models.project import Project | ||
| from sentry.release_health.base import ( | ||
| CrashFreeBreakdown, | ||
| CurrentAndPreviousCrashFreeRates, | ||
| EnvironmentName, | ||
| OrganizationId, | ||
|
|
@@ -22,13 +23,17 @@ | |
| ) | ||
| from sentry.sentry_metrics import indexer | ||
| from sentry.snuba.dataset import Dataset, EntityKey | ||
| from sentry.utils.snuba import raw_snql_query | ||
| from sentry.utils.snuba import QueryOutsideRetentionError, raw_snql_query | ||
|
|
||
|
|
||
| class MetricIndexNotFound(Exception): | ||
| pass | ||
|
|
||
|
|
||
| def get_tag_values_list(org_id: int, values: Sequence[str]) -> Sequence[int]: | ||
| return [x for x in [try_get_string_index(org_id, x) for x in values] if x is not None] | ||
|
|
||
|
|
||
| def metric_id(org_id: int, name: str) -> int: | ||
| index = indexer.resolve(org_id, name) # type: ignore | ||
| if index is None: | ||
|
|
@@ -50,7 +55,7 @@ def tag_value(org_id: int, name: str) -> int: | |
| return index # type: ignore | ||
|
|
||
|
|
||
| def try_get_tag_value(org_id: int, name: str) -> Optional[int]: | ||
| def try_get_string_index(org_id: int, name: str) -> Optional[int]: | ||
| return indexer.resolve(org_id, name) # type: ignore | ||
|
|
||
|
|
||
|
|
@@ -382,13 +387,7 @@ def get_release_sessions_time_bounds( | |
| ] | ||
|
|
||
| if environments is not None: | ||
| env_filter = [ | ||
| x | ||
| for x in [ | ||
| try_get_tag_value(org_id, environment) for environment in environments | ||
| ] | ||
| if x is not None | ||
| ] | ||
| env_filter = get_tag_values_list(org_id, environments) | ||
| if not env_filter: | ||
| raise MetricIndexNotFound() | ||
|
|
||
|
|
@@ -524,11 +523,7 @@ def check_has_health_data( | |
| if includes_releases: | ||
| releases = [x[1] for x in projects_list] # type: ignore | ||
| release_column_name = tag_key(org_id, "release") | ||
| releases_ids = [ | ||
| release_id | ||
| for release_id in [try_get_tag_value(org_id, release) for release in releases] | ||
| if release_id is not None | ||
| ] | ||
| releases_ids = get_tag_values_list(org_id, releases) | ||
| where_clause.append(Condition(Column(release_column_name), Op.IN, releases_ids)) | ||
| column_names = ["project_id", release_column_name] | ||
|
|
||
|
|
@@ -578,7 +573,7 @@ def check_releases_have_health_data( | |
| releases_ids = [ | ||
| release_id | ||
| for release_id in [ | ||
| try_get_tag_value(organization_id, release) for release in release_versions | ||
| try_get_string_index(organization_id, release) for release in release_versions | ||
| ] | ||
| if release_id is not None | ||
| ] | ||
|
|
@@ -608,6 +603,138 @@ def extract_row_info(row: Mapping[str, Union[OrganizationId, str]]) -> ReleaseNa | |
|
|
||
| return {extract_row_info(row) for row in result["data"]} | ||
|
|
||
| def _get_crash_free_breakdown_fn( | ||
| self, | ||
| org_id: int, | ||
| project_id: ProjectId, | ||
| release: ReleaseName, | ||
| start: datetime, | ||
| environments: Optional[Sequence[EnvironmentName]] = None, | ||
| ) -> Callable[[datetime], CrashFreeBreakdown]: | ||
| def generate_defaults(end: datetime) -> CrashFreeBreakdown: | ||
| """Function to use if querying snuba is not necessary""" | ||
| return { | ||
| "crash_free_sessions": None, | ||
| "crash_free_users": None, | ||
| "date": end, | ||
| "total_sessions": 0, | ||
| "total_users": 0, | ||
| } | ||
|
|
||
| # 1) Get required string indexes | ||
| try: | ||
| release_key = tag_key(org_id, "release") | ||
| release_value = tag_value(org_id, release) | ||
| environment_key = tag_key(org_id, "environment") | ||
| status_key = tag_key(org_id, "session.status") | ||
| except MetricIndexNotFound: | ||
| # No need to query snuba if any of these is missing | ||
| return generate_defaults | ||
|
|
||
| environment_values = None | ||
| if environments is not None: | ||
| environment_values = get_tag_values_list(org_id, environments) | ||
|
|
||
| if environment_values == []: | ||
| # No need to query snuba with an empty list | ||
| return generate_defaults | ||
|
|
||
| status_init = tag_value(org_id, "init") | ||
| status_crashed = tag_value(org_id, "crashed") | ||
|
|
||
| conditions = [ | ||
| Condition(Column("org_id"), Op.EQ, org_id), | ||
| Condition(Column("project_id"), Op.EQ, project_id), | ||
| Condition(Column(release_key), Op.EQ, release_value), | ||
| Condition(Column("timestamp"), Op.GTE, start), | ||
| Condition(Column(status_key), Op.IN, [status_init, status_crashed]), | ||
| ] | ||
| if environment_values is not None: | ||
| conditions.append(Condition(Column(environment_key), Op.IN, environment_values)) | ||
|
|
||
| def query_stats(end: datetime) -> CrashFreeBreakdown: | ||
jjbayer marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| def _get_data(entity_key: EntityKey, metric_name: str) -> Tuple[int, int]: | ||
| total = 0 | ||
| crashed = 0 | ||
| metric_id = try_get_string_index(org_id, metric_name) | ||
| if metric_id is not None: | ||
| where = conditions + [ | ||
| Condition(Column("metric_id"), Op.EQ, metric_id), | ||
| Condition(Column("timestamp"), Op.LT, end), | ||
| ] | ||
| data = raw_snql_query( | ||
jjbayer marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| Query( | ||
| dataset=Dataset.Metrics.value, | ||
| match=Entity(entity_key.value), | ||
| select=[Column("value")], | ||
| where=where, | ||
| groupby=[Column(status_key)], | ||
jjbayer marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| ), | ||
| referrer="release_health.metrics.crash-free-breakdown.session", | ||
| )["data"] | ||
| for row in data: | ||
| if row[status_key] == status_init: | ||
| total = int(row["value"]) | ||
| elif row[status_key] == status_crashed: | ||
| crashed = int(row["value"]) | ||
|
|
||
| return total, crashed | ||
|
|
||
| sessions_total, sessions_crashed = _get_data(EntityKey.MetricsCounters, "session") | ||
| users_total, users_crashed = _get_data(EntityKey.MetricsSets, "user") | ||
|
|
||
| return { | ||
| "date": end, | ||
| "total_users": users_total, | ||
| "crash_free_users": 100 - users_crashed / float(users_total) * 100 | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would unify how this is calculated everywhere because if i recall correctly @untitaker might have been doing this differently?
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No I did exactly this... we're just porting the formulas from sessions.py where it was also duplicated but also consistent. not opposed to deduping in any case
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's dedup when we do https://getsentry.atlassian.net/browse/INGEST-456. |
||
| if users_total | ||
| else None, | ||
| "total_sessions": sessions_total, | ||
| "crash_free_sessions": 100 - sessions_crashed / float(sessions_total) * 100 | ||
| if sessions_total | ||
| else None, | ||
| } | ||
|
|
||
| return query_stats | ||
|
|
||
| def get_crash_free_breakdown( | ||
| self, | ||
| project_id: ProjectId, | ||
| release: ReleaseName, | ||
| start: datetime, | ||
| environments: Optional[Sequence[EnvironmentName]] = None, | ||
| ) -> Sequence[CrashFreeBreakdown]: | ||
|
|
||
| org_id = self._get_org_id([project_id]) | ||
|
|
||
| now = datetime.now(pytz.utc) | ||
| query_fn = self._get_crash_free_breakdown_fn( | ||
| org_id, project_id, release, start, environments | ||
| ) | ||
|
|
||
| last: Optional[datetime] = None | ||
| rv = [] | ||
| for offset in ( | ||
| timedelta(days=1), | ||
| timedelta(days=2), | ||
| timedelta(days=7), | ||
| timedelta(days=14), | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. tuples are cheaper to construct
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sure but how expensive really is it to have a list of 4 items?
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I get the argument that lists are supposed to be the homogenous container and tuples the heterogenous but I don't buy it. Not a lot of code works this way and it seems like a style guideline that was invented long after tuples and lists were. For all practical purposes tuples are immutable lists as long as mypy doesn't get in your way about it.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. FWIW I deliberately did not touch this code but simply copied it over from |
||
| timedelta(days=30), | ||
| ): | ||
| try: | ||
| end = start + offset | ||
| if end > now: | ||
| if last is None or (end - last).days > 1: | ||
| rv.append(query_fn(now)) | ||
| break | ||
| rv.append(query_fn(end)) | ||
| last = end | ||
| except QueryOutsideRetentionError: | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. instead of having dummy_fn, what about letting the tag indexer exceptions bubble up until here?
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I wanted to do the whole "does tag exist" dance once instead of in every loop iteration.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Okay, makes sense. I think we may want an in-process cache around indexer anyway, because we have some endpoints that just call one method on the releasehealth service after the other.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, the in-memory cache should be part of the indexer service IMO. I'm writing all my code under the assumption that lookups will be optimized there. |
||
| # cannot query for these | ||
| pass | ||
|
|
||
| return rv | ||
|
|
||
| def get_changed_project_release_model_adoptions( | ||
| self, | ||
| project_ids: Sequence[ProjectId], | ||
|
|
@@ -667,7 +794,7 @@ def get_oldest_health_data_for_releases( | |
| releases = [x[1] for x in project_releases] | ||
| releases_ids = [ | ||
| release_id | ||
| for release_id in [try_get_tag_value(org_id, release) for release in releases] | ||
| for release_id in [try_get_string_index(org_id, release) for release in releases] | ||
| if release_id is not None | ||
| ] | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,32 @@ | ||
| from datetime import datetime | ||
|
|
||
| from django.urls import reverse | ||
|
|
||
| from sentry.models import Release | ||
| from sentry.testutils import APITestCase | ||
|
|
||
|
|
||
| class ProjectReleaseStatsTest(APITestCase): | ||
| def test_simple(self): | ||
| """Minimal test to ensure code coverage of the endpoint""" | ||
| self.login_as(user=self.user) | ||
|
|
||
| project = self.create_project(name="foo") | ||
| release = Release.objects.create( | ||
| organization_id=project.organization_id, | ||
| version="1", | ||
| date_added=datetime(2013, 8, 13, 3, 8, 24, 880386), | ||
| ) | ||
| release.add_project(project) | ||
|
|
||
| url = reverse( | ||
| "sentry-api-0-project-release-stats", | ||
| kwargs={ | ||
| "organization_slug": project.organization.slug, | ||
| "project_slug": project.slug, | ||
| "version": "1", | ||
| }, | ||
| ) | ||
| response = self.client.get(url, format="json") | ||
|
|
||
| assert response.status_code == 200, response.content |
Uh oh!
There was an error while loading. Please reload this page.