diff --git a/src/sentry/api/endpoints/organization_sessions.py b/src/sentry/api/endpoints/organization_sessions.py new file mode 100644 index 00000000000000..640eea68951517 --- /dev/null +++ b/src/sentry/api/endpoints/organization_sessions.py @@ -0,0 +1,50 @@ +from __future__ import absolute_import + +from contextlib import contextmanager + +from rest_framework.response import Response +from rest_framework.exceptions import ParseError + +import six +import sentry_sdk + +from sentry.api.bases import OrganizationEventsEndpointBase +from sentry.snuba.sessions_v2 import ( + InvalidField, + QueryDefinition, + run_sessions_query, + massage_sessions_result, +) + + +# NOTE: this currently extends `OrganizationEventsEndpointBase` for `handle_query_errors` only, which should ideally be decoupled from the base class. +class OrganizationSessionsEndpoint(OrganizationEventsEndpointBase): + def get(self, request, organization): + with self.handle_query_errors(): + with sentry_sdk.start_span(op="sessions.endpoint", description="build_sessions_query"): + query = self.build_sessions_query(request, organization) + + with sentry_sdk.start_span(op="sessions.endpoint", description="run_sessions_query"): + result_totals, result_timeseries = run_sessions_query(query) + + with sentry_sdk.start_span( + op="sessions.endpoint", description="massage_sessions_result" + ): + result = massage_sessions_result(query, result_totals, result_timeseries) + return Response(result, status=200) + + def build_sessions_query(self, request, organization): + # validate and default all `project` params. + projects = self.get_projects(request, organization) + project_ids = [p.id for p in projects] + + return QueryDefinition(request.GET, project_ids) + + @contextmanager + def handle_query_errors(self): + try: + # TODO: this context manager should be decoupled from `OrganizationEventsEndpointBase`? + with super(OrganizationSessionsEndpoint, self).handle_query_errors(): + yield + except InvalidField as error: + raise ParseError(detail=six.text_type(error)) diff --git a/src/sentry/api/urls.py b/src/sentry/api/urls.py index 0f86821449e24b..fbff26aebab54a 100644 --- a/src/sentry/api/urls.py +++ b/src/sentry/api/urls.py @@ -171,6 +171,7 @@ from .endpoints.organization_join_request import OrganizationJoinRequestEndpoint from .endpoints.organization_search_details import OrganizationSearchDetailsEndpoint from .endpoints.organization_searches import OrganizationSearchesEndpoint +from .endpoints.organization_sessions import OrganizationSessionsEndpoint from .endpoints.organization_sentry_apps import OrganizationSentryAppsEndpoint from .endpoints.organization_shortid import ShortIdLookupEndpoint from .endpoints.organization_slugs import SlugsUpdateEndpoint @@ -963,6 +964,11 @@ OrganizationSearchesEndpoint.as_view(), name="sentry-api-0-organization-searches", ), + url( + r"^(?P[^\/]+)/sessions/$", + OrganizationSessionsEndpoint.as_view(), + name="sentry-api-0-organization-sessions", + ), url( r"^(?P[^\/]+)/users/issues/$", OrganizationUserIssuesSearchEndpoint.as_view(), diff --git a/src/sentry/api/utils.py b/src/sentry/api/utils.py index 3b6e010b390daf..c2b5d24a0e343f 100644 --- a/src/sentry/api/utils.py +++ b/src/sentry/api/utils.py @@ -2,12 +2,13 @@ from datetime import timedelta +import math import six from django.utils import timezone from sentry.search.utils import parse_datetime_string, InvalidQuery -from sentry.utils.dates import parse_stats_period - +from sentry.utils.dates import parse_stats_period, to_timestamp, to_datetime +from sentry.constants import MAX_ROLLUP_POINTS MAX_STATS_PERIOD = timedelta(days=90) @@ -83,3 +84,43 @@ def get_date_range_from_params(params, optional=False): raise InvalidParams("start must be before end") return start, end + + +def get_date_range_rollup_from_params( + params, + minimum_interval="1h", + default_interval="", + round_range=False, + max_points=MAX_ROLLUP_POINTS, +): + """ + Similar to `get_date_range_from_params`, but this also parses and validates + an `interval`, as `get_rollup_from_request` would do. + + This also optionally rounds the returned range to the given `interval`. + The rounding uses integer arithmetic on unix timestamps, so might yield + unexpected results when the interval is > 1d. + """ + minimum_interval = parse_stats_period(minimum_interval).total_seconds() + interval = parse_stats_period(params.get("interval", default_interval)) + interval = minimum_interval if interval is None else interval.total_seconds() + if interval <= 0: + raise InvalidParams("Interval cannot result in a zero duration.") + + # round the interval up to the minimum + interval = int(minimum_interval * math.ceil(interval / minimum_interval)) + + start, end = get_date_range_from_params(params) + date_range = end - start + if date_range.total_seconds() / interval > max_points: + raise InvalidParams( + "Your interval and date range would create too many results. " + "Use a larger interval, or a smaller date range." + ) + + if round_range: + end_ts = int(interval * math.ceil(to_timestamp(end) / interval)) + end = to_datetime(end_ts) + start = end - date_range + + return start, end, interval diff --git a/src/sentry/snuba/sessions_v2.py b/src/sentry/snuba/sessions_v2.py new file mode 100644 index 00000000000000..17dafda0aa0236 --- /dev/null +++ b/src/sentry/snuba/sessions_v2.py @@ -0,0 +1,413 @@ +from __future__ import absolute_import + +from datetime import datetime + +import six +import itertools + +from sentry.api.event_search import get_filter +from sentry.api.utils import get_date_range_rollup_from_params +from sentry.utils.dates import to_timestamp +from sentry.utils.snuba import Dataset, raw_query + +""" +The new Sessions API defines a "metrics"-like interface which is can be used in +a similar way to "discover". +See https://www.notion.so/sentry/Session-Stats-API-0016d3713d1a4276be0396a338c7930a + +# "Metrics" + +We have basically 3 "metrics" that we can query: + +- `session` (counter): The number of sessions that occurred +- `user` (set): The set of `distinct_id`s. +- `session.duration` (histogram): The duration of individual sessions + (not available for pre-aggregated sessions) + +# "Operations" on metrics + +Depending on the metric *type*, we can query different things: + +- `counter`: Can only be accessed via the `sum` function. +- `set`: Can only be accessed via the `count_unique` function. +- `histogram`: Can have different quantiles / averages available via: + `avg`, `p50`...`p99`, `max`. + +# Tags / Grouping + +The Session data can be grouped by a set of tags, which can only appear in the +`groupBy` of the query. + +- `project` +- `environment` +- `release`: + TODO: describe specific release filters such as `release.major`, etc + +## "Virtual" tags + +The `session.status` is considered a "virtual" tag, as it does not appear as +such in the current session dataset. Instead the status is represented as +different columns in dataset, and it is "exploded" into distinct groups purely +in code, which is the tricky part. + +Essentially, a Row like this: +``` +{ + sessions: 123 + sessions_abnormal: 4, + sessions_crashed: 3, + sessions_errored: 23, +} +``` + +Is then "exploded" into something like: + +``` +[{ + by: { "session.status": "healthy" }, + series: { + "sum(session)": [100, ...] <- this is `sessions - sessions_errored` + } +}, { + by: { "session.status": "errored" }, + series: { + "sum(session)": [23, ...] + } +}, +... +] +``` +""" + + +class SessionsField(object): + def get_snuba_columns(self, raw_groupby): + if "session.status" in raw_groupby: + return ["sessions", "sessions_abnormal", "sessions_crashed", "sessions_errored"] + return ["sessions"] + + def extract_from_row(self, row, group): + if row is None: + return 0 + status = group.get("session.status") + if status is None: + return row["sessions"] + if status == "healthy": + return row["sessions"] - row["sessions_errored"] + if status == "abnormal": + return row["sessions_abnormal"] + if status == "crashed": + return row["sessions_crashed"] + if status == "errored": + return row["sessions_errored"] + return 0 + + +class UsersField(object): + def get_snuba_columns(self, raw_groupby): + if "session.status" in raw_groupby: + return ["users", "users_abnormal", "users_crashed", "users_errored"] + return ["users"] + + def extract_from_row(self, row, group): + if row is None: + return 0 + status = group.get("session.status") + if status is None: + return row["users"] + if status == "healthy": + return row["users"] - row["users_errored"] + if status == "abnormal": + return row["users_abnormal"] + if status == "crashed": + return row["users_crashed"] + if status == "errored": + return row["users_errored"] + return 0 + + +class DurationAverageField(object): + def get_snuba_columns(self, raw_groupby): + return ["duration_avg"] + + def extract_from_row(self, row, group): + if row is None: + return None + status = group.get("session.status") + if status is None or status == "healthy": + return row["duration_avg"] + return None + + +class DurationQuantileField(object): + def __init__(self, quantile_index): + self.quantile_index = quantile_index + + def get_snuba_columns(self, raw_groupby): + return ["duration_quantiles"] + + def extract_from_row(self, row, group): + if row is None: + return None + status = group.get("session.status") + if status is None or status == "healthy": + return row["duration_quantiles"][self.quantile_index] + return None + + +COLUMN_MAP = { + "sum(session)": SessionsField(), + "count_unique(user)": UsersField(), + "avg(session.duration)": DurationAverageField(), + "p50(session.duration)": DurationQuantileField(0), + "p75(session.duration)": DurationQuantileField(1), + "p90(session.duration)": DurationQuantileField(2), + "p95(session.duration)": DurationQuantileField(3), + "p99(session.duration)": DurationQuantileField(4), + "max(session.duration)": DurationQuantileField(5), +} + + +class SimpleGroupBy(object): + def __init__(self, row_name, name=None): + self.row_name = row_name + self.name = name or row_name + + def get_snuba_columns(self): + return [self.row_name] + + def get_snuba_groupby(self): + return [self.row_name] + + def get_keys_for_row(self, row): + return [(self.name, row[self.row_name])] + + +class SessionStatusGroupBy(object): + def get_snuba_columns(self): + return [] + + def get_snuba_groupby(self): + return [] + + def get_keys_for_row(self, row): + return [("session.status", key) for key in ["healthy", "abnormal", "crashed", "errored"]] + + +GROUPBY_MAP = { + "project": SimpleGroupBy("project_id", "project"), + "environment": SimpleGroupBy("environment"), + "release": SimpleGroupBy("release"), + "session.status": SessionStatusGroupBy(), +} + + +class InvalidField(Exception): + pass + + +class QueryDefinition(object): + """ + This is the definition of the query the user wants to execute. + This is constructed out of the request params, and also contains a list of + `fields` and `groupby` definitions as [`ColumnDefinition`] objects. + """ + + def __init__(self, query, project_ids=None): + self.query = query.get("query", "") + raw_fields = query.getlist("field", []) + raw_groupby = query.getlist("groupBy", []) + + if len(raw_fields) == 0: + raise InvalidField(u'Request is missing a "field"') + + self.fields = {} + for key in raw_fields: + if key not in COLUMN_MAP: + raise InvalidField(u'Invalid field: "{}"'.format(key)) + self.fields[key] = COLUMN_MAP[key] + + self.groupby = [] + for key in raw_groupby: + if key not in GROUPBY_MAP: + raise InvalidField(u'Invalid groupBy: "{}"'.format(key)) + self.groupby.append(GROUPBY_MAP[key]) + + start, end, rollup = get_date_range_rollup_from_params(query, "1h", round_range=True) + self.rollup = rollup + self.start = start + self.end = end + + query_columns = set() + for field in self.fields.values(): + query_columns.update(field.get_snuba_columns(raw_groupby)) + for groupby in self.groupby: + query_columns.update(groupby.get_snuba_columns()) + self.query_columns = list(query_columns) + + query_groupby = set() + for groupby in self.groupby: + query_groupby.update(groupby.get_snuba_groupby()) + self.query_groupby = list(query_groupby) + + params = {"project_id": project_ids or []} + snuba_filter = get_filter(self.query, params) + + self.aggregations = snuba_filter.aggregations + self.conditions = snuba_filter.conditions + self.filter_keys = snuba_filter.filter_keys + + +TS_COL = "bucketed_started" + + +def run_sessions_query(query): + """ + Runs the `query` as defined by [`QueryDefinition`] two times, once for the + `totals` and again for the actual time-series data grouped by the requested + interval. + """ + result_totals = raw_query( + dataset=Dataset.Sessions, + selected_columns=query.query_columns, + groupby=query.query_groupby, + aggregations=query.aggregations, + conditions=query.conditions, + filter_keys=query.filter_keys, + start=query.start, + end=query.end, + rollup=query.rollup, + referrer="sessions.totals", + ) + + result_timeseries = raw_query( + dataset=Dataset.Sessions, + selected_columns=[TS_COL] + query.query_columns, + groupby=[TS_COL] + query.query_groupby, + aggregations=query.aggregations, + conditions=query.conditions, + filter_keys=query.filter_keys, + start=query.start, + end=query.end, + rollup=query.rollup, + referrer="sessions.timeseries", + ) + + return result_totals["data"], result_timeseries["data"] + + +def massage_sessions_result(query, result_totals, result_timeseries): + """ + Post-processes the query result. + + Given the `query` as defined by [`QueryDefinition`] and its totals and + timeseries results from snuba, groups and transforms the result into the + expected format. + + For example: + ```json + { + "intervals": [ + "2020-12-16T00:00:00Z", + "2020-12-16T12:00:00Z", + "2020-12-17T00:00:00Z" + ], + "groups": [ + { + "by": { "release": "99b8edc5a3bb49d01d16426d7bb9c511ec41f81e" }, + "series": { "sum(session)": [0, 1, 0] }, + "totals": { "sum(session)": 1 } + }, + { + "by": { "release": "test-example-release" }, + "series": { "sum(session)": [0, 10, 20] }, + "totals": { "sum(session)": 30 } + } + ] + } + ``` + """ + timestamps = _get_timestamps(query) + + total_groups = _split_rows_groupby(result_totals, query.groupby) + timeseries_groups = _split_rows_groupby(result_timeseries, query.groupby) + + def make_timeseries(rows, group): + for row in rows: + row[TS_COL] = row[TS_COL][:19] + "Z" + + rows.sort(key=lambda row: row[TS_COL]) + fields = [(name, field, list()) for name, field in query.fields.items()] + group_index = 0 + + while group_index < len(rows): + row = rows[group_index] + if row[TS_COL] < timestamps[0]: + group_index += 1 + else: + break + + for ts in timestamps: + row = rows[group_index] if group_index < len(rows) else None + if row is not None and row[TS_COL] == ts: + group_index += 1 + else: + row = None + + for (name, field, series) in fields: + series.append(field.extract_from_row(row, group)) + + return {name: series for (name, field, series) in fields} + + def make_totals(totals, group): + return { + name: field.extract_from_row(totals[0], group) for name, field in query.fields.items() + } + + groups = [] + for key, totals in total_groups.items(): + by = dict(key) + + group = { + "by": by, + "totals": make_totals(totals, by), + "series": make_timeseries(timeseries_groups[key], by), + } + + groups.append(group) + + return { + "query": query.query, + "intervals": timestamps, + "groups": groups, + } + + +def _get_timestamps(query): + """ + Generates a list of timestamps according to `query`. + The timestamps are returned as ISO strings for now. + """ + rollup = query.rollup + start = int(to_timestamp(query.start)) + end = int(to_timestamp(query.end)) + return [ + datetime.utcfromtimestamp(ts).isoformat() + "Z" + for ts in six.moves.xrange(start, end, rollup) + ] + + +def _split_rows_groupby(rows, groupby): + groups = {} + for row in rows: + key_parts = (group.get_keys_for_row(row) for group in groupby) + keys = itertools.product(*key_parts) + + for key in keys: + key = frozenset(key) + + if key not in groups: + groups[key] = [] + groups[key].append(row) + + return groups diff --git a/tests/sentry/api/test_utils.py b/tests/sentry/api/test_utils.py index 36b384938f0722..9de0e93aeae0f8 100644 --- a/tests/sentry/api/test_utils.py +++ b/tests/sentry/api/test_utils.py @@ -5,7 +5,12 @@ from django.utils import timezone from freezegun import freeze_time -from sentry.api.utils import get_date_range_from_params, InvalidParams, MAX_STATS_PERIOD +from sentry.api.utils import ( + get_date_range_from_params, + get_date_range_rollup_from_params, + InvalidParams, + MAX_STATS_PERIOD, +) from sentry.testutils import TestCase @@ -54,3 +59,39 @@ def test_relative_date_range_incomplete(self): with self.assertRaises(InvalidParams): start, end = get_date_range_from_params({"statsPeriodStart": "14d"}) + + +class GetDateRangeRollupFromParamsTest(TestCase): + def test_intervals(self): + # defaults to 1h + start, end, interval = get_date_range_rollup_from_params({}) + assert interval == 3600 + + # rounds up to a multiple of the minimum + start, end, interval = get_date_range_rollup_from_params( + {"statsPeriod": "14h", "interval": "8m"}, minimum_interval="5m" + ) + assert interval == 600 + + @freeze_time("2018-12-11 03:21:34") + def test_round_range(self): + start, end, interval = get_date_range_rollup_from_params( + {"statsPeriod": "2d"}, round_range=True + ) + assert start == datetime.datetime(2018, 12, 9, 4, tzinfo=timezone.utc) + assert end == datetime.datetime(2018, 12, 11, 4, tzinfo=timezone.utc) + + start, end, interval = get_date_range_rollup_from_params( + {"statsPeriod": "2d", "interval": "1d"}, round_range=True + ) + assert start == datetime.datetime(2018, 12, 10, tzinfo=timezone.utc) + assert end == datetime.datetime(2018, 12, 12, tzinfo=timezone.utc) + + def test_invalid_interval(self): + with self.assertRaises(InvalidParams): + start, end, interval = get_date_range_rollup_from_params({"interval": "0d"}) + with self.assertRaises(InvalidParams): + # defaults stats period is 90d + start, end, interval = get_date_range_rollup_from_params( + {"interval": "1d"}, max_points=80 + ) diff --git a/tests/snuba/api/endpoints/test_organization_sessions.py b/tests/snuba/api/endpoints/test_organization_sessions.py new file mode 100644 index 00000000000000..f3b12d22bb1d76 --- /dev/null +++ b/tests/snuba/api/endpoints/test_organization_sessions.py @@ -0,0 +1,408 @@ +from __future__ import absolute_import + +import datetime +import pytz + +from uuid import uuid4 +from freezegun import freeze_time + +from django.core.urlresolvers import reverse + +from sentry.testutils import APITestCase, SnubaTestCase +from sentry.utils.dates import to_timestamp + + +def result_sorted(result): + """sort the groups of the results array by the `by` object, ensuring a stable order""" + + def stable_dict(d): + return tuple(sorted(d.items(), key=lambda t: t[0])) + + result["groups"].sort(key=lambda group: stable_dict(group["by"])) + return result + + +class OrganizationSessionsEndpointTest(APITestCase, SnubaTestCase): + def setUp(self): + super(OrganizationSessionsEndpointTest, self).setUp() + self.setup_fixture() + + def setup_fixture(self): + self.timestamp = to_timestamp(datetime.datetime(2021, 1, 14, 12, 27, 28, tzinfo=pytz.utc)) + self.received = self.timestamp + self.session_started = self.timestamp // 60 * 60 + + self.organization1 = self.organization + self.organization2 = self.create_organization() + self.project1 = self.project + self.project2 = self.create_project() + self.project3 = self.create_project() + self.project4 = self.create_project(organization=self.organization2) + + template = { + "distinct_id": "00000000-0000-0000-0000-000000000000", + "status": "exited", + "seq": 0, + "release": "foo@1.0.0", + "environment": "production", + "retention_days": 90, + "duration": None, + "errors": 0, + "started": self.session_started, + "received": self.received, + } + + def make_session(project, **kwargs): + return dict( + template, + session_id=uuid4().hex, + org_id=project.organization_id, + project_id=project.id, + **kwargs + ) + + self.store_session(make_session(self.project1)) + self.store_session(make_session(self.project1, release="foo@1.1.0")) + self.store_session(make_session(self.project1, started=self.session_started - 60 * 60)) + self.store_session(make_session(self.project1, started=self.session_started - 12 * 60 * 60)) + self.store_session(make_session(self.project2, status="crashed")) + self.store_session(make_session(self.project2, environment="development")) + self.store_session(make_session(self.project3, errors=1)) + self.store_session( + make_session( + self.project3, + distinct_id="39887d89-13b2-4c84-8c23-5d13d2102664", + started=self.session_started - 60 * 60, + ) + ) + self.store_session( + make_session( + self.project3, distinct_id="39887d89-13b2-4c84-8c23-5d13d2102664", errors=1 + ) + ) + self.store_session(make_session(self.project4)) + + def do_request(self, query): + self.login_as(user=self.user) + url = reverse( + "sentry-api-0-organization-sessions", + kwargs={"organization_slug": self.organization.slug}, + ) + return self.client.get(url, query, format="json") + + def test_empty_request(self): + response = self.do_request({}) + + assert response.status_code == 400, response.content + assert response.data == {"detail": 'Request is missing a "field"'} + + def test_inaccessible_project(self): + response = self.do_request({"project": [self.project4.id]}) + + assert response.status_code == 403, response.content + assert response.data == {"detail": "You do not have permission to perform this action."} + + def test_unknown_field(self): + response = self.do_request({"field": ["summ(sessin)"]}) + + assert response.status_code == 400, response.content + assert response.data == {"detail": 'Invalid field: "summ(sessin)"'} + + def test_unknown_groupby(self): + response = self.do_request({"field": ["sum(session)"], "groupBy": ["envriomnent"]}) + + assert response.status_code == 400, response.content + assert response.data == {"detail": 'Invalid groupBy: "envriomnent"'} + + def test_too_many_points(self): + # TODO: looks like this is well within the range of valid points + return + # default statsPeriod is 90d + response = self.do_request({"field": ["sum(session)"], "interval": "1h"}) + + assert response.status_code == 400, response.content + assert response.data == {} + + @freeze_time("2021-01-14T12:27:28.303Z") + def test_timeseries_interval(self): + response = self.do_request( + {"statsPeriod": "1d", "interval": "1d", "field": ["sum(session)"]} + ) + + assert response.status_code == 200, response.content + assert result_sorted(response.data) == { + "query": "", + "intervals": ["2021-01-14T00:00:00Z"], + "groups": [{"by": {}, "series": {"sum(session)": [9]}, "totals": {"sum(session)": 9}}], + } + + response = self.do_request( + {"statsPeriod": "1d", "interval": "6h", "field": ["sum(session)"]} + ) + + assert response.status_code == 200, response.content + assert result_sorted(response.data) == { + "query": "", + "intervals": [ + "2021-01-13T18:00:00Z", + "2021-01-14T00:00:00Z", + "2021-01-14T06:00:00Z", + "2021-01-14T12:00:00Z", + ], + "groups": [ + {"by": {}, "series": {"sum(session)": [0, 1, 2, 6]}, "totals": {"sum(session)": 9}} + ], + } + + @freeze_time("2021-01-14T12:27:28.303Z") + def test_minimum_interval(self): + # smallest interval is 1h + response = self.do_request( + {"statsPeriod": "2h", "interval": "5m", "field": ["sum(session)"]} + ) + + assert response.status_code == 200, response.content + assert result_sorted(response.data) == { + "query": "", + "intervals": ["2021-01-14T11:00:00Z", "2021-01-14T12:00:00Z"], + "groups": [ + {"by": {}, "series": {"sum(session)": [2, 6]}, "totals": {"sum(session)": 8}} + ], + } + + @freeze_time("2021-01-14T12:27:28.303Z") + def test_filter_projects(self): + response = self.do_request( + { + "statsPeriod": "1d", + "interval": "1d", + "field": ["sum(session)"], + "project": [self.project2.id, self.project3.id], + } + ) + + assert response.status_code == 200, response.content + assert result_sorted(response.data)["groups"] == [ + {"by": {}, "series": {"sum(session)": [5]}, "totals": {"sum(session)": 5}} + ] + + @freeze_time("2021-01-14T12:27:28.303Z") + def test_filter_environment(self): + response = self.do_request( + { + "statsPeriod": "1d", + "interval": "1d", + "field": ["sum(session)"], + "query": "environment:development", + } + ) + + assert response.status_code == 200, response.content + assert result_sorted(response.data)["groups"] == [ + {"by": {}, "series": {"sum(session)": [1]}, "totals": {"sum(session)": 1}} + ] + + @freeze_time("2021-01-14T12:27:28.303Z") + def test_filter_release(self): + response = self.do_request( + { + "statsPeriod": "1d", + "interval": "1d", + "field": ["sum(session)"], + "query": "release:foo@1.1.0", + } + ) + + assert response.status_code == 200, response.content + assert result_sorted(response.data)["groups"] == [ + {"by": {}, "series": {"sum(session)": [1]}, "totals": {"sum(session)": 1}} + ] + + @freeze_time("2021-01-14T12:27:28.303Z") + def test_groupby_project(self): + response = self.do_request( + { + "statsPeriod": "1d", + "interval": "1d", + "field": ["sum(session)"], + "groupBy": ["project"], + } + ) + + assert response.status_code == 200, response.content + assert result_sorted(response.data)["groups"] == [ + { + "by": {"project": self.project1.id}, + "series": {"sum(session)": [4]}, + "totals": {"sum(session)": 4}, + }, + { + "by": {"project": self.project2.id}, + "series": {"sum(session)": [2]}, + "totals": {"sum(session)": 2}, + }, + { + "by": {"project": self.project3.id}, + "series": {"sum(session)": [3]}, + "totals": {"sum(session)": 3}, + }, + ] + + @freeze_time("2021-01-14T12:27:28.303Z") + def test_groupby_environment(self): + response = self.do_request( + { + "statsPeriod": "1d", + "interval": "1d", + "field": ["sum(session)"], + "groupBy": ["environment"], + } + ) + + assert response.status_code == 200, response.content + assert result_sorted(response.data)["groups"] == [ + { + "by": {"environment": "development"}, + "series": {"sum(session)": [1]}, + "totals": {"sum(session)": 1}, + }, + { + "by": {"environment": "production"}, + "series": {"sum(session)": [8]}, + "totals": {"sum(session)": 8}, + }, + ] + + @freeze_time("2021-01-14T12:27:28.303Z") + def test_groupby_release(self): + response = self.do_request( + { + "statsPeriod": "1d", + "interval": "1d", + "field": ["sum(session)"], + "groupBy": ["release"], + } + ) + + assert response.status_code == 200, response.content + assert result_sorted(response.data)["groups"] == [ + { + "by": {"release": "foo@1.0.0"}, + "series": {"sum(session)": [8]}, + "totals": {"sum(session)": 8}, + }, + { + "by": {"release": "foo@1.1.0"}, + "series": {"sum(session)": [1]}, + "totals": {"sum(session)": 1}, + }, + ] + + @freeze_time("2021-01-14T12:27:28.303Z") + def test_groupby_status(self): + response = self.do_request( + { + "statsPeriod": "1d", + "interval": "1d", + "field": ["sum(session)"], + "groupBy": ["session.status"], + } + ) + + assert response.status_code == 200, response.content + assert result_sorted(response.data)["groups"] == [ + { + "by": {"session.status": "abnormal"}, + "series": {"sum(session)": [0]}, + "totals": {"sum(session)": 0}, + }, + { + "by": {"session.status": "crashed"}, + "series": {"sum(session)": [1]}, + "totals": {"sum(session)": 1}, + }, + { + "by": {"session.status": "errored"}, + "series": {"sum(session)": [3]}, + "totals": {"sum(session)": 3}, + }, + { + "by": {"session.status": "healthy"}, + "series": {"sum(session)": [6]}, + "totals": {"sum(session)": 6}, + }, + ] + + @freeze_time("2021-01-14T12:27:28.303Z") + def test_groupby_cross(self): + response = self.do_request( + { + "statsPeriod": "1d", + "interval": "1d", + "field": ["sum(session)"], + "groupBy": ["release", "environment"], + } + ) + + assert response.status_code == 200, response.content + assert result_sorted(response.data)["groups"] == [ + { + "by": {"environment": "development", "release": "foo@1.0.0"}, + "series": {"sum(session)": [1]}, + "totals": {"sum(session)": 1}, + }, + { + "by": {"environment": "production", "release": "foo@1.0.0"}, + "series": {"sum(session)": [7]}, + "totals": {"sum(session)": 7}, + }, + { + "by": {"environment": "production", "release": "foo@1.1.0"}, + "series": {"sum(session)": [1]}, + "totals": {"sum(session)": 1}, + }, + ] + + @freeze_time("2021-01-14T12:27:28.303Z") + def test_users_groupby(self): + response = self.do_request( + {"statsPeriod": "1d", "interval": "1d", "field": ["count_unique(user)"]} + ) + + assert response.status_code == 200, response.content + assert result_sorted(response.data)["groups"] == [ + {"by": {}, "series": {"count_unique(user)": [1]}, "totals": {"count_unique(user)": 1}} + ] + + response = self.do_request( + { + "statsPeriod": "1d", + "interval": "1d", + "field": ["count_unique(user)"], + "groupBy": ["session.status"], + } + ) + + assert response.status_code == 200, response.content + assert result_sorted(response.data)["groups"] == [ + { + "by": {"session.status": "abnormal"}, + "series": {"count_unique(user)": [0]}, + "totals": {"count_unique(user)": 0}, + }, + { + "by": {"session.status": "crashed"}, + "series": {"count_unique(user)": [0]}, + "totals": {"count_unique(user)": 0}, + }, + { + "by": {"session.status": "errored"}, + "series": {"count_unique(user)": [1]}, + "totals": {"count_unique(user)": 1}, + }, + { + "by": {"session.status": "healthy"}, + "series": {"count_unique(user)": [0]}, + "totals": {"count_unique(user)": 0}, + }, + ] diff --git a/tests/snuba/sessions/test_sessions_v2.py b/tests/snuba/sessions/test_sessions_v2.py new file mode 100644 index 00000000000000..995a7fe6f9c9d1 --- /dev/null +++ b/tests/snuba/sessions/test_sessions_v2.py @@ -0,0 +1,240 @@ +from __future__ import absolute_import + +from freezegun import freeze_time +from django.http import QueryDict + +# from sentry.testutils import TestCase +from sentry.snuba.sessions_v2 import ( + QueryDefinition, + massage_sessions_result, + _get_timestamps, +) + + +def _make_query(qs): + return QueryDefinition(QueryDict(qs), {}) + + +def result_sorted(result): + """sort the groups of the results array by the `by` object, ensuring a stable order""" + + def stable_dict(d): + return tuple(sorted(d.items(), key=lambda t: t[0])) + + result["groups"].sort(key=lambda group: stable_dict(group["by"])) + return result + + +@freeze_time("2020-12-18T11:14:17.105Z") +def test_timestamps(): + query = _make_query("statsPeriod=1d&interval=12h&field=sum(session)") + + expected_timestamps = ["2020-12-17T12:00:00Z", "2020-12-18T00:00:00Z"] + actual_timestamps = _get_timestamps(query) + + assert actual_timestamps == expected_timestamps + + +def test_simple_query(): + query = _make_query("statsPeriod=1d&interval=12h&field=sum(session)") + + assert query.query_columns == ["sessions"] + + +def test_groupby_query(): + query = _make_query("statsPeriod=1d&interval=12h&field=sum(session)&groupBy=release") + + assert sorted(query.query_columns) == ["release", "sessions"] + assert query.query_groupby == ["release"] + + +def test_virtual_groupby_query(): + query = _make_query("statsPeriod=1d&interval=12h&field=sum(session)&groupBy=session.status") + + assert sorted(query.query_columns) == [ + "sessions", + "sessions_abnormal", + "sessions_crashed", + "sessions_errored", + ] + assert query.query_groupby == [] + + query = _make_query( + "statsPeriod=1d&interval=12h&field=count_unique(user)&groupBy=session.status" + ) + + assert sorted(query.query_columns) == [ + "users", + "users_abnormal", + "users_crashed", + "users_errored", + ] + assert query.query_groupby == [] + + +@freeze_time("2020-12-18T11:14:17.105Z") +def test_massage_simple_timeseries(): + """A timeseries is filled up when it only receives partial data""" + + query = _make_query("statsPeriod=1d&interval=6h&field=sum(session)") + result_totals = [{"sessions": 4}] + # snuba returns the datetimes as strings for now + result_timeseries = [ + {"sessions": 2, "bucketed_started": "2020-12-17T12:00:00+00:00"}, + {"sessions": 2, "bucketed_started": "2020-12-18T06:00:00+00:00"}, + ] + + expected_result = { + "query": "", + "intervals": [ + "2020-12-17T12:00:00Z", + "2020-12-17T18:00:00Z", + "2020-12-18T00:00:00Z", + "2020-12-18T06:00:00Z", + ], + "groups": [ + {"by": {}, "series": {"sum(session)": [2, 0, 0, 2]}, "totals": {"sum(session)": 4}} + ], + } + + actual_result = result_sorted(massage_sessions_result(query, result_totals, result_timeseries)) + + assert actual_result == expected_result + + +@freeze_time("2020-12-18T11:14:17.105Z") +def test_massage_groupby_timeseries(): + query = _make_query("statsPeriod=1d&interval=6h&field=sum(session)&groupBy=release") + + result_totals = [ + {"release": "test-example-release", "sessions": 4}, + {"release": "test-example-release-2", "sessions": 1}, + ] + # snuba returns the datetimes as strings for now + result_timeseries = [ + { + "release": "test-example-release", + "sessions": 2, + "bucketed_started": "2020-12-17T12:00:00+00:00", + }, + { + "release": "test-example-release", + "sessions": 2, + "bucketed_started": "2020-12-18T06:00:00+00:00", + }, + { + "release": "test-example-release-2", + "sessions": 1, + "bucketed_started": "2020-12-18T06:00:00+00:00", + }, + ] + + expected_result = { + "query": "", + "intervals": [ + "2020-12-17T12:00:00Z", + "2020-12-17T18:00:00Z", + "2020-12-18T00:00:00Z", + "2020-12-18T06:00:00Z", + ], + "groups": [ + { + "by": {"release": "test-example-release"}, + "series": {"sum(session)": [2, 0, 0, 2]}, + "totals": {"sum(session)": 4}, + }, + { + "by": {"release": "test-example-release-2"}, + "series": {"sum(session)": [0, 0, 0, 1]}, + "totals": {"sum(session)": 1}, + }, + ], + } + + actual_result = result_sorted(massage_sessions_result(query, result_totals, result_timeseries)) + + assert actual_result == expected_result + + +@freeze_time("2020-12-18T13:25:15.769Z") +def test_massage_virtual_groupby_timeseries(): + query = _make_query( + "statsPeriod=1d&interval=6h&field=sum(session)&field=count_unique(user)&groupBy=session.status" + ) + result_totals = [ + { + "users": 1, + "users_crashed": 1, + "sessions": 6, + "sessions_errored": 1, + "users_errored": 1, + "sessions_abnormal": 0, + "sessions_crashed": 1, + "users_abnormal": 0, + } + ] + # snuba returns the datetimes as strings for now + result_timeseries = [ + { + "sessions_errored": 1, + "users": 1, + "users_crashed": 1, + "sessions_abnormal": 0, + "sessions": 3, + "users_errored": 1, + "users_abnormal": 0, + "sessions_crashed": 1, + "bucketed_started": "2020-12-18T12:00:00+00:00", + }, + { + "sessions_errored": 0, + "users": 1, + "users_crashed": 0, + "sessions_abnormal": 0, + "sessions": 3, + "users_errored": 0, + "users_abnormal": 0, + "sessions_crashed": 0, + "bucketed_started": "2020-12-18T06:00:00+00:00", + }, + ] + + expected_result = { + "query": "", + "intervals": [ + "2020-12-17T18:00:00Z", + "2020-12-18T00:00:00Z", + "2020-12-18T06:00:00Z", + "2020-12-18T12:00:00Z", + ], + "groups": [ + { + "by": {"session.status": "abnormal"}, + "series": {"count_unique(user)": [0, 0, 0, 0], "sum(session)": [0, 0, 0, 0]}, + "totals": {"count_unique(user)": 0, "sum(session)": 0}, + }, + { + "by": {"session.status": "crashed"}, + "series": {"count_unique(user)": [0, 0, 0, 1], "sum(session)": [0, 0, 0, 1]}, + "totals": {"count_unique(user)": 1, "sum(session)": 1}, + }, + { + "by": {"session.status": "errored"}, + "series": {"count_unique(user)": [0, 0, 0, 1], "sum(session)": [0, 0, 0, 1]}, + "totals": {"count_unique(user)": 1, "sum(session)": 1}, + }, + { + "by": {"session.status": "healthy"}, + "series": {"count_unique(user)": [0, 0, 1, 0], "sum(session)": [0, 0, 3, 2]}, + # while in one of the time slots, we have a healthy user, it is + # the *same* user as the one experiencing a crash later on, + # so in the *whole* time window, that one user is not counted as healthy, + # so the `0` here is expected, as thats an example of the `count_unique` behavior. + "totals": {"count_unique(user)": 0, "sum(session)": 5}, + }, + ], + } + + actual_result = result_sorted(massage_sessions_result(query, result_totals, result_timeseries)) + + assert actual_result == expected_result