Skip to content

Commit

Permalink
fix: Batch requests to snuba for TSDB queries (#14379)
Browse files Browse the repository at this point in the history
Some accounts are not getting their weekly reports.

This is cause when the report generation is scheduled, sentry makes a call to snuba with all the new issue IDs and reopened issue IDs. Snuba has a max query size of 500 kB (i.e. the SQL cannot be more than 500 kB). The max query size is not a problem of most cases but does become a problem for larger organizations who open a lot of issues in one week.

[Here's the sentry error](https://sentry.io/organizations/sentry/issues/871693042/).
  • Loading branch information
Manu committed Aug 17, 2019
1 parent dfafa26 commit 5c03215
Show file tree
Hide file tree
Showing 2 changed files with 55 additions and 6 deletions.
18 changes: 14 additions & 4 deletions src/sentry/tasks/reports.py
Expand Up @@ -29,13 +29,17 @@
from sentry.utils import json, redis
from sentry.utils.dates import floor_to_utc_day, to_datetime, to_timestamp
from sentry.utils.email import MessageBuilder
from sentry.utils.iterators import chunked
from sentry.utils.math import mean
from six.moves import reduce


date_format = functools.partial(dateformat.format, format_string="F jS, Y")

logger = logging.getLogger(__name__)

BATCH_SIZE = 30000


def _get_organization_queryset():
return Organization.objects.filter(status=OrganizationStatus.VISIBLE)
Expand Down Expand Up @@ -211,6 +215,15 @@ def get_aggregate_value(start, stop):
]


def get_event_counts(issue_ids, start, stop, rollup):
combined = {}

for chunk in chunked(issue_ids, BATCH_SIZE):
combined.update(tsdb.get_sums(tsdb.models.group, chunk, start, stop, rollup=rollup))

return combined


def prepare_project_issue_summaries(interval, project):
start, stop = interval

Expand Down Expand Up @@ -244,10 +257,7 @@ def prepare_project_issue_summaries(interval, project):
)

rollup = 60 * 60 * 24

event_counts = tsdb.get_sums(
tsdb.models.group, new_issue_ids | reopened_issue_ids, start, stop, rollup=rollup
)
event_counts = get_event_counts(new_issue_ids | reopened_issue_ids, start, stop, rollup)

new_issue_count = sum(event_counts[id] for id in new_issue_ids)
reopened_issue_count = sum(event_counts[id] for id in reopened_issue_ids)
Expand Down
43 changes: 41 additions & 2 deletions tests/sentry/tasks/test_reports.py
Expand Up @@ -29,8 +29,9 @@
prepare_reports,
safe_add,
user_subscribed_to_organization_reports,
prepare_project_issue_summaries,
)
from sentry.testutils.cases import TestCase
from sentry.testutils.cases import TestCase, SnubaTestCase
from sentry.utils.dates import to_datetime, to_timestamp
from six.moves import xrange

Expand Down Expand Up @@ -193,7 +194,7 @@ def test_calendar_range():
)


class ReportTestCase(TestCase):
class ReportTestCase(TestCase, SnubaTestCase):
def test_integration(self):
Project.objects.all().delete()

Expand Down Expand Up @@ -256,3 +257,41 @@ def test_user_subscribed_to_organization_reports(self):

set_option_value([organization.id])
assert user_subscribed_to_organization_reports(user, organization) is False

@mock.patch("sentry.tasks.reports.BATCH_SIZE", 1)
def test_paginates_and_reassumbles_result(self):
import copy
from datetime import timedelta
from django.utils import timezone

from sentry.testutils.factories import DEFAULT_EVENT_DATA

self.login_as(user=self.user)

now = timezone.now()
min_ago = (now - timedelta(minutes=1)).isoformat()[:19]
two_min_ago = now - timedelta(minutes=2)

self.store_event(
data={
"event_id": "a" * 32,
"message": "message",
"timestamp": min_ago,
"stacktrace": copy.deepcopy(DEFAULT_EVENT_DATA["stacktrace"]),
"fingerprint": ["group-1"],
},
project_id=self.project.id,
)

self.store_event(
data={
"event_id": "b" * 32,
"message": "message",
"timestamp": min_ago,
"stacktrace": copy.deepcopy(DEFAULT_EVENT_DATA["stacktrace"]),
"fingerprint": ["group-2"],
},
project_id=self.project.id,
)

assert prepare_project_issue_summaries([two_min_ago, now], self.project) == [2, 0, 0]

0 comments on commit 5c03215

Please sign in to comment.