Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(code-mappings): Add new task to find projects with missing code mappings #40271

Merged
merged 11 commits into from Oct 24, 2022
1 change: 1 addition & 0 deletions mypy.ini
Expand Up @@ -113,6 +113,7 @@ files = fixtures/mypy-stubs,
src/sentry/tasks/store.py,
src/sentry/tasks/symbolication.py,
src/sentry/tasks/update_user_reports.py,
src/sentry/tasks/find_missing_codemappings.py,
src/sentry/testutils/modelmanifest.py,
src/sentry/testutils/silo.py,
src/sentry/types/region.py,
Expand Down
1 change: 1 addition & 0 deletions src/sentry/conf/server.py
Expand Up @@ -684,6 +684,7 @@ def SOCIAL_AUTH_DEFAULT_USERNAME():
Queue("replays.delete_replay", routing_key="replays.delete_replay"),
Queue("counters-0", routing_key="counters-0"),
Queue("triggers-0", routing_key="triggers-0"),
Queue("find-missing-codemappings", routing_key="find-missing-codemappings"),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems we need to register it:

CeleryQueueRegisteredTest.test

AssertionError: Found tasks with queues that are undefined. These must be defined in settings.CELERY_QUEUES.
  Task Info:
   - Task: sentry.tasks.find_missing_codemappings, Queue: find_missing_codemappings.
assert not [' - Task: sentry.tasks.find_missing_codemappings, Queue: find_missing_codemappings']

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually I think it has to do with find-missing-codemappings vs find_missing_codemappings.

]

for queue in CELERY_QUEUES:
Expand Down
100 changes: 100 additions & 0 deletions src/sentry/tasks/find_missing_codemappings.py
@@ -0,0 +1,100 @@
import logging
armenzg marked this conversation as resolved.
Show resolved Hide resolved
from datetime import timedelta
from typing import Any, List, Mapping, Optional, Set, Tuple

from django.utils import timezone

from sentry.db.models.fields.node import NodeData
from sentry.models import Project
from sentry.models.group import Group
from sentry.models.organization import Organization, OrganizationStatus
from sentry.tasks.base import instrumented_task
from sentry.utils.safe import get_path

ACTIVE_PROJECT_THRESHOLD = timedelta(days=7)
GROUP_ANALYSIS_RANGE = timedelta(days=14)

logger = logging.getLogger("sentry.tasks.find_missing_codemappings")


@instrumented_task( # type: ignore
name="sentry.tasks.find_missing_codemappings",
queue="find_missing_codemappings",
armenzg marked this conversation as resolved.
Show resolved Hide resolved
max_retries=0, # if we don't backfill it this time, we'll get it the next time
)
def find_missing_codemappings(
organizations: Optional[List[Organization]] = None,
) -> Mapping[str, Mapping[str, List[str]]]:
"""
Generate a map of projects to stacktrace paths for specified organizations,
or all active organizations if unspecified.

This filters out projects have not had an event in the last 7 days or have
non-python files in the stacktrace.
"""
if organizations is None:
organizations = Organization.objects.filter(status=OrganizationStatus.ACTIVE)

filename_maps = {}
for org in organizations:
projects = Project.objects.filter(organization=org, first_event__isnull=False)
armenzg marked this conversation as resolved.
Show resolved Hide resolved
projects = [
project
for project in projects
if Group.objects.filter(
project=project,
last_seen__gte=timezone.now() - ACTIVE_PROJECT_THRESHOLD,
).exists()
armenzg marked this conversation as resolved.
Show resolved Hide resolved
]

project_file_map = {project.slug: get_all_stacktrace_paths(project) for project in projects}
filename_maps[org.slug] = project_file_map
return filename_maps


def get_all_stacktrace_paths(project: Project) -> List[str]:
groups = Group.objects.filter(
project=project, last_seen__gte=timezone.now() - GROUP_ANALYSIS_RANGE
)

all_stacktrace_paths = set()
for group in groups:
event = group.get_latest_event()
is_python_stacktrace, stacktrace_paths = get_stacktrace_paths(event.data)
armenzg marked this conversation as resolved.
Show resolved Hide resolved
if not is_python_stacktrace:
return []
all_stacktrace_paths.update(stacktrace_paths)

return list(all_stacktrace_paths)


def get_stacktrace_paths(data: NodeData) -> Tuple[bool, Set[str]]:
"""
Get the stacktrace_paths from the stacktrace for the latest event for an issue.
"""
stacktraces = get_stacktrace(data)
stacktrace_paths = set()
for stacktrace in stacktraces:
try:
paths = [frame["filename"] for frame in stacktrace["frames"]]
if len(paths) == 0:
continue
if paths[0].endswith(".py"):
stacktrace_paths.update(paths)
else:
return False, set() # (is_python, stacktrace_paths)
except Exception:
logger.exception("Error getting filenames for project {project.slug}")
return True, stacktrace_paths # (is_python, stacktrace_paths)


def get_stacktrace(data: NodeData) -> List[Mapping[str, Any]]:
armenzg marked this conversation as resolved.
Show resolved Hide resolved
exceptions = get_path(data, "exception", "values", filter=True)
armenzg marked this conversation as resolved.
Show resolved Hide resolved
if exceptions:
return [e["stacktrace"] for e in exceptions if get_path(e, "stacktrace", "frames")]

stacktrace = data.get("stacktrace")
armenzg marked this conversation as resolved.
Show resolved Hide resolved
if stacktrace and stacktrace.get("frames"):
return [stacktrace]

return []