From c25d0c82f5b7807b9e38d05498468e443626edae Mon Sep 17 00:00:00 2001 From: Snigdha Sharma Date: Wed, 19 Oct 2022 10:28:48 -0700 Subject: [PATCH 01/11] Add new task to find missing codemappings --- src/sentry/tasks/find_missing_codemappings.py | 89 ++++++ .../tasks/test_find_missing_codemappings.py | 295 ++++++++++++++++++ 2 files changed, 384 insertions(+) create mode 100644 src/sentry/tasks/find_missing_codemappings.py create mode 100644 tests/sentry/tasks/test_find_missing_codemappings.py diff --git a/src/sentry/tasks/find_missing_codemappings.py b/src/sentry/tasks/find_missing_codemappings.py new file mode 100644 index 00000000000000..4051044f894deb --- /dev/null +++ b/src/sentry/tasks/find_missing_codemappings.py @@ -0,0 +1,89 @@ +import logging +from ast import Tuple +from datetime import timedelta +from typing import List + +from django.utils import timezone + +from sentry.db.models.fields.node import NodeData +from sentry.models import Project +from sentry.models.group import Group +from sentry.models.organization import Organization, OrganizationStatus +from sentry.tasks.base import instrumented_task +from sentry.utils.safe import get_path + +PREFERRED_GROUP_OWNERS = 1 +PREFERRED_GROUP_OWNER_AGE = timedelta(days=7) + +logger = logging.getLogger("tasks.commit_context") + + +@instrumented_task( + name="sentry.tasks.find_missing_codemappings", + queue="find_missing_codemappings", + max_retries=0, # if we don't backfill it this time, we'll get it the next time +) +def find_missing_codemappings(**kwargs): + organizations = kwargs.get( + "organizations", Organization.objects.filter(status=OrganizationStatus.ACTIVE) + ) + + filename_maps = {} + for org in organizations: + projects = Project.objects.filter(organization=org, first_event__isnull=False) + + projects = [ + project + for project in projects + if Group.objects.filter( + project=project, last_seen__gte=timezone.now() - timedelta(days=7) + ).exists() + ] + + project_file_map = {project.slug: get_all_filenames(project) for project in projects} + filename_maps[org.slug] = project_file_map + return filename_maps + + +def get_all_filenames(project): + groups = Group.objects.filter( + project=project, last_seen__gte=timezone.now() - timedelta(days=14) + ) + + filenames = set() + for group in groups: + event = group.get_latest_event() + is_python_project, fn = get_filenames(project, event.data) + if not is_python_project: + return [] + filenames.update(fn) + + return list(filenames) + + +# Get the filenames from the stacktrace for the latest event for an issue. +def get_filenames(project: Project, data: NodeData) -> Tuple(bool, List[str]): + stacktraces = get_stacktrace(data) + filenames = set() + for st in stacktraces: + try: + fn = [frame["filename"] for frame in st["frames"]] + if fn[0].endswith(".py"): + filenames.update(fn) + else: + return False, [] # (is_python, filenames) + except Exception as e: + logger.log(logging.WARNING, f"Error getting filenames for project {project.slug}: {e}") + return True, filenames # (is_python, filenames) + + +def get_stacktrace(data: NodeData) -> List[str]: + exceptions = get_path(data, "exception", "values", filter=True) + if exceptions: + return [e["stacktrace"] for e in exceptions if get_path(e, "stacktrace", "frames")] + + stacktrace = data.get("stacktrace") + if stacktrace and stacktrace.get("frames"): + return [stacktrace] + + return None diff --git a/tests/sentry/tasks/test_find_missing_codemappings.py b/tests/sentry/tasks/test_find_missing_codemappings.py new file mode 100644 index 00000000000000..aa3792667498e6 --- /dev/null +++ b/tests/sentry/tasks/test_find_missing_codemappings.py @@ -0,0 +1,295 @@ +from sentry.models.organization import OrganizationStatus +from sentry.tasks.find_missing_codemappings import find_missing_codemappings +from sentry.testutils import TestCase +from sentry.testutils.helpers.datetime import before_now, iso_format + + +class TestCommitContext(TestCase): + def setUp(self): + self.organization = self.create_organization(status=OrganizationStatus.ACTIVE) + self.project = self.create_project(organization=self.organization) + + def test_finds_files_single_project(self): + self.store_event( + data={ + "message": "Kaboom!", + "platform": "python", + "timestamp": iso_format(before_now(days=1)), + "stacktrace": { + "frames": [ + { + "function": "handle_set_commits", + "abs_path": "/usr/src/sentry/src/sentry/tasks.py", + "module": "sentry.tasks", + "in_app": False, + "lineno": 30, + "filename": "sentry/tasks.py", + }, + { + "function": "set_commits", + "abs_path": "/usr/src/sentry/src/sentry/models/release.py", + "module": "sentry.models.release", + "in_app": True, + "lineno": 39, + "filename": "sentry/models/release.py", + }, + ] + }, + "fingerprint": ["put-me-in-the-control-group"], + }, + project_id=self.project.id, + ) + + with self.tasks(): + mapping = find_missing_codemappings(organizations=[self.organization]) + assert self.organization.slug in mapping + result = mapping[self.organization.slug] + assert self.project.slug in result + assert sorted(result[self.project.slug]) == [ + "sentry/models/release.py", + "sentry/tasks.py", + ] + + def test_finds_files_multiple_projects(self): + project_1 = self.create_project(organization=self.organization) + project_2 = self.create_project(organization=self.organization) + self.store_event( + data={ + "message": "Kaboom!", + "platform": "python", + "timestamp": iso_format(before_now(days=1)), + "stacktrace": { + "frames": [ + { + "function": "handle_set_commits", + "abs_path": "/usr/src/sentry/src/sentry/tasks.py", + "module": "sentry.tasks", + "in_app": False, + "lineno": 30, + "filename": "sentry/tasks.py", + }, + { + "function": "set_commits", + "abs_path": "/usr/src/sentry/src/sentry/models/release.py", + "module": "sentry.models.release", + "in_app": True, + "lineno": 39, + "filename": "sentry/models/release.py", + }, + ] + }, + "fingerprint": ["put-me-in-the-control-group"], + }, + project_id=project_1.id, + ) + + self.store_event( + data={ + "message": "Kaboom!", + "platform": "python", + "timestamp": iso_format(before_now(days=2)), + "stacktrace": { + "frames": [ + { + "function": "test_fn", + "abs_path": "/usr/src/sentry/src/sentry/test_file.py", + "module": "sentry.tasks", + "in_app": False, + "lineno": 30, + "filename": "sentry/test_file.py", + }, + { + "function": "test_fn_2", + "abs_path": "/usr/src/sentry/src/sentry/models/test_file.py", + "module": "sentry.models.release", + "in_app": True, + "lineno": 39, + "filename": "sentry/models/test_file.py", + }, + ] + }, + "fingerprint": ["put-me-in-the-control-group"], + }, + project_id=project_2.id, + ) + + with self.tasks(): + mapping = find_missing_codemappings(organizations=[self.organization]) + assert self.organization.slug in mapping + result = mapping[self.organization.slug] + assert project_1.slug in result + assert sorted(result[project_1.slug]) == [ + "sentry/models/release.py", + "sentry/tasks.py", + ] + assert project_2.slug in result + assert sorted(result[project_2.slug]) == [ + "sentry/models/test_file.py", + "sentry/test_file.py", + ] + + def test_finds_files_multiple_orgs(self): + new_org = self.create_organization() + new_project = self.create_project(organization=new_org) + self.store_event( + data={ + "message": "Kaboom!", + "platform": "python", + "timestamp": iso_format(before_now(days=1)), + "stacktrace": { + "frames": [ + { + "function": "handle_set_commits", + "abs_path": "/usr/src/sentry/src/sentry/tasks.py", + "module": "sentry.tasks", + "in_app": False, + "lineno": 30, + "filename": "sentry/tasks.py", + }, + { + "function": "set_commits", + "abs_path": "/usr/src/sentry/src/sentry/models/release.py", + "module": "sentry.models.release", + "in_app": True, + "lineno": 39, + "filename": "sentry/models/release.py", + }, + ] + }, + "fingerprint": ["put-me-in-the-control-group"], + }, + project_id=self.project.id, + ) + + self.store_event( + data={ + "message": "Kaboom!", + "platform": "python", + "timestamp": iso_format(before_now(days=2)), + "stacktrace": { + "frames": [ + { + "function": "test_fn", + "abs_path": "/usr/src/sentry/src/sentry/test_file.py", + "module": "sentry.tasks", + "in_app": False, + "lineno": 30, + "filename": "sentry/test_file.py", + }, + { + "function": "test_fn_2", + "abs_path": "/usr/src/sentry/src/sentry/models/test_file.py", + "module": "sentry.models.release", + "in_app": True, + "lineno": 39, + "filename": "sentry/models/test_file.py", + }, + ] + }, + "fingerprint": ["put-me-in-the-control-group"], + }, + project_id=new_project.id, + ) + + with self.tasks(): + mapping = find_missing_codemappings(organizations=[self.organization, new_org]) + assert self.organization.slug in mapping + result_1 = mapping[self.organization.slug] + assert self.project.slug in result_1 + assert sorted(result_1[self.project.slug]) == [ + "sentry/models/release.py", + "sentry/tasks.py", + ] + assert new_org.slug in mapping + result_2 = mapping[new_org.slug] + assert new_project.slug in result_2 + assert sorted(result_2[new_project.slug]) == [ + "sentry/models/test_file.py", + "sentry/test_file.py", + ] + + def test_skips_stale_projects(self): + self.store_event( + data={ + "message": "Kaboom!", + "platform": "python", + "timestamp": iso_format(before_now(days=8)), + "stacktrace": { + "frames": [ + { + "function": "handle_set_commits", + "abs_path": "/usr/src/sentry/src/sentry/tasks.py", + "module": "sentry.tasks", + "in_app": False, + "lineno": 30, + "filename": "sentry/tasks.py", + }, + { + "function": "set_commits", + "abs_path": "/usr/src/sentry/src/sentry/models/release.py", + "module": "sentry.models.release", + "in_app": True, + "lineno": 39, + "filename": "sentry/models/release.py", + }, + ] + }, + "fingerprint": ["put-me-in-the-control-group"], + }, + project_id=self.project.id, + ) + + with self.tasks(): + mapping = find_missing_codemappings() + assert self.organization.slug in mapping + result = mapping[self.organization.slug] + assert self.project.slug not in result + + def test_handles_duplicates(self): + self.store_event( + data={ + "message": "Kaboom!", + "platform": "python", + "timestamp": iso_format(before_now(days=1)), + "stacktrace": { + "frames": [ + { + "function": "handle_set_commits", + "abs_path": "/usr/src/sentry/src/sentry/tasks.py", + "module": "sentry.tasks", + "in_app": False, + "lineno": 30, + "filename": "sentry/tasks.py", + }, + { + "function": "set_commits", + "abs_path": "/usr/src/sentry/src/sentry/models/release.py", + "module": "sentry.models.release", + "in_app": True, + "lineno": 39, + "filename": "sentry/models/release.py", + }, + { + "function": "handle_set_commits_new", + "abs_path": "/usr/src/sentry/src/sentry/tasks.py", + "module": "sentry.tasks", + "in_app": False, + "lineno": 40, + "filename": "sentry/tasks.py", + }, + ] + }, + "fingerprint": ["put-me-in-the-control-group"], + }, + project_id=self.project.id, + ) + + with self.tasks(): + mapping = find_missing_codemappings(organizations=[self.organization]) + assert self.organization.slug in mapping + result = mapping[self.organization.slug] + assert self.project.slug in result + assert sorted(result[self.project.slug]) == [ + "sentry/models/release.py", + "sentry/tasks.py", + ] From bce98dd3d0e2b97455b47c30e355350ff6707b62 Mon Sep 17 00:00:00 2001 From: Snigdha Sharma <16563948+snigdhas@users.noreply.github.com> Date: Thu, 20 Oct 2022 09:28:37 -0700 Subject: [PATCH 02/11] Apply suggestions from code review Co-authored-by: Armen Zambrano G. --- src/sentry/tasks/find_missing_codemappings.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/sentry/tasks/find_missing_codemappings.py b/src/sentry/tasks/find_missing_codemappings.py index 4051044f894deb..5ad4bac4d9c2c0 100644 --- a/src/sentry/tasks/find_missing_codemappings.py +++ b/src/sentry/tasks/find_missing_codemappings.py @@ -15,7 +15,7 @@ PREFERRED_GROUP_OWNERS = 1 PREFERRED_GROUP_OWNER_AGE = timedelta(days=7) -logger = logging.getLogger("tasks.commit_context") +logger = logging.getLogger("sentry.tasks.find_missing_codemappings") @instrumented_task( @@ -36,7 +36,7 @@ def find_missing_codemappings(**kwargs): project for project in projects if Group.objects.filter( - project=project, last_seen__gte=timezone.now() - timedelta(days=7) + project=project, last_seen__gte=timezone.now() - timedelta(days=PREFERRED_GROUP_OWNER_AGE) ).exists() ] @@ -45,20 +45,20 @@ def find_missing_codemappings(**kwargs): return filename_maps -def get_all_filenames(project): +def get_all_stacktrace_paths(project): groups = Group.objects.filter( - project=project, last_seen__gte=timezone.now() - timedelta(days=14) + project=project, last_seen__gte=timezone.now() - timedelta(days=GROUP_ANALYSIS_RANGE) ) - filenames = set() + all_stacktrace_paths = set() for group in groups: event = group.get_latest_event() - is_python_project, fn = get_filenames(project, event.data) + is_python_stacktrace, stacktrace_paths = get_stacktrace_paths(project, event.data) if not is_python_project: return [] - filenames.update(fn) + all_stacktrace_paths.update(stacktrace_paths) - return list(filenames) + return list(stacktrace_paths) # Get the filenames from the stacktrace for the latest event for an issue. @@ -73,7 +73,7 @@ def get_filenames(project: Project, data: NodeData) -> Tuple(bool, List[str]): else: return False, [] # (is_python, filenames) except Exception as e: - logger.log(logging.WARNING, f"Error getting filenames for project {project.slug}: {e}") + logger.exception("Error getting filenames for project {project.slug}") return True, filenames # (is_python, filenames) From 86190e802d1c892af25a798ba5e160de614e2f9f Mon Sep 17 00:00:00 2001 From: "getsantry[bot]" <66042841+getsantry[bot]@users.noreply.github.com> Date: Thu, 20 Oct 2022 16:29:57 +0000 Subject: [PATCH 03/11] style(lint): Auto commit lint changes --- src/sentry/tasks/find_missing_codemappings.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/sentry/tasks/find_missing_codemappings.py b/src/sentry/tasks/find_missing_codemappings.py index 5ad4bac4d9c2c0..002fbca3ea535e 100644 --- a/src/sentry/tasks/find_missing_codemappings.py +++ b/src/sentry/tasks/find_missing_codemappings.py @@ -36,7 +36,8 @@ def find_missing_codemappings(**kwargs): project for project in projects if Group.objects.filter( - project=project, last_seen__gte=timezone.now() - timedelta(days=PREFERRED_GROUP_OWNER_AGE) + project=project, + last_seen__gte=timezone.now() - timedelta(days=PREFERRED_GROUP_OWNER_AGE), ).exists() ] From 708d2616c99355e9f1be1fc215c216410e901863 Mon Sep 17 00:00:00 2001 From: Snigdha Sharma Date: Thu, 20 Oct 2022 09:25:51 -0700 Subject: [PATCH 04/11] Add typing and fix variable names --- mypy.ini | 1 + src/sentry/tasks/find_missing_codemappings.py | 58 ++++++++++--------- .../tasks/test_find_missing_codemappings.py | 14 ++--- 3 files changed, 38 insertions(+), 35 deletions(-) diff --git a/mypy.ini b/mypy.ini index 3a36f06e26b0bc..3284f9fccc3a43 100644 --- a/mypy.ini +++ b/mypy.ini @@ -113,6 +113,7 @@ files = fixtures/mypy-stubs, src/sentry/tasks/store.py, src/sentry/tasks/symbolication.py, src/sentry/tasks/update_user_reports.py, + src/sentry/tasks/find_missing_codemappings.py, src/sentry/testutils/modelmanifest.py, src/sentry/testutils/silo.py, src/sentry/types/region.py, diff --git a/src/sentry/tasks/find_missing_codemappings.py b/src/sentry/tasks/find_missing_codemappings.py index 002fbca3ea535e..a96881a5e9571a 100644 --- a/src/sentry/tasks/find_missing_codemappings.py +++ b/src/sentry/tasks/find_missing_codemappings.py @@ -1,7 +1,6 @@ import logging -from ast import Tuple from datetime import timedelta -from typing import List +from typing import Any, List, Mapping, Optional, Set, Tuple from django.utils import timezone @@ -12,21 +11,22 @@ from sentry.tasks.base import instrumented_task from sentry.utils.safe import get_path -PREFERRED_GROUP_OWNERS = 1 -PREFERRED_GROUP_OWNER_AGE = timedelta(days=7) +ACTIVE_PROJECT_THRESHOLD = timedelta(days=7) +GROUP_ANALYSIS_RANGE = timedelta(days=14) logger = logging.getLogger("sentry.tasks.find_missing_codemappings") -@instrumented_task( +@instrumented_task( # type: ignore name="sentry.tasks.find_missing_codemappings", queue="find_missing_codemappings", max_retries=0, # if we don't backfill it this time, we'll get it the next time ) -def find_missing_codemappings(**kwargs): - organizations = kwargs.get( - "organizations", Organization.objects.filter(status=OrganizationStatus.ACTIVE) - ) +def find_missing_codemappings( + organizations: Optional[List[Organization]] = None, +) -> Mapping[str, Mapping[str, List[str]]]: + if organizations is None: + organizations = Organization.objects.filter(status=OrganizationStatus.ACTIVE) filename_maps = {} for org in organizations: @@ -37,48 +37,50 @@ def find_missing_codemappings(**kwargs): for project in projects if Group.objects.filter( project=project, - last_seen__gte=timezone.now() - timedelta(days=PREFERRED_GROUP_OWNER_AGE), + last_seen__gte=timezone.now() - ACTIVE_PROJECT_THRESHOLD, ).exists() ] - project_file_map = {project.slug: get_all_filenames(project) for project in projects} + project_file_map = {project.slug: get_all_stacktrace_paths(project) for project in projects} filename_maps[org.slug] = project_file_map return filename_maps -def get_all_stacktrace_paths(project): +def get_all_stacktrace_paths(project: Project) -> List[str]: groups = Group.objects.filter( - project=project, last_seen__gte=timezone.now() - timedelta(days=GROUP_ANALYSIS_RANGE) + project=project, last_seen__gte=timezone.now() - GROUP_ANALYSIS_RANGE ) all_stacktrace_paths = set() for group in groups: event = group.get_latest_event() - is_python_stacktrace, stacktrace_paths = get_stacktrace_paths(project, event.data) - if not is_python_project: + is_python_stacktrace, stacktrace_paths = get_stacktrace_paths(event.data) + if not is_python_stacktrace: return [] all_stacktrace_paths.update(stacktrace_paths) - return list(stacktrace_paths) + return list(all_stacktrace_paths) -# Get the filenames from the stacktrace for the latest event for an issue. -def get_filenames(project: Project, data: NodeData) -> Tuple(bool, List[str]): +# Get the stacktrace_paths from the stacktrace for the latest event for an issue. +def get_stacktrace_paths(data: NodeData) -> Tuple[bool, Set[str]]: stacktraces = get_stacktrace(data) - filenames = set() - for st in stacktraces: + stacktrace_paths = set() + for stacktrace in stacktraces: try: - fn = [frame["filename"] for frame in st["frames"]] - if fn[0].endswith(".py"): - filenames.update(fn) + paths = [frame["filename"] for frame in stacktrace["frames"]] + if len(paths) == 0: + continue + if paths[0].endswith(".py"): + stacktrace_paths.update(paths) else: - return False, [] # (is_python, filenames) - except Exception as e: + return False, set() # (is_python, stacktrace_paths) + except Exception: logger.exception("Error getting filenames for project {project.slug}") - return True, filenames # (is_python, filenames) + return True, stacktrace_paths # (is_python, stacktrace_paths) -def get_stacktrace(data: NodeData) -> List[str]: +def get_stacktrace(data: NodeData) -> List[Mapping[str, Any]]: exceptions = get_path(data, "exception", "values", filter=True) if exceptions: return [e["stacktrace"] for e in exceptions if get_path(e, "stacktrace", "frames")] @@ -87,4 +89,4 @@ def get_stacktrace(data: NodeData) -> List[str]: if stacktrace and stacktrace.get("frames"): return [stacktrace] - return None + return [] diff --git a/tests/sentry/tasks/test_find_missing_codemappings.py b/tests/sentry/tasks/test_find_missing_codemappings.py index aa3792667498e6..3caa70cf727a50 100644 --- a/tests/sentry/tasks/test_find_missing_codemappings.py +++ b/tests/sentry/tasks/test_find_missing_codemappings.py @@ -9,7 +9,7 @@ def setUp(self): self.organization = self.create_organization(status=OrganizationStatus.ACTIVE) self.project = self.create_project(organization=self.organization) - def test_finds_files_single_project(self): + def test_finds_stacktrace_paths_single_project(self): self.store_event( data={ "message": "Kaboom!", @@ -41,7 +41,7 @@ def test_finds_files_single_project(self): ) with self.tasks(): - mapping = find_missing_codemappings(organizations=[self.organization]) + mapping = find_missing_codemappings([self.organization]) assert self.organization.slug in mapping result = mapping[self.organization.slug] assert self.project.slug in result @@ -50,7 +50,7 @@ def test_finds_files_single_project(self): "sentry/tasks.py", ] - def test_finds_files_multiple_projects(self): + def test_finds_stacktrace_paths_multiple_projects(self): project_1 = self.create_project(organization=self.organization) project_2 = self.create_project(organization=self.organization) self.store_event( @@ -114,7 +114,7 @@ def test_finds_files_multiple_projects(self): ) with self.tasks(): - mapping = find_missing_codemappings(organizations=[self.organization]) + mapping = find_missing_codemappings([self.organization]) assert self.organization.slug in mapping result = mapping[self.organization.slug] assert project_1.slug in result @@ -128,7 +128,7 @@ def test_finds_files_multiple_projects(self): "sentry/test_file.py", ] - def test_finds_files_multiple_orgs(self): + def test_finds_stacktrace_paths_multiple_orgs(self): new_org = self.create_organization() new_project = self.create_project(organization=new_org) self.store_event( @@ -192,7 +192,7 @@ def test_finds_files_multiple_orgs(self): ) with self.tasks(): - mapping = find_missing_codemappings(organizations=[self.organization, new_org]) + mapping = find_missing_codemappings([self.organization, new_org]) assert self.organization.slug in mapping result_1 = mapping[self.organization.slug] assert self.project.slug in result_1 @@ -285,7 +285,7 @@ def test_handles_duplicates(self): ) with self.tasks(): - mapping = find_missing_codemappings(organizations=[self.organization]) + mapping = find_missing_codemappings([self.organization]) assert self.organization.slug in mapping result = mapping[self.organization.slug] assert self.project.slug in result From e3b6684012e5fbf7032966f9cdff09e1b331514b Mon Sep 17 00:00:00 2001 From: Snigdha Sharma Date: Thu, 20 Oct 2022 13:36:13 -0700 Subject: [PATCH 05/11] Add docstring comments --- src/sentry/tasks/find_missing_codemappings.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/sentry/tasks/find_missing_codemappings.py b/src/sentry/tasks/find_missing_codemappings.py index a96881a5e9571a..cb660cfc67ffcf 100644 --- a/src/sentry/tasks/find_missing_codemappings.py +++ b/src/sentry/tasks/find_missing_codemappings.py @@ -25,13 +25,19 @@ def find_missing_codemappings( organizations: Optional[List[Organization]] = None, ) -> Mapping[str, Mapping[str, List[str]]]: + """ + Generate a map of projects to stacktrace paths for specified organizations, + or all active organizations if unspecified. + + This filters out projects have not had an event in the last 7 days or have + non-python files in the stacktrace. + """ if organizations is None: organizations = Organization.objects.filter(status=OrganizationStatus.ACTIVE) filename_maps = {} for org in organizations: projects = Project.objects.filter(organization=org, first_event__isnull=False) - projects = [ project for project in projects @@ -62,8 +68,10 @@ def get_all_stacktrace_paths(project: Project) -> List[str]: return list(all_stacktrace_paths) -# Get the stacktrace_paths from the stacktrace for the latest event for an issue. def get_stacktrace_paths(data: NodeData) -> Tuple[bool, Set[str]]: + """ + Get the stacktrace_paths from the stacktrace for the latest event for an issue. + """ stacktraces = get_stacktrace(data) stacktrace_paths = set() for stacktrace in stacktraces: From 82bd51caeb5b4b2ed669a9b3713e704af5646a2a Mon Sep 17 00:00:00 2001 From: Snigdha Sharma Date: Thu, 20 Oct 2022 13:49:40 -0700 Subject: [PATCH 06/11] Define new task --- src/sentry/conf/server.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/sentry/conf/server.py b/src/sentry/conf/server.py index 675ee185271b8a..a2b95e78b91883 100644 --- a/src/sentry/conf/server.py +++ b/src/sentry/conf/server.py @@ -684,6 +684,7 @@ def SOCIAL_AUTH_DEFAULT_USERNAME(): Queue("replays.delete_replay", routing_key="replays.delete_replay"), Queue("counters-0", routing_key="counters-0"), Queue("triggers-0", routing_key="triggers-0"), + Queue("find-missing-codemappings", routing_key="find-missing-codemappings"), ] for queue in CELERY_QUEUES: From d065a56617d06c91f046ca19750eb43faf8670b7 Mon Sep 17 00:00:00 2001 From: Snigdha Sharma <16563948+snigdhas@users.noreply.github.com> Date: Fri, 21 Oct 2022 09:52:59 -0700 Subject: [PATCH 07/11] Update tests/sentry/tasks/test_find_missing_codemappings.py Co-authored-by: Armen Zambrano G. --- tests/sentry/tasks/test_find_missing_codemappings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/sentry/tasks/test_find_missing_codemappings.py b/tests/sentry/tasks/test_find_missing_codemappings.py index 3caa70cf727a50..4c0ee35c225973 100644 --- a/tests/sentry/tasks/test_find_missing_codemappings.py +++ b/tests/sentry/tasks/test_find_missing_codemappings.py @@ -245,7 +245,7 @@ def test_skips_stale_projects(self): result = mapping[self.organization.slug] assert self.project.slug not in result - def test_handles_duplicates(self): + def test_handle_duplicate_frame_filenames_within_same_stacktrace(self): self.store_event( data={ "message": "Kaboom!", From 0e065be138eb76b90a72952870b8231daa35b994 Mon Sep 17 00:00:00 2001 From: Snigdha Sharma <16563948+snigdhas@users.noreply.github.com> Date: Fri, 21 Oct 2022 09:53:52 -0700 Subject: [PATCH 08/11] Apply suggestions from code review Co-authored-by: Armen Zambrano G. --- tests/sentry/tasks/test_find_missing_codemappings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/sentry/tasks/test_find_missing_codemappings.py b/tests/sentry/tasks/test_find_missing_codemappings.py index 4c0ee35c225973..71cd8a7c3eff0b 100644 --- a/tests/sentry/tasks/test_find_missing_codemappings.py +++ b/tests/sentry/tasks/test_find_missing_codemappings.py @@ -43,7 +43,7 @@ def test_finds_stacktrace_paths_single_project(self): with self.tasks(): mapping = find_missing_codemappings([self.organization]) assert self.organization.slug in mapping - result = mapping[self.organization.slug] + project_to_stacktrace_paths = mapping[self.organization.slug] assert self.project.slug in result assert sorted(result[self.project.slug]) == [ "sentry/models/release.py", From e3e86d5980a2a2ca80879397f2b77589005c4c45 Mon Sep 17 00:00:00 2001 From: Snigdha Sharma Date: Fri, 21 Oct 2022 11:51:40 -0700 Subject: [PATCH 09/11] Fix celery queue --- src/sentry/conf/server.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sentry/conf/server.py b/src/sentry/conf/server.py index a2b95e78b91883..5f9646ff968413 100644 --- a/src/sentry/conf/server.py +++ b/src/sentry/conf/server.py @@ -684,7 +684,7 @@ def SOCIAL_AUTH_DEFAULT_USERNAME(): Queue("replays.delete_replay", routing_key="replays.delete_replay"), Queue("counters-0", routing_key="counters-0"), Queue("triggers-0", routing_key="triggers-0"), - Queue("find-missing-codemappings", routing_key="find-missing-codemappings"), + Queue("find_missing_codemappings", routing_key="find_missing_codemappings"), ] for queue in CELERY_QUEUES: From 4694a3885b0073f4c10c6e529beaf15dfcc4f69e Mon Sep 17 00:00:00 2001 From: Snigdha Sharma Date: Fri, 21 Oct 2022 11:51:57 -0700 Subject: [PATCH 10/11] Reduce code duplication in tests --- .../tasks/test_find_missing_codemappings.py | 318 +++++------------- 1 file changed, 84 insertions(+), 234 deletions(-) diff --git a/tests/sentry/tasks/test_find_missing_codemappings.py b/tests/sentry/tasks/test_find_missing_codemappings.py index 71cd8a7c3eff0b..e8ad380c7313b2 100644 --- a/tests/sentry/tasks/test_find_missing_codemappings.py +++ b/tests/sentry/tasks/test_find_missing_codemappings.py @@ -1,3 +1,5 @@ +from copy import deepcopy + from sentry.models.organization import OrganizationStatus from sentry.tasks.find_missing_codemappings import find_missing_codemappings from sentry.testutils import TestCase @@ -8,122 +10,68 @@ class TestCommitContext(TestCase): def setUp(self): self.organization = self.create_organization(status=OrganizationStatus.ACTIVE) self.project = self.create_project(organization=self.organization) + self.test_data_1 = { + "message": "Kaboom!", + "platform": "python", + "timestamp": iso_format(before_now(days=1)), + "stacktrace": { + "frames": [ + { + "function": "handle_set_commits", + "abs_path": "/usr/src/sentry/src/sentry/tasks.py", + "module": "sentry.tasks", + "in_app": False, + "lineno": 30, + "filename": "sentry/tasks.py", + }, + { + "function": "set_commits", + "abs_path": "/usr/src/sentry/src/sentry/models/release.py", + "module": "sentry.models.release", + "in_app": True, + "lineno": 39, + "filename": "sentry/models/release.py", + }, + ] + }, + "fingerprint": ["put-me-in-the-control-group"], + } + self.test_data_2 = deepcopy(self.test_data_1) + self.test_data_2["stacktrace"]["frames"][0]["filename"] = "sentry/test_file.py" + self.test_data_2["stacktrace"]["frames"][1]["filename"] = "sentry/models/test_file.py" + self.test_data_2["fingerprint"] = ["new-group"] + self.test_data_2["timestamp"] = iso_format(before_now(days=2)) def test_finds_stacktrace_paths_single_project(self): - self.store_event( - data={ - "message": "Kaboom!", - "platform": "python", - "timestamp": iso_format(before_now(days=1)), - "stacktrace": { - "frames": [ - { - "function": "handle_set_commits", - "abs_path": "/usr/src/sentry/src/sentry/tasks.py", - "module": "sentry.tasks", - "in_app": False, - "lineno": 30, - "filename": "sentry/tasks.py", - }, - { - "function": "set_commits", - "abs_path": "/usr/src/sentry/src/sentry/models/release.py", - "module": "sentry.models.release", - "in_app": True, - "lineno": 39, - "filename": "sentry/models/release.py", - }, - ] - }, - "fingerprint": ["put-me-in-the-control-group"], - }, - project_id=self.project.id, - ) + self.store_event(data=self.test_data_1, project_id=self.project.id) with self.tasks(): mapping = find_missing_codemappings([self.organization]) assert self.organization.slug in mapping - project_to_stacktrace_paths = mapping[self.organization.slug] - assert self.project.slug in result - assert sorted(result[self.project.slug]) == [ + + stacktrace_paths = mapping[self.organization.slug] + assert self.project.slug in stacktrace_paths + assert sorted(stacktrace_paths[self.project.slug]) == [ "sentry/models/release.py", "sentry/tasks.py", ] def test_finds_stacktrace_paths_multiple_projects(self): - project_1 = self.create_project(organization=self.organization) project_2 = self.create_project(organization=self.organization) - self.store_event( - data={ - "message": "Kaboom!", - "platform": "python", - "timestamp": iso_format(before_now(days=1)), - "stacktrace": { - "frames": [ - { - "function": "handle_set_commits", - "abs_path": "/usr/src/sentry/src/sentry/tasks.py", - "module": "sentry.tasks", - "in_app": False, - "lineno": 30, - "filename": "sentry/tasks.py", - }, - { - "function": "set_commits", - "abs_path": "/usr/src/sentry/src/sentry/models/release.py", - "module": "sentry.models.release", - "in_app": True, - "lineno": 39, - "filename": "sentry/models/release.py", - }, - ] - }, - "fingerprint": ["put-me-in-the-control-group"], - }, - project_id=project_1.id, - ) - - self.store_event( - data={ - "message": "Kaboom!", - "platform": "python", - "timestamp": iso_format(before_now(days=2)), - "stacktrace": { - "frames": [ - { - "function": "test_fn", - "abs_path": "/usr/src/sentry/src/sentry/test_file.py", - "module": "sentry.tasks", - "in_app": False, - "lineno": 30, - "filename": "sentry/test_file.py", - }, - { - "function": "test_fn_2", - "abs_path": "/usr/src/sentry/src/sentry/models/test_file.py", - "module": "sentry.models.release", - "in_app": True, - "lineno": 39, - "filename": "sentry/models/test_file.py", - }, - ] - }, - "fingerprint": ["put-me-in-the-control-group"], - }, - project_id=project_2.id, - ) + self.store_event(data=self.test_data_1, project_id=self.project.id) + self.store_event(data=self.test_data_2, project_id=project_2.id) with self.tasks(): mapping = find_missing_codemappings([self.organization]) assert self.organization.slug in mapping - result = mapping[self.organization.slug] - assert project_1.slug in result - assert sorted(result[project_1.slug]) == [ + stacktrace_paths = mapping[self.organization.slug] + assert self.project.slug in stacktrace_paths + assert sorted(stacktrace_paths[self.project.slug]) == [ "sentry/models/release.py", "sentry/tasks.py", ] - assert project_2.slug in result - assert sorted(result[project_2.slug]) == [ + assert project_2.slug in stacktrace_paths + assert sorted(stacktrace_paths[project_2.slug]) == [ "sentry/models/test_file.py", "sentry/test_file.py", ] @@ -131,165 +79,67 @@ def test_finds_stacktrace_paths_multiple_projects(self): def test_finds_stacktrace_paths_multiple_orgs(self): new_org = self.create_organization() new_project = self.create_project(organization=new_org) - self.store_event( - data={ - "message": "Kaboom!", - "platform": "python", - "timestamp": iso_format(before_now(days=1)), - "stacktrace": { - "frames": [ - { - "function": "handle_set_commits", - "abs_path": "/usr/src/sentry/src/sentry/tasks.py", - "module": "sentry.tasks", - "in_app": False, - "lineno": 30, - "filename": "sentry/tasks.py", - }, - { - "function": "set_commits", - "abs_path": "/usr/src/sentry/src/sentry/models/release.py", - "module": "sentry.models.release", - "in_app": True, - "lineno": 39, - "filename": "sentry/models/release.py", - }, - ] - }, - "fingerprint": ["put-me-in-the-control-group"], - }, - project_id=self.project.id, - ) - - self.store_event( - data={ - "message": "Kaboom!", - "platform": "python", - "timestamp": iso_format(before_now(days=2)), - "stacktrace": { - "frames": [ - { - "function": "test_fn", - "abs_path": "/usr/src/sentry/src/sentry/test_file.py", - "module": "sentry.tasks", - "in_app": False, - "lineno": 30, - "filename": "sentry/test_file.py", - }, - { - "function": "test_fn_2", - "abs_path": "/usr/src/sentry/src/sentry/models/test_file.py", - "module": "sentry.models.release", - "in_app": True, - "lineno": 39, - "filename": "sentry/models/test_file.py", - }, - ] - }, - "fingerprint": ["put-me-in-the-control-group"], - }, - project_id=new_project.id, - ) + self.store_event(self.test_data_1, project_id=self.project.id) + self.store_event(data=self.test_data_2, project_id=new_project.id) with self.tasks(): mapping = find_missing_codemappings([self.organization, new_org]) assert self.organization.slug in mapping - result_1 = mapping[self.organization.slug] - assert self.project.slug in result_1 - assert sorted(result_1[self.project.slug]) == [ + stacktrace_paths = mapping[self.organization.slug] + assert self.project.slug in stacktrace_paths + assert sorted(stacktrace_paths[self.project.slug]) == [ "sentry/models/release.py", "sentry/tasks.py", ] assert new_org.slug in mapping - result_2 = mapping[new_org.slug] - assert new_project.slug in result_2 - assert sorted(result_2[new_project.slug]) == [ + stacktrace_paths = mapping[new_org.slug] + assert new_project.slug in stacktrace_paths + assert sorted(stacktrace_paths[new_project.slug]) == [ "sentry/models/test_file.py", "sentry/test_file.py", ] def test_skips_stale_projects(self): - self.store_event( - data={ - "message": "Kaboom!", - "platform": "python", - "timestamp": iso_format(before_now(days=8)), - "stacktrace": { - "frames": [ - { - "function": "handle_set_commits", - "abs_path": "/usr/src/sentry/src/sentry/tasks.py", - "module": "sentry.tasks", - "in_app": False, - "lineno": 30, - "filename": "sentry/tasks.py", - }, - { - "function": "set_commits", - "abs_path": "/usr/src/sentry/src/sentry/models/release.py", - "module": "sentry.models.release", - "in_app": True, - "lineno": 39, - "filename": "sentry/models/release.py", - }, - ] - }, - "fingerprint": ["put-me-in-the-control-group"], - }, - project_id=self.project.id, - ) + stale_event = self.test_data_1 + stale_event["timestamp"] = iso_format(before_now(days=8)) + self.store_event(data=stale_event, project_id=self.project.id) with self.tasks(): mapping = find_missing_codemappings() assert self.organization.slug in mapping - result = mapping[self.organization.slug] - assert self.project.slug not in result + stacktrace_paths = mapping[self.organization.slug] + assert self.project.slug not in stacktrace_paths - def test_handle_duplicate_frame_filenames_within_same_stacktrace(self): - self.store_event( - data={ - "message": "Kaboom!", - "platform": "python", - "timestamp": iso_format(before_now(days=1)), - "stacktrace": { - "frames": [ - { - "function": "handle_set_commits", - "abs_path": "/usr/src/sentry/src/sentry/tasks.py", - "module": "sentry.tasks", - "in_app": False, - "lineno": 30, - "filename": "sentry/tasks.py", - }, - { - "function": "set_commits", - "abs_path": "/usr/src/sentry/src/sentry/models/release.py", - "module": "sentry.models.release", - "in_app": True, - "lineno": 39, - "filename": "sentry/models/release.py", - }, - { - "function": "handle_set_commits_new", - "abs_path": "/usr/src/sentry/src/sentry/tasks.py", - "module": "sentry.tasks", - "in_app": False, - "lineno": 40, - "filename": "sentry/tasks.py", - }, - ] - }, - "fingerprint": ["put-me-in-the-control-group"], - }, - project_id=self.project.id, - ) + def test_skips_outdated_events(self): + stale_event = self.test_data_2 + stale_event["timestamp"] = iso_format(before_now(days=16)) + self.store_event(data=self.test_data_1, project_id=self.project.id) + self.store_event(data=stale_event, project_id=self.project.id) with self.tasks(): mapping = find_missing_codemappings([self.organization]) assert self.organization.slug in mapping - result = mapping[self.organization.slug] - assert self.project.slug in result - assert sorted(result[self.project.slug]) == [ + stacktrace_paths = mapping[self.organization.slug] + assert self.project.slug in stacktrace_paths + assert sorted(stacktrace_paths[self.project.slug]) == [ "sentry/models/release.py", "sentry/tasks.py", ] + + def test_handle_duplicate_filenames_in_a_project(self): + self.store_event(data=self.test_data_1, project_id=self.project.id) + duplicate_event = self.test_data_2 + duplicate_event["stacktrace"]["frames"].append(self.test_data_1["stacktrace"]["frames"][0]) + self.store_event(data=duplicate_event, project_id=self.project.id) + + with self.tasks(): + mapping = find_missing_codemappings([self.organization]) + assert self.organization.slug in mapping + stacktrace_paths = mapping[self.organization.slug] + assert self.project.slug in stacktrace_paths + assert sorted(stacktrace_paths[self.project.slug]) == [ + "sentry/models/release.py", + "sentry/models/test_file.py", + "sentry/tasks.py", + "sentry/test_file.py", + ] From 8d1a0e12e0db5a5856158c8e1d8cd230db62d13e Mon Sep 17 00:00:00 2001 From: Snigdha Sharma Date: Fri, 21 Oct 2022 13:07:41 -0700 Subject: [PATCH 11/11] Rename files --- mypy.ini | 2 +- src/sentry/conf/server.py | 2 +- ...odemappings.py => derive_code_mappings.py} | 8 ++++---- ...ppings.py => test_derive_code_mappings.py} | 20 +++++++++---------- 4 files changed, 16 insertions(+), 16 deletions(-) rename src/sentry/tasks/{find_missing_codemappings.py => derive_code_mappings.py} (94%) rename tests/sentry/tasks/{test_find_missing_codemappings.py => test_derive_code_mappings.py} (90%) diff --git a/mypy.ini b/mypy.ini index 3284f9fccc3a43..900c4b24b13190 100644 --- a/mypy.ini +++ b/mypy.ini @@ -113,7 +113,7 @@ files = fixtures/mypy-stubs, src/sentry/tasks/store.py, src/sentry/tasks/symbolication.py, src/sentry/tasks/update_user_reports.py, - src/sentry/tasks/find_missing_codemappings.py, + src/sentry/tasks/derive_code_mappings.py, src/sentry/testutils/modelmanifest.py, src/sentry/testutils/silo.py, src/sentry/types/region.py, diff --git a/src/sentry/conf/server.py b/src/sentry/conf/server.py index 5f9646ff968413..ba055ffc3d8ca0 100644 --- a/src/sentry/conf/server.py +++ b/src/sentry/conf/server.py @@ -684,7 +684,7 @@ def SOCIAL_AUTH_DEFAULT_USERNAME(): Queue("replays.delete_replay", routing_key="replays.delete_replay"), Queue("counters-0", routing_key="counters-0"), Queue("triggers-0", routing_key="triggers-0"), - Queue("find_missing_codemappings", routing_key="find_missing_codemappings"), + Queue("derive_code_mappings", routing_key="derive_code_mappings"), ] for queue in CELERY_QUEUES: diff --git a/src/sentry/tasks/find_missing_codemappings.py b/src/sentry/tasks/derive_code_mappings.py similarity index 94% rename from src/sentry/tasks/find_missing_codemappings.py rename to src/sentry/tasks/derive_code_mappings.py index cb660cfc67ffcf..5daf416a053865 100644 --- a/src/sentry/tasks/find_missing_codemappings.py +++ b/src/sentry/tasks/derive_code_mappings.py @@ -14,15 +14,15 @@ ACTIVE_PROJECT_THRESHOLD = timedelta(days=7) GROUP_ANALYSIS_RANGE = timedelta(days=14) -logger = logging.getLogger("sentry.tasks.find_missing_codemappings") +logger = logging.getLogger("sentry.tasks.derive_code_mappings") @instrumented_task( # type: ignore - name="sentry.tasks.find_missing_codemappings", - queue="find_missing_codemappings", + name="sentry.tasks.derive_code_mappings.identify_stacktrace_paths", + queue="derive_code_mappings", max_retries=0, # if we don't backfill it this time, we'll get it the next time ) -def find_missing_codemappings( +def identify_stacktrace_paths( organizations: Optional[List[Organization]] = None, ) -> Mapping[str, Mapping[str, List[str]]]: """ diff --git a/tests/sentry/tasks/test_find_missing_codemappings.py b/tests/sentry/tasks/test_derive_code_mappings.py similarity index 90% rename from tests/sentry/tasks/test_find_missing_codemappings.py rename to tests/sentry/tasks/test_derive_code_mappings.py index e8ad380c7313b2..d5d4cf335fc7cf 100644 --- a/tests/sentry/tasks/test_find_missing_codemappings.py +++ b/tests/sentry/tasks/test_derive_code_mappings.py @@ -1,7 +1,7 @@ from copy import deepcopy from sentry.models.organization import OrganizationStatus -from sentry.tasks.find_missing_codemappings import find_missing_codemappings +from sentry.tasks.derive_code_mappings import identify_stacktrace_paths from sentry.testutils import TestCase from sentry.testutils.helpers.datetime import before_now, iso_format @@ -46,7 +46,7 @@ def test_finds_stacktrace_paths_single_project(self): self.store_event(data=self.test_data_1, project_id=self.project.id) with self.tasks(): - mapping = find_missing_codemappings([self.organization]) + mapping = identify_stacktrace_paths([self.organization]) assert self.organization.slug in mapping stacktrace_paths = mapping[self.organization.slug] @@ -62,7 +62,7 @@ def test_finds_stacktrace_paths_multiple_projects(self): self.store_event(data=self.test_data_2, project_id=project_2.id) with self.tasks(): - mapping = find_missing_codemappings([self.organization]) + mapping = identify_stacktrace_paths([self.organization]) assert self.organization.slug in mapping stacktrace_paths = mapping[self.organization.slug] assert self.project.slug in stacktrace_paths @@ -83,7 +83,7 @@ def test_finds_stacktrace_paths_multiple_orgs(self): self.store_event(data=self.test_data_2, project_id=new_project.id) with self.tasks(): - mapping = find_missing_codemappings([self.organization, new_org]) + mapping = identify_stacktrace_paths([self.organization, new_org]) assert self.organization.slug in mapping stacktrace_paths = mapping[self.organization.slug] assert self.project.slug in stacktrace_paths @@ -100,24 +100,24 @@ def test_finds_stacktrace_paths_multiple_orgs(self): ] def test_skips_stale_projects(self): - stale_event = self.test_data_1 + stale_event = deepcopy(self.test_data_1) stale_event["timestamp"] = iso_format(before_now(days=8)) self.store_event(data=stale_event, project_id=self.project.id) with self.tasks(): - mapping = find_missing_codemappings() + mapping = identify_stacktrace_paths() assert self.organization.slug in mapping stacktrace_paths = mapping[self.organization.slug] assert self.project.slug not in stacktrace_paths def test_skips_outdated_events(self): - stale_event = self.test_data_2 + stale_event = deepcopy(self.test_data_2) stale_event["timestamp"] = iso_format(before_now(days=16)) self.store_event(data=self.test_data_1, project_id=self.project.id) self.store_event(data=stale_event, project_id=self.project.id) with self.tasks(): - mapping = find_missing_codemappings([self.organization]) + mapping = identify_stacktrace_paths([self.organization]) assert self.organization.slug in mapping stacktrace_paths = mapping[self.organization.slug] assert self.project.slug in stacktrace_paths @@ -128,12 +128,12 @@ def test_skips_outdated_events(self): def test_handle_duplicate_filenames_in_a_project(self): self.store_event(data=self.test_data_1, project_id=self.project.id) - duplicate_event = self.test_data_2 + duplicate_event = deepcopy(self.test_data_2) duplicate_event["stacktrace"]["frames"].append(self.test_data_1["stacktrace"]["frames"][0]) self.store_event(data=duplicate_event, project_id=self.project.id) with self.tasks(): - mapping = find_missing_codemappings([self.organization]) + mapping = identify_stacktrace_paths([self.organization]) assert self.organization.slug in mapping stacktrace_paths = mapping[self.organization.slug] assert self.project.slug in stacktrace_paths