-
-
Notifications
You must be signed in to change notification settings - Fork 4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
fix(code_mappings): Files that do not match the package name should not match #40892
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -31,15 +31,17 @@ def __init__(self, stacktrace_frame_file_path: str) -> None: | |
if stacktrace_frame_file_path.find("/") > -1: | ||
# XXX: This code assumes that all stack trace frames are part of a module | ||
self.root, self.file_and_dir_path = stacktrace_frame_file_path.split("/", 1) | ||
# Does it have more than one level? | ||
|
||
# Check that it does have at least a dir | ||
if self.file_and_dir_path.find("/") > -1: | ||
self.dir_path, self.file_name = self.file_and_dir_path.rsplit("/", 1) | ||
else: | ||
# A package name + a file (e.g. requests/models.py) | ||
# A package name, a file but no dir (e.g. requests/models.py) | ||
self.dir_path = "" | ||
self.file_name = self.file_and_dir_path | ||
else: | ||
self.root = "" | ||
self.dir_path = "" | ||
self.file_and_dir_path = self.full_path | ||
self.file_name = self.full_path | ||
|
||
|
@@ -123,6 +125,16 @@ def _find_code_mapping(self, frame_filename: FrameFilename) -> Union[CodeMapping | |
|
||
return _code_mappings[0] | ||
|
||
def _get_code_mapping_source_path(self, src_file: str, frame_filename: FrameFilename) -> str: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This adds the NotImplementedError functionality. |
||
"""Generate the source path of a code mapping | ||
e.g. src/sentry/identity/oauth2.py -> src/sentry | ||
e.g. ssl.py -> raise NotImplementedError | ||
""" | ||
if frame_filename.dir_path != "": | ||
return src_file.rsplit(frame_filename.dir_path)[0].rstrip("/") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is the original logic. See down below: - source_path=matched_files[0].rsplit(frame_filename.dir_path)[0].rstrip("/"), |
||
else: | ||
raise NotImplementedError("We do not support top level files.") | ||
|
||
def _generate_code_mapping_from_tree( | ||
self, | ||
repo_full_name: str, | ||
|
@@ -140,8 +152,9 @@ def _generate_code_mapping_from_tree( | |
CodeMapping( | ||
repo=self.trees[repo_full_name].repo, | ||
stacktrace_root=frame_filename.root, # sentry | ||
# e.g. src/sentry/identity/oauth2.py -> src/sentry | ||
source_path=matched_files[0].rsplit(frame_filename.dir_path)[0].rstrip("/"), | ||
source_path=self._get_code_mapping_source_path( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Using |
||
matched_files[0], frame_filename | ||
), | ||
) | ||
] | ||
if len(matched_files) == 1 | ||
|
@@ -169,4 +182,15 @@ def _potential_match(self, src_file: str, frame_filename: FrameFilename) -> bool | |
if self._matches_current_code_mappings(src_file, frame_filename): | ||
return False | ||
|
||
return src_file.rfind(frame_filename.file_and_dir_path) > -1 | ||
match = False | ||
# For instance: | ||
# src_file: "src/sentry/integrations/slack/client.py" | ||
# frame_filename.full_path: "sentry/integrations/slack/client.py" | ||
split = src_file.split(frame_filename.file_and_dir_path) | ||
if len(split) > 1: | ||
# This is important because we only want stack frames to match when they | ||
# include the exact package name | ||
# e.g. raven/base.py stackframe should not match this source file: apostello/views/base.py | ||
match = split[0].rfind(f"{frame_filename.root}/") > -1 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is the new functionality. This prevents false positives and it also allows when there's more than one file match in the repository since it also requires matching the package name. The negative side of this logic is that there are certain Python package names that are not named exactly as their directory in the repository. In our case, I'm adding it a task to find examples in the future. This is safer to prevent false positives, thus, taking it. |
||
|
||
return match |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -25,20 +25,37 @@ def inject_fixtures(self, caplog): | |
|
||
def setUp(self): | ||
super().setUp() | ||
foo_repo = Repo("Test-Organization/foo", "master") | ||
bar_repo = Repo("Test-Organization/bar", "main") | ||
self.foo_repo = Repo("Test-Organization/foo", "master") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is so I can references to the repo in the tests rather than defining them again. |
||
self.bar_repo = Repo("Test-Organization/bar", "main") | ||
self.code_mapping_helper = CodeMappingTreesHelper( | ||
{ | ||
"sentry": RepoTree(foo_repo, files=sentry_files), | ||
"getsentry": RepoTree(bar_repo, files=["getsentry/web/urls.py"]), | ||
self.foo_repo.name: RepoTree(self.foo_repo, files=sentry_files), | ||
self.bar_repo.name: RepoTree(self.bar_repo, files=["getsentry/web/urls.py"]), | ||
} | ||
) | ||
|
||
self.expected_code_mappings = [ | ||
CodeMapping(foo_repo, "sentry", "src/sentry"), | ||
CodeMapping(foo_repo, "sentry_plugins", "src/sentry_plugins"), | ||
CodeMapping(self.foo_repo, "sentry", "src/sentry"), | ||
CodeMapping(self.foo_repo, "sentry_plugins", "src/sentry_plugins"), | ||
] | ||
|
||
def test_frame_filename_package_and_more_than_one_level(self): | ||
ff = FrameFilename("getsentry/billing/tax/manager.py") | ||
assert f"{ff.root}/{ff.dir_path}/{ff.file_name}" == "getsentry/billing/tax/manager.py" | ||
assert f"{ff.dir_path}/{ff.file_name}" == ff.file_and_dir_path | ||
|
||
def test_frame_filename_package_and_no_levels(self): | ||
ff = FrameFilename("root/bar.py") | ||
assert f"{ff.root}/{ff.file_name}" == "root/bar.py" | ||
assert f"{ff.root}/{ff.file_and_dir_path}" == "root/bar.py" | ||
assert ff.dir_path == "" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is the test to handle the other bug I encountered. This is the fix in line 44: + self.dir_path = "" |
||
|
||
def test_frame_filename_no_package(self): | ||
ff = FrameFilename("foo.py") | ||
assert ff.root == "" | ||
assert ff.dir_path == "" | ||
assert ff.file_name == "foo.py" | ||
|
||
def test_frame_filename_repr(self): | ||
path = "getsentry/billing/tax/manager.py" | ||
assert FrameFilename(path).__repr__() == f"FrameFilename: {path}" | ||
|
@@ -52,6 +69,38 @@ def test_buckets_logic(self): | |
"getsentry": [FrameFilename("getsentry/billing/tax/manager.py")], | ||
} | ||
|
||
def test_package_also_matches(self): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is the test that fixes the main bug I encountered. |
||
# We create a new tree helper in order to improve the understability of this test | ||
cmh = CodeMappingTreesHelper( | ||
{self.foo_repo.name: RepoTree(self.foo_repo, files=["apostello/views/base.py"])} | ||
) | ||
cm = cmh._generate_code_mapping_from_tree( | ||
repo_full_name=self.foo_repo.name, | ||
frame_filename=FrameFilename("raven/base.py"), | ||
) | ||
# We should not derive a code mapping since the package name does not match | ||
assert cm == [] | ||
|
||
def test_no_support_for_toplevel_files(self): | ||
file_name = "base.py" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I want to test all the functions that could potential try to handle a single filename and make sure that they all would prevent trying to process it by mistakes. This is to prevent that refactorings would allow for this case scenario. |
||
ff = FrameFilename(file_name) | ||
assert ff.root == "" | ||
assert ff.dir_path == "" | ||
assert ff.file_and_dir_path == file_name | ||
# We create a new tree helper in order to improve the understability of this test | ||
cmh = CodeMappingTreesHelper( | ||
{self.foo_repo.name: RepoTree(self.foo_repo, files=[file_name])} | ||
) | ||
|
||
# We should not derive a code mapping since we do not yet | ||
# support stackframes for non-packaged files | ||
assert cmh.generate_code_mappings([file_name]) == [] | ||
|
||
# Make sure that we raise an error if we hit the code path | ||
assert not cmh._potential_match(file_name, ff) | ||
with pytest.raises(NotImplementedError): | ||
cmh._get_code_mapping_source_path(file_name, ff) | ||
|
||
def test_no_matches(self): | ||
stacktraces = [ | ||
"getsentry/billing/tax/manager.py", | ||
|
@@ -62,37 +111,43 @@ def test_no_matches(self): | |
code_mappings = self.code_mapping_helper.generate_code_mappings(stacktraces) | ||
assert code_mappings == [] | ||
|
||
def test_more_than_one_match_does_not_derive(self): | ||
def test_more_than_one_match_does_derive(self): | ||
stacktraces = [ | ||
# More than one file matches for this, thus, no stack traces will be produced | ||
# More than one file matches for this, however, the package name is taken into account | ||
# - "src/sentry_plugins/slack/client.py", | ||
# - "src/sentry/integrations/slack/client.py", | ||
"sentry_plugins/slack/client.py", | ||
] | ||
code_mappings = self.code_mapping_helper.generate_code_mappings(stacktraces) | ||
assert code_mappings == [ | ||
CodeMapping( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The fix to prevent false positives gives us this new match when before we wouldn't. |
||
repo=self.foo_repo, | ||
stacktrace_root="sentry_plugins", | ||
source_path="src/sentry_plugins", | ||
) | ||
] | ||
|
||
def test_no_stacktraces_to_process(self): | ||
code_mappings = self.code_mapping_helper.generate_code_mappings([]) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I want to make sure that an empty list would still work. |
||
assert code_mappings == [] | ||
|
||
def test_more_than_one_match_works_when_code_mapping_excludes_other_match(self): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This test doesn't make sense but it may prevent some issues in the future. |
||
stacktraces = [ | ||
"sentry/identity/oauth2.py", | ||
# This file matches two files in the repo, however, because we first | ||
# derive the sentry code mapping we can exclude one of the files | ||
"sentry_plugins/slack/client.py", | ||
] | ||
code_mappings = self.code_mapping_helper.generate_code_mappings(stacktraces) | ||
assert code_mappings == self.expected_code_mappings | ||
|
||
def test_more_than_one_match_works_with_different_order(self): | ||
# We do *not* derive sentry_plugins because we don't derive sentry first | ||
stacktraces = [ | ||
# This file matches twice files in the repo, however, the reprocessing | ||
# feature allows deriving both code mappings | ||
"sentry_plugins/slack/client.py", | ||
"sentry/identity/oauth2.py", | ||
] | ||
code_mappings = self.code_mapping_helper.generate_code_mappings(stacktraces) | ||
# Order matters, this is why we only derive one of the two code mappings | ||
assert code_mappings == self.expected_code_mappings | ||
assert sorted(code_mappings) == sorted(self.expected_code_mappings) | ||
|
||
def test_more_than_one_repo_match(self): | ||
# XXX: There's a chance that we could infer package names but that is risky | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This was a bug and it is now tested via
test_frame_filename_*
tests.