From 655f74eeedc7e6e24b7ba45d7e67678a11de7797 Mon Sep 17 00:00:00 2001 From: Mirela Popoveniuc Date: Mon, 21 Jun 2021 11:18:25 +0100 Subject: [PATCH 1/2] Remove current path from the module path in ProfileEncoder. Running the uwsgi sample application with Python 3.8.10-3.9.2 showed incorrect module path in the UI. This was narrowed down to traceback returning different results (a path with "/."). This change makes sure the path is not leaked out. --- codeguru_profiler_agent/sampling_utils.py | 6 +++++ .../sdk_reporter/profile_encoder.py | 23 +++++++++++++++---- .../sdk_reporter/test_sdk_profile_encoder.py | 20 ++++++++++++++++ 3 files changed, 44 insertions(+), 5 deletions(-) diff --git a/codeguru_profiler_agent/sampling_utils.py b/codeguru_profiler_agent/sampling_utils.py index 7f47cbc..6b85ed6 100644 --- a/codeguru_profiler_agent/sampling_utils.py +++ b/codeguru_profiler_agent/sampling_utils.py @@ -87,6 +87,12 @@ def _maybe_append_synthetic_frame(result, frame, line_no): def _extract_frames(end_frame, max_depth): stack = list(traceback.walk_stack(end_frame))[::-1][0:max_depth] + # When running the sample app with uwsgi for Python 3.8.10-3.9.2, the traceback command + # returns a file path that contains "/.". + # To not let the path go into the module name, we are removing it later in the ProfileEncoder. + # Examples: + # - file '/Users/mirelap/Documents/workspace/JSON/aws-codeguru-profiler-python-demo-application/sample-demo-django-app/./polls/views.py', line 104, code get_queryset>, 104 + # - file '/Users/mirelap/Documents/workspace/JSON/aws-codeguru-profiler-python-demo-application/sample-demo-django-app/polls/views.py', line 104, code get_queryset>, 104 stack_entries = _extract_stack(stack, max_depth) if len(stack_entries) == max_depth: diff --git a/codeguru_profiler_agent/sdk_reporter/profile_encoder.py b/codeguru_profiler_agent/sdk_reporter/profile_encoder.py index 37940d6..e8866fe 100644 --- a/codeguru_profiler_agent/sdk_reporter/profile_encoder.py +++ b/codeguru_profiler_agent/sdk_reporter/profile_encoder.py @@ -17,17 +17,21 @@ def _get_module_path(file_path, sys_paths): For example, /tmp/bin/python/site-packages/great_app/simple_expansions/simple_interface.py will get turned into great_app.simple_expansions.simple_interface given that the syspath contains /tmp/bin/python/site-packages + + We are making sure we're removing the current path. + For example, '/Users/mirelap/Documents/workspace/JSON/aws-codeguru-profiler-python-demo-application/sample-demo-django-app/./polls/views.py' + will get turned into `polls.views' given that the file path contains the current path. + This should not happen usually, but we've found a case where the "/." is added when calling traceback.walk_stack(..) + in a uwsgi application. Check sampling_utils.py file for details. """ module_path = file_path if platform.system() == "Windows": # In Windows, separator can either be / or \ from experimental result - file_path = file_path.replace("/", os.sep) + module_path = module_path.replace("/", os.sep) - for root in sys_paths: - if root in file_path: - module_path = file_path.replace(root, "") - break + # remove prefix path + module_path = _remove_prefix_path(module_path, sys_paths) # remove suffix module_path = str(Path(module_path).with_suffix("")) @@ -42,6 +46,15 @@ def _get_module_path(file_path, sys_paths): return module_path +def _remove_prefix_path(module_path, sys_paths): + current_path = str(Path().absolute()) + if current_path in module_path: + return module_path.replace(current_path, "").replace("/.", "") + for root in sys_paths: + if root in module_path: + return module_path.replace(root, "") + return module_path + class ProfileEncoder: """ Encodes a given Profile into the JSON version of the ion-based profile format diff --git a/test/unit/sdk_reporter/test_sdk_profile_encoder.py b/test/unit/sdk_reporter/test_sdk_profile_encoder.py index b1ef1af..7d426a7 100644 --- a/test/unit/sdk_reporter/test_sdk_profile_encoder.py +++ b/test/unit/sdk_reporter/test_sdk_profile_encoder.py @@ -13,6 +13,7 @@ import io import gzip from datetime import timedelta +from pathlib import Path from codeguru_profiler_agent.metrics.timer import Timer from codeguru_profiler_agent.model.profile import Profile @@ -329,6 +330,25 @@ def test_it_gzips_the_result_before_writing_to_the_stream(self): assert (len(uncompressed_result) > 0) +class TestModulePathExtractorWithCurrentPath: + @before + def before(self): + self.current_path = str(Path().absolute()) + self.subject = ProfileEncoder(gzip=False, environment=environment).ModulePathExtractor(sys_path=[]) + + def test_it_removes_current_path(self): + file_path = self.current_path + '/polls/views.py' + assert self.subject.get_module_path(file_path) == "polls.views" + + def test_it_removes_current_path_and_slash_and_dot(self): + file_path = self.current_path + '/./polls/views.py' + assert self.subject.get_module_path(file_path) == "polls.views" + + def test_it_does_nothing_when_file_path_has_no_current_path(self): + file_path ='/polls/views.py' + assert self.subject.get_module_path(file_path) == "polls.views" + + class TestModulePathExtractor: @before def before(self): From 90bef671743565b589cfe67acf9278d2ef630bc0 Mon Sep 17 00:00:00 2001 From: Mirela Popoveniuc Date: Mon, 21 Jun 2021 13:34:54 +0100 Subject: [PATCH 2/2] Addressed comments from PR. --- codeguru_profiler_agent/sampling_utils.py | 7 ++----- codeguru_profiler_agent/sdk_reporter/profile_encoder.py | 9 +++++++-- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/codeguru_profiler_agent/sampling_utils.py b/codeguru_profiler_agent/sampling_utils.py index 6b85ed6..3d00e9f 100644 --- a/codeguru_profiler_agent/sampling_utils.py +++ b/codeguru_profiler_agent/sampling_utils.py @@ -87,12 +87,9 @@ def _maybe_append_synthetic_frame(result, frame, line_no): def _extract_frames(end_frame, max_depth): stack = list(traceback.walk_stack(end_frame))[::-1][0:max_depth] - # When running the sample app with uwsgi for Python 3.8.10-3.9.2, the traceback command - # returns a file path that contains "/.". + # When running the sample app with uwsgi for Python 3.8.10 - 3.9.2, the traceback command + # returns a file path that contains "/./" instead of just a "/" between the app directory and the module path. # To not let the path go into the module name, we are removing it later in the ProfileEncoder. - # Examples: - # - file '/Users/mirelap/Documents/workspace/JSON/aws-codeguru-profiler-python-demo-application/sample-demo-django-app/./polls/views.py', line 104, code get_queryset>, 104 - # - file '/Users/mirelap/Documents/workspace/JSON/aws-codeguru-profiler-python-demo-application/sample-demo-django-app/polls/views.py', line 104, code get_queryset>, 104 stack_entries = _extract_stack(stack, max_depth) if len(stack_entries) == max_depth: diff --git a/codeguru_profiler_agent/sdk_reporter/profile_encoder.py b/codeguru_profiler_agent/sdk_reporter/profile_encoder.py index e8866fe..048541a 100644 --- a/codeguru_profiler_agent/sdk_reporter/profile_encoder.py +++ b/codeguru_profiler_agent/sdk_reporter/profile_encoder.py @@ -10,7 +10,6 @@ GZIP_BALANCED_COMPRESSION_LEVEL = 6 DEFAULT_FRAME_COMPONENT_DELIMITER = ":" - def _get_module_path(file_path, sys_paths): """ We tried to remove the python library root path in order to give a reasonable expression of the module path. @@ -23,6 +22,12 @@ def _get_module_path(file_path, sys_paths): will get turned into `polls.views' given that the file path contains the current path. This should not happen usually, but we've found a case where the "/." is added when calling traceback.walk_stack(..) in a uwsgi application. Check sampling_utils.py file for details. + + sampling_utils.py returns different values when calling traceback.walk_stack(..) for uwsgi vs non-uwsgi + for Python 3.8.10-Python 3.9.2. + Examples of results: + - file '/Users/mirelap/Documents/workspace/JSON/aws-codeguru-profiler-python-demo-application/sample-demo-django-app/./polls/views.py', line 104, code get_queryset>, 104 + - file '/Users/mirelap/Documents/workspace/JSON/aws-codeguru-profiler-python-demo-application/sample-demo-django-app/polls/views.py', line 104, code get_queryset>, 104 """ module_path = file_path @@ -49,7 +54,7 @@ def _get_module_path(file_path, sys_paths): def _remove_prefix_path(module_path, sys_paths): current_path = str(Path().absolute()) if current_path in module_path: - return module_path.replace(current_path, "").replace("/.", "") + return module_path.replace(current_path, "").replace("/./", "/") for root in sys_paths: if root in module_path: return module_path.replace(root, "")