Improve the algorithm that generate a file name from request path

jazzband · Jan 20, 2023 · 3299ce5 · 3299ce5
1 parent d6615bb
commit 3299ce5
Show file tree

Hide file tree

Showing 2 changed files with 56 additions and 20 deletions.
diff --git a/project/tests/test_collector.py b/project/tests/test_collector.py
@@ -47,22 +47,38 @@ def test_finalise(self):
                 self.assertTrue(content)
                 self.assertGreater(len(content), 0)
 
-    def test_profile_file_name(self):
-        request = RequestMinFactory()
-        DataCollector().configure(request)
-        expected_file_name_prefix = request.path.replace('/', '_').lstrip('_')
-        print(expected_file_name_prefix)
+    def test_profile_file_name_with_disabled_extended_file_name(self):
+        SilkyConfig().SILKY_PYTHON_PROFILER_EXTENDED_FILE_NAME = False
+        request_path = 'normal/uri/'
+        resulting_prefix = self._get_prof_file_name(request_path)
+        self.assertEqual(resulting_prefix, '')
 
-        with self.subTest("With disabled extended file name"):
-            SilkyConfig().SILKY_PYTHON_PROFILER_EXTENDED_FILE_NAME = False
-            DataCollector().finalise()
-            file = DataCollector().request.prof_file
-            result_file_name = file.name.rsplit('/')[-1]
-            self.assertFalse(result_file_name.startswith(f"{expected_file_name_prefix}_"))
+    def test_profile_file_name_with_enabled_extended_file_name(self):
 
-        with self.subTest("With enabled extended file name"):
-            SilkyConfig().SILKY_PYTHON_PROFILER_EXTENDED_FILE_NAME = True
-            DataCollector().finalise()
-            file = DataCollector().request.prof_file
-            result_file_name = file.name.rsplit('/')[-1]
-            self.assertTrue(result_file_name.startswith(f"{expected_file_name_prefix}_"))
+        SilkyConfig().SILKY_PYTHON_PROFILER_EXTENDED_FILE_NAME = True
+        request_path = 'normal/uri/'
+        resulting_prefix = self._get_prof_file_name(request_path)
+        self.assertEqual(resulting_prefix, 'normal_uri_')
+
+    def test_profile_file_name_with_path_traversal_and_special_char(self):
+        SilkyConfig().SILKY_PYTHON_PROFILER_EXTENDED_FILE_NAME = True
+        request_path = 'spÉciàl/.././大/uri/@É/'
+        resulting_prefix = self._get_prof_file_name(request_path)
+        self.assertEqual(resulting_prefix, 'special_uri_e_')
+
+    def test_profile_file_name_with_long_path(self):
+        SilkyConfig().SILKY_PYTHON_PROFILER_EXTENDED_FILE_NAME = True
+        request_path = 'long/path/' + 'a' * 100
+        resulting_prefix = self._get_prof_file_name(request_path)
+        # the path is limited to 50 char plus the last `_`
+        self.assertEqual(len(resulting_prefix), 51)
+
+    @classmethod
+    def _get_prof_file_name(cls, request_path: str) -> str:
+        request = RequestMinFactory()
+        request.path = request_path
+        DataCollector().configure(request)
+        DataCollector().finalise()
+        file_path = DataCollector().request.prof_file.name
+        filename = file_path.rsplit('/')[-1]
+        return filename.replace(f"{request.id}.prof", "")
diff --git a/silk/collector.py b/silk/collector.py
@@ -2,6 +2,8 @@
 import logging
 import marshal
 import pstats
+import re
+import unicodedata
 from io import StringIO
 from threading import local
 
@@ -191,10 +193,28 @@ def finalise(self):
     def register_silk_query(self, *args):
         self.register_objects(TYP_SILK_QUERIES, *args)
 
-    def _get_proposed_file_name(self):
+    def _get_proposed_file_name(self) -> str:
         """Retrieve the profile file name to be proposed to the storage"""
 
         if SilkyConfig().SILKY_PYTHON_PROFILER_EXTENDED_FILE_NAME:
-            request_path = self.request.path.replace('/', '_').lstrip('_')
-            return f"{request_path}_{str(self.request.id)}.prof"
+            slugified_path = slugify_path(self.request.path)
+            return f"{slugified_path}_{str(self.request.id)}.prof"
         return f"{str(self.request.id)}.prof"
+
+
+def slugify_path(request_path: str) -> str:
+    """
+    Convert any characters not included in [a-zA-Z0-9_]) with a single underscore.
+    Convert to lowercase. Also strip leading and trailing whitespace, dashes, and
+    underscores.
+
+    Inspired from django slugify
+    """
+    request_path = str(request_path)
+    request_path = (
+        unicodedata.normalize("NFKD", request_path)
+        .encode("ascii", "ignore")
+        .decode("ascii")
+    )
+    request_path = request_path.lower()[:50]
+    return re.sub(r'\W+', '_', request_path).strip('_')