Yelp · nickiaconis · Nov 19, 2020 · Nov 19, 2020 · Nov 19, 2020 · Nov 19, 2020
diff --git a/detect_secrets/filters/allowlist.py b/detect_secrets/filters/allowlist.py
@@ -2,100 +2,83 @@
 import re
 from functools import lru_cache
 from typing import Dict
+from typing import Iterable
 from typing import List
 from typing import Pattern
+from typing import Tuple
 
 from ..util.code_snippet import CodeSnippet
 
 
 def is_line_allowlisted(filename: str, line: str, context: CodeSnippet) -> bool:
-    regexes = _get_allowlist_regexes()
-
-    _, ext = os.path.splitext(filename)
-    if ext[1:] in _get_file_based_allowlist_regexes():
-        regexes = _get_file_based_allowlist_regexes()[ext[1:]]
-
-    for regex in regexes:
-        if regex.search(line):
-            return True
-
-    previous_line = context.previous_line
-    regexes = _get_allowlist_nextline_regexes()
-
-    if ext[1:] in _get_file_based_allowlist_nextline_regexes():
-        regexes = _get_file_based_allowlist_nextline_regexes()[ext[1:]]
-
-    for regex in regexes:
-        if regex.search(previous_line):
-            return True
+    for payload, regexes in zip(
+        [line, context.previous_line],
+        _get_allowlist_regexes_for_file(filename),
+    ):
+        for regex in regexes:
+            if regex.search(payload):
+                return True
 
     return False
 
 
 @lru_cache(maxsize=1)
-def _get_file_based_allowlist_regexes() -> Dict[str, List[Pattern]]:
+def _get_file_to_index_dict() -> Dict[str, int]:
     # Add to this mapping (and ALLOWLIST_REGEXES if applicable) lazily,
     # as more language specific file parsers are implemented.
     # Discussion: https://github.com/Yelp/detect-secrets/pull/105
     return {
-        'yaml': [_get_allowlist_regexes()[0]],
+        'yaml': 0,
     }
 
 
 @lru_cache(maxsize=1)
-def _get_allowlist_regexes() -> List[Pattern]:
-    return [
-        re.compile(r)
-        for r in [
-            # Note: Always use allowlist, whitelist will be deprecated in the future
-            r'[ \t]+{} *pragma: ?(allow|white)list[ -]secret.*?{}[ \t]*$'.format(start, end)
-            for start, end in (
-                ('#', ''),                    # e.g. python or yaml
-                ('//', ''),                   # e.g. golang
-                (r'/\*', r' *\*/'),           # e.g. c
-                ('\'', ''),                   # e.g. visual basic .net
-                ('--', ''),                   # e.g. sql
-                (r'<!--[# \t]*?', ' *?-->'),  # e.g. xml
-                # many other inline comment syntaxes are not included,
-                # because we want to be performant for
-                # any(regex.search(line) for regex in ALLOWLIST_REGEXES)
-                # calls. of course, this won't be a concern if detect-secrets
-                # switches over to implementing file plugins for each supported
-                # filetype.
-            )
-        ]
-    ]
+def _get_comment_tuples() -> Iterable[Tuple[str, str]]:
+    return (
+        ('#', ''),                    # e.g. python or yaml
+        ('//', ''),                   # e.g. golang
+        (r'/\*', r' *\*/'),           # e.g. c
+        ('\'', ''),                   # e.g. visual basic .net
+        ('--', ''),                   # e.g. sql
+        (r'<!--[# \t]*?', ' *?-->'),  # e.g. xml
+        # many other inline comment syntaxes are not included,
+        # because we want to be performant for
+        # any(regex.search(line) for regex in ALLOWLIST_REGEXES)
+        # calls. of course, this won't be a concern if detect-secrets
+        # switches over to implementing file plugins for each supported
+        # filetype.
+    )
+
+
+def _get_allowlist_regexes_for_file(filename: str) -> Iterable[List[Pattern]]:
+    comment_tuples = _get_comment_tuples()
 
+    _, ext = os.path.splitext(filename)
+    if ext[1:] in _get_file_to_index_dict():
+        comment_tuples = (comment_tuples[_get_file_to_index_dict()[ext[1:]]],)
 
-@lru_cache(maxsize=1)
-def _get_file_based_allowlist_nextline_regexes() -> Dict[str, List[Pattern]]:
-    # Add to this mapping (and ALLOWLIST_REGEXES if applicable) lazily,
-    # as more language specific file parsers are implemented.
-    # Discussion: https://github.com/Yelp/detect-secrets/pull/105
-    return {
-        'yaml': [_get_allowlist_nextline_regexes()[0]],
-    }
+    yield [
+        _get_allowlist_regexes(comment_tuple=t, nextline=False)
+        for t in comment_tuples
+    ]
+    yield [
+        _get_allowlist_regexes(comment_tuple=t, nextline=True)
+        for t in comment_tuples
+    ]
 
 
-@lru_cache(maxsize=1)
-def _get_allowlist_nextline_regexes() -> List[Pattern]:
-    return [
-        re.compile(r)
-        for r in [
-            r'^[ \t]*{} *pragma: ?allowlist[ -]nextline[ -]secret.*?{}[ \t]*$'.format(start, end)
-            for start, end in (
-                ('#', ''),                    # e.g. python or yaml
-                ('//', ''),                   # e.g. golang
-                (r'/\*', r' *\*/'),           # e.g. c
-                ('\'', ''),                   # e.g. visual basic .net
-                ('--', ''),                   # e.g. sql
-                (r'<!--[# \t]*?', ' *?-->'),  # e.g. xml
-                # many other inline comment syntaxes are not included,
-                # because we want to be performant for
-                # any(regex.search(line) for regex in ALLOWLIST_REGEXES)
-                # calls. of course, this won't be a concern if detect-secrets
-                # switches over to implementing file plugins for each supported
-                # filetype.
-            )
-        ]
-    ]
+# Note: Cache size should be 2x the number of comment types
+@lru_cache(maxsize=12)
+def _get_allowlist_regexes(comment_tuple: Tuple[str, str], nextline: bool) -> Pattern:
+    start = comment_tuple[0]
+    end = comment_tuple[1]
+    return re.compile(
+        # Note: Always use allowlist, whitelist will be deprecated in the future
+        r'{}{} *pragma: ?{}{}[ -]secret.*?{}[ \t]*$'.format(
+            r'^[ \t]*' if nextline else r'[ \t]+',
+            start,
+            r'allowlist' if nextline else r'(allow|white)list',
+            r'[ -]nextline' if nextline else '',
+            end,
+        ),
+    )
diff --git a/tests/filters/allowlist_filter_test.py b/tests/filters/allowlist_filter_test.py
@@ -4,23 +4,26 @@
 from detect_secrets.util.code_snippet import CodeSnippet
 
 
-@pytest.mark.parametrize(
-    'prefix, suffix',
-    (
-        ('#', ''),
-        ('# ', ' more text'),
+EXAMPLE_COMMENT_PARTS = (
+    ('#', ''),
+    ('# ', ' more text'),
 
-        ('//', ''),
-        ('// ', ' more text'),
+    ('//', ''),
+    ('// ', ' more text'),
 
-        ('/*', '*/'),
-        ('/* ', ' */'),
+    ('/*', '*/'),
+    ('/* ', ' */'),
 
-        ('--', ''),
-        ('-- ', ' more text'),
+    ('--', ''),
+    ('-- ', ' more text'),
 
-        ('<!--', '-->'),
-    ),
+    ('<!--', '-->'),
+)
+
+
+@pytest.mark.parametrize(
+    'prefix, suffix',
+    EXAMPLE_COMMENT_PARTS,
 )
 def test_basic(prefix, suffix):
     line = f'AKIAEXAMPLE  {prefix}pragma: allowlist secret{suffix}'
@@ -33,21 +36,7 @@ def test_basic(prefix, suffix):
 
 @pytest.mark.parametrize(
     'prefix, suffix',
-    (
-        ('#', ''),
-        ('# ', ' more text'),
-
-        ('//', ''),
-        ('// ', ' more text'),
-
-        ('/*', '*/'),
-        ('/* ', ' */'),
-
-        ('--', ''),
-        ('-- ', ' more text'),
-
-        ('<!--', '-->'),
-    ),
+    EXAMPLE_COMMENT_PARTS,
 )
 def test_nextline(prefix, suffix):
     comment = f'{prefix}pragma: allowlist nextline secret{suffix}'