From ee2a3a2e63ea1d5d8b28b5ebab4baf4988bb9fad Mon Sep 17 00:00:00 2001
From: Nick Iaconis <nick.iaconis@gmail.com>
Date: Wed, 18 Nov 2020 16:05:39 -0800
Subject: [PATCH 1/6] pragma: allowlist nextline secret

---
 detect_secrets/filters/allowlist.py    | 48 +++++++++++++++++-
 detect_secrets/util/code_snippet.py    |  6 +++
 tests/filters/allowlist_filter_test.py | 68 ++++++++++++++++++++++++--
 tests/util/code_snippet_test.py        |  8 +++
 4 files changed, 126 insertions(+), 4 deletions(-)

diff --git a/detect_secrets/filters/allowlist.py b/detect_secrets/filters/allowlist.py
index 23e885ad5..e6847f07d 100644
--- a/detect_secrets/filters/allowlist.py
+++ b/detect_secrets/filters/allowlist.py
@@ -5,8 +5,10 @@
 from typing import List
 from typing import Pattern
 
+from ..util.code_snippet import CodeSnippet
 
-def is_line_allowlisted(filename: str, line: str) -> bool:
+
+def is_line_allowlisted(filename: str, line: str, context: CodeSnippet) -> bool:
     regexes = _get_allowlist_regexes()
 
     _, ext = os.path.splitext(filename)
@@ -17,6 +19,16 @@ def is_line_allowlisted(filename: str, line: str) -> bool:
         if regex.search(line):
             return True
 
+    previous_line = context.previous_line
+    regexes = _get_allowlist_nextline_regexes()
+
+    if ext[1:] in _get_file_based_allowlist_nextline_regexes():
+        regexes = _get_file_based_allowlist_nextline_regexes()[ext[1:]]
+
+    for regex in regexes:
+        if regex.search(previous_line):
+            return True
+
     return False
 
 
@@ -53,3 +65,37 @@ def _get_allowlist_regexes() -> List[Pattern]:
             )
         ]
     ]
+
+
+@lru_cache(maxsize=1)
+def _get_file_based_allowlist_nextline_regexes() -> Dict[str, List[Pattern]]:
+    # Add to this mapping (and ALLOWLIST_REGEXES if applicable) lazily,
+    # as more language specific file parsers are implemented.
+    # Discussion: https://github.com/Yelp/detect-secrets/pull/105
+    return {
+        'yaml': [_get_allowlist_nextline_regexes()[0]],
+    }
+
+
+@lru_cache(maxsize=1)
+def _get_allowlist_nextline_regexes() -> List[Pattern]:
+    return [
+        re.compile(r)
+        for r in [
+            r'^[ \t]*{} *pragma: ?allowlist[ -]nextline[ -]secret.*?{}[ \t]*$'.format(start, end)
+            for start, end in (
+                ('#', ''),                    # e.g. python or yaml
+                ('//', ''),                   # e.g. golang
+                (r'/\*', r' *\*/'),           # e.g. c
+                ('\'', ''),                   # e.g. visual basic .net
+                ('--', ''),                   # e.g. sql
+                (r'<!--[# \t]*?', ' *?-->'),  # e.g. xml
+                # many other inline comment syntaxes are not included,
+                # because we want to be performant for
+                # any(regex.search(line) for regex in ALLOWLIST_REGEXES)
+                # calls. of course, this won't be a concern if detect-secrets
+                # switches over to implementing file plugins for each supported
+                # filetype.
+            )
+        ]
+    ]
diff --git a/detect_secrets/util/code_snippet.py b/detect_secrets/util/code_snippet.py
index d12037cc1..386274fff 100644
--- a/detect_secrets/util/code_snippet.py
+++ b/detect_secrets/util/code_snippet.py
@@ -49,6 +49,12 @@ def __init__(self, snippet: List[str], start_line: int, target_index: int) -> No
     def target_line(self) -> str:
         return self.lines[self.target_index]
 
+    @property
+    def previous_line(self) -> str:
+        if self.target_index == 0 or len(self.lines) < self.target_index:
+            return ''
+        return self.lines[self.target_index - 1]
+
     @target_line.setter
     def target_line(self, value: str) -> None:
         self.lines[self.target_index] = value
diff --git a/tests/filters/allowlist_filter_test.py b/tests/filters/allowlist_filter_test.py
index 7cbd3384f..438e46a26 100644
--- a/tests/filters/allowlist_filter_test.py
+++ b/tests/filters/allowlist_filter_test.py
@@ -1,6 +1,7 @@
 import pytest
 
 from detect_secrets.filters.allowlist import is_line_allowlisted
+from detect_secrets.util.code_snippet import CodeSnippet
 
 
 @pytest.mark.parametrize(
@@ -22,16 +23,57 @@
     ),
 )
 def test_basic(prefix, suffix):
+    line = f'AKIAEXAMPLE  {prefix}pragma: allowlist secret{suffix}'
     assert is_line_allowlisted(
         'filename',
-        f'AKIAEXAMPLE  {prefix}pragma: allowlist secret{suffix}',
+        line,
+        CodeSnippet([line], 0, 0),
     )
 
 
+@pytest.mark.parametrize(
+    'prefix, suffix',
+    (
+        ('#', ''),
+        ('# ', ' more text'),
+
+        ('//', ''),
+        ('// ', ' more text'),
+
+        ('/*', '*/'),
+        ('/* ', ' */'),
+
+        ('--', ''),
+        ('-- ', ' more text'),
+
+        ('<!--', '-->'),
+    ),
+)
+def test_nextline(prefix, suffix):
+    comment = f'{prefix}pragma: allowlist nextline secret{suffix}'
+    line = 'AKIAEXAMPLE'
+    assert is_line_allowlisted(
+        'filename',
+        line,
+        CodeSnippet([comment, line], 0, 1),
+    )
+
+
+def test_nextline_exclusivity():
+    line = 'AKIAEXAMPLE  # pragma: allowlist nextline secret'
+    assert is_line_allowlisted(
+        'filename',
+        line,
+        CodeSnippet([line], 0, 0),
+    ) is False
+
+
 def test_backwards_compatibility():
+    line = 'AKIAEXAMPLE  # pragma: whitelist secret'
     assert is_line_allowlisted(
         'filename',
-        'AKIAEXAMPLE  # pragma: whitelist secret',
+        line,
+        CodeSnippet([line], 0, 0),
     )
 
 
@@ -43,4 +85,24 @@ def test_backwards_compatibility():
     ),
 )
 def test_file_based_regexes(line, expected_result):
-    assert is_line_allowlisted('filename.yaml', line) is expected_result
+    assert is_line_allowlisted(
+        'filename.yaml',
+        line,
+        CodeSnippet([line], 0, 0),
+    ) is expected_result
+
+
+@pytest.mark.parametrize(
+    'comment, expected_result',
+    (
+        ('# pragma: allowlist nextline secret', True),
+        ('// pragma: allowlist nextline secret', False),
+    ),
+)
+def test_file_based_nextline_regexes(comment, expected_result):
+    line = 'key: value'
+    assert is_line_allowlisted(
+        'filename.yaml',
+        line,
+        CodeSnippet([comment, line], 0, 1),
+    ) is expected_result
diff --git a/tests/util/code_snippet_test.py b/tests/util/code_snippet_test.py
index 11a6a913c..718d3125d 100644
--- a/tests/util/code_snippet_test.py
+++ b/tests/util/code_snippet_test.py
@@ -16,3 +16,11 @@ def test_basic(line_number, expected):
     assert ''.join(
         list(get_code_snippet(list('abcde'), line_number, lines_of_context=2)),
     ) == expected
+
+
+def test_target_line():
+    assert get_code_snippet(list('abcde'), 3, lines_of_context=2).target_line == 'c'
+
+
+def test_previous_line():
+    assert get_code_snippet(list('abcde'), 3, lines_of_context=2).previous_line == 'b'

From 3c0dcf2d932e6ea666c525715333ca5e59ff3137 Mon Sep 17 00:00:00 2001
From: Nick Iaconis <nick.iaconis@gmail.com>
Date: Thu, 19 Nov 2020 11:23:26 -0800
Subject: [PATCH 2/6] DRY up prefix/suffix & share code between regexes

---
 detect_secrets/filters/allowlist.py    | 129 +++++++++++--------------
 tests/filters/allowlist_filter_test.py |  45 ++++-----
 2 files changed, 73 insertions(+), 101 deletions(-)

diff --git a/detect_secrets/filters/allowlist.py b/detect_secrets/filters/allowlist.py
index e6847f07d..191ccc2f4 100644
--- a/detect_secrets/filters/allowlist.py
+++ b/detect_secrets/filters/allowlist.py
@@ -2,100 +2,83 @@
 import re
 from functools import lru_cache
 from typing import Dict
+from typing import Iterable
 from typing import List
 from typing import Pattern
+from typing import Tuple
 
 from ..util.code_snippet import CodeSnippet
 
 
 def is_line_allowlisted(filename: str, line: str, context: CodeSnippet) -> bool:
-    regexes = _get_allowlist_regexes()
-
-    _, ext = os.path.splitext(filename)
-    if ext[1:] in _get_file_based_allowlist_regexes():
-        regexes = _get_file_based_allowlist_regexes()[ext[1:]]
-
-    for regex in regexes:
-        if regex.search(line):
-            return True
-
-    previous_line = context.previous_line
-    regexes = _get_allowlist_nextline_regexes()
-
-    if ext[1:] in _get_file_based_allowlist_nextline_regexes():
-        regexes = _get_file_based_allowlist_nextline_regexes()[ext[1:]]
-
-    for regex in regexes:
-        if regex.search(previous_line):
-            return True
+    for payload, regexes in zip(
+        [line, context.previous_line],
+        _get_allowlist_regexes_for_file(filename),
+    ):
+        for regex in regexes:
+            if regex.search(payload):
+                return True
 
     return False
 
 
 @lru_cache(maxsize=1)
-def _get_file_based_allowlist_regexes() -> Dict[str, List[Pattern]]:
+def _get_file_to_index_dict() -> Dict[str, int]:
     # Add to this mapping (and ALLOWLIST_REGEXES if applicable) lazily,
     # as more language specific file parsers are implemented.
     # Discussion: https://github.com/Yelp/detect-secrets/pull/105
     return {
-        'yaml': [_get_allowlist_regexes()[0]],
+        'yaml': 0,
     }
 
 
 @lru_cache(maxsize=1)
-def _get_allowlist_regexes() -> List[Pattern]:
-    return [
-        re.compile(r)
-        for r in [
-            # Note: Always use allowlist, whitelist will be deprecated in the future
-            r'[ \t]+{} *pragma: ?(allow|white)list[ -]secret.*?{}[ \t]*$'.format(start, end)
-            for start, end in (
-                ('#', ''),                    # e.g. python or yaml
-                ('//', ''),                   # e.g. golang
-                (r'/\*', r' *\*/'),           # e.g. c
-                ('\'', ''),                   # e.g. visual basic .net
-                ('--', ''),                   # e.g. sql
-                (r'<!--[# \t]*?', ' *?-->'),  # e.g. xml
-                # many other inline comment syntaxes are not included,
-                # because we want to be performant for
-                # any(regex.search(line) for regex in ALLOWLIST_REGEXES)
-                # calls. of course, this won't be a concern if detect-secrets
-                # switches over to implementing file plugins for each supported
-                # filetype.
-            )
-        ]
-    ]
+def _get_comment_tuples() -> Iterable[Tuple[str, str]]:
+    return (
+        ('#', ''),                    # e.g. python or yaml
+        ('//', ''),                   # e.g. golang
+        (r'/\*', r' *\*/'),           # e.g. c
+        ('\'', ''),                   # e.g. visual basic .net
+        ('--', ''),                   # e.g. sql
+        (r'<!--[# \t]*?', ' *?-->'),  # e.g. xml
+        # many other inline comment syntaxes are not included,
+        # because we want to be performant for
+        # any(regex.search(line) for regex in ALLOWLIST_REGEXES)
+        # calls. of course, this won't be a concern if detect-secrets
+        # switches over to implementing file plugins for each supported
+        # filetype.
+    )
+
+
+def _get_allowlist_regexes_for_file(filename: str) -> Iterable[List[Pattern]]:
+    comment_tuples = _get_comment_tuples()
 
+    _, ext = os.path.splitext(filename)
+    if ext[1:] in _get_file_to_index_dict():
+        comment_tuples = (comment_tuples[_get_file_to_index_dict()[ext[1:]]],)
 
-@lru_cache(maxsize=1)
-def _get_file_based_allowlist_nextline_regexes() -> Dict[str, List[Pattern]]:
-    # Add to this mapping (and ALLOWLIST_REGEXES if applicable) lazily,
-    # as more language specific file parsers are implemented.
-    # Discussion: https://github.com/Yelp/detect-secrets/pull/105
-    return {
-        'yaml': [_get_allowlist_nextline_regexes()[0]],
-    }
+    yield [
+        _get_allowlist_regexes(comment_tuple=t, nextline=False)
+        for t in comment_tuples
+    ]
+    yield [
+        _get_allowlist_regexes(comment_tuple=t, nextline=True)
+        for t in comment_tuples
+    ]
 
 
-@lru_cache(maxsize=1)
-def _get_allowlist_nextline_regexes() -> List[Pattern]:
-    return [
-        re.compile(r)
-        for r in [
-            r'^[ \t]*{} *pragma: ?allowlist[ -]nextline[ -]secret.*?{}[ \t]*$'.format(start, end)
-            for start, end in (
-                ('#', ''),                    # e.g. python or yaml
-                ('//', ''),                   # e.g. golang
-                (r'/\*', r' *\*/'),           # e.g. c
-                ('\'', ''),                   # e.g. visual basic .net
-                ('--', ''),                   # e.g. sql
-                (r'<!--[# \t]*?', ' *?-->'),  # e.g. xml
-                # many other inline comment syntaxes are not included,
-                # because we want to be performant for
-                # any(regex.search(line) for regex in ALLOWLIST_REGEXES)
-                # calls. of course, this won't be a concern if detect-secrets
-                # switches over to implementing file plugins for each supported
-                # filetype.
-            )
-        ]
-    ]
+# Note: Cache size should be 2x the number of comment types
+@lru_cache(maxsize=12)
+def _get_allowlist_regexes(comment_tuple: Tuple[str, str], nextline: bool) -> Pattern:
+    start = comment_tuple[0]
+    end = comment_tuple[1]
+    return re.compile(
+        # Note: Always use allowlist, whitelist will be deprecated in the future
+        r'{}{} *pragma: ?{}{}[ -]secret.*?{}[ \t]*$'.format(
+            r'^[ \t]*' if nextline else r'[ \t]+',
+            start,
+            r'allowlist' if nextline else r'(allow|white)list',
+            r'[ -]nextline' if nextline else '',
+            end,
+        ),
+    )
diff --git a/tests/filters/allowlist_filter_test.py b/tests/filters/allowlist_filter_test.py
index 438e46a26..5f2ed86ff 100644
--- a/tests/filters/allowlist_filter_test.py
+++ b/tests/filters/allowlist_filter_test.py
@@ -4,23 +4,26 @@
 from detect_secrets.util.code_snippet import CodeSnippet
 
 
-@pytest.mark.parametrize(
-    'prefix, suffix',
-    (
-        ('#', ''),
-        ('# ', ' more text'),
+EXAMPLE_COMMENT_PARTS = (
+    ('#', ''),
+    ('# ', ' more text'),
 
-        ('//', ''),
-        ('// ', ' more text'),
+    ('//', ''),
+    ('// ', ' more text'),
 
-        ('/*', '*/'),
-        ('/* ', ' */'),
+    ('/*', '*/'),
+    ('/* ', ' */'),
 
-        ('--', ''),
-        ('-- ', ' more text'),
+    ('--', ''),
+    ('-- ', ' more text'),
 
-        ('<!--', '-->'),
-    ),
+    ('<!--', '-->'),
+)
+
+
+@pytest.mark.parametrize(
+    'prefix, suffix',
+    EXAMPLE_COMMENT_PARTS,
 )
 def test_basic(prefix, suffix):
     line = f'AKIAEXAMPLE  {prefix}pragma: allowlist secret{suffix}'
@@ -33,21 +36,7 @@ def test_basic(prefix, suffix):
 
 @pytest.mark.parametrize(
     'prefix, suffix',
-    (
-        ('#', ''),
-        ('# ', ' more text'),
-
-        ('//', ''),
-        ('// ', ' more text'),
-
-        ('/*', '*/'),
-        ('/* ', ' */'),
-
-        ('--', ''),
-        ('-- ', ' more text'),
-
-        ('<!--', '-->'),
-    ),
+    EXAMPLE_COMMENT_PARTS,
 )
 def test_nextline(prefix, suffix):
     comment = f'{prefix}pragma: allowlist nextline secret{suffix}'

From 8cf35fb1b10b2f92e03221d37beb018ea846e44d Mon Sep 17 00:00:00 2001
From: Nick Iaconis <nick.iaconis@gmail.com>
Date: Thu, 19 Nov 2020 13:17:19 -0800
Subject: [PATCH 3/6] inject context into filter

---
 detect_secrets/core/scan.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/detect_secrets/core/scan.py b/detect_secrets/core/scan.py
index a397bad39..6b3c64f28 100644
--- a/detect_secrets/core/scan.py
+++ b/detect_secrets/core/scan.py
@@ -162,7 +162,15 @@ def _process_line_based_plugins(
 
         # We apply line-specific filters, and see whether that allows us to quit early.
         if any([
-            inject_variables_into_function(filter_fn, filename=filename, line=line)
+            inject_variables_into_function(
+                filter_fn,
+                filename=filename,
+                line=line,
+                context=get_code_snippet(
+                    lines=line_content,
+                    line_number=line_number,
+                ),
+            )
             for filter_fn in get_filters_with_parameter('line')
         ]):
             continue

From 192b0a109a756d7abe1084114e91f0ff98325805 Mon Sep 17 00:00:00 2001
From: Nick Iaconis <nick.iaconis@gmail.com>
Date: Thu, 19 Nov 2020 13:18:41 -0800
Subject: [PATCH 4/6] document rationale for no text before a nextline pragma

---
 detect_secrets/filters/allowlist.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/detect_secrets/filters/allowlist.py b/detect_secrets/filters/allowlist.py
index 191ccc2f4..e66bce7af 100644
--- a/detect_secrets/filters/allowlist.py
+++ b/detect_secrets/filters/allowlist.py
@@ -73,10 +73,11 @@ def _get_allowlist_regexes(comment_tuple: Tuple[str, str], nextline: bool) -> Pa
     start = comment_tuple[0]
     end = comment_tuple[1]
     return re.compile(
-        # Note: Always use allowlist, whitelist will be deprecated in the future
-        r'{}{} *pragma: ?{}{}[ -]secret.*?{}[ \t]*$'.format(
-            r'^[ \t]*' if nextline else r'[ \t]+',
+        r'{}[ \t]*{} *pragma: ?{}{}[ -]secret.*?{}[ \t]*$'.format(
+            # Note: No text can precede a nextline pragma, this prevents obscuring what is allowed
+            r'^' if nextline else '',
             start,
+            # Note: Always use allowlist, whitelist will be deprecated in the future
             r'allowlist' if nextline else r'(allow|white)list',
             r'[ -]nextline' if nextline else '',
             end,

From 2660b94e2ffd9380aea24de031c591507d7c3016 Mon Sep 17 00:00:00 2001
From: Nick Iaconis <nick.iaconis+github@gmail.com>
Date: Mon, 30 Nov 2020 10:42:43 -0800
Subject: [PATCH 5/6] Update detect_secrets/filters/allowlist.py

Co-authored-by: Aaron Loo <domanchi@users.noreply.github.com>
---
 detect_secrets/filters/allowlist.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/detect_secrets/filters/allowlist.py b/detect_secrets/filters/allowlist.py
index e66bce7af..f3a4e81c3 100644
--- a/detect_secrets/filters/allowlist.py
+++ b/detect_secrets/filters/allowlist.py
@@ -75,6 +75,9 @@ def _get_allowlist_regexes(comment_tuple: Tuple[str, str], nextline: bool) -> Pa
     return re.compile(
         r'{}[ \t]*{} *pragma: ?{}{}[ -]secret.*?{}[ \t]*$'.format(
             # Note: No text can precede a nextline pragma, this prevents obscuring what is allowed
+            # For instance, we want to prevent the following case from working:
+            #     foo = 'bar' # pragma: allowlist nextline secret
+            #     pass = 'hunter2'
             r'^' if nextline else '',
             start,
             # Note: Always use allowlist, whitelist will be deprecated in the future

From cf72d81b9f41e7c872c261f044063f01bf9398ff Mon Sep 17 00:00:00 2001
From: Nick Iaconis <nick.iaconis@gmail.com>
Date: Mon, 30 Nov 2020 10:46:53 -0800
Subject: [PATCH 6/6] store code snippet in variable instead of recreating

---
 detect_secrets/core/scan.py | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/detect_secrets/core/scan.py b/detect_secrets/core/scan.py
index 6b3c64f28..d5685f09a 100644
--- a/detect_secrets/core/scan.py
+++ b/detect_secrets/core/scan.py
@@ -159,6 +159,10 @@ def _process_line_based_plugins(
     # filters return True.
     for line_number, line in lines:
         line = line.rstrip()
+        code_snippet = get_code_snippet(
+            lines=line_content,
+            line_number=line_number,
+        )
 
         # We apply line-specific filters, and see whether that allows us to quit early.
         if any([
@@ -166,10 +170,7 @@ def _process_line_based_plugins(
                 filter_fn,
                 filename=filename,
                 line=line,
-                context=get_code_snippet(
-                    lines=line_content,
-                    line_number=line_number,
-                ),
+                context=code_snippet,
             )
             for filter_fn in get_filters_with_parameter('line')
         ]):
@@ -184,10 +185,7 @@ def _process_line_based_plugins(
                         secret=secret.secret_value,
                         plugin=plugin,
                         line=line,
-                        context=get_code_snippet(
-                            lines=line_content,
-                            line_number=line_number,
-                        ),
+                        context=code_snippet,
                     ):
                         log.debug(f'Skipping "{secret.secret_value}" due to `{filter_fn.path}`.')
                         break