From ee2a3a2e63ea1d5d8b28b5ebab4baf4988bb9fad Mon Sep 17 00:00:00 2001 From: Nick Iaconis Date: Wed, 18 Nov 2020 16:05:39 -0800 Subject: [PATCH 1/6] pragma: allowlist nextline secret --- detect_secrets/filters/allowlist.py | 48 +++++++++++++++++- detect_secrets/util/code_snippet.py | 6 +++ tests/filters/allowlist_filter_test.py | 68 ++++++++++++++++++++++++-- tests/util/code_snippet_test.py | 8 +++ 4 files changed, 126 insertions(+), 4 deletions(-) diff --git a/detect_secrets/filters/allowlist.py b/detect_secrets/filters/allowlist.py index 23e885ad5..e6847f07d 100644 --- a/detect_secrets/filters/allowlist.py +++ b/detect_secrets/filters/allowlist.py @@ -5,8 +5,10 @@ from typing import List from typing import Pattern +from ..util.code_snippet import CodeSnippet -def is_line_allowlisted(filename: str, line: str) -> bool: + +def is_line_allowlisted(filename: str, line: str, context: CodeSnippet) -> bool: regexes = _get_allowlist_regexes() _, ext = os.path.splitext(filename) @@ -17,6 +19,16 @@ def is_line_allowlisted(filename: str, line: str) -> bool: if regex.search(line): return True + previous_line = context.previous_line + regexes = _get_allowlist_nextline_regexes() + + if ext[1:] in _get_file_based_allowlist_nextline_regexes(): + regexes = _get_file_based_allowlist_nextline_regexes()[ext[1:]] + + for regex in regexes: + if regex.search(previous_line): + return True + return False @@ -53,3 +65,37 @@ def _get_allowlist_regexes() -> List[Pattern]: ) ] ] + + +@lru_cache(maxsize=1) +def _get_file_based_allowlist_nextline_regexes() -> Dict[str, List[Pattern]]: + # Add to this mapping (and ALLOWLIST_REGEXES if applicable) lazily, + # as more language specific file parsers are implemented. + # Discussion: https://github.com/Yelp/detect-secrets/pull/105 + return { + 'yaml': [_get_allowlist_nextline_regexes()[0]], + } + + +@lru_cache(maxsize=1) +def _get_allowlist_nextline_regexes() -> List[Pattern]: + return [ + re.compile(r) + for r in [ + r'^[ \t]*{} *pragma: ?allowlist[ -]nextline[ -]secret.*?{}[ \t]*$'.format(start, end) + for start, end in ( + ('#', ''), # e.g. python or yaml + ('//', ''), # e.g. golang + (r'/\*', r' *\*/'), # e.g. c + ('\'', ''), # e.g. visual basic .net + ('--', ''), # e.g. sql + (r''), # e.g. xml + # many other inline comment syntaxes are not included, + # because we want to be performant for + # any(regex.search(line) for regex in ALLOWLIST_REGEXES) + # calls. of course, this won't be a concern if detect-secrets + # switches over to implementing file plugins for each supported + # filetype. + ) + ] + ] diff --git a/detect_secrets/util/code_snippet.py b/detect_secrets/util/code_snippet.py index d12037cc1..386274fff 100644 --- a/detect_secrets/util/code_snippet.py +++ b/detect_secrets/util/code_snippet.py @@ -49,6 +49,12 @@ def __init__(self, snippet: List[str], start_line: int, target_index: int) -> No def target_line(self) -> str: return self.lines[self.target_index] + @property + def previous_line(self) -> str: + if self.target_index == 0 or len(self.lines) < self.target_index: + return '' + return self.lines[self.target_index - 1] + @target_line.setter def target_line(self, value: str) -> None: self.lines[self.target_index] = value diff --git a/tests/filters/allowlist_filter_test.py b/tests/filters/allowlist_filter_test.py index 7cbd3384f..438e46a26 100644 --- a/tests/filters/allowlist_filter_test.py +++ b/tests/filters/allowlist_filter_test.py @@ -1,6 +1,7 @@ import pytest from detect_secrets.filters.allowlist import is_line_allowlisted +from detect_secrets.util.code_snippet import CodeSnippet @pytest.mark.parametrize( @@ -22,16 +23,57 @@ ), ) def test_basic(prefix, suffix): + line = f'AKIAEXAMPLE {prefix}pragma: allowlist secret{suffix}' assert is_line_allowlisted( 'filename', - f'AKIAEXAMPLE {prefix}pragma: allowlist secret{suffix}', + line, + CodeSnippet([line], 0, 0), ) +@pytest.mark.parametrize( + 'prefix, suffix', + ( + ('#', ''), + ('# ', ' more text'), + + ('//', ''), + ('// ', ' more text'), + + ('/*', '*/'), + ('/* ', ' */'), + + ('--', ''), + ('-- ', ' more text'), + + (''), + ), +) +def test_nextline(prefix, suffix): + comment = f'{prefix}pragma: allowlist nextline secret{suffix}' + line = 'AKIAEXAMPLE' + assert is_line_allowlisted( + 'filename', + line, + CodeSnippet([comment, line], 0, 1), + ) + + +def test_nextline_exclusivity(): + line = 'AKIAEXAMPLE # pragma: allowlist nextline secret' + assert is_line_allowlisted( + 'filename', + line, + CodeSnippet([line], 0, 0), + ) is False + + def test_backwards_compatibility(): + line = 'AKIAEXAMPLE # pragma: whitelist secret' assert is_line_allowlisted( 'filename', - 'AKIAEXAMPLE # pragma: whitelist secret', + line, + CodeSnippet([line], 0, 0), ) @@ -43,4 +85,24 @@ def test_backwards_compatibility(): ), ) def test_file_based_regexes(line, expected_result): - assert is_line_allowlisted('filename.yaml', line) is expected_result + assert is_line_allowlisted( + 'filename.yaml', + line, + CodeSnippet([line], 0, 0), + ) is expected_result + + +@pytest.mark.parametrize( + 'comment, expected_result', + ( + ('# pragma: allowlist nextline secret', True), + ('// pragma: allowlist nextline secret', False), + ), +) +def test_file_based_nextline_regexes(comment, expected_result): + line = 'key: value' + assert is_line_allowlisted( + 'filename.yaml', + line, + CodeSnippet([comment, line], 0, 1), + ) is expected_result diff --git a/tests/util/code_snippet_test.py b/tests/util/code_snippet_test.py index 11a6a913c..718d3125d 100644 --- a/tests/util/code_snippet_test.py +++ b/tests/util/code_snippet_test.py @@ -16,3 +16,11 @@ def test_basic(line_number, expected): assert ''.join( list(get_code_snippet(list('abcde'), line_number, lines_of_context=2)), ) == expected + + +def test_target_line(): + assert get_code_snippet(list('abcde'), 3, lines_of_context=2).target_line == 'c' + + +def test_previous_line(): + assert get_code_snippet(list('abcde'), 3, lines_of_context=2).previous_line == 'b' From 3c0dcf2d932e6ea666c525715333ca5e59ff3137 Mon Sep 17 00:00:00 2001 From: Nick Iaconis Date: Thu, 19 Nov 2020 11:23:26 -0800 Subject: [PATCH 2/6] DRY up prefix/suffix & share code between regexes --- detect_secrets/filters/allowlist.py | 129 +++++++++++-------------- tests/filters/allowlist_filter_test.py | 45 ++++----- 2 files changed, 73 insertions(+), 101 deletions(-) diff --git a/detect_secrets/filters/allowlist.py b/detect_secrets/filters/allowlist.py index e6847f07d..191ccc2f4 100644 --- a/detect_secrets/filters/allowlist.py +++ b/detect_secrets/filters/allowlist.py @@ -2,100 +2,83 @@ import re from functools import lru_cache from typing import Dict +from typing import Iterable from typing import List from typing import Pattern +from typing import Tuple from ..util.code_snippet import CodeSnippet def is_line_allowlisted(filename: str, line: str, context: CodeSnippet) -> bool: - regexes = _get_allowlist_regexes() - - _, ext = os.path.splitext(filename) - if ext[1:] in _get_file_based_allowlist_regexes(): - regexes = _get_file_based_allowlist_regexes()[ext[1:]] - - for regex in regexes: - if regex.search(line): - return True - - previous_line = context.previous_line - regexes = _get_allowlist_nextline_regexes() - - if ext[1:] in _get_file_based_allowlist_nextline_regexes(): - regexes = _get_file_based_allowlist_nextline_regexes()[ext[1:]] - - for regex in regexes: - if regex.search(previous_line): - return True + for payload, regexes in zip( + [line, context.previous_line], + _get_allowlist_regexes_for_file(filename), + ): + for regex in regexes: + if regex.search(payload): + return True return False @lru_cache(maxsize=1) -def _get_file_based_allowlist_regexes() -> Dict[str, List[Pattern]]: +def _get_file_to_index_dict() -> Dict[str, int]: # Add to this mapping (and ALLOWLIST_REGEXES if applicable) lazily, # as more language specific file parsers are implemented. # Discussion: https://github.com/Yelp/detect-secrets/pull/105 return { - 'yaml': [_get_allowlist_regexes()[0]], + 'yaml': 0, } @lru_cache(maxsize=1) -def _get_allowlist_regexes() -> List[Pattern]: - return [ - re.compile(r) - for r in [ - # Note: Always use allowlist, whitelist will be deprecated in the future - r'[ \t]+{} *pragma: ?(allow|white)list[ -]secret.*?{}[ \t]*$'.format(start, end) - for start, end in ( - ('#', ''), # e.g. python or yaml - ('//', ''), # e.g. golang - (r'/\*', r' *\*/'), # e.g. c - ('\'', ''), # e.g. visual basic .net - ('--', ''), # e.g. sql - (r''), # e.g. xml - # many other inline comment syntaxes are not included, - # because we want to be performant for - # any(regex.search(line) for regex in ALLOWLIST_REGEXES) - # calls. of course, this won't be a concern if detect-secrets - # switches over to implementing file plugins for each supported - # filetype. - ) - ] - ] +def _get_comment_tuples() -> Iterable[Tuple[str, str]]: + return ( + ('#', ''), # e.g. python or yaml + ('//', ''), # e.g. golang + (r'/\*', r' *\*/'), # e.g. c + ('\'', ''), # e.g. visual basic .net + ('--', ''), # e.g. sql + (r''), # e.g. xml + # many other inline comment syntaxes are not included, + # because we want to be performant for + # any(regex.search(line) for regex in ALLOWLIST_REGEXES) + # calls. of course, this won't be a concern if detect-secrets + # switches over to implementing file plugins for each supported + # filetype. + ) + + +def _get_allowlist_regexes_for_file(filename: str) -> Iterable[List[Pattern]]: + comment_tuples = _get_comment_tuples() + _, ext = os.path.splitext(filename) + if ext[1:] in _get_file_to_index_dict(): + comment_tuples = (comment_tuples[_get_file_to_index_dict()[ext[1:]]],) -@lru_cache(maxsize=1) -def _get_file_based_allowlist_nextline_regexes() -> Dict[str, List[Pattern]]: - # Add to this mapping (and ALLOWLIST_REGEXES if applicable) lazily, - # as more language specific file parsers are implemented. - # Discussion: https://github.com/Yelp/detect-secrets/pull/105 - return { - 'yaml': [_get_allowlist_nextline_regexes()[0]], - } + yield [ + _get_allowlist_regexes(comment_tuple=t, nextline=False) + for t in comment_tuples + ] + yield [ + _get_allowlist_regexes(comment_tuple=t, nextline=True) + for t in comment_tuples + ] -@lru_cache(maxsize=1) -def _get_allowlist_nextline_regexes() -> List[Pattern]: - return [ - re.compile(r) - for r in [ - r'^[ \t]*{} *pragma: ?allowlist[ -]nextline[ -]secret.*?{}[ \t]*$'.format(start, end) - for start, end in ( - ('#', ''), # e.g. python or yaml - ('//', ''), # e.g. golang - (r'/\*', r' *\*/'), # e.g. c - ('\'', ''), # e.g. visual basic .net - ('--', ''), # e.g. sql - (r''), # e.g. xml - # many other inline comment syntaxes are not included, - # because we want to be performant for - # any(regex.search(line) for regex in ALLOWLIST_REGEXES) - # calls. of course, this won't be a concern if detect-secrets - # switches over to implementing file plugins for each supported - # filetype. - ) - ] - ] +# Note: Cache size should be 2x the number of comment types +@lru_cache(maxsize=12) +def _get_allowlist_regexes(comment_tuple: Tuple[str, str], nextline: bool) -> Pattern: + start = comment_tuple[0] + end = comment_tuple[1] + return re.compile( + # Note: Always use allowlist, whitelist will be deprecated in the future + r'{}{} *pragma: ?{}{}[ -]secret.*?{}[ \t]*$'.format( + r'^[ \t]*' if nextline else r'[ \t]+', + start, + r'allowlist' if nextline else r'(allow|white)list', + r'[ -]nextline' if nextline else '', + end, + ), + ) diff --git a/tests/filters/allowlist_filter_test.py b/tests/filters/allowlist_filter_test.py index 438e46a26..5f2ed86ff 100644 --- a/tests/filters/allowlist_filter_test.py +++ b/tests/filters/allowlist_filter_test.py @@ -4,23 +4,26 @@ from detect_secrets.util.code_snippet import CodeSnippet -@pytest.mark.parametrize( - 'prefix, suffix', - ( - ('#', ''), - ('# ', ' more text'), +EXAMPLE_COMMENT_PARTS = ( + ('#', ''), + ('# ', ' more text'), - ('//', ''), - ('// ', ' more text'), + ('//', ''), + ('// ', ' more text'), - ('/*', '*/'), - ('/* ', ' */'), + ('/*', '*/'), + ('/* ', ' */'), - ('--', ''), - ('-- ', ' more text'), + ('--', ''), + ('-- ', ' more text'), - (''), - ), + (''), +) + + +@pytest.mark.parametrize( + 'prefix, suffix', + EXAMPLE_COMMENT_PARTS, ) def test_basic(prefix, suffix): line = f'AKIAEXAMPLE {prefix}pragma: allowlist secret{suffix}' @@ -33,21 +36,7 @@ def test_basic(prefix, suffix): @pytest.mark.parametrize( 'prefix, suffix', - ( - ('#', ''), - ('# ', ' more text'), - - ('//', ''), - ('// ', ' more text'), - - ('/*', '*/'), - ('/* ', ' */'), - - ('--', ''), - ('-- ', ' more text'), - - (''), - ), + EXAMPLE_COMMENT_PARTS, ) def test_nextline(prefix, suffix): comment = f'{prefix}pragma: allowlist nextline secret{suffix}' From 8cf35fb1b10b2f92e03221d37beb018ea846e44d Mon Sep 17 00:00:00 2001 From: Nick Iaconis Date: Thu, 19 Nov 2020 13:17:19 -0800 Subject: [PATCH 3/6] inject context into filter --- detect_secrets/core/scan.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/detect_secrets/core/scan.py b/detect_secrets/core/scan.py index a397bad39..6b3c64f28 100644 --- a/detect_secrets/core/scan.py +++ b/detect_secrets/core/scan.py @@ -162,7 +162,15 @@ def _process_line_based_plugins( # We apply line-specific filters, and see whether that allows us to quit early. if any([ - inject_variables_into_function(filter_fn, filename=filename, line=line) + inject_variables_into_function( + filter_fn, + filename=filename, + line=line, + context=get_code_snippet( + lines=line_content, + line_number=line_number, + ), + ) for filter_fn in get_filters_with_parameter('line') ]): continue From 192b0a109a756d7abe1084114e91f0ff98325805 Mon Sep 17 00:00:00 2001 From: Nick Iaconis Date: Thu, 19 Nov 2020 13:18:41 -0800 Subject: [PATCH 4/6] document rationale for no text before a nextline pragma --- detect_secrets/filters/allowlist.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/detect_secrets/filters/allowlist.py b/detect_secrets/filters/allowlist.py index 191ccc2f4..e66bce7af 100644 --- a/detect_secrets/filters/allowlist.py +++ b/detect_secrets/filters/allowlist.py @@ -73,10 +73,11 @@ def _get_allowlist_regexes(comment_tuple: Tuple[str, str], nextline: bool) -> Pa start = comment_tuple[0] end = comment_tuple[1] return re.compile( - # Note: Always use allowlist, whitelist will be deprecated in the future - r'{}{} *pragma: ?{}{}[ -]secret.*?{}[ \t]*$'.format( - r'^[ \t]*' if nextline else r'[ \t]+', + r'{}[ \t]*{} *pragma: ?{}{}[ -]secret.*?{}[ \t]*$'.format( + # Note: No text can precede a nextline pragma, this prevents obscuring what is allowed + r'^' if nextline else '', start, + # Note: Always use allowlist, whitelist will be deprecated in the future r'allowlist' if nextline else r'(allow|white)list', r'[ -]nextline' if nextline else '', end, From 2660b94e2ffd9380aea24de031c591507d7c3016 Mon Sep 17 00:00:00 2001 From: Nick Iaconis Date: Mon, 30 Nov 2020 10:42:43 -0800 Subject: [PATCH 5/6] Update detect_secrets/filters/allowlist.py Co-authored-by: Aaron Loo --- detect_secrets/filters/allowlist.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/detect_secrets/filters/allowlist.py b/detect_secrets/filters/allowlist.py index e66bce7af..f3a4e81c3 100644 --- a/detect_secrets/filters/allowlist.py +++ b/detect_secrets/filters/allowlist.py @@ -75,6 +75,9 @@ def _get_allowlist_regexes(comment_tuple: Tuple[str, str], nextline: bool) -> Pa return re.compile( r'{}[ \t]*{} *pragma: ?{}{}[ -]secret.*?{}[ \t]*$'.format( # Note: No text can precede a nextline pragma, this prevents obscuring what is allowed + # For instance, we want to prevent the following case from working: + # foo = 'bar' # pragma: allowlist nextline secret + # pass = 'hunter2' r'^' if nextline else '', start, # Note: Always use allowlist, whitelist will be deprecated in the future From cf72d81b9f41e7c872c261f044063f01bf9398ff Mon Sep 17 00:00:00 2001 From: Nick Iaconis Date: Mon, 30 Nov 2020 10:46:53 -0800 Subject: [PATCH 6/6] store code snippet in variable instead of recreating --- detect_secrets/core/scan.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/detect_secrets/core/scan.py b/detect_secrets/core/scan.py index 6b3c64f28..d5685f09a 100644 --- a/detect_secrets/core/scan.py +++ b/detect_secrets/core/scan.py @@ -159,6 +159,10 @@ def _process_line_based_plugins( # filters return True. for line_number, line in lines: line = line.rstrip() + code_snippet = get_code_snippet( + lines=line_content, + line_number=line_number, + ) # We apply line-specific filters, and see whether that allows us to quit early. if any([ @@ -166,10 +170,7 @@ def _process_line_based_plugins( filter_fn, filename=filename, line=line, - context=get_code_snippet( - lines=line_content, - line_number=line_number, - ), + context=code_snippet, ) for filter_fn in get_filters_with_parameter('line') ]): @@ -184,10 +185,7 @@ def _process_line_based_plugins( secret=secret.secret_value, plugin=plugin, line=line, - context=get_code_snippet( - lines=line_content, - line_number=line_number, - ), + context=code_snippet, ): log.debug(f'Skipping "{secret.secret_value}" due to `{filter_fn.path}`.') break