Skip to content

Commit

Permalink
Merge remote-tracking branch nickiaconis/nextline-pragma into pre-v1-…
Browse files Browse the repository at this point in the history
…launch
  • Loading branch information
Aaron Loo committed Dec 2, 2020
2 parents da17012 + cf72d81 commit b73c473
Show file tree
Hide file tree
Showing 5 changed files with 165 additions and 57 deletions.
28 changes: 19 additions & 9 deletions detect_secrets/core/scan.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,10 +167,14 @@ def _scan_for_allowlisted_secrets_in_lines(
get_settings().disable_filters('detect_secrets.filters.allowlist.is_line_allowlisted')
get_filters.cache_clear()

for line_number, line in lines:
line = line.rstrip()

if not is_line_allowlisted(filename, line):
line_numbers, lines = zip(*lines)
lines = [line.rstrip() for line in lines]
for line_number, line in zip(line_numbers, lines):
if not is_line_allowlisted(
filename,
line,
context=get_code_snippet(lines, line_number),
):
continue

if any([
Expand Down Expand Up @@ -277,10 +281,19 @@ def _process_line_based_plugins(
# filters return True.
for line_number, line in lines:
line = line.rstrip()
code_snippet = get_code_snippet(
lines=line_content,
line_number=line_number,
)

# We apply line-specific filters, and see whether that allows us to quit early.
if any([
inject_variables_into_function(filter_fn, filename=filename, line=line)
inject_variables_into_function(
filter_fn,
filename=filename,
line=line,
context=code_snippet,
)
for filter_fn in get_filters_with_parameter('line')
]):
continue
Expand All @@ -294,10 +307,7 @@ def _process_line_based_plugins(
secret=secret.secret_value,
plugin=plugin,
line=line,
context=get_code_snippet(
lines=line_content,
line_number=line_number,
),
context=code_snippet,
):
log.debug(f'Skipping "{secret.secret_value}" due to `{filter_fn.path}`.')
break
Expand Down
97 changes: 65 additions & 32 deletions detect_secrets/filters/allowlist.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,54 +2,87 @@
import re
from functools import lru_cache
from typing import Dict
from typing import Iterable
from typing import List
from typing import Pattern
from typing import Tuple

from ..util.code_snippet import CodeSnippet

def is_line_allowlisted(filename: str, line: str) -> bool:
regexes = _get_allowlist_regexes()

_, ext = os.path.splitext(filename)
if ext[1:] in _get_file_based_allowlist_regexes():
regexes = _get_file_based_allowlist_regexes()[ext[1:]]

for regex in regexes:
if regex.search(line):
return True
def is_line_allowlisted(filename: str, line: str, context: CodeSnippet) -> bool:
for payload, regexes in zip(
[line, context.previous_line],
_get_allowlist_regexes_for_file(filename),
):
for regex in regexes:
if regex.search(payload):
return True

return False


@lru_cache(maxsize=1)
def _get_file_based_allowlist_regexes() -> Dict[str, List[Pattern]]:
def _get_file_to_index_dict() -> Dict[str, int]:
# Add to this mapping (and ALLOWLIST_REGEXES if applicable) lazily,
# as more language specific file parsers are implemented.
# Discussion: https://github.com/Yelp/detect-secrets/pull/105
return {
'yaml': [_get_allowlist_regexes()[0]],
'yaml': 0,
}


@lru_cache(maxsize=1)
def _get_allowlist_regexes() -> List[Pattern]:
return [
re.compile(r)
for r in [
# Note: Always use allowlist, whitelist will be deprecated in the future
r'[ \t]+{} *pragma: ?(allow|white)list[ -]secret.*?{}[ \t]*$'.format(start, end)
for start, end in (
('#', ''), # e.g. python or yaml
('//', ''), # e.g. golang
(r'/\*', r' *\*/'), # e.g. c
('\'', ''), # e.g. visual basic .net
('--', ''), # e.g. sql
(r'<!--[# \t]*?', ' *?-->'), # e.g. xml
# many other inline comment syntaxes are not included,
# because we want to be performant for
# any(regex.search(line) for regex in ALLOWLIST_REGEXES)
# calls. of course, this won't be a concern if detect-secrets
# switches over to implementing file plugins for each supported
# filetype.
)
]
def _get_comment_tuples() -> Iterable[Tuple[str, str]]:
return (
('#', ''), # e.g. python or yaml
('//', ''), # e.g. golang
(r'/\*', r' *\*/'), # e.g. c
('\'', ''), # e.g. visual basic .net
('--', ''), # e.g. sql
(r'<!--[# \t]*?', ' *?-->'), # e.g. xml
# many other inline comment syntaxes are not included,
# because we want to be performant for
# any(regex.search(line) for regex in ALLOWLIST_REGEXES)
# calls. of course, this won't be a concern if detect-secrets
# switches over to implementing file plugins for each supported
# filetype.
)


def _get_allowlist_regexes_for_file(filename: str) -> Iterable[List[Pattern]]:
comment_tuples = _get_comment_tuples()

_, ext = os.path.splitext(filename)
if ext[1:] in _get_file_to_index_dict():
comment_tuples = (comment_tuples[_get_file_to_index_dict()[ext[1:]]],)

yield [
_get_allowlist_regexes(comment_tuple=t, nextline=False)
for t in comment_tuples
]
yield [
_get_allowlist_regexes(comment_tuple=t, nextline=True)
for t in comment_tuples
]


# Note: Cache size should be 2x the number of comment types
@lru_cache(maxsize=12)
def _get_allowlist_regexes(comment_tuple: Tuple[str, str], nextline: bool) -> Pattern:
start = comment_tuple[0]
end = comment_tuple[1]
return re.compile(
r'{}[ \t]*{} *pragma: ?{}{}[ -]secret.*?{}[ \t]*$'.format(
# Note: No text can precede a nextline pragma, this prevents obscuring what is allowed
# For instance, we want to prevent the following case from working:
# foo = 'bar' # pragma: allowlist nextline secret
# pass = 'hunter2'
r'^' if nextline else '',
start,
# Note: Always use allowlist, whitelist will be deprecated in the future
r'allowlist' if nextline else r'(allow|white)list',
r'[ -]nextline' if nextline else '',
end,
),
)
6 changes: 6 additions & 0 deletions detect_secrets/util/code_snippet.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,12 @@ def __init__(self, snippet: List[str], start_line: int, target_index: int) -> No
def target_line(self) -> str:
return self.lines[self.target_index]

@property
def previous_line(self) -> str:
if self.target_index == 0 or len(self.lines) < self.target_index:
return ''
return self.lines[self.target_index - 1]

@target_line.setter
def target_line(self, value: str) -> None:
self.lines[self.target_index] = value
Expand Down
83 changes: 67 additions & 16 deletions tests/filters/allowlist_filter_test.py
Original file line number Diff line number Diff line change
@@ -1,37 +1,68 @@
import pytest

from detect_secrets.filters.allowlist import is_line_allowlisted
from detect_secrets.util.code_snippet import CodeSnippet


@pytest.mark.parametrize(
'prefix, suffix',
(
('#', ''),
('# ', ' more text'),
EXAMPLE_COMMENT_PARTS = (
('#', ''),
('# ', ' more text'),

('//', ''),
('// ', ' more text'),
('//', ''),
('// ', ' more text'),

('/*', '*/'),
('/* ', ' */'),
('/*', '*/'),
('/* ', ' */'),

('--', ''),
('-- ', ' more text'),
('--', ''),
('-- ', ' more text'),

('<!--', '-->'),
),
('<!--', '-->'),
)


@pytest.mark.parametrize(
'prefix, suffix',
EXAMPLE_COMMENT_PARTS,
)
def test_basic(prefix, suffix):
line = f'AKIAEXAMPLE {prefix}pragma: allowlist secret{suffix}'
assert is_line_allowlisted(
'filename',
f'AKIAEXAMPLE {prefix}pragma: allowlist secret{suffix}',
line,
CodeSnippet([line], 0, 0),
)


@pytest.mark.parametrize(
'prefix, suffix',
EXAMPLE_COMMENT_PARTS,
)
def test_nextline(prefix, suffix):
comment = f'{prefix}pragma: allowlist nextline secret{suffix}'
line = 'AKIAEXAMPLE'
assert is_line_allowlisted(
'filename',
line,
CodeSnippet([comment, line], 0, 1),
)


def test_nextline_exclusivity():
line = 'AKIAEXAMPLE # pragma: allowlist nextline secret'
assert is_line_allowlisted(
'filename',
line,
CodeSnippet([line], 0, 0),
) is False


def test_backwards_compatibility():
line = 'AKIAEXAMPLE # pragma: whitelist secret'
assert is_line_allowlisted(
'filename',
'AKIAEXAMPLE # pragma: whitelist secret',
line,
CodeSnippet([line], 0, 0),
)


Expand All @@ -43,4 +74,24 @@ def test_backwards_compatibility():
),
)
def test_file_based_regexes(line, expected_result):
assert is_line_allowlisted('filename.yaml', line) is expected_result
assert is_line_allowlisted(
'filename.yaml',
line,
CodeSnippet([line], 0, 0),
) is expected_result


@pytest.mark.parametrize(
'comment, expected_result',
(
('# pragma: allowlist nextline secret', True),
('// pragma: allowlist nextline secret', False),
),
)
def test_file_based_nextline_regexes(comment, expected_result):
line = 'key: value'
assert is_line_allowlisted(
'filename.yaml',
line,
CodeSnippet([comment, line], 0, 1),
) is expected_result
8 changes: 8 additions & 0 deletions tests/util/code_snippet_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,11 @@ def test_basic(line_number, expected):
assert ''.join(
list(get_code_snippet(list('abcde'), line_number, lines_of_context=2)),
) == expected


def test_target_line():
assert get_code_snippet(list('abcde'), 3, lines_of_context=2).target_line == 'c'


def test_previous_line():
assert get_code_snippet(list('abcde'), 3, lines_of_context=2).previous_line == 'b'

0 comments on commit b73c473

Please sign in to comment.