Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add a "skip next line" pragma #367

Closed
Closed
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion detect_secrets/core/scan.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,15 @@ def _process_line_based_plugins(

# We apply line-specific filters, and see whether that allows us to quit early.
if any([
inject_variables_into_function(filter_fn, filename=filename, line=line)
inject_variables_into_function(
filter_fn,
filename=filename,
line=line,
context=get_code_snippet(
lines=line_content,
line_number=line_number,
),
nickiaconis marked this conversation as resolved.
Show resolved Hide resolved
)
for filter_fn in get_filters_with_parameter('line')
]):
continue
Expand Down
94 changes: 62 additions & 32 deletions detect_secrets/filters/allowlist.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,54 +2,84 @@
import re
from functools import lru_cache
from typing import Dict
from typing import Iterable
from typing import List
from typing import Pattern
from typing import Tuple

from ..util.code_snippet import CodeSnippet

def is_line_allowlisted(filename: str, line: str) -> bool:
regexes = _get_allowlist_regexes()

_, ext = os.path.splitext(filename)
if ext[1:] in _get_file_based_allowlist_regexes():
regexes = _get_file_based_allowlist_regexes()[ext[1:]]

for regex in regexes:
if regex.search(line):
return True
def is_line_allowlisted(filename: str, line: str, context: CodeSnippet) -> bool:
for payload, regexes in zip(
[line, context.previous_line],
_get_allowlist_regexes_for_file(filename),
):
for regex in regexes:
if regex.search(payload):
return True

return False


@lru_cache(maxsize=1)
def _get_file_based_allowlist_regexes() -> Dict[str, List[Pattern]]:
def _get_file_to_index_dict() -> Dict[str, int]:
# Add to this mapping (and ALLOWLIST_REGEXES if applicable) lazily,
# as more language specific file parsers are implemented.
# Discussion: https://github.com/Yelp/detect-secrets/pull/105
return {
'yaml': [_get_allowlist_regexes()[0]],
'yaml': 0,
}


@lru_cache(maxsize=1)
def _get_allowlist_regexes() -> List[Pattern]:
return [
re.compile(r)
for r in [
# Note: Always use allowlist, whitelist will be deprecated in the future
r'[ \t]+{} *pragma: ?(allow|white)list[ -]secret.*?{}[ \t]*$'.format(start, end)
for start, end in (
('#', ''), # e.g. python or yaml
('//', ''), # e.g. golang
(r'/\*', r' *\*/'), # e.g. c
('\'', ''), # e.g. visual basic .net
('--', ''), # e.g. sql
(r'<!--[# \t]*?', ' *?-->'), # e.g. xml
# many other inline comment syntaxes are not included,
# because we want to be performant for
# any(regex.search(line) for regex in ALLOWLIST_REGEXES)
# calls. of course, this won't be a concern if detect-secrets
# switches over to implementing file plugins for each supported
# filetype.
)
]
def _get_comment_tuples() -> Iterable[Tuple[str, str]]:
return (
('#', ''), # e.g. python or yaml
('//', ''), # e.g. golang
(r'/\*', r' *\*/'), # e.g. c
('\'', ''), # e.g. visual basic .net
('--', ''), # e.g. sql
(r'<!--[# \t]*?', ' *?-->'), # e.g. xml
# many other inline comment syntaxes are not included,
# because we want to be performant for
# any(regex.search(line) for regex in ALLOWLIST_REGEXES)
# calls. of course, this won't be a concern if detect-secrets
# switches over to implementing file plugins for each supported
# filetype.
)


def _get_allowlist_regexes_for_file(filename: str) -> Iterable[List[Pattern]]:
comment_tuples = _get_comment_tuples()

_, ext = os.path.splitext(filename)
if ext[1:] in _get_file_to_index_dict():
comment_tuples = (comment_tuples[_get_file_to_index_dict()[ext[1:]]],)

yield [
_get_allowlist_regexes(comment_tuple=t, nextline=False)
for t in comment_tuples
]
yield [
_get_allowlist_regexes(comment_tuple=t, nextline=True)
for t in comment_tuples
]


# Note: Cache size should be 2x the number of comment types
@lru_cache(maxsize=12)
def _get_allowlist_regexes(comment_tuple: Tuple[str, str], nextline: bool) -> Pattern:
start = comment_tuple[0]
end = comment_tuple[1]
return re.compile(
r'{}[ \t]*{} *pragma: ?{}{}[ -]secret.*?{}[ \t]*$'.format(
# Note: No text can precede a nextline pragma, this prevents obscuring what is allowed
r'^' if nextline else '',
nickiaconis marked this conversation as resolved.
Show resolved Hide resolved
start,
# Note: Always use allowlist, whitelist will be deprecated in the future
r'allowlist' if nextline else r'(allow|white)list',
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm also fine if these are the same. Supporting whitelist is just for backwards compatibility, and I'm fine with keeping that backwards compatibility around for the nextline regex too.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't mind one way or the other, but my thinking was that, b/c nextline is new, it doesn't have any backward compatibility to contend with. Thus, requiring allowlist for nextline prepares uses of it for the future removal of whitelist, whereas allowing both opens the possibility of further entrenching whitelist.

r'[ -]nextline' if nextline else '',
end,
),
)
6 changes: 6 additions & 0 deletions detect_secrets/util/code_snippet.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,12 @@ def __init__(self, snippet: List[str], start_line: int, target_index: int) -> No
def target_line(self) -> str:
return self.lines[self.target_index]

@property
def previous_line(self) -> str:
if self.target_index == 0 or len(self.lines) < self.target_index:
return ''
return self.lines[self.target_index - 1]

@target_line.setter
def target_line(self, value: str) -> None:
self.lines[self.target_index] = value
Expand Down
83 changes: 67 additions & 16 deletions tests/filters/allowlist_filter_test.py
Original file line number Diff line number Diff line change
@@ -1,37 +1,68 @@
import pytest

from detect_secrets.filters.allowlist import is_line_allowlisted
from detect_secrets.util.code_snippet import CodeSnippet


@pytest.mark.parametrize(
'prefix, suffix',
(
('#', ''),
('# ', ' more text'),
EXAMPLE_COMMENT_PARTS = (
('#', ''),
('# ', ' more text'),

('//', ''),
('// ', ' more text'),
('//', ''),
('// ', ' more text'),

('/*', '*/'),
('/* ', ' */'),
('/*', '*/'),
('/* ', ' */'),

('--', ''),
('-- ', ' more text'),
('--', ''),
('-- ', ' more text'),

('<!--', '-->'),
),
('<!--', '-->'),
)


@pytest.mark.parametrize(
'prefix, suffix',
EXAMPLE_COMMENT_PARTS,
)
def test_basic(prefix, suffix):
line = f'AKIAEXAMPLE {prefix}pragma: allowlist secret{suffix}'
assert is_line_allowlisted(
'filename',
f'AKIAEXAMPLE {prefix}pragma: allowlist secret{suffix}',
line,
CodeSnippet([line], 0, 0),
)


@pytest.mark.parametrize(
'prefix, suffix',
EXAMPLE_COMMENT_PARTS,
)
def test_nextline(prefix, suffix):
comment = f'{prefix}pragma: allowlist nextline secret{suffix}'
line = 'AKIAEXAMPLE'
assert is_line_allowlisted(
'filename',
line,
CodeSnippet([comment, line], 0, 1),
)


def test_nextline_exclusivity():
line = 'AKIAEXAMPLE # pragma: allowlist nextline secret'
assert is_line_allowlisted(
'filename',
line,
CodeSnippet([line], 0, 0),
) is False


def test_backwards_compatibility():
line = 'AKIAEXAMPLE # pragma: whitelist secret'
assert is_line_allowlisted(
'filename',
'AKIAEXAMPLE # pragma: whitelist secret',
line,
CodeSnippet([line], 0, 0),
)


Expand All @@ -43,4 +74,24 @@ def test_backwards_compatibility():
),
)
def test_file_based_regexes(line, expected_result):
assert is_line_allowlisted('filename.yaml', line) is expected_result
assert is_line_allowlisted(
'filename.yaml',
line,
CodeSnippet([line], 0, 0),
) is expected_result


@pytest.mark.parametrize(
'comment, expected_result',
(
('# pragma: allowlist nextline secret', True),
('// pragma: allowlist nextline secret', False),
),
)
def test_file_based_nextline_regexes(comment, expected_result):
line = 'key: value'
assert is_line_allowlisted(
'filename.yaml',
line,
CodeSnippet([comment, line], 0, 1),
) is expected_result
8 changes: 8 additions & 0 deletions tests/util/code_snippet_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,11 @@ def test_basic(line_number, expected):
assert ''.join(
list(get_code_snippet(list('abcde'), line_number, lines_of_context=2)),
) == expected


def test_target_line():
assert get_code_snippet(list('abcde'), 3, lines_of_context=2).target_line == 'c'


def test_previous_line():
assert get_code_snippet(list('abcde'), 3, lines_of_context=2).previous_line == 'b'