Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add a "skip next line" pragma #367

Closed
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
129 changes: 56 additions & 73 deletions detect_secrets/filters/allowlist.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,100 +2,83 @@
import re
from functools import lru_cache
from typing import Dict
from typing import Iterable
from typing import List
from typing import Pattern
from typing import Tuple

from ..util.code_snippet import CodeSnippet


def is_line_allowlisted(filename: str, line: str, context: CodeSnippet) -> bool:
regexes = _get_allowlist_regexes()

_, ext = os.path.splitext(filename)
if ext[1:] in _get_file_based_allowlist_regexes():
regexes = _get_file_based_allowlist_regexes()[ext[1:]]

for regex in regexes:
if regex.search(line):
return True

previous_line = context.previous_line
regexes = _get_allowlist_nextline_regexes()

if ext[1:] in _get_file_based_allowlist_nextline_regexes():
regexes = _get_file_based_allowlist_nextline_regexes()[ext[1:]]

for regex in regexes:
if regex.search(previous_line):
return True
for payload, regexes in zip(
[line, context.previous_line],
_get_allowlist_regexes_for_file(filename),
):
for regex in regexes:
if regex.search(payload):
return True

return False


@lru_cache(maxsize=1)
def _get_file_based_allowlist_regexes() -> Dict[str, List[Pattern]]:
def _get_file_to_index_dict() -> Dict[str, int]:
# Add to this mapping (and ALLOWLIST_REGEXES if applicable) lazily,
# as more language specific file parsers are implemented.
# Discussion: https://github.com/Yelp/detect-secrets/pull/105
return {
'yaml': [_get_allowlist_regexes()[0]],
'yaml': 0,
}


@lru_cache(maxsize=1)
def _get_allowlist_regexes() -> List[Pattern]:
return [
re.compile(r)
for r in [
# Note: Always use allowlist, whitelist will be deprecated in the future
r'[ \t]+{} *pragma: ?(allow|white)list[ -]secret.*?{}[ \t]*$'.format(start, end)
for start, end in (
('#', ''), # e.g. python or yaml
('//', ''), # e.g. golang
(r'/\*', r' *\*/'), # e.g. c
('\'', ''), # e.g. visual basic .net
('--', ''), # e.g. sql
(r'<!--[# \t]*?', ' *?-->'), # e.g. xml
# many other inline comment syntaxes are not included,
# because we want to be performant for
# any(regex.search(line) for regex in ALLOWLIST_REGEXES)
# calls. of course, this won't be a concern if detect-secrets
# switches over to implementing file plugins for each supported
# filetype.
)
]
]
def _get_comment_tuples() -> Iterable[Tuple[str, str]]:
return (
('#', ''), # e.g. python or yaml
('//', ''), # e.g. golang
(r'/\*', r' *\*/'), # e.g. c
('\'', ''), # e.g. visual basic .net
('--', ''), # e.g. sql
(r'<!--[# \t]*?', ' *?-->'), # e.g. xml
# many other inline comment syntaxes are not included,
# because we want to be performant for
# any(regex.search(line) for regex in ALLOWLIST_REGEXES)
# calls. of course, this won't be a concern if detect-secrets
# switches over to implementing file plugins for each supported
# filetype.
)


def _get_allowlist_regexes_for_file(filename: str) -> Iterable[List[Pattern]]:
comment_tuples = _get_comment_tuples()

_, ext = os.path.splitext(filename)
if ext[1:] in _get_file_to_index_dict():
comment_tuples = (comment_tuples[_get_file_to_index_dict()[ext[1:]]],)

@lru_cache(maxsize=1)
def _get_file_based_allowlist_nextline_regexes() -> Dict[str, List[Pattern]]:
# Add to this mapping (and ALLOWLIST_REGEXES if applicable) lazily,
# as more language specific file parsers are implemented.
# Discussion: https://github.com/Yelp/detect-secrets/pull/105
return {
'yaml': [_get_allowlist_nextline_regexes()[0]],
}
yield [
_get_allowlist_regexes(comment_tuple=t, nextline=False)
for t in comment_tuples
]
yield [
_get_allowlist_regexes(comment_tuple=t, nextline=True)
for t in comment_tuples
]


@lru_cache(maxsize=1)
def _get_allowlist_nextline_regexes() -> List[Pattern]:
return [
re.compile(r)
for r in [
r'^[ \t]*{} *pragma: ?allowlist[ -]nextline[ -]secret.*?{}[ \t]*$'.format(start, end)
for start, end in (
('#', ''), # e.g. python or yaml
('//', ''), # e.g. golang
(r'/\*', r' *\*/'), # e.g. c
('\'', ''), # e.g. visual basic .net
('--', ''), # e.g. sql
(r'<!--[# \t]*?', ' *?-->'), # e.g. xml
# many other inline comment syntaxes are not included,
# because we want to be performant for
# any(regex.search(line) for regex in ALLOWLIST_REGEXES)
# calls. of course, this won't be a concern if detect-secrets
# switches over to implementing file plugins for each supported
# filetype.
)
]
]
# Note: Cache size should be 2x the number of comment types
@lru_cache(maxsize=12)
def _get_allowlist_regexes(comment_tuple: Tuple[str, str], nextline: bool) -> Pattern:
start = comment_tuple[0]
end = comment_tuple[1]
return re.compile(
# Note: Always use allowlist, whitelist will be deprecated in the future
r'{}{} *pragma: ?{}{}[ -]secret.*?{}[ \t]*$'.format(
r'^[ \t]*' if nextline else r'[ \t]+',
nickiaconis marked this conversation as resolved.
Show resolved Hide resolved
start,
r'allowlist' if nextline else r'(allow|white)list',
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm also fine if these are the same. Supporting whitelist is just for backwards compatibility, and I'm fine with keeping that backwards compatibility around for the nextline regex too.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't mind one way or the other, but my thinking was that, b/c nextline is new, it doesn't have any backward compatibility to contend with. Thus, requiring allowlist for nextline prepares uses of it for the future removal of whitelist, whereas allowing both opens the possibility of further entrenching whitelist.

r'[ -]nextline' if nextline else '',
end,
),
)
45 changes: 17 additions & 28 deletions tests/filters/allowlist_filter_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,23 +4,26 @@
from detect_secrets.util.code_snippet import CodeSnippet


@pytest.mark.parametrize(
'prefix, suffix',
(
('#', ''),
('# ', ' more text'),
EXAMPLE_COMMENT_PARTS = (
('#', ''),
('# ', ' more text'),

('//', ''),
('// ', ' more text'),
('//', ''),
('// ', ' more text'),

('/*', '*/'),
('/* ', ' */'),
('/*', '*/'),
('/* ', ' */'),

('--', ''),
('-- ', ' more text'),
('--', ''),
('-- ', ' more text'),

('<!--', '-->'),
),
('<!--', '-->'),
)


@pytest.mark.parametrize(
'prefix, suffix',
EXAMPLE_COMMENT_PARTS,
)
def test_basic(prefix, suffix):
line = f'AKIAEXAMPLE {prefix}pragma: allowlist secret{suffix}'
Expand All @@ -33,21 +36,7 @@ def test_basic(prefix, suffix):

@pytest.mark.parametrize(
'prefix, suffix',
(
('#', ''),
('# ', ' more text'),

('//', ''),
('// ', ' more text'),

('/*', '*/'),
('/* ', ' */'),

('--', ''),
('-- ', ' more text'),

('<!--', '-->'),
),
EXAMPLE_COMMENT_PARTS,
)
def test_nextline(prefix, suffix):
comment = f'{prefix}pragma: allowlist nextline secret{suffix}'
Expand Down