Skip to content

Commit

Permalink
🔭[Keyword Plugin] Precision improvements
Browse files Browse the repository at this point in the history
Made quotes required in Python files/added regexes for this
Added a Filetype Enum and `determine_file_type` function

Replaced 'pass' with 'db_pass' in BLACKLIST
Added 'aws_secret_access_key' to BLACKLIST
Added some trailing char cases to FALSE_POSITIVES

:boom: Changed secret_type to 'Secret Keyword'
  • Loading branch information
KevinHock committed Dec 28, 2018
1 parent f15366e commit e01d818
Show file tree
Hide file tree
Showing 6 changed files with 190 additions and 30 deletions.
7 changes: 4 additions & 3 deletions detect_secrets/core/audit.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

from ..plugins.core import initialize
from ..plugins.high_entropy_strings import HighEntropyStringsPlugin
from ..plugins.keyword import determine_file_type
from ..plugins.keyword import KeywordDetector
from .baseline import format_baseline_for_output
from .baseline import merge_results
Expand Down Expand Up @@ -554,7 +555,7 @@ def _highlight_secret(
for raw_secret in _raw_secret_generator(
plugin,
secret_line,
is_php_file=filename.endswith('.php'),
filetype=determine_file_type(filename),
):
secret_obj = PotentialSecret(
plugin.secret_type,
Expand Down Expand Up @@ -583,10 +584,10 @@ def _highlight_secret(
)


def _raw_secret_generator(plugin, secret_line, is_php_file):
def _raw_secret_generator(plugin, secret_line, filetype):
"""Generates raw secrets by re-scanning the line, with the specified plugin"""
if isinstance(plugin, KeywordDetector):
for raw_secret in plugin.secret_generator(secret_line, is_php_file):
for raw_secret in plugin.secret_generator(secret_line, filetype=filetype):
yield raw_secret
else:
for raw_secret in plugin.secret_generator(secret_line):
Expand Down
71 changes: 61 additions & 10 deletions detect_secrets/plugins/keyword.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
from __future__ import absolute_import

import re
from enum import Enum

from .base import BasePlugin
from detect_secrets.core.potential_secret import PotentialSecret
Expand All @@ -36,7 +37,8 @@
BLACKLIST = (
'apikey',
'api_key',
'pass',
'aws_secret_access_key',
'db_pass',
'password',
'passwd',
'private_key',
Expand All @@ -49,24 +51,40 @@
'""',
'""):',
'false',
'false):',
'none',
'none,',
'none}',
'not',
'password)',
'password},',
'true',
'true):',
)
FOLLOWED_BY_COLON_RE = re.compile(
# e.g. token:
# e.g. api_key: foo
r'({})(("|\')?):(\s*?)(("|\')?)([^\s]+)(\5)'.format(
r'|'.join(BLACKLIST),
),
)
FOLLOWED_BY_COLON_QUOTES_REQUIRED_RE = re.compile(
# e.g. api_key: "foo"
r'({})(("|\')?):(\s*?)(("|\'))([^\s]+)(\5)'.format(
r'|'.join(BLACKLIST),
),
)
FOLLOWED_BY_EQUAL_SIGNS_RE = re.compile(
# e.g. my_password =
# e.g. my_password = bar
r'({})()(\s*?)=(\s*?)(("|\')?)([^\s]+)(\5)'.format(
r'|'.join(BLACKLIST),
),
)
FOLLOWED_BY_EQUAL_SIGNS_QUOTES_REQUIRED_RE = re.compile(
# e.g. my_password = "bar"
r'({})()(\s*?)=(\s*?)(("|\'))([^\s]+)(\5)'.format(
r'|'.join(BLACKLIST),
),
)
FOLLOWED_BY_QUOTES_AND_SEMICOLON_RE = re.compile(
# e.g. private_key "something";
r'({})([^\s]*?)(\s*?)("|\')([^\s]+)(\4);'.format(
Expand All @@ -78,21 +96,45 @@
FOLLOWED_BY_EQUAL_SIGNS_RE: 7,
FOLLOWED_BY_QUOTES_AND_SEMICOLON_RE: 5,
}
PYTHON_BLACKLIST_REGEX_TO_GROUP = {
FOLLOWED_BY_COLON_QUOTES_REQUIRED_RE: 7,
FOLLOWED_BY_EQUAL_SIGNS_QUOTES_REQUIRED_RE: 7,
FOLLOWED_BY_QUOTES_AND_SEMICOLON_RE: 5,
}


class FileType(Enum):
PYTHON = 1
PHP = 2
OTHER = 3


def determine_file_type(filename):
"""
:param filename: str
:rtype: FileType
"""
if filename.endswith('.py'):
return FileType.PYTHON
elif filename.endswith('.php'):
return FileType.PHP
return FileType.OTHER


class KeywordDetector(BasePlugin):
"""This checks if blacklisted keywords
are present in the analyzed string.
"""

secret_type = 'Password'
secret_type = 'Secret Keyword'

def analyze_string(self, string, line_num, filename):
output = {}

for identifier in self.secret_generator(
string,
is_php_file=filename.endswith('.php'),
filetype=determine_file_type(filename),
):
secret = PotentialSecret(
self.secret_type,
Expand All @@ -104,26 +146,35 @@ def analyze_string(self, string, line_num, filename):

return output

def secret_generator(self, string, is_php_file):
def secret_generator(self, string, filetype):
lowered_string = string.lower()

for REGEX, group_number in BLACKLIST_REGEX_TO_GROUP.items():
if filetype == FileType.PYTHON:
blacklist_RE_to_group = PYTHON_BLACKLIST_REGEX_TO_GROUP
else:
blacklist_RE_to_group = BLACKLIST_REGEX_TO_GROUP

for REGEX, group_number in blacklist_RE_to_group.items():
match = REGEX.search(lowered_string)
if match:
lowered_secret = match.group(group_number)

# ([^\s]+) guarantees lowered_secret is not ''
if not probably_false_positive(lowered_secret, is_php_file):
if not probably_false_positive(
lowered_secret,
filetype=filetype,
):
yield lowered_secret


def probably_false_positive(lowered_secret, is_php_file):
def probably_false_positive(lowered_secret, filetype):
if (
'fake' in lowered_secret or
'self.' in lowered_secret or
lowered_secret in FALSE_POSITIVES or
# If it is a .php file, do not report $variables
(
is_php_file and
filetype == FileType.PHP and
lowered_secret[0] == '$'
)
):
Expand Down
2 changes: 1 addition & 1 deletion testing/factories.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

def potential_secret_factory(type_='type', filename='filename', secret='secret', lineno=1):
"""This is only marginally better than creating PotentialSecret objects directly,
because of default values.
because of the default values.
"""
return PotentialSecret(type_, filename, secret, lineno)

Expand Down
40 changes: 39 additions & 1 deletion tests/core/audit_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -605,7 +605,7 @@ def test_secret_not_found(self, mock_printer):
""")[1:-1]

def test_secret_in_yaml_file(self, mock_printer):
def test_hex_high_entropy_secret_in_yaml_file(self, mock_printer):
with self._mock_sed_call(
line_containing_secret='api key: 123456789a',
):
Expand Down Expand Up @@ -644,6 +644,44 @@ def test_secret_in_yaml_file(self, mock_printer):
""")[1:-1]

def test_keyword_secret_in_yaml_file(self, mock_printer):
with self._mock_sed_call(
line_containing_secret='api_key: yerba',
):
self.run_logic(
secret=potential_secret_factory(
type_='Secret Keyword',
filename='filenameB',
secret='yerba',
lineno=15,
).json(),
settings=[
{
'name': 'KeywordDetector',
},
],
)

assert mock_printer.message == textwrap.dedent("""
Secret: 1 of 2
Filename: filenameB
Secret Type: Secret Keyword
----------
10:a
11:b
12:c
13:d
14:e
15:api_key: yerba
16:e
17:d
18:c
19:b
20:a
----------
""")[1:-1]


class TestGetUserDecision(object):

Expand Down
98 changes: 84 additions & 14 deletions tests/plugins/keyword_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,20 +14,53 @@ class TestKeywordDetector(object):
'file_content',
[
# FOLLOWED_BY_COLON_RE
"'theapikey': 'ho)pe]nob(ody[finds>-_$#thisone'",
'"theapikey": "ho)pe]nob(ody[finds>-_$#thisone"',
'apikey: ho)pe]nob(ody[finds>-_$#thisone',
'apikey:ho)pe]nob(ody[finds>-_$#thisone',
'theapikey:ho)pe]nob(ody[finds>-_$#thisone',
'apikey: "ho)pe]nob(ody[finds>-_$#thisone"',
"apikey: 'ho)pe]nob(ody[finds>-_$#thisone'",
# FOLLOWED_BY_EQUAL_SIGNS_RE
'my_password=ho)pe]nob(ody[finds>-_$#thisone',
'my_password= ho)pe]nob(ody[finds>-_$#thisone',
'my_password =ho)pe]nob(ody[finds>-_$#thisone',
'my_password = ho)pe]nob(ody[finds>-_$#thisone',
'my_password =ho)pe]nob(ody[finds>-_$#thisone',
'the_password=ho)pe]nob(ody[finds>-_$#thisone\n',
'the_password= "ho)pe]nob(ody[finds>-_$#thisone"\n',
'the_password=\'ho)pe]nob(ody[finds>-_$#thisone\'\n',
# FOLLOWED_BY_QUOTES_AND_SEMICOLON_RE
'apikey "ho)pe]nob(ody[finds>-_$#thisone";', # Double-quotes
'fooapikeyfoo "ho)pe]nob(ody[finds>-_$#thisone";', # Double-quotes
'fooapikeyfoo"ho)pe]nob(ody[finds>-_$#thisone";', # Double-quotes
'private_key \'ho)pe]nob(ody[finds>-_$#thisone\';', # Single-quotes
'fooprivate_keyfoo\'ho)pe]nob(ody[finds>-_$#thisone\';', # Single-quotes
'fooprivate_key\'ho)pe]nob(ody[finds>-_$#thisone\';', # Single-quotes
],
)
def test_analyze_positives(self, file_content):
logic = KeywordDetector()

f = mock_file_object(file_content)
output = logic.analyze(f, 'mock_filename')
assert len(output) == 1
for potential_secret in output:
assert 'mock_filename' == potential_secret.filename
assert (
potential_secret.secret_hash
== PotentialSecret.hash_secret('ho)pe]nob(ody[finds>-_$#thisone')
)

@pytest.mark.parametrize(
'file_content',
[
# FOLLOWED_BY_COLON_QUOTES_REQUIRED_RE
"'theapikey': 'hope]nobody[finds>-_$#thisone'",
'"theapikey": "hope]nobody[finds>-_$#thisone"',
'apikey: hope]nobody[finds>-_$#thisone',
'apikey:hope]nobody[finds>-_$#thisone',
'theapikey:hope]nobody[finds>-_$#thisone',
'apikey: "hope]nobody[finds>-_$#thisone"',
"apikey: 'hope]nobody[finds>-_$#thisone'",
# FOLLOWED_BY_EQUAL_SIGNS_RE
'my_password=hope]nobody[finds>-_$#thisone',
'my_password= hope]nobody[finds>-_$#thisone',
'my_password =hope]nobody[finds>-_$#thisone',
'my_password = hope]nobody[finds>-_$#thisone',
'my_password =hope]nobody[finds>-_$#thisone',
'the_password=hope]nobody[finds>-_$#thisone\n',
# FOLLOWED_BY_EQUAL_SIGNS_QUOTES_REQUIRED_RE
'the_password= "hope]nobody[finds>-_$#thisone"\n',
'the_password=\'hope]nobody[finds>-_$#thisone\'\n',
# FOLLOWED_BY_QUOTES_AND_SEMICOLON_RE
Expand All @@ -39,14 +72,14 @@ class TestKeywordDetector(object):
'fooprivate_key\'hope]nobody[finds>-_$#thisone\';', # Single-quotes
],
)
def test_analyze_positives(self, file_content):
def test_analyze_python_positives(self, file_content):
logic = KeywordDetector()

f = mock_file_object(file_content)
output = logic.analyze(f, 'mock_filename')
output = logic.analyze(f, 'mock_filename.py')
assert len(output) == 1
for potential_secret in output:
assert 'mock_filename' == potential_secret.filename
assert 'mock_filename.py' == potential_secret.filename
assert (
potential_secret.secret_hash
== PotentialSecret.hash_secret('hope]nobody[finds>-_$#thisone')
Expand All @@ -66,7 +99,6 @@ def test_analyze_positives(self, file_content):
'theapikey: "somefakekey"', # 'fake' in the secret
'theapikeyforfoo:hopenobodyfindsthisone', # Characters between apikey and :
# FOLLOWED_BY_EQUAL_SIGNS_RE
'$password = $input;', # Skip anything starting with $ in php files
'some_key = "real_secret"', # We cannot make 'key' a Keyword, too noisy
'my_password = foo(hey)you', # Has a ( followed by a )
"my_password = request.json_body['hey']", # Has a [ followed by a ]
Expand All @@ -76,11 +108,49 @@ def test_analyze_positives(self, file_content):
'my_password = "fakesecret"', # 'fake' in the secret
'login(username=username, password=password)', # secret is password)
'open(self, password = ""):', # secrets is ""):
# '',
],
)
def test_analyze_negatives(self, file_content):
logic = KeywordDetector()

f = mock_file_object(file_content)
output = logic.analyze(f, 'mock_filename.foo')
assert len(output) == 0

@pytest.mark.parametrize(
'file_content',
[
# FOLLOWED_BY_EQUAL_SIGNS_RE
'$password = $input;', # Skip anything starting with $ in php files
],
)
def test_analyze_php_negatives(self, file_content):
logic = KeywordDetector()

f = mock_file_object(file_content)
output = logic.analyze(f, 'mock_filename.php')
assert len(output) == 0

@pytest.mark.parametrize(
'file_content',
[
# FOLLOWED_BY_COLON_QUOTES_REQUIRED_RE
'apikey: hope]nobody[finds>-_$#thisone', # No quotes
'apikey:hope]nobody[finds>-_$#thisone', # No quotes
'theapikey:hope]nobody[finds>-_$#thisone', # No quotes
# FOLLOWED_BY_EQUAL_SIGNS_QUOTES_REQUIRED_RE
'my_password=hope]nobody[finds>-_$#thisone', # No quotes
'my_password= hope]nobody[finds>-_$#thisone', # No quotes
'my_password =hope]nobody[finds>-_$#thisone', # No quotes
'my_password = hope]nobody[finds>-_$#thisone', # No quotes
'my_password =hope]nobody[finds>-_$#thisone', # No quotes
'the_password=hope]nobody[finds>-_$#thisone\n', # No quotes
],
)
def test_analyze_python_negatives(self, file_content):
logic = KeywordDetector()

f = mock_file_object(file_content)
output = logic.analyze(f, 'mock_filename.py')
assert len(output) == 0
2 changes: 1 addition & 1 deletion tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ commands =
coverage erase
coverage run -m pytest tests
coverage report --show-missing --include=tests/* --fail-under 100
coverage report --show-missing --include=detect_secrets/* --fail-under 96
coverage report --show-missing --include=detect_secrets/* --fail-under 97
pre-commit run --all-files

[testenv:venv]
Expand Down

0 comments on commit e01d818

Please sign in to comment.