Skip to content

Commit

Permalink
🔭[Keyword Plugin] Precision improvements
Browse files Browse the repository at this point in the history
Added Javascript specific false-positive checks
Added ${ before } heuristic for  e.g. ${link}
Added more false-positives to FALSE_POSITIVES

:zap: keyword_test.py
Make STANDARD_NEGATIVES list and STANDARD_POSITIVES set for DRYness
  • Loading branch information
KevinHock committed Jan 3, 2019
1 parent d314550 commit a29108b
Show file tree
Hide file tree
Showing 2 changed files with 133 additions and 83 deletions.
52 changes: 43 additions & 9 deletions detect_secrets/plugins/keyword.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,18 +48,34 @@
FALSE_POSITIVES = (
"''",
"''):",
"')",
"'this",
'""',
'""):',
'")',
'<a',
'#pass',
'#password',
'<aws_secret_access_key>',
'<password>',
'dummy_secret',
'false',
'false):',
'none',
'none,',
'none}',
'not',
'null,',
'password)',
'password,',
'password},',
'string,',
'string}',
'string}}',
'test-access-key',
'true',
'true):',
'{',
)
FOLLOWED_BY_COLON_RE = re.compile(
# e.g. api_key: foo
Expand Down Expand Up @@ -104,8 +120,9 @@


class FileType(Enum):
PYTHON = 1
PHP = 2
JAVASCRIPT = 0
PHP = 1
PYTHON = 2
OTHER = 3


Expand All @@ -115,7 +132,9 @@ def determine_file_type(filename):
:rtype: FileType
"""
if filename.endswith('.py'):
if filename.endswith('.js'):
return FileType.JAVASCRIPT
elif filename.endswith('.py'):
return FileType.PYTHON
elif filename.endswith('.php'):
return FileType.PHP
Expand Down Expand Up @@ -170,17 +189,23 @@ def secret_generator(self, string, filetype):
def probably_false_positive(lowered_secret, filetype):
if (
'fake' in lowered_secret
or 'self.' in lowered_secret
or lowered_secret in FALSE_POSITIVES or
# If it is a .php file, do not report $variables
(
or 'forgot' in lowered_secret
or lowered_secret in FALSE_POSITIVES
or (
filetype == FileType.JAVASCRIPT
and (
lowered_secret.startswith('this.')
or lowered_secret.startswith('fs.read')
or lowered_secret == 'new'
)
) or ( # If it is a .php file, do not report $variables
filetype == FileType.PHP
and lowered_secret[0] == '$'
)
):
return True

# No function calls
# Heuristic for no function calls
try:
if (
lowered_secret.index('(') < lowered_secret.index(')')
Expand All @@ -189,7 +214,7 @@ def probably_false_positive(lowered_secret, filetype):
except ValueError:
pass

# Skip over e.g. request.json_body['hey']
# Heuristic for e.g. request.json_body['hey']
try:
if (
lowered_secret.index('[') < lowered_secret.index(']')
Expand All @@ -198,4 +223,13 @@ def probably_false_positive(lowered_secret, filetype):
except ValueError:
pass

# Heuristic for e.g. ${link}
try:
if (
lowered_secret.index('${') < lowered_secret.index('}')
):
return True
except ValueError:
pass

return False
164 changes: 90 additions & 74 deletions tests/plugins/keyword_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,40 +8,67 @@
from testing.mocks import mock_file_object


STANDARD_NEGATIVES = [
# FOLLOWED_BY_COLON_RE
'theapikey: ""', # Nothing in the quotes
'theapikey: "somefakekey"', # 'fake' in the secret
'theapikeyforfoo:hopenobodyfindsthisone', # Characters between apikey and :
# FOLLOWED_BY_EQUAL_SIGNS_RE
'some_key = "real_secret"', # We cannot make 'key' a Keyword, too noisy
'my_password = foo(hey)you', # Has a ( followed by a )
"my_password = request.json_body['hey']", # Has a [ followed by a ]
'my_password = ""', # Nothing in the quotes
"my_password = ''", # Nothing in the quotes
'my_password = True', # 'True' is a known false-positive
'my_password = "fakesecret"', # 'fake' in the secret
'login(username=username, password=password)', # secret is password)
'open(self, password = ""):', # secrets is ""):
'open(self, password = ""):', # secrets is ""):
# FOLLOWED_BY_QUOTES_AND_SEMICOLON_RE
'private_key "";', # Nothing in the quotes
'private_key \'"no spaces\';', # Has whitespace in the secret
'private_key "fake";', # 'fake' in the secret
'private_key "hopenobodyfindsthisone\';', # Double-quote does not match single-quote
'private_key \'hopenobodyfindsthisone";', # Single-quote does not match double-quote
'password: ${link}', # Has a ${ followed by a }
]
STANDARD_POSITIVES = {
# FOLLOWED_BY_COLON_RE
"'theapikey': 'h}o)p${e]nob(ody[finds>-_$#thisone'",
'"theapikey": "h}o)p${e]nob(ody[finds>-_$#thisone"',
'apikey: h}o)p${e]nob(ody[finds>-_$#thisone',
'apikey:h}o)p${e]nob(ody[finds>-_$#thisone',
'theapikey:h}o)p${e]nob(ody[finds>-_$#thisone',
'apikey: "h}o)p${e]nob(ody[finds>-_$#thisone"',
"apikey: 'h}o)p${e]nob(ody[finds>-_$#thisone'",
# FOLLOWED_BY_EQUAL_SIGNS_RE
'some_dict["secret"] = "h}o)p${e]nob(ody[finds>-_$#thisone"',
"some_dict['secret'] = h}o)p${e]nob(ody[finds>-_$#thisone",
'my_password=h}o)p${e]nob(ody[finds>-_$#thisone',
'my_password= h}o)p${e]nob(ody[finds>-_$#thisone',
'my_password =h}o)p${e]nob(ody[finds>-_$#thisone',
'my_password = h}o)p${e]nob(ody[finds>-_$#thisone',
'my_password =h}o)p${e]nob(ody[finds>-_$#thisone',
'the_password=h}o)p${e]nob(ody[finds>-_$#thisone\n',
'the_password= "h}o)p${e]nob(ody[finds>-_$#thisone"\n',
'the_password=\'h}o)p${e]nob(ody[finds>-_$#thisone\'\n',
# FOLLOWED_BY_QUOTES_AND_SEMICOLON_RE
'apikey "h}o)p${e]nob(ody[finds>-_$#thisone";', # Double-quotes
'fooapikeyfoo "h}o)p${e]nob(ody[finds>-_$#thisone";', # Double-quotes
'fooapikeyfoo"h}o)p${e]nob(ody[finds>-_$#thisone";', # Double-quotes
'private_key \'h}o)p${e]nob(ody[finds>-_$#thisone\';', # Single-quotes
'fooprivate_keyfoo\'h}o)p${e]nob(ody[finds>-_$#thisone\';', # Single-quotes
'fooprivate_key\'h}o)p${e]nob(ody[finds>-_$#thisone\';', # Single-quotes
}


class TestKeywordDetector(object):

@pytest.mark.parametrize(
'file_content',
[
# FOLLOWED_BY_COLON_RE
"'theapikey': 'ho)pe]nob(ody[finds>-_$#thisone'",
'"theapikey": "ho)pe]nob(ody[finds>-_$#thisone"',
'apikey: ho)pe]nob(ody[finds>-_$#thisone',
'apikey:ho)pe]nob(ody[finds>-_$#thisone',
'theapikey:ho)pe]nob(ody[finds>-_$#thisone',
'apikey: "ho)pe]nob(ody[finds>-_$#thisone"',
"apikey: 'ho)pe]nob(ody[finds>-_$#thisone'",
# FOLLOWED_BY_EQUAL_SIGNS_RE
'some_dict["secret"] = "ho)pe]nob(ody[finds>-_$#thisone"',
"some_dict['secret'] = ho)pe]nob(ody[finds>-_$#thisone",
'my_password=ho)pe]nob(ody[finds>-_$#thisone',
'my_password= ho)pe]nob(ody[finds>-_$#thisone',
'my_password =ho)pe]nob(ody[finds>-_$#thisone',
'my_password = ho)pe]nob(ody[finds>-_$#thisone',
'my_password =ho)pe]nob(ody[finds>-_$#thisone',
'the_password=ho)pe]nob(ody[finds>-_$#thisone\n',
'the_password= "ho)pe]nob(ody[finds>-_$#thisone"\n',
'the_password=\'ho)pe]nob(ody[finds>-_$#thisone\'\n',
# FOLLOWED_BY_QUOTES_AND_SEMICOLON_RE
'apikey "ho)pe]nob(ody[finds>-_$#thisone";', # Double-quotes
'fooapikeyfoo "ho)pe]nob(ody[finds>-_$#thisone";', # Double-quotes
'fooapikeyfoo"ho)pe]nob(ody[finds>-_$#thisone";', # Double-quotes
'private_key \'ho)pe]nob(ody[finds>-_$#thisone\';', # Single-quotes
'fooprivate_keyfoo\'ho)pe]nob(ody[finds>-_$#thisone\';', # Single-quotes
'fooprivate_key\'ho)pe]nob(ody[finds>-_$#thisone\';', # Single-quotes
],
STANDARD_POSITIVES,
)
def test_analyze_positives(self, file_content):
def test_analyze_standard_positives(self, file_content):
logic = KeywordDetector()

f = mock_file_object(file_content)
Expand All @@ -51,29 +78,25 @@ def test_analyze_positives(self, file_content):
assert 'mock_filename' == potential_secret.filename
assert (
potential_secret.secret_hash
== PotentialSecret.hash_secret('ho)pe]nob(ody[finds>-_$#thisone')
== PotentialSecret.hash_secret('h}o)p${e]nob(ody[finds>-_$#thisone')
)

@pytest.mark.parametrize(
'file_content',
[
STANDARD_POSITIVES - {
# FOLLOWED_BY_COLON_QUOTES_REQUIRED_RE
"'theapikey': 'hope]nobody[finds>-_$#thisone'",
'"theapikey": "hope]nobody[finds>-_$#thisone"',
'apikey: "hope]nobody[finds>-_$#thisone"',
"apikey: 'hope]nobody[finds>-_$#thisone'",
'apikey: h}o)p${e]nob(ody[finds>-_$#thisone',
'apikey:h}o)p${e]nob(ody[finds>-_$#thisone',
'theapikey:h}o)p${e]nob(ody[finds>-_$#thisone',
# FOLLOWED_BY_EQUAL_SIGNS_QUOTES_REQUIRED_RE
'some_dict["secret"] = "hope]nobody[finds>-_$#thisone"',
'the_password= "hope]nobody[finds>-_$#thisone"\n',
'the_password=\'hope]nobody[finds>-_$#thisone\'\n',
# FOLLOWED_BY_QUOTES_AND_SEMICOLON_RE
'apikey "hope]nobody[finds>-_$#thisone";', # Double-quotes
'fooapikeyfoo "hope]nobody[finds>-_$#thisone";', # Double-quotes
'fooapikeyfoo"hope]nobody[finds>-_$#thisone";', # Double-quotes
'private_key \'hope]nobody[finds>-_$#thisone\';', # Single-quotes
'fooprivate_keyfoo\'hope]nobody[finds>-_$#thisone\';', # Single-quotes
'fooprivate_key\'hope]nobody[finds>-_$#thisone\';', # Single-quotes
],
"some_dict['secret'] = h}o)p${e]nob(ody[finds>-_$#thisone",
'my_password=h}o)p${e]nob(ody[finds>-_$#thisone',
'my_password= h}o)p${e]nob(ody[finds>-_$#thisone',
'my_password =h}o)p${e]nob(ody[finds>-_$#thisone',
'my_password = h}o)p${e]nob(ody[finds>-_$#thisone',
'my_password =h}o)p${e]nob(ody[finds>-_$#thisone',
'the_password=h}o)p${e]nob(ody[finds>-_$#thisone\n',
},
)
def test_analyze_python_positives(self, file_content):
logic = KeywordDetector()
Expand All @@ -85,45 +108,38 @@ def test_analyze_python_positives(self, file_content):
assert 'mock_filename.py' == potential_secret.filename
assert (
potential_secret.secret_hash
== PotentialSecret.hash_secret('hope]nobody[finds>-_$#thisone')
== PotentialSecret.hash_secret('h}o)p${e]nob(ody[finds>-_$#thisone')
)

@pytest.mark.parametrize(
'file_content',
[
'negative',
STANDARD_NEGATIVES,
)
def test_analyze_standard_negatives(self, negative):
logic = KeywordDetector()

f = mock_file_object(negative)
output = logic.analyze(f, 'mock_filename.foo')
assert len(output) == 0

@pytest.mark.parametrize(
'js_negative',
STANDARD_NEGATIVES + [
# FOLLOWED_BY_COLON_RE
'private_key "";', # Nothing in the quotes
'private_key \'"no spaces\';', # Has whitespace in the secret
'private_key "fake";', # 'fake' in the secret
'private_key "hopenobodyfindsthisone\';', # Double-quote does not match single-quote
'private_key \'hopenobodyfindsthisone";', # Single-quote does not match double-quote
# FOLLOWED_BY_QUOTES_AND_SEMICOLON_RE
'theapikey: ""', # Nothing in the quotes
'theapikey: "somefakekey"', # 'fake' in the secret
'theapikeyforfoo:hopenobodyfindsthisone', # Characters between apikey and :
# FOLLOWED_BY_EQUAL_SIGNS_RE
'some_key = "real_secret"', # We cannot make 'key' a Keyword, too noisy
'my_password = foo(hey)you', # Has a ( followed by a )
"my_password = request.json_body['hey']", # Has a [ followed by a ]
'my_password = ""', # Nothing in the quotes
"my_password = ''", # Nothing in the quotes
'my_password = True', # 'True' is a known false-positive
'my_password = "fakesecret"', # 'fake' in the secret
'login(username=username, password=password)', # secret is password)
'open(self, password = ""):', # secrets is ""):
# '',
'apiKey: this.apiKey,',
"apiKey: fs.readFileSync('foo',",
],
)
def test_analyze_negatives(self, file_content):
def test_analyze_javascript_negatives(self, js_negative):
logic = KeywordDetector()

f = mock_file_object(file_content)
output = logic.analyze(f, 'mock_filename.foo')
f = mock_file_object(js_negative)
output = logic.analyze(f, 'mock_filename.js')
assert len(output) == 0

@pytest.mark.parametrize(
'secret_starting_with_dollar_sign',
[
STANDARD_NEGATIVES + [
# FOLLOWED_BY_EQUAL_SIGNS_RE
'$password = $input;',
],
Expand All @@ -137,7 +153,7 @@ def test_analyze_php_negatives(self, secret_starting_with_dollar_sign):

@pytest.mark.parametrize(
'secret_with_no_quote',
[
STANDARD_NEGATIVES + [
# FOLLOWED_BY_COLON_QUOTES_REQUIRED_RE
'apikey: hope]nobody[finds>-_$#thisone',
'apikey:hope]nobody[finds>-_$#thisone',
Expand Down

0 comments on commit a29108b

Please sign in to comment.