Skip to content

Commit

Permalink
🔭[Keyword Plugin] Precision improvements
Browse files Browse the repository at this point in the history
Made quotes required in Python files/added regexes for this
Added a Filetype Enum

Replaced 'pass' with 'db_pass' in BLACKLIST
Added 'aws_secret_access_key' to BLACKLIST
Added some trailing char cases to FALSE_POSITIVES

:boom: Changed secret_type to 'Secret Keyword'
  • Loading branch information
KevinHock committed Dec 28, 2018
1 parent f15366e commit ea99830
Show file tree
Hide file tree
Showing 2 changed files with 139 additions and 24 deletions.
65 changes: 55 additions & 10 deletions detect_secrets/plugins/keyword.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
from __future__ import absolute_import

import re
from enum import Enum

from .base import BasePlugin
from detect_secrets.core.potential_secret import PotentialSecret
Expand All @@ -36,7 +37,8 @@
BLACKLIST = (
'apikey',
'api_key',
'pass',
'aws_secret_access_key',
'db_pass',
'password',
'passwd',
'private_key',
Expand All @@ -49,24 +51,40 @@
'""',
'""):',
'false',
'false):',
'none',
'none,',
'none}',
'not',
'password)',
'password},',
'true',
'true):',
)
FOLLOWED_BY_COLON_RE = re.compile(
# e.g. token:
# e.g. api_key: foo
r'({})(("|\')?):(\s*?)(("|\')?)([^\s]+)(\5)'.format(
r'|'.join(BLACKLIST),
),
)
FOLLOWED_BY_COLON_QUOTES_REQUIRED_RE = re.compile(
# e.g. api_key: "foo"
r'({})(("|\')?):(\s*?)(("|\'))([^\s]+)(\5)'.format(
r'|'.join(BLACKLIST),
),
)
FOLLOWED_BY_EQUAL_SIGNS_RE = re.compile(
# e.g. my_password =
# e.g. my_password = bar
r'({})()(\s*?)=(\s*?)(("|\')?)([^\s]+)(\5)'.format(
r'|'.join(BLACKLIST),
),
)
FOLLOWED_BY_EQUAL_SIGNS_QUOTES_REQUIRED_RE = re.compile(
# e.g. my_password = "bar"
r'({})()(\s*?)=(\s*?)(("|\'))([^\s]+)(\5)'.format(
r'|'.join(BLACKLIST),
),
)
FOLLOWED_BY_QUOTES_AND_SEMICOLON_RE = re.compile(
# e.g. private_key "something";
r'({})([^\s]*?)(\s*?)("|\')([^\s]+)(\4);'.format(
Expand All @@ -78,21 +96,39 @@
FOLLOWED_BY_EQUAL_SIGNS_RE: 7,
FOLLOWED_BY_QUOTES_AND_SEMICOLON_RE: 5,
}
PYTHON_BLACKLIST_REGEX_TO_GROUP = {
FOLLOWED_BY_COLON_QUOTES_REQUIRED_RE: 7,
FOLLOWED_BY_EQUAL_SIGNS_QUOTES_REQUIRED_RE: 7,
FOLLOWED_BY_QUOTES_AND_SEMICOLON_RE: 5,
}


class FileType(Enum):
PYTHON = 1
PHP = 2
OTHER = 3


class KeywordDetector(BasePlugin):
"""This checks if blacklisted keywords
are present in the analyzed string.
"""

secret_type = 'Password'
secret_type = 'Secret Keyword'

def analyze_string(self, string, line_num, filename):
output = {}

if filename.endswith('.py'):
filetype = FileType.PYTHON
elif filename.endswith('.php'):
filetype = FileType.PHP
else:
filetype = FileType.OTHER

for identifier in self.secret_generator(
string,
is_php_file=filename.endswith('.php'),
filetype=filetype,
):
secret = PotentialSecret(
self.secret_type,
Expand All @@ -104,26 +140,35 @@ def analyze_string(self, string, line_num, filename):

return output

def secret_generator(self, string, is_php_file):
def secret_generator(self, string, filetype):
lowered_string = string.lower()

for REGEX, group_number in BLACKLIST_REGEX_TO_GROUP.items():
if filetype == FileType.PYTHON:
blacklist_RE_to_group = PYTHON_BLACKLIST_REGEX_TO_GROUP
else:
blacklist_RE_to_group = BLACKLIST_REGEX_TO_GROUP

for REGEX, group_number in blacklist_RE_to_group.items():
match = REGEX.search(lowered_string)
if match:
lowered_secret = match.group(group_number)

# ([^\s]+) guarantees lowered_secret is not ''
if not probably_false_positive(lowered_secret, is_php_file):
if not probably_false_positive(
lowered_secret,
filetype=filetype,
):
yield lowered_secret


def probably_false_positive(lowered_secret, is_php_file):
def probably_false_positive(lowered_secret, filetype):
if (
'fake' in lowered_secret or
'self.' in lowered_secret or
lowered_secret in FALSE_POSITIVES or
# If it is a .php file, do not report $variables
(
is_php_file and
filetype == FileType.PHP and
lowered_secret[0] == '$'
)
):
Expand Down
98 changes: 84 additions & 14 deletions tests/plugins/keyword_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,20 +14,53 @@ class TestKeywordDetector(object):
'file_content',
[
# FOLLOWED_BY_COLON_RE
"'theapikey': 'ho)pe]nob(ody[finds>-_$#thisone'",
'"theapikey": "ho)pe]nob(ody[finds>-_$#thisone"',
'apikey: ho)pe]nob(ody[finds>-_$#thisone',
'apikey:ho)pe]nob(ody[finds>-_$#thisone',
'theapikey:ho)pe]nob(ody[finds>-_$#thisone',
'apikey: "ho)pe]nob(ody[finds>-_$#thisone"',
"apikey: 'ho)pe]nob(ody[finds>-_$#thisone'",
# FOLLOWED_BY_EQUAL_SIGNS_RE
'my_password=ho)pe]nob(ody[finds>-_$#thisone',
'my_password= ho)pe]nob(ody[finds>-_$#thisone',
'my_password =ho)pe]nob(ody[finds>-_$#thisone',
'my_password = ho)pe]nob(ody[finds>-_$#thisone',
'my_password =ho)pe]nob(ody[finds>-_$#thisone',
'the_password=ho)pe]nob(ody[finds>-_$#thisone\n',
'the_password= "ho)pe]nob(ody[finds>-_$#thisone"\n',
'the_password=\'ho)pe]nob(ody[finds>-_$#thisone\'\n',
# FOLLOWED_BY_QUOTES_AND_SEMICOLON_RE
'apikey "ho)pe]nob(ody[finds>-_$#thisone";', # Double-quotes
'fooapikeyfoo "ho)pe]nob(ody[finds>-_$#thisone";', # Double-quotes
'fooapikeyfoo"ho)pe]nob(ody[finds>-_$#thisone";', # Double-quotes
'private_key \'ho)pe]nob(ody[finds>-_$#thisone\';', # Single-quotes
'fooprivate_keyfoo\'ho)pe]nob(ody[finds>-_$#thisone\';', # Single-quotes
'fooprivate_key\'ho)pe]nob(ody[finds>-_$#thisone\';', # Single-quotes
],
)
def test_analyze_positives(self, file_content):
logic = KeywordDetector()

f = mock_file_object(file_content)
output = logic.analyze(f, 'mock_filename')
assert len(output) == 1
for potential_secret in output:
assert 'mock_filename' == potential_secret.filename
assert (
potential_secret.secret_hash
== PotentialSecret.hash_secret('ho)pe]nob(ody[finds>-_$#thisone')
)

@pytest.mark.parametrize(
'file_content',
[
# FOLLOWED_BY_COLON_QUOTES_REQUIRED_RE
"'theapikey': 'hope]nobody[finds>-_$#thisone'",
'"theapikey": "hope]nobody[finds>-_$#thisone"',
'apikey: hope]nobody[finds>-_$#thisone',
'apikey:hope]nobody[finds>-_$#thisone',
'theapikey:hope]nobody[finds>-_$#thisone',
'apikey: "hope]nobody[finds>-_$#thisone"',
"apikey: 'hope]nobody[finds>-_$#thisone'",
# FOLLOWED_BY_EQUAL_SIGNS_RE
'my_password=hope]nobody[finds>-_$#thisone',
'my_password= hope]nobody[finds>-_$#thisone',
'my_password =hope]nobody[finds>-_$#thisone',
'my_password = hope]nobody[finds>-_$#thisone',
'my_password =hope]nobody[finds>-_$#thisone',
'the_password=hope]nobody[finds>-_$#thisone\n',
# FOLLOWED_BY_EQUAL_SIGNS_QUOTES_REQUIRED_RE
'the_password= "hope]nobody[finds>-_$#thisone"\n',
'the_password=\'hope]nobody[finds>-_$#thisone\'\n',
# FOLLOWED_BY_QUOTES_AND_SEMICOLON_RE
Expand All @@ -39,14 +72,14 @@ class TestKeywordDetector(object):
'fooprivate_key\'hope]nobody[finds>-_$#thisone\';', # Single-quotes
],
)
def test_analyze_positives(self, file_content):
def test_analyze_python_positives(self, file_content):
logic = KeywordDetector()

f = mock_file_object(file_content)
output = logic.analyze(f, 'mock_filename')
output = logic.analyze(f, 'mock_filename.py')
assert len(output) == 1
for potential_secret in output:
assert 'mock_filename' == potential_secret.filename
assert 'mock_filename.py' == potential_secret.filename
assert (
potential_secret.secret_hash
== PotentialSecret.hash_secret('hope]nobody[finds>-_$#thisone')
Expand All @@ -66,7 +99,6 @@ def test_analyze_positives(self, file_content):
'theapikey: "somefakekey"', # 'fake' in the secret
'theapikeyforfoo:hopenobodyfindsthisone', # Characters between apikey and :
# FOLLOWED_BY_EQUAL_SIGNS_RE
'$password = $input;', # Skip anything starting with $ in php files
'some_key = "real_secret"', # We cannot make 'key' a Keyword, too noisy
'my_password = foo(hey)you', # Has a ( followed by a )
"my_password = request.json_body['hey']", # Has a [ followed by a ]
Expand All @@ -76,11 +108,49 @@ def test_analyze_positives(self, file_content):
'my_password = "fakesecret"', # 'fake' in the secret
'login(username=username, password=password)', # secret is password)
'open(self, password = ""):', # secrets is ""):
# '',
],
)
def test_analyze_negatives(self, file_content):
logic = KeywordDetector()

f = mock_file_object(file_content)
output = logic.analyze(f, 'mock_filename.foo')
assert len(output) == 0

@pytest.mark.parametrize(
'file_content',
[
# FOLLOWED_BY_EQUAL_SIGNS_RE
'$password = $input;', # Skip anything starting with $ in php files
],
)
def test_analyze_php_negatives(self, file_content):
logic = KeywordDetector()

f = mock_file_object(file_content)
output = logic.analyze(f, 'mock_filename.php')
assert len(output) == 0

@pytest.mark.parametrize(
'file_content',
[
# FOLLOWED_BY_COLON_QUOTES_REQUIRED_RE
'apikey: hope]nobody[finds>-_$#thisone', # No quotes
'apikey:hope]nobody[finds>-_$#thisone', # No quotes
'theapikey:hope]nobody[finds>-_$#thisone', # No quotes
# FOLLOWED_BY_EQUAL_SIGNS_QUOTES_REQUIRED_RE
'my_password=hope]nobody[finds>-_$#thisone', # No quotes
'my_password= hope]nobody[finds>-_$#thisone', # No quotes
'my_password =hope]nobody[finds>-_$#thisone', # No quotes
'my_password = hope]nobody[finds>-_$#thisone', # No quotes
'my_password =hope]nobody[finds>-_$#thisone', # No quotes
'the_password=hope]nobody[finds>-_$#thisone\n', # No quotes
],
)
def test_analyze_python_negatives(self, file_content):
logic = KeywordDetector()

f = mock_file_object(file_content)
output = logic.analyze(f, 'mock_filename.py')
assert len(output) == 0

0 comments on commit ea99830

Please sign in to comment.