From b9c80f9b7b7aba210d24a14067045788291bf307 Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Mon, 22 Oct 2018 11:11:02 -0700 Subject: [PATCH 01/16] Use different regexes in KeywordDetector to improve accuracy --- detect_secrets/plugins/keyword.py | 55 ++++++++++++++++++++----- tests/plugins/keyword_test.py | 68 ++++++++++++++++++++++++++++--- 2 files changed, 106 insertions(+), 17 deletions(-) diff --git a/detect_secrets/plugins/keyword.py b/detect_secrets/plugins/keyword.py index 5ce4d461b..3159afb55 100644 --- a/detect_secrets/plugins/keyword.py +++ b/detect_secrets/plugins/keyword.py @@ -26,22 +26,54 @@ """ from __future__ import absolute_import +import re + from .base import BasePlugin from detect_secrets.core.potential_secret import PotentialSecret from detect_secrets.plugins.core.constants import WHITELIST_REGEX +# Note: All values here should be lowercase BLACKLIST = ( - # NOTE all values here should be lowercase, - # otherwise _secret_generator can fail to match them - 'pass =', + 'apikey', + 'api_key', + 'pass', 'password', 'passwd', - 'pwd', + 'private_key', 'secret', 'secrete', 'token', ) +# Uses lazy quantifiers +BLACKLIST_REGEX = re.compile( + # Followed by double-quotes and a semi-colon + # e.g. private_key "something"; + r'|'.join( + '(.*?){}(.*?)"(.*?)";'.format( + value, + ) + for value in BLACKLIST + ) + '|' + + # Followed by a : + # e.g. token: + r'|'.join( + '(.*?){}[:](.*?)'.format( + value, + ) + for value in BLACKLIST + ) + '|' + + # Follwed by an = sign + # e.g. my_password = + r'|'.join( + '(.*?){}(.*?)=(.*?)'.format( + value, + ) + for value in BLACKLIST + ) + + # pwd has to start with pwd, it is too common + '|^pwd(.*?)=(.*?)', +) class KeywordDetector(BasePlugin): @@ -57,7 +89,8 @@ def analyze_string(self, string, line_num, filename): if WHITELIST_REGEX.search(string): return output - for identifier in self.secret_generator(string): + identifier = self.secret_generator(string) + if identifier: secret = PotentialSecret( self.secret_type, filename, @@ -68,10 +101,10 @@ def analyze_string(self, string, line_num, filename): return output - def _secret_generator(self, lowercase_string): - for line in BLACKLIST: - if line in lowercase_string: - yield line - def secret_generator(self, string): - return self._secret_generator(string.lower()) + match = BLACKLIST_REGEX.search( + string.lower(), + ) + if not match: + return None + return match.group() diff --git a/tests/plugins/keyword_test.py b/tests/plugins/keyword_test.py index 94ddc648f..14701022b 100644 --- a/tests/plugins/keyword_test.py +++ b/tests/plugins/keyword_test.py @@ -13,23 +13,79 @@ class TestKeywordDetector(object): 'file_content', [ ( - 'login_somewhere --http-password hopenobodyfindsthisone\n' + 'apikey "something";' ), ( - 'token = "noentropy"' + 'token "something";' + ), + ( + 'apikey:' + ), + ( + 'the_token:' + ), + ( + 'my_password =' + ), + ( + 'some_token_for_something =' + ), + ( + 'pwd = foo' ), ( - 'PASSWORD = "verysimple"' + 'private_key "something";' + ), + + ( + 'passwordone=foo\n' + ), + ( + 'API_KEY=hopenobodyfindsthisone\n' + ), + ( + 'token = "noentropy"' ), ], ) - def test_analyze(self, file_content): + def test_analyze_positives(self, file_content): logic = KeywordDetector() f = mock_file_object(file_content) output = logic.analyze(f, 'mock_filename') + print('output is {}'.format(output)) + for k in output: + print(str(k)) assert len(output) == 1 for potential_secret in output: assert 'mock_filename' == potential_secret.filename - generated = list(logic.secret_generator(file_content)) - assert len(generated) == len(output) + + @pytest.mark.parametrize( + 'file_content', + [ + ( + 'private_key \'something\';' # Single-quotes not double-quotes + ), + ( + 'passwordonefoo\n' # No = or anything + ), + ( + 'api_keyhopenobodyfindsthisone:\n' # Has char's in between api_key and : + ), + ( + 'my_pwd =' # Does not start with pwd + ), + ( + 'token "noentropy;' # No 2nd double-quote + ), + ( + 'token noentropy;' # No quotes + ), + ], + ) + def test_analyze_negatives(self, file_content): + logic = KeywordDetector() + + f = mock_file_object(file_content) + output = logic.analyze(f, 'mock_filename') + assert len(output) == 0 From c4e76762ec58b92cf8826b338c33149f1dd21d38 Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Mon, 22 Oct 2018 14:22:01 -0700 Subject: [PATCH 02/16] Trim regexes down, include secret 'foo'; as well. --- detect_secrets/__init__.py | 2 +- detect_secrets/plugins/base.py | 3 +++ detect_secrets/plugins/keyword.py | 18 +++++++++--------- tests/plugins/keyword_test.py | 11 ++++------- 4 files changed, 17 insertions(+), 17 deletions(-) diff --git a/detect_secrets/__init__.py b/detect_secrets/__init__.py index 54c820fa6..1da3331f0 100644 --- a/detect_secrets/__init__.py +++ b/detect_secrets/__init__.py @@ -1 +1 @@ -VERSION = '0.10.3' +VERSION = '0.10.4' diff --git a/detect_secrets/plugins/base.py b/detect_secrets/plugins/base.py index 457625d9a..1c754acd3 100644 --- a/detect_secrets/plugins/base.py +++ b/detect_secrets/plugins/base.py @@ -47,6 +47,9 @@ def secret_generator(self, string): :type string: str :param string: the secret to scan + + :rtype: iter + :returns: Of all the identifiers found """ raise NotImplementedError diff --git a/detect_secrets/plugins/keyword.py b/detect_secrets/plugins/keyword.py index 3159afb55..8e5a0bdef 100644 --- a/detect_secrets/plugins/keyword.py +++ b/detect_secrets/plugins/keyword.py @@ -49,8 +49,9 @@ BLACKLIST_REGEX = re.compile( # Followed by double-quotes and a semi-colon # e.g. private_key "something"; + # e.g. private_key 'something'; r'|'.join( - '(.*?){}(.*?)"(.*?)";'.format( + '{}(.*?)("|\')(.*?)(\'|");'.format( value, ) for value in BLACKLIST @@ -58,7 +59,7 @@ # Followed by a : # e.g. token: r'|'.join( - '(.*?){}[:](.*?)'.format( + '{}:'.format( value, ) for value in BLACKLIST @@ -66,13 +67,13 @@ # Follwed by an = sign # e.g. my_password = r'|'.join( - '(.*?){}(.*?)=(.*?)'.format( + '{}(.*?)='.format( value, ) for value in BLACKLIST ) + - # pwd has to start with pwd, it is too common - '|^pwd(.*?)=(.*?)', + # For `pwd` it has to start with pwd after whitespace, it is too common + '|\\spwd(.*?)=', ) @@ -89,8 +90,7 @@ def analyze_string(self, string, line_num, filename): if WHITELIST_REGEX.search(string): return output - identifier = self.secret_generator(string) - if identifier: + for identifier in self.secret_generator(string): secret = PotentialSecret( self.secret_type, filename, @@ -106,5 +106,5 @@ def secret_generator(self, string): string.lower(), ) if not match: - return None - return match.group() + return [] + return [match.group()] diff --git a/tests/plugins/keyword_test.py b/tests/plugins/keyword_test.py index 14701022b..73705244b 100644 --- a/tests/plugins/keyword_test.py +++ b/tests/plugins/keyword_test.py @@ -18,6 +18,9 @@ class TestKeywordDetector(object): ( 'token "something";' ), + ( + 'private_key \'"something\';' # Single-quotes not double-quotes + ), ( 'apikey:' ), @@ -31,7 +34,7 @@ class TestKeywordDetector(object): 'some_token_for_something =' ), ( - 'pwd = foo' + ' pwd = foo' ), ( 'private_key "something";' @@ -53,9 +56,6 @@ def test_analyze_positives(self, file_content): f = mock_file_object(file_content) output = logic.analyze(f, 'mock_filename') - print('output is {}'.format(output)) - for k in output: - print(str(k)) assert len(output) == 1 for potential_secret in output: assert 'mock_filename' == potential_secret.filename @@ -63,9 +63,6 @@ def test_analyze_positives(self, file_content): @pytest.mark.parametrize( 'file_content', [ - ( - 'private_key \'something\';' # Single-quotes not double-quotes - ), ( 'passwordonefoo\n' # No = or anything ), From fce28376e532cd51e4d18b5679fb193b9f24ac30 Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Mon, 22 Oct 2018 15:40:16 -0700 Subject: [PATCH 03/16] Do not alert if whitespace is in secret --- detect_secrets/plugins/keyword.py | 2 +- tests/plugins/keyword_test.py | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/detect_secrets/plugins/keyword.py b/detect_secrets/plugins/keyword.py index 8e5a0bdef..6f4ef4b9b 100644 --- a/detect_secrets/plugins/keyword.py +++ b/detect_secrets/plugins/keyword.py @@ -51,7 +51,7 @@ # e.g. private_key "something"; # e.g. private_key 'something'; r'|'.join( - '{}(.*?)("|\')(.*?)(\'|");'.format( + '{}(.*?)("|\')(\\S*?)(\'|");'.format( value, ) for value in BLACKLIST diff --git a/tests/plugins/keyword_test.py b/tests/plugins/keyword_test.py index 73705244b..7e93a4fad 100644 --- a/tests/plugins/keyword_test.py +++ b/tests/plugins/keyword_test.py @@ -63,6 +63,9 @@ def test_analyze_positives(self, file_content): @pytest.mark.parametrize( 'file_content', [ + ( + 'private_key \'"no spaces\';' # Has whitespace in-between + ), ( 'passwordonefoo\n' # No = or anything ), From 3167cb1f713e722fe1535882eeb613859ef79c2d Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Tue, 30 Oct 2018 18:15:58 -0700 Subject: [PATCH 04/16] [Keyword Plugin] Add 3 re's and their secret groups in dict --- detect_secrets/plugins/keyword.py | 59 ++++++++++++++----------------- tests/plugins/keyword_test.py | 56 +++++++++++++++++------------ 2 files changed, 60 insertions(+), 55 deletions(-) diff --git a/detect_secrets/plugins/keyword.py b/detect_secrets/plugins/keyword.py index 6f4ef4b9b..1df65c59c 100644 --- a/detect_secrets/plugins/keyword.py +++ b/detect_secrets/plugins/keyword.py @@ -45,36 +45,29 @@ 'secrete', 'token', ) -# Uses lazy quantifiers -BLACKLIST_REGEX = re.compile( - # Followed by double-quotes and a semi-colon - # e.g. private_key "something"; - # e.g. private_key 'something'; - r'|'.join( - '{}(.*?)("|\')(\\S*?)(\'|");'.format( - value, - ) - for value in BLACKLIST - ) + '|' + - # Followed by a : +FOLLOWED_BY_COLON_RE = re.compile( # e.g. token: - r'|'.join( - '{}:'.format( - value, - ) - for value in BLACKLIST - ) + '|' + - # Follwed by an = sign + r'({}):(\s*?)([^\s]+)'.format( + r'|'.join(BLACKLIST), + ), +) +FOLLOWED_BY_EQUAL_SIGNS_RE = re.compile( # e.g. my_password = - r'|'.join( - '{}(.*?)='.format( - value, - ) - for value in BLACKLIST - ) + - # For `pwd` it has to start with pwd after whitespace, it is too common - '|\\spwd(.*?)=', + r'({})([^\s]*?)(\s*?)=(\s*?)([^\s]+)'.format( + r'|'.join(BLACKLIST), + ), ) +FOLLOWED_BY_QUOTES_AND_SEMICOLON_RE = re.compile( + # e.g. private_key "something"; + r'({})([^\s]*?)(\s*?)("|\')([^\s]+)(\4);'.format( + r'|'.join(BLACKLIST), + ), +) +BLACKLIST_REGEX_TO_GROUP = { + FOLLOWED_BY_COLON_RE: 3, + FOLLOWED_BY_EQUAL_SIGNS_RE: 5, + FOLLOWED_BY_QUOTES_AND_SEMICOLON_RE: 5, +} class KeywordDetector(BasePlugin): @@ -102,9 +95,9 @@ def analyze_string(self, string, line_num, filename): return output def secret_generator(self, string): - match = BLACKLIST_REGEX.search( - string.lower(), - ) - if not match: - return [] - return [match.group()] + lowered_string = string.lower() + + for REGEX, group_number in BLACKLIST_REGEX_TO_GROUP.items(): + match = REGEX.search(lowered_string) + if match: + yield match.group(group_number) diff --git a/tests/plugins/keyword_test.py b/tests/plugins/keyword_test.py index 7e93a4fad..fb6a91009 100644 --- a/tests/plugins/keyword_test.py +++ b/tests/plugins/keyword_test.py @@ -3,6 +3,7 @@ import pytest +from detect_secrets.core.potential_secret import PotentialSecret from detect_secrets.plugins.keyword import KeywordDetector from testing.mocks import mock_file_object @@ -12,42 +13,53 @@ class TestKeywordDetector(object): @pytest.mark.parametrize( 'file_content', [ + # FOLLOWED_BY_COLON_RE ( - 'apikey "something";' + 'apikey: hopenobodyfindsthisone' ), ( - 'token "something";' + 'apikey:hopenobodyfindsthisone' ), ( - 'private_key \'"something\';' # Single-quotes not double-quotes + 'theapikey:hopenobodyfindsthisone' ), + # FOLLOWED_BY_EQUAL_SIGNS_RE ( - 'apikey:' + 'my_password=hopenobodyfindsthisone' ), ( - 'the_token:' + 'my_password= hopenobodyfindsthisone' ), ( - 'my_password =' + 'my_password =hopenobodyfindsthisone' ), ( - 'some_token_for_something =' + 'my_password_for_stuff = hopenobodyfindsthisone' ), ( - ' pwd = foo' + 'my_password_for_stuff =hopenobodyfindsthisone' ), ( - 'private_key "something";' + 'passwordone=hopenobodyfindsthisone\n' + ), + # FOLLOWED_BY_QUOTES_AND_SEMICOLON_RE + ( + 'apikey "hopenobodyfindsthisone";' # Double-quotes ), - ( - 'passwordone=foo\n' + 'fooapikeyfoo "hopenobodyfindsthisone";' # Double-quotes ), ( - 'API_KEY=hopenobodyfindsthisone\n' + 'fooapikeyfoo"hopenobodyfindsthisone";' # Double-quotes ), ( - 'token = "noentropy"' + 'private_key \'hopenobodyfindsthisone\';' # Single-quotes + ), + ( + 'fooprivate_keyfoo\'hopenobodyfindsthisone\';' # Single-quotes + ), + ( + 'fooprivate_key\'hopenobodyfindsthisone\';' # Single-quotes ), ], ) @@ -59,27 +71,27 @@ def test_analyze_positives(self, file_content): assert len(output) == 1 for potential_secret in output: assert 'mock_filename' == potential_secret.filename + assert ( + potential_secret.secret_hash + == PotentialSecret.hash_secret('hopenobodyfindsthisone') + ) @pytest.mark.parametrize( 'file_content', [ + # FOLLOWED_BY_COLON_RE ( 'private_key \'"no spaces\';' # Has whitespace in-between ), ( - 'passwordonefoo\n' # No = or anything - ), - ( - 'api_keyhopenobodyfindsthisone:\n' # Has char's in between api_key and : - ), - ( - 'my_pwd =' # Does not start with pwd + 'private_key "hopenobodyfindsthisone\';' # Double-quote does not match single-quote ), ( - 'token "noentropy;' # No 2nd double-quote + 'private_key \'hopenobodyfindsthisone";' # Single-quote does not match double-quote ), + # FOLLOWED_BY_QUOTES_AND_SEMICOLON_RE ( - 'token noentropy;' # No quotes + 'theapikeyforfoo:hopenobodyfindsthisone' # Characters between apikey and : ), ], ) From b89209f09933fc895fd57ed4acc8a1bb4504db07 Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Wed, 31 Oct 2018 15:10:32 -0700 Subject: [PATCH 05/16] Add a few more Keyword negative test-cases --- tests/plugins/keyword_test.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tests/plugins/keyword_test.py b/tests/plugins/keyword_test.py index fb6a91009..3ec207b65 100644 --- a/tests/plugins/keyword_test.py +++ b/tests/plugins/keyword_test.py @@ -80,6 +80,9 @@ def test_analyze_positives(self, file_content): 'file_content', [ # FOLLOWED_BY_COLON_RE + ( + 'private_key "";' # Nothing in the quotes + ), ( 'private_key \'"no spaces\';' # Has whitespace in-between ), @@ -93,6 +96,13 @@ def test_analyze_positives(self, file_content): ( 'theapikeyforfoo:hopenobodyfindsthisone' # Characters between apikey and : ), + ( + 'theapikey:' # Nothing after : + ), + # FOLLOWED_BY_EQUAL_SIGNS_RE + ( + 'my_password_for_stuff =' # Nothing after = + ), ], ) def test_analyze_negatives(self, file_content): From 431ad1298b23c574a66824a9f137c071a33927d3 Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Thu, 1 Nov 2018 12:45:39 -0700 Subject: [PATCH 06/16] Added a variety of accuracy improvements to Keyword Plugin (see tests) Turned on Keyword detector by default Down-graded version to '0.1.666' to test it on a few repos without causing havoc --- detect_secrets/__init__.py | 2 +- detect_secrets/core/usage.py | 4 +- detect_secrets/plugins/keyword.py | 23 ++++++++-- tests/core/usage_test.py | 1 + tests/main_test.py | 2 + tests/plugins/keyword_test.py | 76 +++++++++++++++++++++++-------- 6 files changed, 79 insertions(+), 29 deletions(-) diff --git a/detect_secrets/__init__.py b/detect_secrets/__init__.py index 5308a7d1e..a4ff83fbf 100644 --- a/detect_secrets/__init__.py +++ b/detect_secrets/__init__.py @@ -1 +1 @@ -VERSION = '0.10.5' +VERSION = '0.1.666' # Will not merge PR to master until fully tested :D diff --git a/detect_secrets/core/usage.py b/detect_secrets/core/usage.py index 7776bcfdc..d9f257151 100644 --- a/detect_secrets/core/usage.py +++ b/detect_secrets/core/usage.py @@ -329,9 +329,7 @@ def _add_opt_out_options(self): plugin.disable_flag_text, action='store_true', help=plugin.disable_help_text, - # Temporarily disabling the KeywordDetector - # Until we can test its effectiveness on more repositories - default=True if plugin.disable_flag_text == '--no-keyword-scan' else False, + default=False, ) return self diff --git a/detect_secrets/plugins/keyword.py b/detect_secrets/plugins/keyword.py index 1df65c59c..a9e2c6a39 100644 --- a/detect_secrets/plugins/keyword.py +++ b/detect_secrets/plugins/keyword.py @@ -45,15 +45,22 @@ 'secrete', 'token', ) +FALSE_POSITIVES = ( + "''", + '""', + 'false', + 'none', + 'true', +) FOLLOWED_BY_COLON_RE = re.compile( # e.g. token: - r'({}):(\s*?)([^\s]+)'.format( + r'({})(("|\')?):(\s*?)(("|\')?)([^\s]+)(\5)'.format( r'|'.join(BLACKLIST), ), ) FOLLOWED_BY_EQUAL_SIGNS_RE = re.compile( # e.g. my_password = - r'({})([^\s]*?)(\s*?)=(\s*?)([^\s]+)'.format( + r'({})([^\s]*?)(\s*?)=(\s*?)(("|\')?)([^\s]+)(\5)'.format( r'|'.join(BLACKLIST), ), ) @@ -64,8 +71,8 @@ ), ) BLACKLIST_REGEX_TO_GROUP = { - FOLLOWED_BY_COLON_RE: 3, - FOLLOWED_BY_EQUAL_SIGNS_RE: 5, + FOLLOWED_BY_COLON_RE: 7, + FOLLOWED_BY_EQUAL_SIGNS_RE: 7, FOLLOWED_BY_QUOTES_AND_SEMICOLON_RE: 5, } @@ -100,4 +107,10 @@ def secret_generator(self, string): for REGEX, group_number in BLACKLIST_REGEX_TO_GROUP.items(): match = REGEX.search(lowered_string) if match: - yield match.group(group_number) + secret = match.group(group_number) + if ( + secret and + 'fake' not in secret and + secret not in FALSE_POSITIVES + ): + yield secret diff --git a/tests/core/usage_test.py b/tests/core/usage_test.py index 1cf679f2a..880049e00 100644 --- a/tests/core/usage_test.py +++ b/tests/core/usage_test.py @@ -32,6 +32,7 @@ def test_consolidates_output_basic(self): 'Base64HighEntropyString': { 'base64_limit': 4.5, }, + 'KeywordDetector': {}, 'PrivateKeyDetector': {}, } assert not hasattr(args, 'no_private_key_scan') diff --git a/tests/main_test.py b/tests/main_test.py index faccfa958..d99e394a2 100644 --- a/tests/main_test.py +++ b/tests/main_test.py @@ -90,6 +90,7 @@ def test_scan_string_basic(self, mock_baseline_initialize): Base64HighEntropyString: False (3.459) BasicAuthDetector : False HexHighEntropyString : True (3.459) + KeywordDetector : False PrivateKeyDetector : False """)[1:] @@ -106,6 +107,7 @@ def test_scan_string_cli_overrides_stdin(self): Base64HighEntropyString: False (2.585) BasicAuthDetector : False HexHighEntropyString : False (2.121) + KeywordDetector : False PrivateKeyDetector : False """)[1:] diff --git a/tests/plugins/keyword_test.py b/tests/plugins/keyword_test.py index 3ec207b65..28b315421 100644 --- a/tests/plugins/keyword_test.py +++ b/tests/plugins/keyword_test.py @@ -15,51 +15,69 @@ class TestKeywordDetector(object): [ # FOLLOWED_BY_COLON_RE ( - 'apikey: hopenobodyfindsthisone' + "'theapikey': 'hopenobodyfinds>-_$#thisone'" ), ( - 'apikey:hopenobodyfindsthisone' + '"theapikey": "hopenobodyfinds>-_$#thisone"' ), ( - 'theapikey:hopenobodyfindsthisone' + 'apikey: hopenobodyfinds>-_$#thisone' + ), + ( + 'apikey:hopenobodyfinds>-_$#thisone' + ), + ( + 'theapikey:hopenobodyfinds>-_$#thisone' + ), + ( + 'apikey: "hopenobodyfinds>-_$#thisone"' + ), + ( + "apikey: 'hopenobodyfinds>-_$#thisone'" ), # FOLLOWED_BY_EQUAL_SIGNS_RE ( - 'my_password=hopenobodyfindsthisone' + 'my_password=hopenobodyfinds>-_$#thisone' + ), + ( + 'my_password= hopenobodyfinds>-_$#thisone' ), ( - 'my_password= hopenobodyfindsthisone' + 'my_password =hopenobodyfinds>-_$#thisone' ), ( - 'my_password =hopenobodyfindsthisone' + 'my_password_for_stuff = hopenobodyfinds>-_$#thisone' ), ( - 'my_password_for_stuff = hopenobodyfindsthisone' + 'my_password_for_stuff =hopenobodyfinds>-_$#thisone' ), ( - 'my_password_for_stuff =hopenobodyfindsthisone' + 'passwordone=hopenobodyfinds>-_$#thisone\n' ), ( - 'passwordone=hopenobodyfindsthisone\n' + 'passwordone= "hopenobodyfinds>-_$#thisone"\n' + ), + ( + 'passwordone=\'hopenobodyfinds>-_$#thisone\'\n' ), # FOLLOWED_BY_QUOTES_AND_SEMICOLON_RE ( - 'apikey "hopenobodyfindsthisone";' # Double-quotes + 'apikey "hopenobodyfinds>-_$#thisone";' # Double-quotes ), ( - 'fooapikeyfoo "hopenobodyfindsthisone";' # Double-quotes + 'fooapikeyfoo "hopenobodyfinds>-_$#thisone";' # Double-quotes ), ( - 'fooapikeyfoo"hopenobodyfindsthisone";' # Double-quotes + 'fooapikeyfoo"hopenobodyfinds>-_$#thisone";' # Double-quotes ), ( - 'private_key \'hopenobodyfindsthisone\';' # Single-quotes + 'private_key \'hopenobodyfinds>-_$#thisone\';' # Single-quotes ), ( - 'fooprivate_keyfoo\'hopenobodyfindsthisone\';' # Single-quotes + 'fooprivate_keyfoo\'hopenobodyfinds>-_$#thisone\';' # Single-quotes ), ( - 'fooprivate_key\'hopenobodyfindsthisone\';' # Single-quotes + 'fooprivate_key\'hopenobodyfinds>-_$#thisone\';' # Single-quotes ), ], ) @@ -73,7 +91,7 @@ def test_analyze_positives(self, file_content): assert 'mock_filename' == potential_secret.filename assert ( potential_secret.secret_hash - == PotentialSecret.hash_secret('hopenobodyfindsthisone') + == PotentialSecret.hash_secret('hopenobodyfinds>-_$#thisone') ) @pytest.mark.parametrize( @@ -84,7 +102,10 @@ def test_analyze_positives(self, file_content): 'private_key "";' # Nothing in the quotes ), ( - 'private_key \'"no spaces\';' # Has whitespace in-between + 'private_key \'"no spaces\';' # Has whitespace in the secret + ), + ( + 'private_key "fake";' # 'fake' in the secret ), ( 'private_key "hopenobodyfindsthisone\';' # Double-quote does not match single-quote @@ -94,14 +115,29 @@ def test_analyze_positives(self, file_content): ), # FOLLOWED_BY_QUOTES_AND_SEMICOLON_RE ( - 'theapikeyforfoo:hopenobodyfindsthisone' # Characters between apikey and : + 'theapikey: ""' # Nothing in the quotes ), ( - 'theapikey:' # Nothing after : + 'theapikey: "somefakekey"' # 'fake' in the secret + ), + ( + 'theapikeyforfoo:hopenobodyfindsthisone' # Characters between apikey and : ), # FOLLOWED_BY_EQUAL_SIGNS_RE ( - 'my_password_for_stuff =' # Nothing after = + 'some_key = "real_secret"' # We cannot make 'key' a Keyword, too noisy + ), + ( + 'my_password_for_stuff = ""' # Nothing in the quotes + ), + ( + "my_password_for_stuff = ''" # Nothing in the quotes + ), + ( + 'my_password_for_stuff = True' # 'True' is a known false-positive + ), + ( + 'my_password_for_stuff = "fakesecret"' # 'fake' in the secret ), ], ) From a27659aa24f2e4cb4522bd9c57805684fb3016ef Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Thu, 13 Dec 2018 17:12:54 -0800 Subject: [PATCH 07/16] :telescope:[Keyword Plugin] Filter false-positives Filter out $variables for PHP files Filter out `(|[` followed by `)|]` Add `not`, more empty quotes and `password` variable names to FALSE_POSITIVES --- detect_secrets/plugins/keyword.py | 59 ++++++++++++++++++++++++++----- tests/plugins/keyword_test.py | 17 ++++++++- 2 files changed, 66 insertions(+), 10 deletions(-) diff --git a/detect_secrets/plugins/keyword.py b/detect_secrets/plugins/keyword.py index a9e2c6a39..b1a49e54e 100644 --- a/detect_secrets/plugins/keyword.py +++ b/detect_secrets/plugins/keyword.py @@ -47,9 +47,14 @@ ) FALSE_POSITIVES = ( "''", + "''):", '""', + '""):', 'false', 'none', + 'not', + 'password)', + 'password},', 'true', ) FOLLOWED_BY_COLON_RE = re.compile( @@ -90,7 +95,10 @@ def analyze_string(self, string, line_num, filename): if WHITELIST_REGEX.search(string): return output - for identifier in self.secret_generator(string): + for identifier in self.secret_generator( + string, + is_php_file=filename.endswith('.php'), + ): secret = PotentialSecret( self.secret_type, filename, @@ -101,16 +109,49 @@ def analyze_string(self, string, line_num, filename): return output - def secret_generator(self, string): + def secret_generator(self, string, is_php_file): lowered_string = string.lower() for REGEX, group_number in BLACKLIST_REGEX_TO_GROUP.items(): match = REGEX.search(lowered_string) if match: - secret = match.group(group_number) - if ( - secret and - 'fake' not in secret and - secret not in FALSE_POSITIVES - ): - yield secret + lowered_secret = match.group(group_number) + + if not lowered_secret: + continue + + if not probably_false_positive(lowered_secret, is_php_file): + yield lowered_secret + + +def probably_false_positive(lowered_secret, is_php_file): + if ( + 'fake' in lowered_secret or + lowered_secret in FALSE_POSITIVES or + # If it is a .php file, do not report $variables + ( + is_php_file and + lowered_secret[0] == '$' + ) + ): + return True + + # No function calls + try: + if ( + lowered_secret.index('(') < lowered_secret.index(')') + ): + return True + except ValueError: + pass + + # Skip over e.g. request.json_body['hey'] + try: + if ( + lowered_secret.index('[') < lowered_secret.index(']') + ): + return True + except ValueError: + pass + + return False diff --git a/tests/plugins/keyword_test.py b/tests/plugins/keyword_test.py index 28b315421..694fc62e0 100644 --- a/tests/plugins/keyword_test.py +++ b/tests/plugins/keyword_test.py @@ -124,9 +124,18 @@ def test_analyze_positives(self, file_content): 'theapikeyforfoo:hopenobodyfindsthisone' # Characters between apikey and : ), # FOLLOWED_BY_EQUAL_SIGNS_RE + ( + '$password = $input;' # Skip anything starting with $ in php files + ), ( 'some_key = "real_secret"' # We cannot make 'key' a Keyword, too noisy ), + ( + 'my_password_for_stuff = foo(hey)you' # Has a ( followed by a ) + ), + ( + "my_password_for_stuff = request.json_body['hey']" # Has a [ followed by a ] + ), ( 'my_password_for_stuff = ""' # Nothing in the quotes ), @@ -139,11 +148,17 @@ def test_analyze_positives(self, file_content): ( 'my_password_for_stuff = "fakesecret"' # 'fake' in the secret ), + ( + 'login(username=username, password=password)' # secret is password) + ), + ( + 'open(self, password = ""):' # secrets is ""): + ), ], ) def test_analyze_negatives(self, file_content): logic = KeywordDetector() f = mock_file_object(file_content) - output = logic.analyze(f, 'mock_filename') + output = logic.analyze(f, 'mock_filename.php') assert len(output) == 0 From d8f4e29fb675729840e43b50182f8eeb2e491f5e Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Thu, 13 Dec 2018 18:30:41 -0800 Subject: [PATCH 08/16] :snake: Make tests pass After merging in master --- detect_secrets/core/audit.py | 25 +++++++++++++++---- .../{first_line.py => first_line.php} | 2 +- tests/core/audit_test.py | 4 +-- tests/main_test.py | 4 +-- tests/pre_commit_hook_test.py | 3 +++ 5 files changed, 28 insertions(+), 10 deletions(-) rename test_data/short_files/{first_line.py => first_line.php} (76%) diff --git a/detect_secrets/core/audit.py b/detect_secrets/core/audit.py index 9b373940d..d2b8db425 100644 --- a/detect_secrets/core/audit.py +++ b/detect_secrets/core/audit.py @@ -9,6 +9,7 @@ from ..plugins.core import initialize from ..plugins.high_entropy_strings import HighEntropyStringsPlugin +from ..plugins.keyword import KeywordDetector from .baseline import format_baseline_for_output from .baseline import merge_results from .bidirectional_iterator import BidirectionalIterator @@ -518,7 +519,13 @@ def _get_secret_with_context( ) -def _highlight_secret(secret_line, secret_lineno, secret, filename, plugin_settings): +def _highlight_secret( + secret_line, + secret_lineno, + secret, + filename, + plugin_settings, +): """ :type secret_line: str :param secret_line: the line on which the secret is found @@ -544,7 +551,11 @@ def _highlight_secret(secret_line, secret_lineno, secret, filename, plugin_setti plugin_settings, ) - for raw_secret in _raw_secret_generator(plugin, secret_line): + for raw_secret in _raw_secret_generator( + plugin, + secret_line, + is_php_file=filename.endswith('.php'), + ): secret_obj = PotentialSecret( plugin.secret_type, filename, @@ -572,10 +583,14 @@ def _highlight_secret(secret_line, secret_lineno, secret, filename, plugin_setti ) -def _raw_secret_generator(plugin, secret_line): +def _raw_secret_generator(plugin, secret_line, is_php_file): """Generates raw secrets by re-scanning the line, with the specified plugin""" - for raw_secret in plugin.secret_generator(secret_line): - yield raw_secret + if isinstance(plugin, KeywordDetector): + for raw_secret in plugin.secret_generator(secret_line, is_php_file): + yield raw_secret + else: + for raw_secret in plugin.secret_generator(secret_line): + yield raw_secret if issubclass(plugin.__class__, HighEntropyStringsPlugin): with plugin.non_quoted_string_regex(strict=False): diff --git a/test_data/short_files/first_line.py b/test_data/short_files/first_line.php similarity index 76% rename from test_data/short_files/first_line.py rename to test_data/short_files/first_line.php index ae20b49c3..9b1d12e67 100644 --- a/test_data/short_files/first_line.py +++ b/test_data/short_files/first_line.php @@ -1,4 +1,4 @@ -secret = 'BEEF0123456789a' +seecret = 'BEEF0123456789a' skipped_sequential_false_positive = '0123456789a' print('second line') var = 'third line' diff --git a/tests/core/audit_test.py b/tests/core/audit_test.py index 43335a8ff..feb136299 100644 --- a/tests/core/audit_test.py +++ b/tests/core/audit_test.py @@ -335,7 +335,7 @@ def test_compare(self, mock_printer): # These files come after, because filenames are sorted first assert headers[2] == textwrap.dedent(""" Secret: 3 of 4 - Filename: test_data/short_files/first_line.py + Filename: test_data/short_files/first_line.php Secret Type: Hex High Entropy String Status: >> REMOVED << """)[1:] @@ -406,7 +406,7 @@ def old_baseline(self): ], # This entire file will be "removed" - 'test_data/short_files/first_line.py': [ + 'test_data/short_files/first_line.php': [ { 'hashed_secret': '0de9a11b3f37872868ca49ecd726c955e25b6e21', 'line_number': 1, diff --git a/tests/main_test.py b/tests/main_test.py index 8d757b7f2..7298362ba 100644 --- a/tests/main_test.py +++ b/tests/main_test.py @@ -194,9 +194,9 @@ def test_old_baseline_ignored_with_update_flag( 'filename, expected_output', [ ( - 'test_data/short_files/first_line.py', + 'test_data/short_files/first_line.php', textwrap.dedent(""" - 1:secret = 'BEEF0123456789a' + 1:seecret = 'BEEF0123456789a' 2:skipped_sequential_false_positive = '0123456789a' 3:print('second line') 4:var = 'third line' diff --git a/tests/pre_commit_hook_test.py b/tests/pre_commit_hook_test.py index 0b7c08317..7727365ac 100644 --- a/tests/pre_commit_hook_test.py +++ b/tests/pre_commit_hook_test.py @@ -145,6 +145,9 @@ def test_that_baseline_gets_updated( 'hex_limit': 3, 'name': 'HexHighEntropyString', }, + { + 'name': 'KeywordDetector', + }, { 'name': 'PrivateKeyDetector', }, From ed6a374b8429a7ad9ec981e760522b7026064458 Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Fri, 14 Dec 2018 11:31:54 -0800 Subject: [PATCH 09/16] :snake: Improve test coverage Trim uncovered code Change tox to ensure tests are covered 100% --- detect_secrets/plugins/keyword.py | 4 +-- tests/core/audit_test.py | 10 ++---- tests/core/secrets_collection_test.py | 2 +- tests/plugins/base_test.py | 2 +- tests/plugins/keyword_test.py | 44 +++++++++++++-------------- tox.ini | 3 +- 6 files changed, 30 insertions(+), 35 deletions(-) diff --git a/detect_secrets/plugins/keyword.py b/detect_secrets/plugins/keyword.py index e9a7eb151..d5734e80a 100644 --- a/detect_secrets/plugins/keyword.py +++ b/detect_secrets/plugins/keyword.py @@ -113,9 +113,7 @@ def secret_generator(self, string, is_php_file): if match: lowered_secret = match.group(group_number) - if not lowered_secret: - continue - + # ([^\s]+) guarantees lowered_secret is not '' if not probably_false_positive(lowered_secret, is_php_file): yield lowered_secret diff --git a/tests/core/audit_test.py b/tests/core/audit_test.py index feb136299..868e66a9a 100644 --- a/tests/core/audit_test.py +++ b/tests/core/audit_test.py @@ -65,7 +65,7 @@ def test_making_decisions(self, mock_printer): def test_quit_half_way(self, mock_printer): modified_baseline = deepcopy(self.baseline) - for secrets in modified_baseline['results'].values(): + for secrets in modified_baseline['results'].values(): # pragma: no cover secrets[0]['is_secret'] = False break @@ -147,8 +147,7 @@ def test_go_back_several_steps(self, mock_printer): for secrets in modified_baseline['results'].values(): for secret in secrets: value = values_to_inject.pop(0) - if value is not None: - secret['is_secret'] = value + secret['is_secret'] = value self.run_logic( ['s', 'y', 'b', 's', 'b', 'b', 'n', 'n', 'n'], @@ -174,10 +173,7 @@ def run_logic(self, inputs, modified_baseline=None, input_baseline=None): ) as m: audit.audit_baseline('will_be_mocked') - if not modified_baseline: - assert m.call_args[0][1] == self.baseline - else: - assert m.call_args[0][1] == modified_baseline + assert m.call_args[0][1] == modified_baseline @contextmanager def mock_env(self, user_inputs=None, baseline=None): diff --git a/tests/core/secrets_collection_test.py b/tests/core/secrets_collection_test.py index eb2d37eee..310b6ac5c 100644 --- a/tests/core/secrets_collection_test.py +++ b/tests/core/secrets_collection_test.py @@ -350,7 +350,7 @@ def assert_loaded_collection_is_original_collection(self, original, new): assert original[key] == new[key] -class MockBasePlugin(BasePlugin): +class MockBasePlugin(BasePlugin): # pragma: no cover """Abstract testing class, to implement abstract methods.""" def analyze_string(self, value): diff --git a/tests/plugins/base_test.py b/tests/plugins/base_test.py index 98ee81faf..f3c80e3f6 100644 --- a/tests/plugins/base_test.py +++ b/tests/plugins/base_test.py @@ -6,7 +6,7 @@ def test_fails_if_no_secret_type_defined(): - class MockPlugin(BasePlugin): + class MockPlugin(BasePlugin): # pragma: no cover def analyze_string(self, *args, **kwargs): pass diff --git a/tests/plugins/keyword_test.py b/tests/plugins/keyword_test.py index 694fc62e0..d879d94a9 100644 --- a/tests/plugins/keyword_test.py +++ b/tests/plugins/keyword_test.py @@ -15,69 +15,69 @@ class TestKeywordDetector(object): [ # FOLLOWED_BY_COLON_RE ( - "'theapikey': 'hopenobodyfinds>-_$#thisone'" + "'theapikey': 'hope]nobody[finds>-_$#thisone'" ), ( - '"theapikey": "hopenobodyfinds>-_$#thisone"' + '"theapikey": "hope]nobody[finds>-_$#thisone"' ), ( - 'apikey: hopenobodyfinds>-_$#thisone' + 'apikey: hope]nobody[finds>-_$#thisone' ), ( - 'apikey:hopenobodyfinds>-_$#thisone' + 'apikey:hope]nobody[finds>-_$#thisone' ), ( - 'theapikey:hopenobodyfinds>-_$#thisone' + 'theapikey:hope]nobody[finds>-_$#thisone' ), ( - 'apikey: "hopenobodyfinds>-_$#thisone"' + 'apikey: "hope]nobody[finds>-_$#thisone"' ), ( - "apikey: 'hopenobodyfinds>-_$#thisone'" + "apikey: 'hope]nobody[finds>-_$#thisone'" ), # FOLLOWED_BY_EQUAL_SIGNS_RE ( - 'my_password=hopenobodyfinds>-_$#thisone' + 'my_password=hope]nobody[finds>-_$#thisone' ), ( - 'my_password= hopenobodyfinds>-_$#thisone' + 'my_password= hope]nobody[finds>-_$#thisone' ), ( - 'my_password =hopenobodyfinds>-_$#thisone' + 'my_password =hope]nobody[finds>-_$#thisone' ), ( - 'my_password_for_stuff = hopenobodyfinds>-_$#thisone' + 'my_password_for_stuff = hope]nobody[finds>-_$#thisone' ), ( - 'my_password_for_stuff =hopenobodyfinds>-_$#thisone' + 'my_password_for_stuff =hope]nobody[finds>-_$#thisone' ), ( - 'passwordone=hopenobodyfinds>-_$#thisone\n' + 'passwordone=hope]nobody[finds>-_$#thisone\n' ), ( - 'passwordone= "hopenobodyfinds>-_$#thisone"\n' + 'passwordone= "hope]nobody[finds>-_$#thisone"\n' ), ( - 'passwordone=\'hopenobodyfinds>-_$#thisone\'\n' + 'passwordone=\'hope]nobody[finds>-_$#thisone\'\n' ), # FOLLOWED_BY_QUOTES_AND_SEMICOLON_RE ( - 'apikey "hopenobodyfinds>-_$#thisone";' # Double-quotes + 'apikey "hope]nobody[finds>-_$#thisone";' # Double-quotes ), ( - 'fooapikeyfoo "hopenobodyfinds>-_$#thisone";' # Double-quotes + 'fooapikeyfoo "hope]nobody[finds>-_$#thisone";' # Double-quotes ), ( - 'fooapikeyfoo"hopenobodyfinds>-_$#thisone";' # Double-quotes + 'fooapikeyfoo"hope]nobody[finds>-_$#thisone";' # Double-quotes ), ( - 'private_key \'hopenobodyfinds>-_$#thisone\';' # Single-quotes + 'private_key \'hope]nobody[finds>-_$#thisone\';' # Single-quotes ), ( - 'fooprivate_keyfoo\'hopenobodyfinds>-_$#thisone\';' # Single-quotes + 'fooprivate_keyfoo\'hope]nobody[finds>-_$#thisone\';' # Single-quotes ), ( - 'fooprivate_key\'hopenobodyfinds>-_$#thisone\';' # Single-quotes + 'fooprivate_key\'hope]nobody[finds>-_$#thisone\';' # Single-quotes ), ], ) @@ -91,7 +91,7 @@ def test_analyze_positives(self, file_content): assert 'mock_filename' == potential_secret.filename assert ( potential_secret.secret_hash - == PotentialSecret.hash_secret('hopenobodyfinds>-_$#thisone') + == PotentialSecret.hash_secret('hope]nobody[finds>-_$#thisone') ) @pytest.mark.parametrize( diff --git a/tox.ini b/tox.ini index 9ef1b4aa1..cf758ec71 100644 --- a/tox.ini +++ b/tox.ini @@ -11,7 +11,8 @@ deps = -rrequirements-dev.txt commands = coverage erase coverage run -m pytest tests - coverage report --show-missing --fail-under 98 + coverage report --show-missing --include=tests/* --fail-under 100 + coverage report --show-missing --include=detect_secrets/* --fail-under 96 pre-commit run --all-files [testenv:venv] From 3fd9e87a654fc72e0ddd04014c86e2510a0f026a Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Fri, 14 Dec 2018 11:54:54 -0800 Subject: [PATCH 10/16] :telescope:[Keyword Plugin] Precision improvements Removed `token` as a keyword Made FOLLOWED_BY_EQUAL_SIGNS_RE require variable ends with keyword --- detect_secrets/plugins/keyword.py | 3 +-- tests/plugins/keyword_test.py | 22 +++++++++++----------- 2 files changed, 12 insertions(+), 13 deletions(-) diff --git a/detect_secrets/plugins/keyword.py b/detect_secrets/plugins/keyword.py index d5734e80a..6bf0b2963 100644 --- a/detect_secrets/plugins/keyword.py +++ b/detect_secrets/plugins/keyword.py @@ -42,7 +42,6 @@ 'private_key', 'secret', 'secrete', - 'token', ) FALSE_POSITIVES = ( "''", @@ -64,7 +63,7 @@ ) FOLLOWED_BY_EQUAL_SIGNS_RE = re.compile( # e.g. my_password = - r'({})([^\s]*?)(\s*?)=(\s*?)(("|\')?)([^\s]+)(\5)'.format( + r'({})()(\s*?)=(\s*?)(("|\')?)([^\s]+)(\5)'.format( r'|'.join(BLACKLIST), ), ) diff --git a/tests/plugins/keyword_test.py b/tests/plugins/keyword_test.py index d879d94a9..652387c58 100644 --- a/tests/plugins/keyword_test.py +++ b/tests/plugins/keyword_test.py @@ -46,19 +46,19 @@ class TestKeywordDetector(object): 'my_password =hope]nobody[finds>-_$#thisone' ), ( - 'my_password_for_stuff = hope]nobody[finds>-_$#thisone' + 'my_password = hope]nobody[finds>-_$#thisone' ), ( - 'my_password_for_stuff =hope]nobody[finds>-_$#thisone' + 'my_password =hope]nobody[finds>-_$#thisone' ), ( - 'passwordone=hope]nobody[finds>-_$#thisone\n' + 'the_password=hope]nobody[finds>-_$#thisone\n' ), ( - 'passwordone= "hope]nobody[finds>-_$#thisone"\n' + 'the_password= "hope]nobody[finds>-_$#thisone"\n' ), ( - 'passwordone=\'hope]nobody[finds>-_$#thisone\'\n' + 'the_password=\'hope]nobody[finds>-_$#thisone\'\n' ), # FOLLOWED_BY_QUOTES_AND_SEMICOLON_RE ( @@ -131,22 +131,22 @@ def test_analyze_positives(self, file_content): 'some_key = "real_secret"' # We cannot make 'key' a Keyword, too noisy ), ( - 'my_password_for_stuff = foo(hey)you' # Has a ( followed by a ) + 'my_password = foo(hey)you' # Has a ( followed by a ) ), ( - "my_password_for_stuff = request.json_body['hey']" # Has a [ followed by a ] + "my_password = request.json_body['hey']" # Has a [ followed by a ] ), ( - 'my_password_for_stuff = ""' # Nothing in the quotes + 'my_password = ""' # Nothing in the quotes ), ( - "my_password_for_stuff = ''" # Nothing in the quotes + "my_password = ''" # Nothing in the quotes ), ( - 'my_password_for_stuff = True' # 'True' is a known false-positive + 'my_password = True' # 'True' is a known false-positive ), ( - 'my_password_for_stuff = "fakesecret"' # 'fake' in the secret + 'my_password = "fakesecret"' # 'fake' in the secret ), ( 'login(username=username, password=password)' # secret is password) From f15366ea6dbfb248a37a301fa3a58e6cdacfecf1 Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Fri, 21 Dec 2018 12:19:39 -0800 Subject: [PATCH 11/16] :zap: Remove unnecessary wrapping parens In keyword_test.py --- tests/plugins/keyword_test.py | 156 +++++++++------------------------- 1 file changed, 39 insertions(+), 117 deletions(-) diff --git a/tests/plugins/keyword_test.py b/tests/plugins/keyword_test.py index 652387c58..704fa726c 100644 --- a/tests/plugins/keyword_test.py +++ b/tests/plugins/keyword_test.py @@ -14,71 +14,29 @@ class TestKeywordDetector(object): 'file_content', [ # FOLLOWED_BY_COLON_RE - ( - "'theapikey': 'hope]nobody[finds>-_$#thisone'" - ), - ( - '"theapikey": "hope]nobody[finds>-_$#thisone"' - ), - ( - 'apikey: hope]nobody[finds>-_$#thisone' - ), - ( - 'apikey:hope]nobody[finds>-_$#thisone' - ), - ( - 'theapikey:hope]nobody[finds>-_$#thisone' - ), - ( - 'apikey: "hope]nobody[finds>-_$#thisone"' - ), - ( - "apikey: 'hope]nobody[finds>-_$#thisone'" - ), + "'theapikey': 'hope]nobody[finds>-_$#thisone'", + '"theapikey": "hope]nobody[finds>-_$#thisone"', + 'apikey: hope]nobody[finds>-_$#thisone', + 'apikey:hope]nobody[finds>-_$#thisone', + 'theapikey:hope]nobody[finds>-_$#thisone', + 'apikey: "hope]nobody[finds>-_$#thisone"', + "apikey: 'hope]nobody[finds>-_$#thisone'", # FOLLOWED_BY_EQUAL_SIGNS_RE - ( - 'my_password=hope]nobody[finds>-_$#thisone' - ), - ( - 'my_password= hope]nobody[finds>-_$#thisone' - ), - ( - 'my_password =hope]nobody[finds>-_$#thisone' - ), - ( - 'my_password = hope]nobody[finds>-_$#thisone' - ), - ( - 'my_password =hope]nobody[finds>-_$#thisone' - ), - ( - 'the_password=hope]nobody[finds>-_$#thisone\n' - ), - ( - 'the_password= "hope]nobody[finds>-_$#thisone"\n' - ), - ( - 'the_password=\'hope]nobody[finds>-_$#thisone\'\n' - ), + 'my_password=hope]nobody[finds>-_$#thisone', + 'my_password= hope]nobody[finds>-_$#thisone', + 'my_password =hope]nobody[finds>-_$#thisone', + 'my_password = hope]nobody[finds>-_$#thisone', + 'my_password =hope]nobody[finds>-_$#thisone', + 'the_password=hope]nobody[finds>-_$#thisone\n', + 'the_password= "hope]nobody[finds>-_$#thisone"\n', + 'the_password=\'hope]nobody[finds>-_$#thisone\'\n', # FOLLOWED_BY_QUOTES_AND_SEMICOLON_RE - ( - 'apikey "hope]nobody[finds>-_$#thisone";' # Double-quotes - ), - ( - 'fooapikeyfoo "hope]nobody[finds>-_$#thisone";' # Double-quotes - ), - ( - 'fooapikeyfoo"hope]nobody[finds>-_$#thisone";' # Double-quotes - ), - ( - 'private_key \'hope]nobody[finds>-_$#thisone\';' # Single-quotes - ), - ( - 'fooprivate_keyfoo\'hope]nobody[finds>-_$#thisone\';' # Single-quotes - ), - ( - 'fooprivate_key\'hope]nobody[finds>-_$#thisone\';' # Single-quotes - ), + 'apikey "hope]nobody[finds>-_$#thisone";', # Double-quotes + 'fooapikeyfoo "hope]nobody[finds>-_$#thisone";', # Double-quotes + 'fooapikeyfoo"hope]nobody[finds>-_$#thisone";', # Double-quotes + 'private_key \'hope]nobody[finds>-_$#thisone\';', # Single-quotes + 'fooprivate_keyfoo\'hope]nobody[finds>-_$#thisone\';', # Single-quotes + 'fooprivate_key\'hope]nobody[finds>-_$#thisone\';', # Single-quotes ], ) def test_analyze_positives(self, file_content): @@ -98,62 +56,26 @@ def test_analyze_positives(self, file_content): 'file_content', [ # FOLLOWED_BY_COLON_RE - ( - 'private_key "";' # Nothing in the quotes - ), - ( - 'private_key \'"no spaces\';' # Has whitespace in the secret - ), - ( - 'private_key "fake";' # 'fake' in the secret - ), - ( - 'private_key "hopenobodyfindsthisone\';' # Double-quote does not match single-quote - ), - ( - 'private_key \'hopenobodyfindsthisone";' # Single-quote does not match double-quote - ), + 'private_key "";', # Nothing in the quotes + 'private_key \'"no spaces\';', # Has whitespace in the secret + 'private_key "fake";', # 'fake' in the secret + 'private_key "hopenobodyfindsthisone\';', # Double-quote does not match single-quote + 'private_key \'hopenobodyfindsthisone";', # Single-quote does not match double-quote # FOLLOWED_BY_QUOTES_AND_SEMICOLON_RE - ( - 'theapikey: ""' # Nothing in the quotes - ), - ( - 'theapikey: "somefakekey"' # 'fake' in the secret - ), - ( - 'theapikeyforfoo:hopenobodyfindsthisone' # Characters between apikey and : - ), + 'theapikey: ""', # Nothing in the quotes + 'theapikey: "somefakekey"', # 'fake' in the secret + 'theapikeyforfoo:hopenobodyfindsthisone', # Characters between apikey and : # FOLLOWED_BY_EQUAL_SIGNS_RE - ( - '$password = $input;' # Skip anything starting with $ in php files - ), - ( - 'some_key = "real_secret"' # We cannot make 'key' a Keyword, too noisy - ), - ( - 'my_password = foo(hey)you' # Has a ( followed by a ) - ), - ( - "my_password = request.json_body['hey']" # Has a [ followed by a ] - ), - ( - 'my_password = ""' # Nothing in the quotes - ), - ( - "my_password = ''" # Nothing in the quotes - ), - ( - 'my_password = True' # 'True' is a known false-positive - ), - ( - 'my_password = "fakesecret"' # 'fake' in the secret - ), - ( - 'login(username=username, password=password)' # secret is password) - ), - ( - 'open(self, password = ""):' # secrets is ""): - ), + '$password = $input;', # Skip anything starting with $ in php files + 'some_key = "real_secret"', # We cannot make 'key' a Keyword, too noisy + 'my_password = foo(hey)you', # Has a ( followed by a ) + "my_password = request.json_body['hey']", # Has a [ followed by a ] + 'my_password = ""', # Nothing in the quotes + "my_password = ''", # Nothing in the quotes + 'my_password = True', # 'True' is a known false-positive + 'my_password = "fakesecret"', # 'fake' in the secret + 'login(username=username, password=password)', # secret is password) + 'open(self, password = ""):', # secrets is ""): ], ) def test_analyze_negatives(self, file_content): From e01d818ad118b0b1fccb1cc9b406e7aa1539e242 Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Fri, 28 Dec 2018 13:15:10 -0800 Subject: [PATCH 12/16] :telescope:[Keyword Plugin] Precision improvements Made quotes required in Python files/added regexes for this Added a Filetype Enum and `determine_file_type` function Replaced 'pass' with 'db_pass' in BLACKLIST Added 'aws_secret_access_key' to BLACKLIST Added some trailing char cases to FALSE_POSITIVES :boom: Changed secret_type to 'Secret Keyword' --- detect_secrets/core/audit.py | 7 ++- detect_secrets/plugins/keyword.py | 71 ++++++++++++++++++---- testing/factories.py | 2 +- tests/core/audit_test.py | 40 ++++++++++++- tests/plugins/keyword_test.py | 98 ++++++++++++++++++++++++++----- tox.ini | 2 +- 6 files changed, 190 insertions(+), 30 deletions(-) diff --git a/detect_secrets/core/audit.py b/detect_secrets/core/audit.py index d2b8db425..6e8548011 100644 --- a/detect_secrets/core/audit.py +++ b/detect_secrets/core/audit.py @@ -9,6 +9,7 @@ from ..plugins.core import initialize from ..plugins.high_entropy_strings import HighEntropyStringsPlugin +from ..plugins.keyword import determine_file_type from ..plugins.keyword import KeywordDetector from .baseline import format_baseline_for_output from .baseline import merge_results @@ -554,7 +555,7 @@ def _highlight_secret( for raw_secret in _raw_secret_generator( plugin, secret_line, - is_php_file=filename.endswith('.php'), + filetype=determine_file_type(filename), ): secret_obj = PotentialSecret( plugin.secret_type, @@ -583,10 +584,10 @@ def _highlight_secret( ) -def _raw_secret_generator(plugin, secret_line, is_php_file): +def _raw_secret_generator(plugin, secret_line, filetype): """Generates raw secrets by re-scanning the line, with the specified plugin""" if isinstance(plugin, KeywordDetector): - for raw_secret in plugin.secret_generator(secret_line, is_php_file): + for raw_secret in plugin.secret_generator(secret_line, filetype=filetype): yield raw_secret else: for raw_secret in plugin.secret_generator(secret_line): diff --git a/detect_secrets/plugins/keyword.py b/detect_secrets/plugins/keyword.py index 6bf0b2963..0a571540a 100644 --- a/detect_secrets/plugins/keyword.py +++ b/detect_secrets/plugins/keyword.py @@ -27,6 +27,7 @@ from __future__ import absolute_import import re +from enum import Enum from .base import BasePlugin from detect_secrets.core.potential_secret import PotentialSecret @@ -36,7 +37,8 @@ BLACKLIST = ( 'apikey', 'api_key', - 'pass', + 'aws_secret_access_key', + 'db_pass', 'password', 'passwd', 'private_key', @@ -49,24 +51,40 @@ '""', '""):', 'false', + 'false):', 'none', + 'none,', + 'none}', 'not', 'password)', 'password},', 'true', + 'true):', ) FOLLOWED_BY_COLON_RE = re.compile( - # e.g. token: + # e.g. api_key: foo r'({})(("|\')?):(\s*?)(("|\')?)([^\s]+)(\5)'.format( r'|'.join(BLACKLIST), ), ) +FOLLOWED_BY_COLON_QUOTES_REQUIRED_RE = re.compile( + # e.g. api_key: "foo" + r'({})(("|\')?):(\s*?)(("|\'))([^\s]+)(\5)'.format( + r'|'.join(BLACKLIST), + ), +) FOLLOWED_BY_EQUAL_SIGNS_RE = re.compile( - # e.g. my_password = + # e.g. my_password = bar r'({})()(\s*?)=(\s*?)(("|\')?)([^\s]+)(\5)'.format( r'|'.join(BLACKLIST), ), ) +FOLLOWED_BY_EQUAL_SIGNS_QUOTES_REQUIRED_RE = re.compile( + # e.g. my_password = "bar" + r'({})()(\s*?)=(\s*?)(("|\'))([^\s]+)(\5)'.format( + r'|'.join(BLACKLIST), + ), +) FOLLOWED_BY_QUOTES_AND_SEMICOLON_RE = re.compile( # e.g. private_key "something"; r'({})([^\s]*?)(\s*?)("|\')([^\s]+)(\4);'.format( @@ -78,6 +96,30 @@ FOLLOWED_BY_EQUAL_SIGNS_RE: 7, FOLLOWED_BY_QUOTES_AND_SEMICOLON_RE: 5, } +PYTHON_BLACKLIST_REGEX_TO_GROUP = { + FOLLOWED_BY_COLON_QUOTES_REQUIRED_RE: 7, + FOLLOWED_BY_EQUAL_SIGNS_QUOTES_REQUIRED_RE: 7, + FOLLOWED_BY_QUOTES_AND_SEMICOLON_RE: 5, +} + + +class FileType(Enum): + PYTHON = 1 + PHP = 2 + OTHER = 3 + + +def determine_file_type(filename): + """ + :param filename: str + + :rtype: FileType + """ + if filename.endswith('.py'): + return FileType.PYTHON + elif filename.endswith('.php'): + return FileType.PHP + return FileType.OTHER class KeywordDetector(BasePlugin): @@ -85,14 +127,14 @@ class KeywordDetector(BasePlugin): are present in the analyzed string. """ - secret_type = 'Password' + secret_type = 'Secret Keyword' def analyze_string(self, string, line_num, filename): output = {} for identifier in self.secret_generator( string, - is_php_file=filename.endswith('.php'), + filetype=determine_file_type(filename), ): secret = PotentialSecret( self.secret_type, @@ -104,26 +146,35 @@ def analyze_string(self, string, line_num, filename): return output - def secret_generator(self, string, is_php_file): + def secret_generator(self, string, filetype): lowered_string = string.lower() - for REGEX, group_number in BLACKLIST_REGEX_TO_GROUP.items(): + if filetype == FileType.PYTHON: + blacklist_RE_to_group = PYTHON_BLACKLIST_REGEX_TO_GROUP + else: + blacklist_RE_to_group = BLACKLIST_REGEX_TO_GROUP + + for REGEX, group_number in blacklist_RE_to_group.items(): match = REGEX.search(lowered_string) if match: lowered_secret = match.group(group_number) # ([^\s]+) guarantees lowered_secret is not '' - if not probably_false_positive(lowered_secret, is_php_file): + if not probably_false_positive( + lowered_secret, + filetype=filetype, + ): yield lowered_secret -def probably_false_positive(lowered_secret, is_php_file): +def probably_false_positive(lowered_secret, filetype): if ( 'fake' in lowered_secret or + 'self.' in lowered_secret or lowered_secret in FALSE_POSITIVES or # If it is a .php file, do not report $variables ( - is_php_file and + filetype == FileType.PHP and lowered_secret[0] == '$' ) ): diff --git a/testing/factories.py b/testing/factories.py index 67f65e409..66699465a 100644 --- a/testing/factories.py +++ b/testing/factories.py @@ -6,7 +6,7 @@ def potential_secret_factory(type_='type', filename='filename', secret='secret', lineno=1): """This is only marginally better than creating PotentialSecret objects directly, - because of default values. + because of the default values. """ return PotentialSecret(type_, filename, secret, lineno) diff --git a/tests/core/audit_test.py b/tests/core/audit_test.py index 868e66a9a..755db4516 100644 --- a/tests/core/audit_test.py +++ b/tests/core/audit_test.py @@ -605,7 +605,7 @@ def test_secret_not_found(self, mock_printer): """)[1:-1] - def test_secret_in_yaml_file(self, mock_printer): + def test_hex_high_entropy_secret_in_yaml_file(self, mock_printer): with self._mock_sed_call( line_containing_secret='api key: 123456789a', ): @@ -644,6 +644,44 @@ def test_secret_in_yaml_file(self, mock_printer): """)[1:-1] + def test_keyword_secret_in_yaml_file(self, mock_printer): + with self._mock_sed_call( + line_containing_secret='api_key: yerba', + ): + self.run_logic( + secret=potential_secret_factory( + type_='Secret Keyword', + filename='filenameB', + secret='yerba', + lineno=15, + ).json(), + settings=[ + { + 'name': 'KeywordDetector', + }, + ], + ) + + assert mock_printer.message == textwrap.dedent(""" + Secret: 1 of 2 + Filename: filenameB + Secret Type: Secret Keyword + ---------- + 10:a + 11:b + 12:c + 13:d + 14:e + 15:api_key: yerba + 16:e + 17:d + 18:c + 19:b + 20:a + ---------- + + """)[1:-1] + class TestGetUserDecision(object): diff --git a/tests/plugins/keyword_test.py b/tests/plugins/keyword_test.py index 704fa726c..5b9f89d4d 100644 --- a/tests/plugins/keyword_test.py +++ b/tests/plugins/keyword_test.py @@ -14,20 +14,53 @@ class TestKeywordDetector(object): 'file_content', [ # FOLLOWED_BY_COLON_RE + "'theapikey': 'ho)pe]nob(ody[finds>-_$#thisone'", + '"theapikey": "ho)pe]nob(ody[finds>-_$#thisone"', + 'apikey: ho)pe]nob(ody[finds>-_$#thisone', + 'apikey:ho)pe]nob(ody[finds>-_$#thisone', + 'theapikey:ho)pe]nob(ody[finds>-_$#thisone', + 'apikey: "ho)pe]nob(ody[finds>-_$#thisone"', + "apikey: 'ho)pe]nob(ody[finds>-_$#thisone'", + # FOLLOWED_BY_EQUAL_SIGNS_RE + 'my_password=ho)pe]nob(ody[finds>-_$#thisone', + 'my_password= ho)pe]nob(ody[finds>-_$#thisone', + 'my_password =ho)pe]nob(ody[finds>-_$#thisone', + 'my_password = ho)pe]nob(ody[finds>-_$#thisone', + 'my_password =ho)pe]nob(ody[finds>-_$#thisone', + 'the_password=ho)pe]nob(ody[finds>-_$#thisone\n', + 'the_password= "ho)pe]nob(ody[finds>-_$#thisone"\n', + 'the_password=\'ho)pe]nob(ody[finds>-_$#thisone\'\n', + # FOLLOWED_BY_QUOTES_AND_SEMICOLON_RE + 'apikey "ho)pe]nob(ody[finds>-_$#thisone";', # Double-quotes + 'fooapikeyfoo "ho)pe]nob(ody[finds>-_$#thisone";', # Double-quotes + 'fooapikeyfoo"ho)pe]nob(ody[finds>-_$#thisone";', # Double-quotes + 'private_key \'ho)pe]nob(ody[finds>-_$#thisone\';', # Single-quotes + 'fooprivate_keyfoo\'ho)pe]nob(ody[finds>-_$#thisone\';', # Single-quotes + 'fooprivate_key\'ho)pe]nob(ody[finds>-_$#thisone\';', # Single-quotes + ], + ) + def test_analyze_positives(self, file_content): + logic = KeywordDetector() + + f = mock_file_object(file_content) + output = logic.analyze(f, 'mock_filename') + assert len(output) == 1 + for potential_secret in output: + assert 'mock_filename' == potential_secret.filename + assert ( + potential_secret.secret_hash + == PotentialSecret.hash_secret('ho)pe]nob(ody[finds>-_$#thisone') + ) + + @pytest.mark.parametrize( + 'file_content', + [ + # FOLLOWED_BY_COLON_QUOTES_REQUIRED_RE "'theapikey': 'hope]nobody[finds>-_$#thisone'", '"theapikey": "hope]nobody[finds>-_$#thisone"', - 'apikey: hope]nobody[finds>-_$#thisone', - 'apikey:hope]nobody[finds>-_$#thisone', - 'theapikey:hope]nobody[finds>-_$#thisone', 'apikey: "hope]nobody[finds>-_$#thisone"', "apikey: 'hope]nobody[finds>-_$#thisone'", - # FOLLOWED_BY_EQUAL_SIGNS_RE - 'my_password=hope]nobody[finds>-_$#thisone', - 'my_password= hope]nobody[finds>-_$#thisone', - 'my_password =hope]nobody[finds>-_$#thisone', - 'my_password = hope]nobody[finds>-_$#thisone', - 'my_password =hope]nobody[finds>-_$#thisone', - 'the_password=hope]nobody[finds>-_$#thisone\n', + # FOLLOWED_BY_EQUAL_SIGNS_QUOTES_REQUIRED_RE 'the_password= "hope]nobody[finds>-_$#thisone"\n', 'the_password=\'hope]nobody[finds>-_$#thisone\'\n', # FOLLOWED_BY_QUOTES_AND_SEMICOLON_RE @@ -39,14 +72,14 @@ class TestKeywordDetector(object): 'fooprivate_key\'hope]nobody[finds>-_$#thisone\';', # Single-quotes ], ) - def test_analyze_positives(self, file_content): + def test_analyze_python_positives(self, file_content): logic = KeywordDetector() f = mock_file_object(file_content) - output = logic.analyze(f, 'mock_filename') + output = logic.analyze(f, 'mock_filename.py') assert len(output) == 1 for potential_secret in output: - assert 'mock_filename' == potential_secret.filename + assert 'mock_filename.py' == potential_secret.filename assert ( potential_secret.secret_hash == PotentialSecret.hash_secret('hope]nobody[finds>-_$#thisone') @@ -66,7 +99,6 @@ def test_analyze_positives(self, file_content): 'theapikey: "somefakekey"', # 'fake' in the secret 'theapikeyforfoo:hopenobodyfindsthisone', # Characters between apikey and : # FOLLOWED_BY_EQUAL_SIGNS_RE - '$password = $input;', # Skip anything starting with $ in php files 'some_key = "real_secret"', # We cannot make 'key' a Keyword, too noisy 'my_password = foo(hey)you', # Has a ( followed by a ) "my_password = request.json_body['hey']", # Has a [ followed by a ] @@ -76,11 +108,49 @@ def test_analyze_positives(self, file_content): 'my_password = "fakesecret"', # 'fake' in the secret 'login(username=username, password=password)', # secret is password) 'open(self, password = ""):', # secrets is ""): + # '', ], ) def test_analyze_negatives(self, file_content): logic = KeywordDetector() + f = mock_file_object(file_content) + output = logic.analyze(f, 'mock_filename.foo') + assert len(output) == 0 + + @pytest.mark.parametrize( + 'file_content', + [ + # FOLLOWED_BY_EQUAL_SIGNS_RE + '$password = $input;', # Skip anything starting with $ in php files + ], + ) + def test_analyze_php_negatives(self, file_content): + logic = KeywordDetector() + f = mock_file_object(file_content) output = logic.analyze(f, 'mock_filename.php') assert len(output) == 0 + + @pytest.mark.parametrize( + 'file_content', + [ + # FOLLOWED_BY_COLON_QUOTES_REQUIRED_RE + 'apikey: hope]nobody[finds>-_$#thisone', # No quotes + 'apikey:hope]nobody[finds>-_$#thisone', # No quotes + 'theapikey:hope]nobody[finds>-_$#thisone', # No quotes + # FOLLOWED_BY_EQUAL_SIGNS_QUOTES_REQUIRED_RE + 'my_password=hope]nobody[finds>-_$#thisone', # No quotes + 'my_password= hope]nobody[finds>-_$#thisone', # No quotes + 'my_password =hope]nobody[finds>-_$#thisone', # No quotes + 'my_password = hope]nobody[finds>-_$#thisone', # No quotes + 'my_password =hope]nobody[finds>-_$#thisone', # No quotes + 'the_password=hope]nobody[finds>-_$#thisone\n', # No quotes + ], + ) + def test_analyze_python_negatives(self, file_content): + logic = KeywordDetector() + + f = mock_file_object(file_content) + output = logic.analyze(f, 'mock_filename.py') + assert len(output) == 0 diff --git a/tox.ini b/tox.ini index cf758ec71..8bcfa3762 100644 --- a/tox.ini +++ b/tox.ini @@ -12,7 +12,7 @@ commands = coverage erase coverage run -m pytest tests coverage report --show-missing --include=tests/* --fail-under 100 - coverage report --show-missing --include=detect_secrets/* --fail-under 96 + coverage report --show-missing --include=detect_secrets/* --fail-under 97 pre-commit run --all-files [testenv:venv] From 4581aa8aafa49034bbda1b3b8475b4f84561838a Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Fri, 28 Dec 2018 14:49:56 -0800 Subject: [PATCH 13/16] :mortar_board: Eg. -> E.g. --- README.md | 2 +- detect_secrets/core/usage.py | 2 +- testing/factories.py | 2 +- testing/mocks.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index e42310301..f659b336c 100644 --- a/README.md +++ b/README.md @@ -181,7 +181,7 @@ rejected as a potential secret. This preset amount can be adjusted in several ways: * Specifying it within the config file, for server scanning. -* Specifying it with command line flags (eg. `--base64-limit`) +* Specifying it with command line flags (e.g. `--base64-limit`) Lowering these limits will identify more potential secrets, but also create more false positives. Adjust these limits to suit your needs. diff --git a/detect_secrets/core/usage.py b/detect_secrets/core/usage.py index 058c7d8db..d6ae7c73b 100644 --- a/detect_secrets/core/usage.py +++ b/detect_secrets/core/usage.py @@ -368,7 +368,7 @@ def _convert_flag_text_to_argument_name(flag_text): """This just emulates argparse's underlying logic. :type flag_text: str - :param flag_text: eg. `--no-hex-string-scan` + :param flag_text: e.g. `--no-hex-string-scan` :return: `no_hex_string_scan` """ return flag_text[2:].replace('-', '_') diff --git a/testing/factories.py b/testing/factories.py index 66699465a..7f605bb5c 100644 --- a/testing/factories.py +++ b/testing/factories.py @@ -15,7 +15,7 @@ def secrets_collection_factory(secrets=None, plugins=(), exclude_regex=''): """ :type secrets: list(dict) :param secrets: list of params to pass to add_secret. - Eg. [ {'secret': 'blah'}, ] + E.g. [ {'secret': 'blah'}, ] :type plugins: tuple :type exclude_regex: str diff --git a/testing/mocks.py b/testing/mocks.py index c1e468958..a6c9ce340 100644 --- a/testing/mocks.py +++ b/testing/mocks.py @@ -19,7 +19,7 @@ def mock_git_calls(subprocess_namespace, cases): :type cases: iterable(SubprocessMock) :type subprocess_namespace: str :param subprocess_namespace: should be the namespace referring to check_output. - Eg. `detect_secrets.pre_commit_hook.subprocess.check_output` + E.g. `detect_secrets.pre_commit_hook.subprocess.check_output` """ # We need to use a dictionary, because python2.7 does not support # the `nonlocal` keyword (and needs to share scope with From b7e48ab98c79b25bd38a4bf0d27f8b0e60e47884 Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Fri, 28 Dec 2018 15:40:54 -0800 Subject: [PATCH 14/16] :telescope:[Keyword Plugin] Handle dict['keyword'] By adding an optional `((\'|")])?` to the regexes This is to catch 'foo' in e.g. `some_dict["secret"] = "foo"` --- detect_secrets/plugins/keyword.py | 18 ++++++++-------- tests/plugins/keyword_test.py | 36 +++++++++++++++++-------------- 2 files changed, 29 insertions(+), 25 deletions(-) diff --git a/detect_secrets/plugins/keyword.py b/detect_secrets/plugins/keyword.py index 0a571540a..23830599e 100644 --- a/detect_secrets/plugins/keyword.py +++ b/detect_secrets/plugins/keyword.py @@ -75,13 +75,13 @@ ) FOLLOWED_BY_EQUAL_SIGNS_RE = re.compile( # e.g. my_password = bar - r'({})()(\s*?)=(\s*?)(("|\')?)([^\s]+)(\5)'.format( + r'({})((\'|")])?()(\s*?)=(\s*?)(("|\')?)([^\s]+)(\7)'.format( r'|'.join(BLACKLIST), ), ) FOLLOWED_BY_EQUAL_SIGNS_QUOTES_REQUIRED_RE = re.compile( # e.g. my_password = "bar" - r'({})()(\s*?)=(\s*?)(("|\'))([^\s]+)(\5)'.format( + r'({})((\'|")])?()(\s*?)=(\s*?)(("|\'))([^\s]+)(\7)'.format( r'|'.join(BLACKLIST), ), ) @@ -93,12 +93,12 @@ ) BLACKLIST_REGEX_TO_GROUP = { FOLLOWED_BY_COLON_RE: 7, - FOLLOWED_BY_EQUAL_SIGNS_RE: 7, + FOLLOWED_BY_EQUAL_SIGNS_RE: 9, FOLLOWED_BY_QUOTES_AND_SEMICOLON_RE: 5, } PYTHON_BLACKLIST_REGEX_TO_GROUP = { FOLLOWED_BY_COLON_QUOTES_REQUIRED_RE: 7, - FOLLOWED_BY_EQUAL_SIGNS_QUOTES_REQUIRED_RE: 7, + FOLLOWED_BY_EQUAL_SIGNS_QUOTES_REQUIRED_RE: 9, FOLLOWED_BY_QUOTES_AND_SEMICOLON_RE: 5, } @@ -169,13 +169,13 @@ def secret_generator(self, string, filetype): def probably_false_positive(lowered_secret, filetype): if ( - 'fake' in lowered_secret or - 'self.' in lowered_secret or - lowered_secret in FALSE_POSITIVES or + 'fake' in lowered_secret + or 'self.' in lowered_secret + or lowered_secret in FALSE_POSITIVES or # If it is a .php file, do not report $variables ( - filetype == FileType.PHP and - lowered_secret[0] == '$' + filetype == FileType.PHP + and lowered_secret[0] == '$' ) ): return True diff --git a/tests/plugins/keyword_test.py b/tests/plugins/keyword_test.py index 5b9f89d4d..4392e9940 100644 --- a/tests/plugins/keyword_test.py +++ b/tests/plugins/keyword_test.py @@ -22,6 +22,8 @@ class TestKeywordDetector(object): 'apikey: "ho)pe]nob(ody[finds>-_$#thisone"', "apikey: 'ho)pe]nob(ody[finds>-_$#thisone'", # FOLLOWED_BY_EQUAL_SIGNS_RE + 'some_dict["secret"] = "ho)pe]nob(ody[finds>-_$#thisone"', + "some_dict['secret'] = ho)pe]nob(ody[finds>-_$#thisone", 'my_password=ho)pe]nob(ody[finds>-_$#thisone', 'my_password= ho)pe]nob(ody[finds>-_$#thisone', 'my_password =ho)pe]nob(ody[finds>-_$#thisone', @@ -61,6 +63,7 @@ def test_analyze_positives(self, file_content): 'apikey: "hope]nobody[finds>-_$#thisone"', "apikey: 'hope]nobody[finds>-_$#thisone'", # FOLLOWED_BY_EQUAL_SIGNS_QUOTES_REQUIRED_RE + 'some_dict["secret"] = "hope]nobody[finds>-_$#thisone"', 'the_password= "hope]nobody[finds>-_$#thisone"\n', 'the_password=\'hope]nobody[finds>-_$#thisone\'\n', # FOLLOWED_BY_QUOTES_AND_SEMICOLON_RE @@ -119,38 +122,39 @@ def test_analyze_negatives(self, file_content): assert len(output) == 0 @pytest.mark.parametrize( - 'file_content', + 'secret_starting_with_dollar_sign', [ # FOLLOWED_BY_EQUAL_SIGNS_RE - '$password = $input;', # Skip anything starting with $ in php files + '$password = $input;', ], ) - def test_analyze_php_negatives(self, file_content): + def test_analyze_php_negatives(self, secret_starting_with_dollar_sign): logic = KeywordDetector() - f = mock_file_object(file_content) + f = mock_file_object(secret_starting_with_dollar_sign) output = logic.analyze(f, 'mock_filename.php') assert len(output) == 0 @pytest.mark.parametrize( - 'file_content', + 'secret_with_no_quote', [ # FOLLOWED_BY_COLON_QUOTES_REQUIRED_RE - 'apikey: hope]nobody[finds>-_$#thisone', # No quotes - 'apikey:hope]nobody[finds>-_$#thisone', # No quotes - 'theapikey:hope]nobody[finds>-_$#thisone', # No quotes + 'apikey: hope]nobody[finds>-_$#thisone', + 'apikey:hope]nobody[finds>-_$#thisone', + 'theapikey:hope]nobody[finds>-_$#thisone', # FOLLOWED_BY_EQUAL_SIGNS_QUOTES_REQUIRED_RE - 'my_password=hope]nobody[finds>-_$#thisone', # No quotes - 'my_password= hope]nobody[finds>-_$#thisone', # No quotes - 'my_password =hope]nobody[finds>-_$#thisone', # No quotes - 'my_password = hope]nobody[finds>-_$#thisone', # No quotes - 'my_password =hope]nobody[finds>-_$#thisone', # No quotes - 'the_password=hope]nobody[finds>-_$#thisone\n', # No quotes + "some_dict['secret'] = hope]nobody[finds>-_$#thisone", + 'my_password=hope]nobody[finds>-_$#thisone', + 'my_password= hope]nobody[finds>-_$#thisone', + 'my_password =hope]nobody[finds>-_$#thisone', + 'my_password = hope]nobody[finds>-_$#thisone', + 'my_password =hope]nobody[finds>-_$#thisone', + 'the_password=hope]nobody[finds>-_$#thisone\n', ], ) - def test_analyze_python_negatives(self, file_content): + def test_analyze_python_negatives(self, secret_with_no_quote): logic = KeywordDetector() - f = mock_file_object(file_content) + f = mock_file_object(secret_with_no_quote) output = logic.analyze(f, 'mock_filename.py') assert len(output) == 0 From d3145509cb2ee2fe76368ac4b8c9865ffc05cc0b Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Fri, 28 Dec 2018 16:14:14 -0800 Subject: [PATCH 15/16] :snake:[consistency] 2 spaces before pragma comment --- detect_secrets/core/audit.py | 6 +++--- detect_secrets/pre_commit_hook.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/detect_secrets/core/audit.py b/detect_secrets/core/audit.py index 388194c62..99c7982a3 100644 --- a/detect_secrets/core/audit.py +++ b/detect_secrets/core/audit.py @@ -167,7 +167,7 @@ def compare_baselines(old_baseline_filename, new_baseline_filename): print('Quitting...') break - if decision == 'b': # pragma: no cover + if decision == 'b': # pragma: no cover current_index -= 2 secret_iterator.step_back_on_next_iteration() @@ -307,11 +307,11 @@ def _comparison_generator(old_list, new_list, compare_fn): new_index += 1 -def _clear_screen(): # pragma: no cover +def _clear_screen(): # pragma: no cover subprocess.call(['clear']) -def _print_context( # pragma: no cover +def _print_context( # pragma: no cover filename, secret, count, diff --git a/detect_secrets/pre_commit_hook.py b/detect_secrets/pre_commit_hook.py index f07710f47..f8ed92a3b 100644 --- a/detect_secrets/pre_commit_hook.py +++ b/detect_secrets/pre_commit_hook.py @@ -102,7 +102,7 @@ def get_baseline(baseline_filename): ) -def _get_baseline_string_from_file(filename): # pragma: no cover +def _get_baseline_string_from_file(filename): # pragma: no cover """Breaking this function up for mockability.""" try: with open(filename) as f: From a29108b584cb61c6cadd73916ad798fbb9cf15ec Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Thu, 3 Jan 2019 14:50:46 -0800 Subject: [PATCH 16/16] :telescope:[Keyword Plugin] Precision improvements Added Javascript specific false-positive checks Added ${ before } heuristic for e.g. ${link} Added more false-positives to FALSE_POSITIVES :zap: keyword_test.py Make STANDARD_NEGATIVES list and STANDARD_POSITIVES set for DRYness --- detect_secrets/plugins/keyword.py | 52 ++++++++-- tests/plugins/keyword_test.py | 164 ++++++++++++++++-------------- 2 files changed, 133 insertions(+), 83 deletions(-) diff --git a/detect_secrets/plugins/keyword.py b/detect_secrets/plugins/keyword.py index 23830599e..f23713fe7 100644 --- a/detect_secrets/plugins/keyword.py +++ b/detect_secrets/plugins/keyword.py @@ -48,18 +48,34 @@ FALSE_POSITIVES = ( "''", "''):", + "')", + "'this", '""', '""):', + '")', + '', + '', + 'dummy_secret', 'false', 'false):', 'none', 'none,', 'none}', 'not', + 'null,', 'password)', + 'password,', 'password},', + 'string,', + 'string}', + 'string}}', + 'test-access-key', 'true', 'true):', + '{', ) FOLLOWED_BY_COLON_RE = re.compile( # e.g. api_key: foo @@ -104,8 +120,9 @@ class FileType(Enum): - PYTHON = 1 - PHP = 2 + JAVASCRIPT = 0 + PHP = 1 + PYTHON = 2 OTHER = 3 @@ -115,7 +132,9 @@ def determine_file_type(filename): :rtype: FileType """ - if filename.endswith('.py'): + if filename.endswith('.js'): + return FileType.JAVASCRIPT + elif filename.endswith('.py'): return FileType.PYTHON elif filename.endswith('.php'): return FileType.PHP @@ -170,17 +189,23 @@ def secret_generator(self, string, filetype): def probably_false_positive(lowered_secret, filetype): if ( 'fake' in lowered_secret - or 'self.' in lowered_secret - or lowered_secret in FALSE_POSITIVES or - # If it is a .php file, do not report $variables - ( + or 'forgot' in lowered_secret + or lowered_secret in FALSE_POSITIVES + or ( + filetype == FileType.JAVASCRIPT + and ( + lowered_secret.startswith('this.') + or lowered_secret.startswith('fs.read') + or lowered_secret == 'new' + ) + ) or ( # If it is a .php file, do not report $variables filetype == FileType.PHP and lowered_secret[0] == '$' ) ): return True - # No function calls + # Heuristic for no function calls try: if ( lowered_secret.index('(') < lowered_secret.index(')') @@ -189,7 +214,7 @@ def probably_false_positive(lowered_secret, filetype): except ValueError: pass - # Skip over e.g. request.json_body['hey'] + # Heuristic for e.g. request.json_body['hey'] try: if ( lowered_secret.index('[') < lowered_secret.index(']') @@ -198,4 +223,13 @@ def probably_false_positive(lowered_secret, filetype): except ValueError: pass + # Heuristic for e.g. ${link} + try: + if ( + lowered_secret.index('${') < lowered_secret.index('}') + ): + return True + except ValueError: + pass + return False diff --git a/tests/plugins/keyword_test.py b/tests/plugins/keyword_test.py index 4392e9940..4cdfaa0c3 100644 --- a/tests/plugins/keyword_test.py +++ b/tests/plugins/keyword_test.py @@ -8,40 +8,67 @@ from testing.mocks import mock_file_object +STANDARD_NEGATIVES = [ + # FOLLOWED_BY_COLON_RE + 'theapikey: ""', # Nothing in the quotes + 'theapikey: "somefakekey"', # 'fake' in the secret + 'theapikeyforfoo:hopenobodyfindsthisone', # Characters between apikey and : + # FOLLOWED_BY_EQUAL_SIGNS_RE + 'some_key = "real_secret"', # We cannot make 'key' a Keyword, too noisy + 'my_password = foo(hey)you', # Has a ( followed by a ) + "my_password = request.json_body['hey']", # Has a [ followed by a ] + 'my_password = ""', # Nothing in the quotes + "my_password = ''", # Nothing in the quotes + 'my_password = True', # 'True' is a known false-positive + 'my_password = "fakesecret"', # 'fake' in the secret + 'login(username=username, password=password)', # secret is password) + 'open(self, password = ""):', # secrets is ""): + 'open(self, password = ""):', # secrets is ""): + # FOLLOWED_BY_QUOTES_AND_SEMICOLON_RE + 'private_key "";', # Nothing in the quotes + 'private_key \'"no spaces\';', # Has whitespace in the secret + 'private_key "fake";', # 'fake' in the secret + 'private_key "hopenobodyfindsthisone\';', # Double-quote does not match single-quote + 'private_key \'hopenobodyfindsthisone";', # Single-quote does not match double-quote + 'password: ${link}', # Has a ${ followed by a } +] +STANDARD_POSITIVES = { + # FOLLOWED_BY_COLON_RE + "'theapikey': 'h}o)p${e]nob(ody[finds>-_$#thisone'", + '"theapikey": "h}o)p${e]nob(ody[finds>-_$#thisone"', + 'apikey: h}o)p${e]nob(ody[finds>-_$#thisone', + 'apikey:h}o)p${e]nob(ody[finds>-_$#thisone', + 'theapikey:h}o)p${e]nob(ody[finds>-_$#thisone', + 'apikey: "h}o)p${e]nob(ody[finds>-_$#thisone"', + "apikey: 'h}o)p${e]nob(ody[finds>-_$#thisone'", + # FOLLOWED_BY_EQUAL_SIGNS_RE + 'some_dict["secret"] = "h}o)p${e]nob(ody[finds>-_$#thisone"', + "some_dict['secret'] = h}o)p${e]nob(ody[finds>-_$#thisone", + 'my_password=h}o)p${e]nob(ody[finds>-_$#thisone', + 'my_password= h}o)p${e]nob(ody[finds>-_$#thisone', + 'my_password =h}o)p${e]nob(ody[finds>-_$#thisone', + 'my_password = h}o)p${e]nob(ody[finds>-_$#thisone', + 'my_password =h}o)p${e]nob(ody[finds>-_$#thisone', + 'the_password=h}o)p${e]nob(ody[finds>-_$#thisone\n', + 'the_password= "h}o)p${e]nob(ody[finds>-_$#thisone"\n', + 'the_password=\'h}o)p${e]nob(ody[finds>-_$#thisone\'\n', + # FOLLOWED_BY_QUOTES_AND_SEMICOLON_RE + 'apikey "h}o)p${e]nob(ody[finds>-_$#thisone";', # Double-quotes + 'fooapikeyfoo "h}o)p${e]nob(ody[finds>-_$#thisone";', # Double-quotes + 'fooapikeyfoo"h}o)p${e]nob(ody[finds>-_$#thisone";', # Double-quotes + 'private_key \'h}o)p${e]nob(ody[finds>-_$#thisone\';', # Single-quotes + 'fooprivate_keyfoo\'h}o)p${e]nob(ody[finds>-_$#thisone\';', # Single-quotes + 'fooprivate_key\'h}o)p${e]nob(ody[finds>-_$#thisone\';', # Single-quotes +} + + class TestKeywordDetector(object): @pytest.mark.parametrize( 'file_content', - [ - # FOLLOWED_BY_COLON_RE - "'theapikey': 'ho)pe]nob(ody[finds>-_$#thisone'", - '"theapikey": "ho)pe]nob(ody[finds>-_$#thisone"', - 'apikey: ho)pe]nob(ody[finds>-_$#thisone', - 'apikey:ho)pe]nob(ody[finds>-_$#thisone', - 'theapikey:ho)pe]nob(ody[finds>-_$#thisone', - 'apikey: "ho)pe]nob(ody[finds>-_$#thisone"', - "apikey: 'ho)pe]nob(ody[finds>-_$#thisone'", - # FOLLOWED_BY_EQUAL_SIGNS_RE - 'some_dict["secret"] = "ho)pe]nob(ody[finds>-_$#thisone"', - "some_dict['secret'] = ho)pe]nob(ody[finds>-_$#thisone", - 'my_password=ho)pe]nob(ody[finds>-_$#thisone', - 'my_password= ho)pe]nob(ody[finds>-_$#thisone', - 'my_password =ho)pe]nob(ody[finds>-_$#thisone', - 'my_password = ho)pe]nob(ody[finds>-_$#thisone', - 'my_password =ho)pe]nob(ody[finds>-_$#thisone', - 'the_password=ho)pe]nob(ody[finds>-_$#thisone\n', - 'the_password= "ho)pe]nob(ody[finds>-_$#thisone"\n', - 'the_password=\'ho)pe]nob(ody[finds>-_$#thisone\'\n', - # FOLLOWED_BY_QUOTES_AND_SEMICOLON_RE - 'apikey "ho)pe]nob(ody[finds>-_$#thisone";', # Double-quotes - 'fooapikeyfoo "ho)pe]nob(ody[finds>-_$#thisone";', # Double-quotes - 'fooapikeyfoo"ho)pe]nob(ody[finds>-_$#thisone";', # Double-quotes - 'private_key \'ho)pe]nob(ody[finds>-_$#thisone\';', # Single-quotes - 'fooprivate_keyfoo\'ho)pe]nob(ody[finds>-_$#thisone\';', # Single-quotes - 'fooprivate_key\'ho)pe]nob(ody[finds>-_$#thisone\';', # Single-quotes - ], + STANDARD_POSITIVES, ) - def test_analyze_positives(self, file_content): + def test_analyze_standard_positives(self, file_content): logic = KeywordDetector() f = mock_file_object(file_content) @@ -51,29 +78,25 @@ def test_analyze_positives(self, file_content): assert 'mock_filename' == potential_secret.filename assert ( potential_secret.secret_hash - == PotentialSecret.hash_secret('ho)pe]nob(ody[finds>-_$#thisone') + == PotentialSecret.hash_secret('h}o)p${e]nob(ody[finds>-_$#thisone') ) @pytest.mark.parametrize( 'file_content', - [ + STANDARD_POSITIVES - { # FOLLOWED_BY_COLON_QUOTES_REQUIRED_RE - "'theapikey': 'hope]nobody[finds>-_$#thisone'", - '"theapikey": "hope]nobody[finds>-_$#thisone"', - 'apikey: "hope]nobody[finds>-_$#thisone"', - "apikey: 'hope]nobody[finds>-_$#thisone'", + 'apikey: h}o)p${e]nob(ody[finds>-_$#thisone', + 'apikey:h}o)p${e]nob(ody[finds>-_$#thisone', + 'theapikey:h}o)p${e]nob(ody[finds>-_$#thisone', # FOLLOWED_BY_EQUAL_SIGNS_QUOTES_REQUIRED_RE - 'some_dict["secret"] = "hope]nobody[finds>-_$#thisone"', - 'the_password= "hope]nobody[finds>-_$#thisone"\n', - 'the_password=\'hope]nobody[finds>-_$#thisone\'\n', - # FOLLOWED_BY_QUOTES_AND_SEMICOLON_RE - 'apikey "hope]nobody[finds>-_$#thisone";', # Double-quotes - 'fooapikeyfoo "hope]nobody[finds>-_$#thisone";', # Double-quotes - 'fooapikeyfoo"hope]nobody[finds>-_$#thisone";', # Double-quotes - 'private_key \'hope]nobody[finds>-_$#thisone\';', # Single-quotes - 'fooprivate_keyfoo\'hope]nobody[finds>-_$#thisone\';', # Single-quotes - 'fooprivate_key\'hope]nobody[finds>-_$#thisone\';', # Single-quotes - ], + "some_dict['secret'] = h}o)p${e]nob(ody[finds>-_$#thisone", + 'my_password=h}o)p${e]nob(ody[finds>-_$#thisone', + 'my_password= h}o)p${e]nob(ody[finds>-_$#thisone', + 'my_password =h}o)p${e]nob(ody[finds>-_$#thisone', + 'my_password = h}o)p${e]nob(ody[finds>-_$#thisone', + 'my_password =h}o)p${e]nob(ody[finds>-_$#thisone', + 'the_password=h}o)p${e]nob(ody[finds>-_$#thisone\n', + }, ) def test_analyze_python_positives(self, file_content): logic = KeywordDetector() @@ -85,45 +108,38 @@ def test_analyze_python_positives(self, file_content): assert 'mock_filename.py' == potential_secret.filename assert ( potential_secret.secret_hash - == PotentialSecret.hash_secret('hope]nobody[finds>-_$#thisone') + == PotentialSecret.hash_secret('h}o)p${e]nob(ody[finds>-_$#thisone') ) @pytest.mark.parametrize( - 'file_content', - [ + 'negative', + STANDARD_NEGATIVES, + ) + def test_analyze_standard_negatives(self, negative): + logic = KeywordDetector() + + f = mock_file_object(negative) + output = logic.analyze(f, 'mock_filename.foo') + assert len(output) == 0 + + @pytest.mark.parametrize( + 'js_negative', + STANDARD_NEGATIVES + [ # FOLLOWED_BY_COLON_RE - 'private_key "";', # Nothing in the quotes - 'private_key \'"no spaces\';', # Has whitespace in the secret - 'private_key "fake";', # 'fake' in the secret - 'private_key "hopenobodyfindsthisone\';', # Double-quote does not match single-quote - 'private_key \'hopenobodyfindsthisone";', # Single-quote does not match double-quote - # FOLLOWED_BY_QUOTES_AND_SEMICOLON_RE - 'theapikey: ""', # Nothing in the quotes - 'theapikey: "somefakekey"', # 'fake' in the secret - 'theapikeyforfoo:hopenobodyfindsthisone', # Characters between apikey and : - # FOLLOWED_BY_EQUAL_SIGNS_RE - 'some_key = "real_secret"', # We cannot make 'key' a Keyword, too noisy - 'my_password = foo(hey)you', # Has a ( followed by a ) - "my_password = request.json_body['hey']", # Has a [ followed by a ] - 'my_password = ""', # Nothing in the quotes - "my_password = ''", # Nothing in the quotes - 'my_password = True', # 'True' is a known false-positive - 'my_password = "fakesecret"', # 'fake' in the secret - 'login(username=username, password=password)', # secret is password) - 'open(self, password = ""):', # secrets is ""): - # '', + 'apiKey: this.apiKey,', + "apiKey: fs.readFileSync('foo',", ], ) - def test_analyze_negatives(self, file_content): + def test_analyze_javascript_negatives(self, js_negative): logic = KeywordDetector() - f = mock_file_object(file_content) - output = logic.analyze(f, 'mock_filename.foo') + f = mock_file_object(js_negative) + output = logic.analyze(f, 'mock_filename.js') assert len(output) == 0 @pytest.mark.parametrize( 'secret_starting_with_dollar_sign', - [ + STANDARD_NEGATIVES + [ # FOLLOWED_BY_EQUAL_SIGNS_RE '$password = $input;', ], @@ -137,7 +153,7 @@ def test_analyze_php_negatives(self, secret_starting_with_dollar_sign): @pytest.mark.parametrize( 'secret_with_no_quote', - [ + STANDARD_NEGATIVES + [ # FOLLOWED_BY_COLON_QUOTES_REQUIRED_RE 'apikey: hope]nobody[finds>-_$#thisone', 'apikey:hope]nobody[finds>-_$#thisone',