diff --git a/README.md b/README.md index 24c74e763..64c47c995 100644 --- a/README.md +++ b/README.md @@ -176,7 +176,7 @@ rejected as a potential secret. This preset amount can be adjusted in several ways: * Specifying it within the config file, for server scanning. -* Specifying it with command line flags (eg. `--base64-limit`) +* Specifying it with command line flags (e.g. `--base64-limit`) Lowering these limits will identify more potential secrets, but also create more false positives. Adjust these limits to suit your needs. diff --git a/detect_secrets/core/audit.py b/detect_secrets/core/audit.py index bedbfca08..99c7982a3 100644 --- a/detect_secrets/core/audit.py +++ b/detect_secrets/core/audit.py @@ -9,6 +9,8 @@ from ..plugins.core import initialize from ..plugins.high_entropy_strings import HighEntropyStringsPlugin +from ..plugins.keyword import determine_file_type +from ..plugins.keyword import KeywordDetector from .baseline import format_baseline_for_output from .baseline import merge_results from .bidirectional_iterator import BidirectionalIterator @@ -165,7 +167,7 @@ def compare_baselines(old_baseline_filename, new_baseline_filename): print('Quitting...') break - if decision == 'b': # pragma: no cover + if decision == 'b': # pragma: no cover current_index -= 2 secret_iterator.step_back_on_next_iteration() @@ -305,11 +307,11 @@ def _comparison_generator(old_list, new_list, compare_fn): new_index += 1 -def _clear_screen(): # pragma: no cover +def _clear_screen(): # pragma: no cover subprocess.call(['clear']) -def _print_context( # pragma: no cover +def _print_context( # pragma: no cover filename, secret, count, @@ -518,7 +520,13 @@ def _get_secret_with_context( ) -def _highlight_secret(secret_line, secret_lineno, secret, filename, plugin_settings): +def _highlight_secret( + secret_line, + secret_lineno, + secret, + filename, + plugin_settings, +): """ :type secret_line: str :param secret_line: the line on which the secret is found @@ -544,7 +552,11 @@ def _highlight_secret(secret_line, secret_lineno, secret, filename, plugin_setti plugin_settings, ) - for raw_secret in _raw_secret_generator(plugin, secret_line): + for raw_secret in _raw_secret_generator( + plugin, + secret_line, + filetype=determine_file_type(filename), + ): secret_obj = PotentialSecret( plugin.secret_type, filename, @@ -572,10 +584,14 @@ def _highlight_secret(secret_line, secret_lineno, secret, filename, plugin_setti ) -def _raw_secret_generator(plugin, secret_line): +def _raw_secret_generator(plugin, secret_line, filetype): """Generates raw secrets by re-scanning the line, with the specified plugin""" - for raw_secret in plugin.secret_generator(secret_line): - yield raw_secret + if isinstance(plugin, KeywordDetector): + for raw_secret in plugin.secret_generator(secret_line, filetype=filetype): + yield raw_secret + else: + for raw_secret in plugin.secret_generator(secret_line): + yield raw_secret if issubclass(plugin.__class__, HighEntropyStringsPlugin): with plugin.non_quoted_string_regex(strict=False): diff --git a/detect_secrets/core/usage.py b/detect_secrets/core/usage.py index b17514f1a..d6ae7c73b 100644 --- a/detect_secrets/core/usage.py +++ b/detect_secrets/core/usage.py @@ -348,9 +348,7 @@ def _add_opt_out_options(self): plugin.disable_flag_text, action='store_true', help=plugin.disable_help_text, - # Temporarily disabling the KeywordDetector - # Until we can test its effectiveness on more repositories - default=True if plugin.disable_flag_text == '--no-keyword-scan' else False, + default=False, ) return self @@ -370,7 +368,7 @@ def _convert_flag_text_to_argument_name(flag_text): """This just emulates argparse's underlying logic. :type flag_text: str - :param flag_text: eg. `--no-hex-string-scan` + :param flag_text: e.g. `--no-hex-string-scan` :return: `no_hex_string_scan` """ return flag_text[2:].replace('-', '_') diff --git a/detect_secrets/plugins/base.py b/detect_secrets/plugins/base.py index c52861996..ee6aa4688 100644 --- a/detect_secrets/plugins/base.py +++ b/detect_secrets/plugins/base.py @@ -53,6 +53,9 @@ def secret_generator(self, string): :type string: str :param string: the secret to scan + + :rtype: iter + :returns: Of all the identifiers found """ raise NotImplementedError diff --git a/detect_secrets/plugins/keyword.py b/detect_secrets/plugins/keyword.py index aa865f93c..f23713fe7 100644 --- a/detect_secrets/plugins/keyword.py +++ b/detect_secrets/plugins/keyword.py @@ -26,21 +26,119 @@ """ from __future__ import absolute_import +import re +from enum import Enum + from .base import BasePlugin from detect_secrets.core.potential_secret import PotentialSecret +# Note: All values here should be lowercase BLACKLIST = ( - # NOTE all values here should be lowercase, - # otherwise _secret_generator can fail to match them - 'pass =', + 'apikey', + 'api_key', + 'aws_secret_access_key', + 'db_pass', 'password', 'passwd', - 'pwd', + 'private_key', 'secret', 'secrete', - 'token', ) +FALSE_POSITIVES = ( + "''", + "''):", + "')", + "'this", + '""', + '""):', + '")', + '', + '', + 'dummy_secret', + 'false', + 'false):', + 'none', + 'none,', + 'none}', + 'not', + 'null,', + 'password)', + 'password,', + 'password},', + 'string,', + 'string}', + 'string}}', + 'test-access-key', + 'true', + 'true):', + '{', +) +FOLLOWED_BY_COLON_RE = re.compile( + # e.g. api_key: foo + r'({})(("|\')?):(\s*?)(("|\')?)([^\s]+)(\5)'.format( + r'|'.join(BLACKLIST), + ), +) +FOLLOWED_BY_COLON_QUOTES_REQUIRED_RE = re.compile( + # e.g. api_key: "foo" + r'({})(("|\')?):(\s*?)(("|\'))([^\s]+)(\5)'.format( + r'|'.join(BLACKLIST), + ), +) +FOLLOWED_BY_EQUAL_SIGNS_RE = re.compile( + # e.g. my_password = bar + r'({})((\'|")])?()(\s*?)=(\s*?)(("|\')?)([^\s]+)(\7)'.format( + r'|'.join(BLACKLIST), + ), +) +FOLLOWED_BY_EQUAL_SIGNS_QUOTES_REQUIRED_RE = re.compile( + # e.g. my_password = "bar" + r'({})((\'|")])?()(\s*?)=(\s*?)(("|\'))([^\s]+)(\7)'.format( + r'|'.join(BLACKLIST), + ), +) +FOLLOWED_BY_QUOTES_AND_SEMICOLON_RE = re.compile( + # e.g. private_key "something"; + r'({})([^\s]*?)(\s*?)("|\')([^\s]+)(\4);'.format( + r'|'.join(BLACKLIST), + ), +) +BLACKLIST_REGEX_TO_GROUP = { + FOLLOWED_BY_COLON_RE: 7, + FOLLOWED_BY_EQUAL_SIGNS_RE: 9, + FOLLOWED_BY_QUOTES_AND_SEMICOLON_RE: 5, +} +PYTHON_BLACKLIST_REGEX_TO_GROUP = { + FOLLOWED_BY_COLON_QUOTES_REQUIRED_RE: 7, + FOLLOWED_BY_EQUAL_SIGNS_QUOTES_REQUIRED_RE: 9, + FOLLOWED_BY_QUOTES_AND_SEMICOLON_RE: 5, +} + + +class FileType(Enum): + JAVASCRIPT = 0 + PHP = 1 + PYTHON = 2 + OTHER = 3 + + +def determine_file_type(filename): + """ + :param filename: str + + :rtype: FileType + """ + if filename.endswith('.js'): + return FileType.JAVASCRIPT + elif filename.endswith('.py'): + return FileType.PYTHON + elif filename.endswith('.php'): + return FileType.PHP + return FileType.OTHER class KeywordDetector(BasePlugin): @@ -48,12 +146,15 @@ class KeywordDetector(BasePlugin): are present in the analyzed string. """ - secret_type = 'Password' + secret_type = 'Secret Keyword' def analyze_string(self, string, line_num, filename): output = {} - for identifier in self.secret_generator(string): + for identifier in self.secret_generator( + string, + filetype=determine_file_type(filename), + ): secret = PotentialSecret( self.secret_type, filename, @@ -64,10 +165,71 @@ def analyze_string(self, string, line_num, filename): return output - def _secret_generator(self, lowercase_string): - for line in BLACKLIST: - if line in lowercase_string: - yield line + def secret_generator(self, string, filetype): + lowered_string = string.lower() + + if filetype == FileType.PYTHON: + blacklist_RE_to_group = PYTHON_BLACKLIST_REGEX_TO_GROUP + else: + blacklist_RE_to_group = BLACKLIST_REGEX_TO_GROUP + + for REGEX, group_number in blacklist_RE_to_group.items(): + match = REGEX.search(lowered_string) + if match: + lowered_secret = match.group(group_number) + + # ([^\s]+) guarantees lowered_secret is not '' + if not probably_false_positive( + lowered_secret, + filetype=filetype, + ): + yield lowered_secret + + +def probably_false_positive(lowered_secret, filetype): + if ( + 'fake' in lowered_secret + or 'forgot' in lowered_secret + or lowered_secret in FALSE_POSITIVES + or ( + filetype == FileType.JAVASCRIPT + and ( + lowered_secret.startswith('this.') + or lowered_secret.startswith('fs.read') + or lowered_secret == 'new' + ) + ) or ( # If it is a .php file, do not report $variables + filetype == FileType.PHP + and lowered_secret[0] == '$' + ) + ): + return True + + # Heuristic for no function calls + try: + if ( + lowered_secret.index('(') < lowered_secret.index(')') + ): + return True + except ValueError: + pass + + # Heuristic for e.g. request.json_body['hey'] + try: + if ( + lowered_secret.index('[') < lowered_secret.index(']') + ): + return True + except ValueError: + pass + + # Heuristic for e.g. ${link} + try: + if ( + lowered_secret.index('${') < lowered_secret.index('}') + ): + return True + except ValueError: + pass - def secret_generator(self, string): - return self._secret_generator(string.lower()) + return False diff --git a/detect_secrets/pre_commit_hook.py b/detect_secrets/pre_commit_hook.py index f07710f47..f8ed92a3b 100644 --- a/detect_secrets/pre_commit_hook.py +++ b/detect_secrets/pre_commit_hook.py @@ -102,7 +102,7 @@ def get_baseline(baseline_filename): ) -def _get_baseline_string_from_file(filename): # pragma: no cover +def _get_baseline_string_from_file(filename): # pragma: no cover """Breaking this function up for mockability.""" try: with open(filename) as f: diff --git a/test_data/short_files/first_line.py b/test_data/short_files/first_line.php similarity index 76% rename from test_data/short_files/first_line.py rename to test_data/short_files/first_line.php index ae20b49c3..9b1d12e67 100644 --- a/test_data/short_files/first_line.py +++ b/test_data/short_files/first_line.php @@ -1,4 +1,4 @@ -secret = 'BEEF0123456789a' +seecret = 'BEEF0123456789a' skipped_sequential_false_positive = '0123456789a' print('second line') var = 'third line' diff --git a/testing/factories.py b/testing/factories.py index 67f65e409..7f605bb5c 100644 --- a/testing/factories.py +++ b/testing/factories.py @@ -6,7 +6,7 @@ def potential_secret_factory(type_='type', filename='filename', secret='secret', lineno=1): """This is only marginally better than creating PotentialSecret objects directly, - because of default values. + because of the default values. """ return PotentialSecret(type_, filename, secret, lineno) @@ -15,7 +15,7 @@ def secrets_collection_factory(secrets=None, plugins=(), exclude_regex=''): """ :type secrets: list(dict) :param secrets: list of params to pass to add_secret. - Eg. [ {'secret': 'blah'}, ] + E.g. [ {'secret': 'blah'}, ] :type plugins: tuple :type exclude_regex: str diff --git a/testing/mocks.py b/testing/mocks.py index 18e1d23f6..4e138b838 100644 --- a/testing/mocks.py +++ b/testing/mocks.py @@ -19,7 +19,7 @@ def mock_git_calls(subprocess_namespace, cases): :type cases: iterable(SubprocessMock) :type subprocess_namespace: str :param subprocess_namespace: should be the namespace referring to check_output. - Eg. `detect_secrets.pre_commit_hook.subprocess.check_output` + E.g. `detect_secrets.pre_commit_hook.subprocess.check_output` """ # We need to use a dictionary, because python2.7 does not support # the `nonlocal` keyword (and needs to share scope with diff --git a/tests/core/audit_test.py b/tests/core/audit_test.py index a085b8888..a5d25ecf9 100644 --- a/tests/core/audit_test.py +++ b/tests/core/audit_test.py @@ -65,7 +65,7 @@ def test_making_decisions(self, mock_printer): def test_quit_half_way(self, mock_printer): modified_baseline = deepcopy(self.baseline) - for secrets in modified_baseline['results'].values(): + for secrets in modified_baseline['results'].values(): # pragma: no cover secrets[0]['is_secret'] = False break @@ -147,8 +147,7 @@ def test_go_back_several_steps(self, mock_printer): for secrets in modified_baseline['results'].values(): for secret in secrets: value = values_to_inject.pop(0) - if value is not None: - secret['is_secret'] = value + secret['is_secret'] = value self.run_logic( ['s', 'y', 'b', 's', 'b', 'b', 'n', 'n', 'n'], @@ -174,10 +173,7 @@ def run_logic(self, inputs, modified_baseline=None, input_baseline=None): ) as m: audit.audit_baseline('will_be_mocked') - if not modified_baseline: - assert m.call_args[0][1] == self.baseline - else: - assert m.call_args[0][1] == modified_baseline + assert m.call_args[0][1] == modified_baseline @contextmanager def mock_env(self, user_inputs=None, baseline=None): @@ -329,7 +325,7 @@ def test_compare(self, mock_printer): # These files come after, because filenames are sorted first assert uncolor(headers[2]) == textwrap.dedent(""" Secret: 3 of 4 - Filename: test_data/short_files/first_line.py + Filename: test_data/short_files/first_line.php Secret Type: Hex High Entropy String Status: >> REMOVED << """)[1:] @@ -400,7 +396,7 @@ def old_baseline(self): ], # This entire file will be "removed" - 'test_data/short_files/first_line.py': [ + 'test_data/short_files/first_line.php': [ { 'hashed_secret': '0de9a11b3f37872868ca49ecd726c955e25b6e21', 'line_number': 1, @@ -597,7 +593,7 @@ def test_secret_not_found(self, mock_printer): """)[1:-1] - def test_secret_in_yaml_file(self, mock_printer): + def test_hex_high_entropy_secret_in_yaml_file(self, mock_printer): with self._mock_sed_call( line_containing_secret='api key: 123456789a', ): @@ -636,6 +632,44 @@ def test_secret_in_yaml_file(self, mock_printer): """)[1:-1] + def test_keyword_secret_in_yaml_file(self, mock_printer): + with self._mock_sed_call( + line_containing_secret='api_key: yerba', + ): + self.run_logic( + secret=potential_secret_factory( + type_='Secret Keyword', + filename='filenameB', + secret='yerba', + lineno=15, + ).json(), + settings=[ + { + 'name': 'KeywordDetector', + }, + ], + ) + + assert uncolor(mock_printer.message) == textwrap.dedent(""" + Secret: 1 of 2 + Filename: filenameB + Secret Type: Secret Keyword + ---------- + 10:a + 11:b + 12:c + 13:d + 14:e + 15:api_key: yerba + 16:e + 17:d + 18:c + 19:b + 20:a + ---------- + + """)[1:-1] + class TestGetUserDecision(object): diff --git a/tests/core/secrets_collection_test.py b/tests/core/secrets_collection_test.py index eb2d37eee..310b6ac5c 100644 --- a/tests/core/secrets_collection_test.py +++ b/tests/core/secrets_collection_test.py @@ -350,7 +350,7 @@ def assert_loaded_collection_is_original_collection(self, original, new): assert original[key] == new[key] -class MockBasePlugin(BasePlugin): +class MockBasePlugin(BasePlugin): # pragma: no cover """Abstract testing class, to implement abstract methods.""" def analyze_string(self, value): diff --git a/tests/core/usage_test.py b/tests/core/usage_test.py index 6ca66087f..edd290cd3 100644 --- a/tests/core/usage_test.py +++ b/tests/core/usage_test.py @@ -32,6 +32,7 @@ def test_consolidates_output_basic(self): 'Base64HighEntropyString': { 'base64_limit': 4.5, }, + 'KeywordDetector': {}, 'PrivateKeyDetector': {}, 'AWSKeyDetector': {}, } diff --git a/tests/main_test.py b/tests/main_test.py index 015cc7c6c..160ab5739 100644 --- a/tests/main_test.py +++ b/tests/main_test.py @@ -65,6 +65,7 @@ def test_scan_string_basic(self, mock_baseline_initialize): Base64HighEntropyString: False (3.459) BasicAuthDetector : False HexHighEntropyString : True (3.459) + KeywordDetector : False PrivateKeyDetector : False """)[1:] @@ -82,6 +83,7 @@ def test_scan_string_cli_overrides_stdin(self): Base64HighEntropyString: False (2.585) BasicAuthDetector : False HexHighEntropyString : False (2.121) + KeywordDetector : False PrivateKeyDetector : False """)[1:] @@ -166,9 +168,9 @@ def test_old_baseline_ignored_with_update_flag( 'filename, expected_output', [ ( - 'test_data/short_files/first_line.py', + 'test_data/short_files/first_line.php', textwrap.dedent(""" - 1:secret = 'BEEF0123456789a' + 1:seecret = 'BEEF0123456789a' 2:skipped_sequential_false_positive = '0123456789a' 3:print('second line') 4:var = 'third line' diff --git a/tests/plugins/base_test.py b/tests/plugins/base_test.py index 3d2da92ba..f3c80e3f6 100644 --- a/tests/plugins/base_test.py +++ b/tests/plugins/base_test.py @@ -6,7 +6,7 @@ def test_fails_if_no_secret_type_defined(): - class MockPlugin(BasePlugin): # pragma: no cover + class MockPlugin(BasePlugin): # pragma: no cover def analyze_string(self, *args, **kwargs): pass diff --git a/tests/plugins/keyword_test.py b/tests/plugins/keyword_test.py index 94ddc648f..4cdfaa0c3 100644 --- a/tests/plugins/keyword_test.py +++ b/tests/plugins/keyword_test.py @@ -3,27 +3,72 @@ import pytest +from detect_secrets.core.potential_secret import PotentialSecret from detect_secrets.plugins.keyword import KeywordDetector from testing.mocks import mock_file_object +STANDARD_NEGATIVES = [ + # FOLLOWED_BY_COLON_RE + 'theapikey: ""', # Nothing in the quotes + 'theapikey: "somefakekey"', # 'fake' in the secret + 'theapikeyforfoo:hopenobodyfindsthisone', # Characters between apikey and : + # FOLLOWED_BY_EQUAL_SIGNS_RE + 'some_key = "real_secret"', # We cannot make 'key' a Keyword, too noisy + 'my_password = foo(hey)you', # Has a ( followed by a ) + "my_password = request.json_body['hey']", # Has a [ followed by a ] + 'my_password = ""', # Nothing in the quotes + "my_password = ''", # Nothing in the quotes + 'my_password = True', # 'True' is a known false-positive + 'my_password = "fakesecret"', # 'fake' in the secret + 'login(username=username, password=password)', # secret is password) + 'open(self, password = ""):', # secrets is ""): + 'open(self, password = ""):', # secrets is ""): + # FOLLOWED_BY_QUOTES_AND_SEMICOLON_RE + 'private_key "";', # Nothing in the quotes + 'private_key \'"no spaces\';', # Has whitespace in the secret + 'private_key "fake";', # 'fake' in the secret + 'private_key "hopenobodyfindsthisone\';', # Double-quote does not match single-quote + 'private_key \'hopenobodyfindsthisone";', # Single-quote does not match double-quote + 'password: ${link}', # Has a ${ followed by a } +] +STANDARD_POSITIVES = { + # FOLLOWED_BY_COLON_RE + "'theapikey': 'h}o)p${e]nob(ody[finds>-_$#thisone'", + '"theapikey": "h}o)p${e]nob(ody[finds>-_$#thisone"', + 'apikey: h}o)p${e]nob(ody[finds>-_$#thisone', + 'apikey:h}o)p${e]nob(ody[finds>-_$#thisone', + 'theapikey:h}o)p${e]nob(ody[finds>-_$#thisone', + 'apikey: "h}o)p${e]nob(ody[finds>-_$#thisone"', + "apikey: 'h}o)p${e]nob(ody[finds>-_$#thisone'", + # FOLLOWED_BY_EQUAL_SIGNS_RE + 'some_dict["secret"] = "h}o)p${e]nob(ody[finds>-_$#thisone"', + "some_dict['secret'] = h}o)p${e]nob(ody[finds>-_$#thisone", + 'my_password=h}o)p${e]nob(ody[finds>-_$#thisone', + 'my_password= h}o)p${e]nob(ody[finds>-_$#thisone', + 'my_password =h}o)p${e]nob(ody[finds>-_$#thisone', + 'my_password = h}o)p${e]nob(ody[finds>-_$#thisone', + 'my_password =h}o)p${e]nob(ody[finds>-_$#thisone', + 'the_password=h}o)p${e]nob(ody[finds>-_$#thisone\n', + 'the_password= "h}o)p${e]nob(ody[finds>-_$#thisone"\n', + 'the_password=\'h}o)p${e]nob(ody[finds>-_$#thisone\'\n', + # FOLLOWED_BY_QUOTES_AND_SEMICOLON_RE + 'apikey "h}o)p${e]nob(ody[finds>-_$#thisone";', # Double-quotes + 'fooapikeyfoo "h}o)p${e]nob(ody[finds>-_$#thisone";', # Double-quotes + 'fooapikeyfoo"h}o)p${e]nob(ody[finds>-_$#thisone";', # Double-quotes + 'private_key \'h}o)p${e]nob(ody[finds>-_$#thisone\';', # Single-quotes + 'fooprivate_keyfoo\'h}o)p${e]nob(ody[finds>-_$#thisone\';', # Single-quotes + 'fooprivate_key\'h}o)p${e]nob(ody[finds>-_$#thisone\';', # Single-quotes +} + + class TestKeywordDetector(object): @pytest.mark.parametrize( 'file_content', - [ - ( - 'login_somewhere --http-password hopenobodyfindsthisone\n' - ), - ( - 'token = "noentropy"' - ), - ( - 'PASSWORD = "verysimple"' - ), - ], + STANDARD_POSITIVES, ) - def test_analyze(self, file_content): + def test_analyze_standard_positives(self, file_content): logic = KeywordDetector() f = mock_file_object(file_content) @@ -31,5 +76,101 @@ def test_analyze(self, file_content): assert len(output) == 1 for potential_secret in output: assert 'mock_filename' == potential_secret.filename - generated = list(logic.secret_generator(file_content)) - assert len(generated) == len(output) + assert ( + potential_secret.secret_hash + == PotentialSecret.hash_secret('h}o)p${e]nob(ody[finds>-_$#thisone') + ) + + @pytest.mark.parametrize( + 'file_content', + STANDARD_POSITIVES - { + # FOLLOWED_BY_COLON_QUOTES_REQUIRED_RE + 'apikey: h}o)p${e]nob(ody[finds>-_$#thisone', + 'apikey:h}o)p${e]nob(ody[finds>-_$#thisone', + 'theapikey:h}o)p${e]nob(ody[finds>-_$#thisone', + # FOLLOWED_BY_EQUAL_SIGNS_QUOTES_REQUIRED_RE + "some_dict['secret'] = h}o)p${e]nob(ody[finds>-_$#thisone", + 'my_password=h}o)p${e]nob(ody[finds>-_$#thisone', + 'my_password= h}o)p${e]nob(ody[finds>-_$#thisone', + 'my_password =h}o)p${e]nob(ody[finds>-_$#thisone', + 'my_password = h}o)p${e]nob(ody[finds>-_$#thisone', + 'my_password =h}o)p${e]nob(ody[finds>-_$#thisone', + 'the_password=h}o)p${e]nob(ody[finds>-_$#thisone\n', + }, + ) + def test_analyze_python_positives(self, file_content): + logic = KeywordDetector() + + f = mock_file_object(file_content) + output = logic.analyze(f, 'mock_filename.py') + assert len(output) == 1 + for potential_secret in output: + assert 'mock_filename.py' == potential_secret.filename + assert ( + potential_secret.secret_hash + == PotentialSecret.hash_secret('h}o)p${e]nob(ody[finds>-_$#thisone') + ) + + @pytest.mark.parametrize( + 'negative', + STANDARD_NEGATIVES, + ) + def test_analyze_standard_negatives(self, negative): + logic = KeywordDetector() + + f = mock_file_object(negative) + output = logic.analyze(f, 'mock_filename.foo') + assert len(output) == 0 + + @pytest.mark.parametrize( + 'js_negative', + STANDARD_NEGATIVES + [ + # FOLLOWED_BY_COLON_RE + 'apiKey: this.apiKey,', + "apiKey: fs.readFileSync('foo',", + ], + ) + def test_analyze_javascript_negatives(self, js_negative): + logic = KeywordDetector() + + f = mock_file_object(js_negative) + output = logic.analyze(f, 'mock_filename.js') + assert len(output) == 0 + + @pytest.mark.parametrize( + 'secret_starting_with_dollar_sign', + STANDARD_NEGATIVES + [ + # FOLLOWED_BY_EQUAL_SIGNS_RE + '$password = $input;', + ], + ) + def test_analyze_php_negatives(self, secret_starting_with_dollar_sign): + logic = KeywordDetector() + + f = mock_file_object(secret_starting_with_dollar_sign) + output = logic.analyze(f, 'mock_filename.php') + assert len(output) == 0 + + @pytest.mark.parametrize( + 'secret_with_no_quote', + STANDARD_NEGATIVES + [ + # FOLLOWED_BY_COLON_QUOTES_REQUIRED_RE + 'apikey: hope]nobody[finds>-_$#thisone', + 'apikey:hope]nobody[finds>-_$#thisone', + 'theapikey:hope]nobody[finds>-_$#thisone', + # FOLLOWED_BY_EQUAL_SIGNS_QUOTES_REQUIRED_RE + "some_dict['secret'] = hope]nobody[finds>-_$#thisone", + 'my_password=hope]nobody[finds>-_$#thisone', + 'my_password= hope]nobody[finds>-_$#thisone', + 'my_password =hope]nobody[finds>-_$#thisone', + 'my_password = hope]nobody[finds>-_$#thisone', + 'my_password =hope]nobody[finds>-_$#thisone', + 'the_password=hope]nobody[finds>-_$#thisone\n', + ], + ) + def test_analyze_python_negatives(self, secret_with_no_quote): + logic = KeywordDetector() + + f = mock_file_object(secret_with_no_quote) + output = logic.analyze(f, 'mock_filename.py') + assert len(output) == 0 diff --git a/tests/pre_commit_hook_test.py b/tests/pre_commit_hook_test.py index 0b7c08317..7727365ac 100644 --- a/tests/pre_commit_hook_test.py +++ b/tests/pre_commit_hook_test.py @@ -145,6 +145,9 @@ def test_that_baseline_gets_updated( 'hex_limit': 3, 'name': 'HexHighEntropyString', }, + { + 'name': 'KeywordDetector', + }, { 'name': 'PrivateKeyDetector', }, diff --git a/tox.ini b/tox.ini index 9ef1b4aa1..8bcfa3762 100644 --- a/tox.ini +++ b/tox.ini @@ -11,7 +11,8 @@ deps = -rrequirements-dev.txt commands = coverage erase coverage run -m pytest tests - coverage report --show-missing --fail-under 98 + coverage report --show-missing --include=tests/* --fail-under 100 + coverage report --show-missing --include=detect_secrets/* --fail-under 97 pre-commit run --all-files [testenv:venv]