From 57abcc1349fd863bf26bb55be0d66496832b9c9f Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Mon, 17 Sep 2018 16:21:37 -0700 Subject: [PATCH 1/6] Added a PasswordDetector plugin Added _remove_password_secrets_if_line_reported_already Because is often the case that e.g. SUPER_SECRET_VALUE = 'c3VwZXIgbG9uZyBzdHJ' is reported both by the PasswordDetector and another plugin. To minimize diff size, we will simply not report findings from the PasswordDetector if another plugin reports a secret on the same line. --- detect_secrets/core/secrets_collection.py | 40 +++++++++++-- detect_secrets/core/usage.py | 5 ++ detect_secrets/plugins/core/initialize.py | 1 + detect_secrets/plugins/password.py | 68 +++++++++++++++++++++++ detect_secrets/plugins/private_key.py | 2 +- test_data/files/file_with_no_secrets.py | 3 +- test_data/short_files/last_line.ini | 4 +- test_data/short_files/middle_line.yml | 4 +- tests/core/secrets_collection_test.py | 49 ++++++++++++++-- tests/core/usage_test.py | 1 + tests/main_test.py | 10 ++-- tests/plugins/password_test.py | 30 ++++++++++ 12 files changed, 195 insertions(+), 22 deletions(-) create mode 100644 detect_secrets/plugins/password.py create mode 100644 tests/plugins/password_test.py diff --git a/detect_secrets/core/secrets_collection.py b/detect_secrets/core/secrets_collection.py index 7b24cda6c..e57dcdd93 100644 --- a/detect_secrets/core/secrets_collection.py +++ b/detect_secrets/core/secrets_collection.py @@ -251,18 +251,48 @@ def _results_accumulator(self, filename): Caller is responsible for updating the dictionary with results of plugin analysis. """ - results = {} + file_results = {} for plugin in self.plugins: - yield results, plugin + yield file_results, plugin - if not results: + if not file_results: return + self._remove_password_secrets_if_line_reported_already( + file_results, + ) + if filename not in self.data: - self.data[filename] = results + self.data[filename] = file_results else: - self.data[filename].update(results) + self.data[filename].update(file_results) + + def _remove_password_secrets_if_line_reported_already( + self, + file_results, + ): + """ + It is often the case that e.g. + SUPER_SECRET_VALUE = 'c3VwZXIgbG9uZyBzdHJ' + is reported both by the PasswordDetector and another plugin. + + To minimize diff size, we will simply not report findings from + the PasswordDetector if another plugin reports a secret on the + same line. + """ + password_secrets = list() + line_numbers_of_other_plugins = set() + + for secret in file_results: + if secret.type == 'Password': + password_secrets.append(secret) + else: + line_numbers_of_other_plugins.add(secret.lineno) + + for password_secret in password_secrets: + if password_secret.lineno in line_numbers_of_other_plugins: + del file_results[password_secret] def _extract_secrets_from_file(self, f, filename): """Extract secrets from a given file object. diff --git a/detect_secrets/core/usage.py b/detect_secrets/core/usage.py index efef69fe4..2248642e5 100644 --- a/detect_secrets/core/usage.py +++ b/detect_secrets/core/usage.py @@ -225,6 +225,11 @@ class PluginOptions(object): disable_flag_text='--no-basic-auth-scan', disable_help_text='Disables scanning for Basic Auth formatted URIs.', ), + PluginDescriptor( + classname='PasswordDetector', + disable_flag_text='--no-password-scan', + disable_help_text='Disables scanning for passwords.', + ), ] def __init__(self, parser): diff --git a/detect_secrets/plugins/core/initialize.py b/detect_secrets/plugins/core/initialize.py index ae9ea4c43..bbf51fbd7 100644 --- a/detect_secrets/plugins/core/initialize.py +++ b/detect_secrets/plugins/core/initialize.py @@ -8,6 +8,7 @@ from ..basic_auth import BasicAuthDetector # noqa: F401 from ..high_entropy_strings import Base64HighEntropyString # noqa: F401 from ..high_entropy_strings import HexHighEntropyString # noqa: F401 +from ..password import PasswordDetector # noqa: F401 from ..private_key import PrivateKeyDetector # noqa: F401 from detect_secrets.core.log import log diff --git a/detect_secrets/plugins/password.py b/detect_secrets/plugins/password.py new file mode 100644 index 000000000..c01f62de6 --- /dev/null +++ b/detect_secrets/plugins/password.py @@ -0,0 +1,68 @@ +""" +This code was extracted in part from +https://github.com/PyCQA/bandit. Using similar heuristic logic, +we adapted it to fit our plugin infrastructure, to create an organized, +concerted effort in detecting all type of secrets in code. + +Copyright (c) 2014 Hewlett-Packard Development Company, L.P. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" +from __future__ import absolute_import + +from .base import BasePlugin +from detect_secrets.core.potential_secret import PotentialSecret + + +BLACKLIST = ( + 'PASS =', + 'password', + 'passwd', + 'pwd', + 'secret', + 'secrete', + 'token', +) + + +class PasswordDetector(BasePlugin): + """This checks for private keys by determining whether the blacklisted + lines are present in the analyzed string. + """ + + secret_type = 'Password' + + def analyze_string(self, string, line_num, filename): + output = {} + + for identifier in self.secret_generator(string.lower()): + secret = PotentialSecret( + self.secret_type, + filename, + line_num, + identifier, + ) + output[secret] = secret + + return output + + def secret_generator(self, string): + for line in BLACKLIST: + if line in string: + yield line diff --git a/detect_secrets/plugins/private_key.py b/detect_secrets/plugins/private_key.py index 7596363f3..a4e256457 100644 --- a/detect_secrets/plugins/private_key.py +++ b/detect_secrets/plugins/private_key.py @@ -1,7 +1,7 @@ """ This code was extracted in part from https://github.com/pre-commit/pre-commit-hooks. Using similar heuristic logic, -we adapt it to fit our plugin infrastructure, to create an organized, +we adapted it to fit our plugin infrastructure, to create an organized, concerted effort in detecting all type of secrets in code. Copyright (c) 2014 pre-commit dev team: Anthony Sottile, Ken Struys diff --git a/test_data/files/file_with_no_secrets.py b/test_data/files/file_with_no_secrets.py index ea832f442..9f7372a8c 100644 --- a/test_data/files/file_with_no_secrets.py +++ b/test_data/files/file_with_no_secrets.py @@ -1,6 +1,5 @@ #!/usr/bin/python -# Will change this later. -SUPER_SECRET_VALUE = "this is just a long string, like a user facing error message" +REGULAR_VALUE = "this is just a long string, like a user facing error message" def main(): diff --git a/test_data/short_files/last_line.ini b/test_data/short_files/last_line.ini index 7f0793d24..167e62e96 100644 --- a/test_data/short_files/last_line.ini +++ b/test_data/short_files/last_line.ini @@ -1,5 +1,5 @@ [some section] -secrets_for_no_one_to_find = +secreets_for_no_one_to_find = hunter2 - password123 + passsword123 BEEF0123456789a diff --git a/test_data/short_files/middle_line.yml b/test_data/short_files/middle_line.yml index db82aae55..e96ea3f81 100644 --- a/test_data/short_files/middle_line.yml +++ b/test_data/short_files/middle_line.yml @@ -1,6 +1,6 @@ deploy: user: aaronloo - password: + passhword: secure: thequickbrownfoxjumpsoverthelazydog on: - repo: Yelp/detect-secrets + repo: Yelp/detect-sechrets diff --git a/tests/core/secrets_collection_test.py b/tests/core/secrets_collection_test.py index 851c56a89..6a1bac87c 100644 --- a/tests/core/secrets_collection_test.py +++ b/tests/core/secrets_collection_test.py @@ -93,6 +93,30 @@ def test_success_multiple_plugins(self): line_numbers = [entry.lineno for entry in logic.data['filename']] assert set(line_numbers) == set([1, 2, 3]) + def test_removal_of_password_plugin_secrets_if_reported_already(self): + logic = secrets_collection_factory( + secrets=[ + { + 'filename': 'filename', + 'lineno': 3, + }, + ], + plugins=( + MockPasswordPluginValue(), + MockPluginFileValue(), + ), + ) + + with mock_open('junk text here'): + logic.scan_file('filename') + + # One from only the MockPluginFileValue, and one from existing secret + # This is because the MockPasswordPluginValue was not placed into data + assert len(logic.data['filename']) == 2 + + line_numbers = [entry.lineno for entry in logic.data['filename']] + assert set(line_numbers) == set([2, 3]) + def test_unicode_decode_error(self, mock_log): logic = secrets_collection_factory( plugins=(MockPluginFileValue(),), @@ -203,12 +227,14 @@ def test_optional_type(self, filename, secret_hash, expected_value): ) def test_explicit_type_for_optimization(self, type_, is_none): with self._mock_secret_hash(): - logic = secrets_collection_factory(secrets=[ - { - 'filename': 'filename', - 'type_': 'type', - }, - ]) + logic = secrets_collection_factory( + secrets=[ + { + 'filename': 'filename', + 'type_': 'type', + }, + ], + ) assert (logic.get_secret('filename', 'secret_hash', type_) is None) == is_none @@ -358,4 +384,15 @@ def analyze(self, f, filename): return {secret: secret} +class MockPasswordPluginValue(MockBasePlugin): + + secret_type = 'mock_plugin_file_value' + + def analyze(self, f, filename): + password_secret = PotentialSecret('Password', filename, 2, f.read().strip()) + return { + password_secret: password_secret, + } + + MockUnicodeDecodeError = UnicodeDecodeError('encoding type', b'subject', 0, 1, 'exception message') diff --git a/tests/core/usage_test.py b/tests/core/usage_test.py index 1cf679f2a..81bac6071 100644 --- a/tests/core/usage_test.py +++ b/tests/core/usage_test.py @@ -32,6 +32,7 @@ def test_consolidates_output_basic(self): 'Base64HighEntropyString': { 'base64_limit': 4.5, }, + 'PasswordDetector': {}, 'PrivateKeyDetector': {}, } assert not hasattr(args, 'no_private_key_scan') diff --git a/tests/main_test.py b/tests/main_test.py index 27e0da729..f29ff5ebc 100644 --- a/tests/main_test.py +++ b/tests/main_test.py @@ -86,6 +86,7 @@ def test_scan_string_basic(self, mock_baseline_initialize): Base64HighEntropyString: False (3.459) BasicAuthDetector : False HexHighEntropyString : True (3.459) + PasswordDetector : False PrivateKeyDetector : False """)[1:] @@ -102,6 +103,7 @@ def test_scan_string_cli_overrides_stdin(self): Base64HighEntropyString: False (2.585) BasicAuthDetector : False HexHighEntropyString : False (2.121) + PasswordDetector : False PrivateKeyDetector : False """)[1:] @@ -190,19 +192,19 @@ def test_old_baseline_ignored_with_update_flag( textwrap.dedent(""" 1:deploy: 2: user: aaronloo - 3: password: + 3: passhword: 4: secure: thequickbrownfoxjumpsoverthelazydog 5: on: - 6: repo: Yelp/detect-secrets + 6: repo: Yelp/detect-sechrets """)[1:-1], ), ( 'test_data/short_files/last_line.ini', textwrap.dedent(""" 1:[some section] - 2:secrets_for_no_one_to_find = + 2:secreets_for_no_one_to_find = 3: hunter2 - 4: password123 + 4: passsword123 5: BEEF0123456789a """)[1:-1], ), diff --git a/tests/plugins/password_test.py b/tests/plugins/password_test.py new file mode 100644 index 000000000..c6d0c9fe3 --- /dev/null +++ b/tests/plugins/password_test.py @@ -0,0 +1,30 @@ +from __future__ import absolute_import +from __future__ import unicode_literals + +import pytest + +from detect_secrets.plugins.password import PasswordDetector +from testing.mocks import mock_file_object + + +class TestPasswordDetector(object): + + @pytest.mark.parametrize( + 'file_content', + [ + ( + 'login_somewhere --http-password hopenobodyfindsthisone\n' + ), + ( + 'token = "noentropy"' + ), + ], + ) + def test_analyze(self, file_content): + logic = PasswordDetector() + + f = mock_file_object(file_content) + output = logic.analyze(f, 'mock_filename') + assert len(output) == 1 + for potential_secret in output: + assert 'mock_filename' == potential_secret.filename From 60267d952ba48ed109535dfbaf9abe461613eb84 Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Tue, 18 Sep 2018 16:10:06 -0700 Subject: [PATCH 2/6] Rename password plugin to keyword plugin --- detect_secrets/core/secrets_collection.py | 8 ++++---- detect_secrets/core/usage.py | 6 +++--- detect_secrets/plugins/core/initialize.py | 2 +- detect_secrets/plugins/{password.py => keyword.py} | 6 +++--- tests/core/usage_test.py | 2 +- tests/main_test.py | 4 ++-- tests/plugins/{password_test.py => keyword_test.py} | 6 +++--- 7 files changed, 17 insertions(+), 17 deletions(-) rename detect_secrets/plugins/{password.py => keyword.py} (93%) rename tests/plugins/{password_test.py => keyword_test.py} (83%) diff --git a/detect_secrets/core/secrets_collection.py b/detect_secrets/core/secrets_collection.py index e57dcdd93..0e2bc952c 100644 --- a/detect_secrets/core/secrets_collection.py +++ b/detect_secrets/core/secrets_collection.py @@ -259,7 +259,7 @@ def _results_accumulator(self, filename): if not file_results: return - self._remove_password_secrets_if_line_reported_already( + self._remove_keyword_secrets_if_line_reported_already( file_results, ) @@ -268,17 +268,17 @@ def _results_accumulator(self, filename): else: self.data[filename].update(file_results) - def _remove_password_secrets_if_line_reported_already( + def _remove_keyword_secrets_if_line_reported_already( self, file_results, ): """ It is often the case that e.g. SUPER_SECRET_VALUE = 'c3VwZXIgbG9uZyBzdHJ' - is reported both by the PasswordDetector and another plugin. + is reported both by the KeywordDetector and another plugin. To minimize diff size, we will simply not report findings from - the PasswordDetector if another plugin reports a secret on the + the KeywordDetector if another plugin reports a secret on the same line. """ password_secrets = list() diff --git a/detect_secrets/core/usage.py b/detect_secrets/core/usage.py index 2248642e5..64cf3d4ff 100644 --- a/detect_secrets/core/usage.py +++ b/detect_secrets/core/usage.py @@ -226,9 +226,9 @@ class PluginOptions(object): disable_help_text='Disables scanning for Basic Auth formatted URIs.', ), PluginDescriptor( - classname='PasswordDetector', - disable_flag_text='--no-password-scan', - disable_help_text='Disables scanning for passwords.', + classname='KeywordDetector', + disable_flag_text='--no-keyword-scan', + disable_help_text='Disables scanning for secret keywords.', ), ] diff --git a/detect_secrets/plugins/core/initialize.py b/detect_secrets/plugins/core/initialize.py index bbf51fbd7..7a32b2971 100644 --- a/detect_secrets/plugins/core/initialize.py +++ b/detect_secrets/plugins/core/initialize.py @@ -8,7 +8,7 @@ from ..basic_auth import BasicAuthDetector # noqa: F401 from ..high_entropy_strings import Base64HighEntropyString # noqa: F401 from ..high_entropy_strings import HexHighEntropyString # noqa: F401 -from ..password import PasswordDetector # noqa: F401 +from ..keyword import KeywordDetector # noqa: F401 from ..private_key import PrivateKeyDetector # noqa: F401 from detect_secrets.core.log import log diff --git a/detect_secrets/plugins/password.py b/detect_secrets/plugins/keyword.py similarity index 93% rename from detect_secrets/plugins/password.py rename to detect_secrets/plugins/keyword.py index c01f62de6..c34c2dcdb 100644 --- a/detect_secrets/plugins/password.py +++ b/detect_secrets/plugins/keyword.py @@ -41,9 +41,9 @@ ) -class PasswordDetector(BasePlugin): - """This checks for private keys by determining whether the blacklisted - lines are present in the analyzed string. +class KeywordDetector(BasePlugin): + """This checks if blacklisted keywords + are present in the analyzed string. """ secret_type = 'Password' diff --git a/tests/core/usage_test.py b/tests/core/usage_test.py index 81bac6071..880049e00 100644 --- a/tests/core/usage_test.py +++ b/tests/core/usage_test.py @@ -32,7 +32,7 @@ def test_consolidates_output_basic(self): 'Base64HighEntropyString': { 'base64_limit': 4.5, }, - 'PasswordDetector': {}, + 'KeywordDetector': {}, 'PrivateKeyDetector': {}, } assert not hasattr(args, 'no_private_key_scan') diff --git a/tests/main_test.py b/tests/main_test.py index f29ff5ebc..482e7e116 100644 --- a/tests/main_test.py +++ b/tests/main_test.py @@ -86,7 +86,7 @@ def test_scan_string_basic(self, mock_baseline_initialize): Base64HighEntropyString: False (3.459) BasicAuthDetector : False HexHighEntropyString : True (3.459) - PasswordDetector : False + KeywordDetector : False PrivateKeyDetector : False """)[1:] @@ -103,7 +103,7 @@ def test_scan_string_cli_overrides_stdin(self): Base64HighEntropyString: False (2.585) BasicAuthDetector : False HexHighEntropyString : False (2.121) - PasswordDetector : False + KeywordDetector : False PrivateKeyDetector : False """)[1:] diff --git a/tests/plugins/password_test.py b/tests/plugins/keyword_test.py similarity index 83% rename from tests/plugins/password_test.py rename to tests/plugins/keyword_test.py index c6d0c9fe3..5d72e32f5 100644 --- a/tests/plugins/password_test.py +++ b/tests/plugins/keyword_test.py @@ -3,11 +3,11 @@ import pytest -from detect_secrets.plugins.password import PasswordDetector +from detect_secrets.plugins.keyword import KeywordDetector from testing.mocks import mock_file_object -class TestPasswordDetector(object): +class TestKeywordDetector(object): @pytest.mark.parametrize( 'file_content', @@ -21,7 +21,7 @@ class TestPasswordDetector(object): ], ) def test_analyze(self, file_content): - logic = PasswordDetector() + logic = KeywordDetector() f = mock_file_object(file_content) output = logic.analyze(f, 'mock_filename') From 29b3b78414519296e622ead22f7893c0eaa8ed9f Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Tue, 18 Sep 2018 17:26:34 -0700 Subject: [PATCH 3/6] Default lineno to be 0 in PotentialSecret, Change typ to be list --- detect_secrets/core/audit.py | 4 ---- detect_secrets/core/potential_secret.py | 21 +++++++++++-------- detect_secrets/core/secrets_collection.py | 4 ++-- detect_secrets/plugins/basic_auth.py | 2 +- .../plugins/high_entropy_strings.py | 2 +- detect_secrets/plugins/keyword.py | 2 +- detect_secrets/plugins/private_key.py | 2 +- tests/core/baseline_test.py | 4 ++-- tests/core/secrets_collection_test.py | 6 +++--- 9 files changed, 23 insertions(+), 24 deletions(-) diff --git a/detect_secrets/core/audit.py b/detect_secrets/core/audit.py index 08df73846..e79b33880 100644 --- a/detect_secrets/core/audit.py +++ b/detect_secrets/core/audit.py @@ -321,10 +321,6 @@ def _highlight_secret(secret_line, secret, filename, plugin_settings): plugin.secret_type, filename, secret=raw_secret, - - # This doesn't matter, because PotentialSecret only uses - # line numbers for logging, and we're not logging it. - lineno=0, ) # There could be more than two secrets on the same line. diff --git a/detect_secrets/core/potential_secret.py b/detect_secrets/core/potential_secret.py index 8603b9983..0fc0b03c6 100644 --- a/detect_secrets/core/potential_secret.py +++ b/detect_secrets/core/potential_secret.py @@ -18,30 +18,30 @@ def __init__( self, typ, filename, - lineno, secret, + lineno=0, is_secret=None, ): """ - :type typ: str - :param typ: human-readable secret type, defined by the plugin + :type typ: list(str) + :param typ: human-readable secret types, defined by the plugins that generated this PotentialSecret. - e.g. "High Entropy String" + e.g. ["High Entropy String"] :type filename: str :param filename: name of file that this secret was found + :type secret: str + :param secret: the actual secret identified + :type lineno: int :param lineno: location of secret, within filename. Merely used as a reference for easy triage. - :type secret: str - :param secret: the actual secret identified - :type is_secret: bool|None :param is_secret: whether or not the secret is a true- or false- positive """ - self.type = typ + self.type = [typ] self.filename = filename self.lineno = lineno self.secret_hash = self.hash_secret(secret) @@ -87,7 +87,10 @@ def __ne__(self, other): def __hash__(self): return hash( - tuple([getattr(self, x) for x in self.fields_to_compare]), + tuple( + getattr(self, x) + for x in self.fields_to_compare + ), ) def __str__(self): # pragma: no cover diff --git a/detect_secrets/core/secrets_collection.py b/detect_secrets/core/secrets_collection.py index 0e2bc952c..6694c4d8c 100644 --- a/detect_secrets/core/secrets_collection.py +++ b/detect_secrets/core/secrets_collection.py @@ -88,8 +88,8 @@ def _load_baseline_from_dict(cls, data): secret = PotentialSecret( item['type'], filename, - item['line_number'], secret='will be replaced', + lineno=item['line_number'], is_secret=item.get('is_secret'), ) secret.secret_hash = item['hashed_secret'] @@ -204,7 +204,7 @@ def get_secret(self, filename, secret, type_=None): if type_: # Optimized lookup, because we know the type of secret # (and therefore, its hash) - tmp_secret = PotentialSecret(type_, filename, 0, 'will be overriden') + tmp_secret = PotentialSecret(type_, filename, secret='will be overriden') tmp_secret.secret_hash = secret if tmp_secret in self.data[filename]: diff --git a/detect_secrets/plugins/basic_auth.py b/detect_secrets/plugins/basic_auth.py index 399fde2e4..772b0d9e6 100644 --- a/detect_secrets/plugins/basic_auth.py +++ b/detect_secrets/plugins/basic_auth.py @@ -22,8 +22,8 @@ def analyze_string(self, string, line_num, filename): secret = PotentialSecret( self.secret_type, filename, - line_num, result, + line_num, ) output[secret] = secret diff --git a/detect_secrets/plugins/high_entropy_strings.py b/detect_secrets/plugins/high_entropy_strings.py index ce489c40d..54d9110d2 100644 --- a/detect_secrets/plugins/high_entropy_strings.py +++ b/detect_secrets/plugins/high_entropy_strings.py @@ -102,7 +102,7 @@ def analyze_string(self, string, line_num, filename): for result in self.secret_generator(string): if self.is_sequential_string(result): continue - secret = PotentialSecret(self.secret_type, filename, line_num, result) + secret = PotentialSecret(self.secret_type, filename, result, line_num) output[secret] = secret return output diff --git a/detect_secrets/plugins/keyword.py b/detect_secrets/plugins/keyword.py index c34c2dcdb..d5d067af1 100644 --- a/detect_secrets/plugins/keyword.py +++ b/detect_secrets/plugins/keyword.py @@ -55,8 +55,8 @@ def analyze_string(self, string, line_num, filename): secret = PotentialSecret( self.secret_type, filename, - line_num, identifier, + line_num, ) output[secret] = secret diff --git a/detect_secrets/plugins/private_key.py b/detect_secrets/plugins/private_key.py index a4e256457..dd667c4db 100644 --- a/detect_secrets/plugins/private_key.py +++ b/detect_secrets/plugins/private_key.py @@ -55,8 +55,8 @@ def analyze_string(self, string, line_num, filename): secret = PotentialSecret( self.secret_type, filename, - line_num, identifier, + line_num, ) output[secret] = secret diff --git a/tests/core/baseline_test.py b/tests/core/baseline_test.py index 9a84b06c3..66f428fbe 100644 --- a/tests/core/baseline_test.py +++ b/tests/core/baseline_test.py @@ -178,7 +178,7 @@ def test_new_secret_line_old_file(self): results = get_secrets_not_in_baseline(new_findings, baseline) assert len(results.data['filename']) == 1 - secretA = PotentialSecret('type', 'filename', 1, 'secret1') + secretA = PotentialSecret('type', 'filename', 'secret1', 1) assert results.data['filename'][secretA].secret_hash == \ PotentialSecret.hash_secret('secret1') assert baseline.data == backup_baseline @@ -201,7 +201,7 @@ def test_rolled_creds(self): assert len(results.data['filename']) == 1 - secretA = PotentialSecret('type', 'filename', 1, 'secret_new') + secretA = PotentialSecret('type', 'filename', 'secret_new', 1) assert results.data['filename'][secretA].secret_hash == \ PotentialSecret.hash_secret('secret_new') assert baseline.data == backup_baseline diff --git a/tests/core/secrets_collection_test.py b/tests/core/secrets_collection_test.py index 6a1bac87c..ff739e3b8 100644 --- a/tests/core/secrets_collection_test.py +++ b/tests/core/secrets_collection_test.py @@ -369,7 +369,7 @@ class MockPluginFixedValue(MockBasePlugin): def analyze(self, f, filename): # We're not testing the plugin's ability to analyze secrets, so # it doesn't matter what we return - secret = PotentialSecret('mock fixed value type', filename, 1, 'asdf') + secret = PotentialSecret('mock fixed value type', filename, 'asdf', 1) return {secret: secret} @@ -380,7 +380,7 @@ class MockPluginFileValue(MockBasePlugin): def analyze(self, f, filename): # We're not testing the plugin's ability to analyze secrets, so # it doesn't matter what we return - secret = PotentialSecret('mock file value type', filename, 2, f.read().strip()) + secret = PotentialSecret('mock file value type', filename, f.read().strip(), 2) return {secret: secret} @@ -389,7 +389,7 @@ class MockPasswordPluginValue(MockBasePlugin): secret_type = 'mock_plugin_file_value' def analyze(self, f, filename): - password_secret = PotentialSecret('Password', filename, 2, f.read().strip()) + password_secret = PotentialSecret('Password', filename, f.read().strip(), 2) return { password_secret: password_secret, } From 4ce95562b7ffaf06637ec7d3c464776f29f0d025 Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Wed, 19 Sep 2018 14:27:07 -0700 Subject: [PATCH 4/6] Do not try to remove password plugin reported secrets even if other plugins report that same line, change PotentialSecret .type back to a single string --- detect_secrets/core/potential_secret.py | 6 ++--- detect_secrets/core/secrets_collection.py | 30 ----------------------- 2 files changed, 3 insertions(+), 33 deletions(-) diff --git a/detect_secrets/core/potential_secret.py b/detect_secrets/core/potential_secret.py index 0fc0b03c6..96ecc08a2 100644 --- a/detect_secrets/core/potential_secret.py +++ b/detect_secrets/core/potential_secret.py @@ -23,10 +23,10 @@ def __init__( is_secret=None, ): """ - :type typ: list(str) + :type typ: str :param typ: human-readable secret types, defined by the plugins that generated this PotentialSecret. - e.g. ["High Entropy String"] + e.g. "High Entropy String" :type filename: str :param filename: name of file that this secret was found @@ -41,7 +41,7 @@ def __init__( :type is_secret: bool|None :param is_secret: whether or not the secret is a true- or false- positive """ - self.type = [typ] + self.type = typ self.filename = filename self.lineno = lineno self.secret_hash = self.hash_secret(secret) diff --git a/detect_secrets/core/secrets_collection.py b/detect_secrets/core/secrets_collection.py index 6694c4d8c..fae288b2e 100644 --- a/detect_secrets/core/secrets_collection.py +++ b/detect_secrets/core/secrets_collection.py @@ -259,41 +259,11 @@ def _results_accumulator(self, filename): if not file_results: return - self._remove_keyword_secrets_if_line_reported_already( - file_results, - ) - if filename not in self.data: self.data[filename] = file_results else: self.data[filename].update(file_results) - def _remove_keyword_secrets_if_line_reported_already( - self, - file_results, - ): - """ - It is often the case that e.g. - SUPER_SECRET_VALUE = 'c3VwZXIgbG9uZyBzdHJ' - is reported both by the KeywordDetector and another plugin. - - To minimize diff size, we will simply not report findings from - the KeywordDetector if another plugin reports a secret on the - same line. - """ - password_secrets = list() - line_numbers_of_other_plugins = set() - - for secret in file_results: - if secret.type == 'Password': - password_secrets.append(secret) - else: - line_numbers_of_other_plugins.add(secret.lineno) - - for password_secret in password_secrets: - if password_secret.lineno in line_numbers_of_other_plugins: - del file_results[password_secret] - def _extract_secrets_from_file(self, f, filename): """Extract secrets from a given file object. From e4911bfde5ae56a1ce38cd2e4252a20c10f8e8fe Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Wed, 19 Sep 2018 17:30:34 -0700 Subject: [PATCH 5/6] Fix a few tests --- testing/factories.py | 6 +++--- tests/core/potential_secret_test.py | 2 +- tests/core/secrets_collection_test.py | 6 ++---- 3 files changed, 6 insertions(+), 8 deletions(-) diff --git a/testing/factories.py b/testing/factories.py index d64101dda..67f65e409 100644 --- a/testing/factories.py +++ b/testing/factories.py @@ -4,11 +4,11 @@ from detect_secrets.core.secrets_collection import SecretsCollection -def potential_secret_factory(type_='type', filename='filename', lineno=1, secret='secret'): +def potential_secret_factory(type_='type', filename='filename', secret='secret', lineno=1): """This is only marginally better than creating PotentialSecret objects directly, because of default values. """ - return PotentialSecret(type_, filename, lineno, secret) + return PotentialSecret(type_, filename, secret, lineno) def secrets_collection_factory(secrets=None, plugins=(), exclude_regex=''): @@ -51,7 +51,7 @@ def _add_secret(collection, type_='type', secret='secret', filename='filename', tmp_secret = potential_secret_factory( type_=type_, filename=filename, - lineno=lineno, secret=secret, + lineno=lineno, ) collection.data[filename][tmp_secret] = tmp_secret diff --git a/tests/core/potential_secret_test.py b/tests/core/potential_secret_test.py index 3e3381a2c..75a86a134 100644 --- a/tests/core/potential_secret_test.py +++ b/tests/core/potential_secret_test.py @@ -9,7 +9,7 @@ class TestPotentialSecret(object): @pytest.mark.parametrize( - 'a,b,is_equal', + 'a, b, is_equal', [ ( potential_secret_factory(lineno=1), diff --git a/tests/core/secrets_collection_test.py b/tests/core/secrets_collection_test.py index ff739e3b8..eb2d37eee 100644 --- a/tests/core/secrets_collection_test.py +++ b/tests/core/secrets_collection_test.py @@ -93,7 +93,7 @@ def test_success_multiple_plugins(self): line_numbers = [entry.lineno for entry in logic.data['filename']] assert set(line_numbers) == set([1, 2, 3]) - def test_removal_of_password_plugin_secrets_if_reported_already(self): + def test_reporting_of_password_plugin_secrets_if_reported_already(self): logic = secrets_collection_factory( secrets=[ { @@ -110,9 +110,7 @@ def test_removal_of_password_plugin_secrets_if_reported_already(self): with mock_open('junk text here'): logic.scan_file('filename') - # One from only the MockPluginFileValue, and one from existing secret - # This is because the MockPasswordPluginValue was not placed into data - assert len(logic.data['filename']) == 2 + assert len(logic.data['filename']) == 3 line_numbers = [entry.lineno for entry in logic.data['filename']] assert set(line_numbers) == set([2, 3]) From d912ced2e7a7607f06794e3d916b10e52536bd5c Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Thu, 20 Sep 2018 11:33:04 -0700 Subject: [PATCH 6/6] Add whitelist to Keyword plugin, fix the rest of the tests --- detect_secrets/core/potential_secret.py | 2 +- detect_secrets/plugins/keyword.py | 4 ++++ test_data/files/file_with_secrets.py | 2 +- test_data/short_files/first_line.py | 2 +- tests/main_test.py | 2 +- 5 files changed, 8 insertions(+), 4 deletions(-) diff --git a/detect_secrets/core/potential_secret.py b/detect_secrets/core/potential_secret.py index 96ecc08a2..a48d33ea6 100644 --- a/detect_secrets/core/potential_secret.py +++ b/detect_secrets/core/potential_secret.py @@ -24,7 +24,7 @@ def __init__( ): """ :type typ: str - :param typ: human-readable secret types, defined by the plugins + :param typ: human-readable secret type, defined by the plugin that generated this PotentialSecret. e.g. "High Entropy String" diff --git a/detect_secrets/plugins/keyword.py b/detect_secrets/plugins/keyword.py index d5d067af1..cfe723d53 100644 --- a/detect_secrets/plugins/keyword.py +++ b/detect_secrets/plugins/keyword.py @@ -28,6 +28,7 @@ from .base import BasePlugin from detect_secrets.core.potential_secret import PotentialSecret +from detect_secrets.plugins.core.constants import WHITELIST_REGEX BLACKLIST = ( @@ -51,6 +52,9 @@ class KeywordDetector(BasePlugin): def analyze_string(self, string, line_num, filename): output = {} + if WHITELIST_REGEX.search(string): + return output + for identifier in self.secret_generator(string.lower()): secret = PotentialSecret( self.secret_type, diff --git a/test_data/files/file_with_secrets.py b/test_data/files/file_with_secrets.py index eeaaa5707..802a99100 100644 --- a/test_data/files/file_with_secrets.py +++ b/test_data/files/file_with_secrets.py @@ -1,6 +1,6 @@ #!/usr/bin/python # Will change this later. -SUPER_SECRET_VALUE = 'c3VwZXIgbG9uZyBzdHJpbmcgc2hvdWxkIGNhdXNlIGVub3VnaCBlbnRyb3B5' +SUPER_SEECRET_VALUE = 'c3VwZXIgbG9uZyBzdHJpbmcgc2hvdWxkIGNhdXNlIGVub3VnaCBlbnRyb3B5' VERY_SECRET_TOO = 'f6CGV4aMM9zedoh3OUNbSakBymo7yplB' # pragma: whitelist secret diff --git a/test_data/short_files/first_line.py b/test_data/short_files/first_line.py index ae20b49c3..9b1d12e67 100644 --- a/test_data/short_files/first_line.py +++ b/test_data/short_files/first_line.py @@ -1,4 +1,4 @@ -secret = 'BEEF0123456789a' +seecret = 'BEEF0123456789a' skipped_sequential_false_positive = '0123456789a' print('second line') var = 'third line' diff --git a/tests/main_test.py b/tests/main_test.py index 482e7e116..8f936a4c6 100644 --- a/tests/main_test.py +++ b/tests/main_test.py @@ -181,7 +181,7 @@ def test_old_baseline_ignored_with_update_flag( ( 'test_data/short_files/first_line.py', textwrap.dedent(""" - 1:secret = 'BEEF0123456789a' + 1:seecret = 'BEEF0123456789a' 2:skipped_sequential_false_positive = '0123456789a' 3:print('second line') 4:var = 'third line'