diff --git a/detect_secrets/core/audit.py b/detect_secrets/core/audit.py index 08df73846..e79b33880 100644 --- a/detect_secrets/core/audit.py +++ b/detect_secrets/core/audit.py @@ -321,10 +321,6 @@ def _highlight_secret(secret_line, secret, filename, plugin_settings): plugin.secret_type, filename, secret=raw_secret, - - # This doesn't matter, because PotentialSecret only uses - # line numbers for logging, and we're not logging it. - lineno=0, ) # There could be more than two secrets on the same line. diff --git a/detect_secrets/core/potential_secret.py b/detect_secrets/core/potential_secret.py index 8603b9983..a48d33ea6 100644 --- a/detect_secrets/core/potential_secret.py +++ b/detect_secrets/core/potential_secret.py @@ -18,8 +18,8 @@ def __init__( self, typ, filename, - lineno, secret, + lineno=0, is_secret=None, ): """ @@ -31,13 +31,13 @@ def __init__( :type filename: str :param filename: name of file that this secret was found + :type secret: str + :param secret: the actual secret identified + :type lineno: int :param lineno: location of secret, within filename. Merely used as a reference for easy triage. - :type secret: str - :param secret: the actual secret identified - :type is_secret: bool|None :param is_secret: whether or not the secret is a true- or false- positive """ @@ -87,7 +87,10 @@ def __ne__(self, other): def __hash__(self): return hash( - tuple([getattr(self, x) for x in self.fields_to_compare]), + tuple( + getattr(self, x) + for x in self.fields_to_compare + ), ) def __str__(self): # pragma: no cover diff --git a/detect_secrets/core/secrets_collection.py b/detect_secrets/core/secrets_collection.py index 7b24cda6c..fae288b2e 100644 --- a/detect_secrets/core/secrets_collection.py +++ b/detect_secrets/core/secrets_collection.py @@ -88,8 +88,8 @@ def _load_baseline_from_dict(cls, data): secret = PotentialSecret( item['type'], filename, - item['line_number'], secret='will be replaced', + lineno=item['line_number'], is_secret=item.get('is_secret'), ) secret.secret_hash = item['hashed_secret'] @@ -204,7 +204,7 @@ def get_secret(self, filename, secret, type_=None): if type_: # Optimized lookup, because we know the type of secret # (and therefore, its hash) - tmp_secret = PotentialSecret(type_, filename, 0, 'will be overriden') + tmp_secret = PotentialSecret(type_, filename, secret='will be overriden') tmp_secret.secret_hash = secret if tmp_secret in self.data[filename]: @@ -251,18 +251,18 @@ def _results_accumulator(self, filename): Caller is responsible for updating the dictionary with results of plugin analysis. """ - results = {} + file_results = {} for plugin in self.plugins: - yield results, plugin + yield file_results, plugin - if not results: + if not file_results: return if filename not in self.data: - self.data[filename] = results + self.data[filename] = file_results else: - self.data[filename].update(results) + self.data[filename].update(file_results) def _extract_secrets_from_file(self, f, filename): """Extract secrets from a given file object. diff --git a/detect_secrets/core/usage.py b/detect_secrets/core/usage.py index efef69fe4..64cf3d4ff 100644 --- a/detect_secrets/core/usage.py +++ b/detect_secrets/core/usage.py @@ -225,6 +225,11 @@ class PluginOptions(object): disable_flag_text='--no-basic-auth-scan', disable_help_text='Disables scanning for Basic Auth formatted URIs.', ), + PluginDescriptor( + classname='KeywordDetector', + disable_flag_text='--no-keyword-scan', + disable_help_text='Disables scanning for secret keywords.', + ), ] def __init__(self, parser): diff --git a/detect_secrets/plugins/basic_auth.py b/detect_secrets/plugins/basic_auth.py index 399fde2e4..772b0d9e6 100644 --- a/detect_secrets/plugins/basic_auth.py +++ b/detect_secrets/plugins/basic_auth.py @@ -22,8 +22,8 @@ def analyze_string(self, string, line_num, filename): secret = PotentialSecret( self.secret_type, filename, - line_num, result, + line_num, ) output[secret] = secret diff --git a/detect_secrets/plugins/core/initialize.py b/detect_secrets/plugins/core/initialize.py index ae9ea4c43..7a32b2971 100644 --- a/detect_secrets/plugins/core/initialize.py +++ b/detect_secrets/plugins/core/initialize.py @@ -8,6 +8,7 @@ from ..basic_auth import BasicAuthDetector # noqa: F401 from ..high_entropy_strings import Base64HighEntropyString # noqa: F401 from ..high_entropy_strings import HexHighEntropyString # noqa: F401 +from ..keyword import KeywordDetector # noqa: F401 from ..private_key import PrivateKeyDetector # noqa: F401 from detect_secrets.core.log import log diff --git a/detect_secrets/plugins/high_entropy_strings.py b/detect_secrets/plugins/high_entropy_strings.py index ce489c40d..54d9110d2 100644 --- a/detect_secrets/plugins/high_entropy_strings.py +++ b/detect_secrets/plugins/high_entropy_strings.py @@ -102,7 +102,7 @@ def analyze_string(self, string, line_num, filename): for result in self.secret_generator(string): if self.is_sequential_string(result): continue - secret = PotentialSecret(self.secret_type, filename, line_num, result) + secret = PotentialSecret(self.secret_type, filename, result, line_num) output[secret] = secret return output diff --git a/detect_secrets/plugins/keyword.py b/detect_secrets/plugins/keyword.py new file mode 100644 index 000000000..cfe723d53 --- /dev/null +++ b/detect_secrets/plugins/keyword.py @@ -0,0 +1,72 @@ +""" +This code was extracted in part from +https://github.com/PyCQA/bandit. Using similar heuristic logic, +we adapted it to fit our plugin infrastructure, to create an organized, +concerted effort in detecting all type of secrets in code. + +Copyright (c) 2014 Hewlett-Packard Development Company, L.P. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" +from __future__ import absolute_import + +from .base import BasePlugin +from detect_secrets.core.potential_secret import PotentialSecret +from detect_secrets.plugins.core.constants import WHITELIST_REGEX + + +BLACKLIST = ( + 'PASS =', + 'password', + 'passwd', + 'pwd', + 'secret', + 'secrete', + 'token', +) + + +class KeywordDetector(BasePlugin): + """This checks if blacklisted keywords + are present in the analyzed string. + """ + + secret_type = 'Password' + + def analyze_string(self, string, line_num, filename): + output = {} + + if WHITELIST_REGEX.search(string): + return output + + for identifier in self.secret_generator(string.lower()): + secret = PotentialSecret( + self.secret_type, + filename, + identifier, + line_num, + ) + output[secret] = secret + + return output + + def secret_generator(self, string): + for line in BLACKLIST: + if line in string: + yield line diff --git a/detect_secrets/plugins/private_key.py b/detect_secrets/plugins/private_key.py index 7596363f3..dd667c4db 100644 --- a/detect_secrets/plugins/private_key.py +++ b/detect_secrets/plugins/private_key.py @@ -1,7 +1,7 @@ """ This code was extracted in part from https://github.com/pre-commit/pre-commit-hooks. Using similar heuristic logic, -we adapt it to fit our plugin infrastructure, to create an organized, +we adapted it to fit our plugin infrastructure, to create an organized, concerted effort in detecting all type of secrets in code. Copyright (c) 2014 pre-commit dev team: Anthony Sottile, Ken Struys @@ -55,8 +55,8 @@ def analyze_string(self, string, line_num, filename): secret = PotentialSecret( self.secret_type, filename, - line_num, identifier, + line_num, ) output[secret] = secret diff --git a/test_data/files/file_with_no_secrets.py b/test_data/files/file_with_no_secrets.py index ea832f442..9f7372a8c 100644 --- a/test_data/files/file_with_no_secrets.py +++ b/test_data/files/file_with_no_secrets.py @@ -1,6 +1,5 @@ #!/usr/bin/python -# Will change this later. -SUPER_SECRET_VALUE = "this is just a long string, like a user facing error message" +REGULAR_VALUE = "this is just a long string, like a user facing error message" def main(): diff --git a/test_data/files/file_with_secrets.py b/test_data/files/file_with_secrets.py index eeaaa5707..802a99100 100644 --- a/test_data/files/file_with_secrets.py +++ b/test_data/files/file_with_secrets.py @@ -1,6 +1,6 @@ #!/usr/bin/python # Will change this later. -SUPER_SECRET_VALUE = 'c3VwZXIgbG9uZyBzdHJpbmcgc2hvdWxkIGNhdXNlIGVub3VnaCBlbnRyb3B5' +SUPER_SEECRET_VALUE = 'c3VwZXIgbG9uZyBzdHJpbmcgc2hvdWxkIGNhdXNlIGVub3VnaCBlbnRyb3B5' VERY_SECRET_TOO = 'f6CGV4aMM9zedoh3OUNbSakBymo7yplB' # pragma: whitelist secret diff --git a/test_data/short_files/first_line.py b/test_data/short_files/first_line.py index ae20b49c3..9b1d12e67 100644 --- a/test_data/short_files/first_line.py +++ b/test_data/short_files/first_line.py @@ -1,4 +1,4 @@ -secret = 'BEEF0123456789a' +seecret = 'BEEF0123456789a' skipped_sequential_false_positive = '0123456789a' print('second line') var = 'third line' diff --git a/test_data/short_files/last_line.ini b/test_data/short_files/last_line.ini index 7f0793d24..167e62e96 100644 --- a/test_data/short_files/last_line.ini +++ b/test_data/short_files/last_line.ini @@ -1,5 +1,5 @@ [some section] -secrets_for_no_one_to_find = +secreets_for_no_one_to_find = hunter2 - password123 + passsword123 BEEF0123456789a diff --git a/test_data/short_files/middle_line.yml b/test_data/short_files/middle_line.yml index db82aae55..e96ea3f81 100644 --- a/test_data/short_files/middle_line.yml +++ b/test_data/short_files/middle_line.yml @@ -1,6 +1,6 @@ deploy: user: aaronloo - password: + passhword: secure: thequickbrownfoxjumpsoverthelazydog on: - repo: Yelp/detect-secrets + repo: Yelp/detect-sechrets diff --git a/testing/factories.py b/testing/factories.py index d64101dda..67f65e409 100644 --- a/testing/factories.py +++ b/testing/factories.py @@ -4,11 +4,11 @@ from detect_secrets.core.secrets_collection import SecretsCollection -def potential_secret_factory(type_='type', filename='filename', lineno=1, secret='secret'): +def potential_secret_factory(type_='type', filename='filename', secret='secret', lineno=1): """This is only marginally better than creating PotentialSecret objects directly, because of default values. """ - return PotentialSecret(type_, filename, lineno, secret) + return PotentialSecret(type_, filename, secret, lineno) def secrets_collection_factory(secrets=None, plugins=(), exclude_regex=''): @@ -51,7 +51,7 @@ def _add_secret(collection, type_='type', secret='secret', filename='filename', tmp_secret = potential_secret_factory( type_=type_, filename=filename, - lineno=lineno, secret=secret, + lineno=lineno, ) collection.data[filename][tmp_secret] = tmp_secret diff --git a/tests/core/baseline_test.py b/tests/core/baseline_test.py index 9a84b06c3..66f428fbe 100644 --- a/tests/core/baseline_test.py +++ b/tests/core/baseline_test.py @@ -178,7 +178,7 @@ def test_new_secret_line_old_file(self): results = get_secrets_not_in_baseline(new_findings, baseline) assert len(results.data['filename']) == 1 - secretA = PotentialSecret('type', 'filename', 1, 'secret1') + secretA = PotentialSecret('type', 'filename', 'secret1', 1) assert results.data['filename'][secretA].secret_hash == \ PotentialSecret.hash_secret('secret1') assert baseline.data == backup_baseline @@ -201,7 +201,7 @@ def test_rolled_creds(self): assert len(results.data['filename']) == 1 - secretA = PotentialSecret('type', 'filename', 1, 'secret_new') + secretA = PotentialSecret('type', 'filename', 'secret_new', 1) assert results.data['filename'][secretA].secret_hash == \ PotentialSecret.hash_secret('secret_new') assert baseline.data == backup_baseline diff --git a/tests/core/potential_secret_test.py b/tests/core/potential_secret_test.py index 3e3381a2c..75a86a134 100644 --- a/tests/core/potential_secret_test.py +++ b/tests/core/potential_secret_test.py @@ -9,7 +9,7 @@ class TestPotentialSecret(object): @pytest.mark.parametrize( - 'a,b,is_equal', + 'a, b, is_equal', [ ( potential_secret_factory(lineno=1), diff --git a/tests/core/secrets_collection_test.py b/tests/core/secrets_collection_test.py index 851c56a89..eb2d37eee 100644 --- a/tests/core/secrets_collection_test.py +++ b/tests/core/secrets_collection_test.py @@ -93,6 +93,28 @@ def test_success_multiple_plugins(self): line_numbers = [entry.lineno for entry in logic.data['filename']] assert set(line_numbers) == set([1, 2, 3]) + def test_reporting_of_password_plugin_secrets_if_reported_already(self): + logic = secrets_collection_factory( + secrets=[ + { + 'filename': 'filename', + 'lineno': 3, + }, + ], + plugins=( + MockPasswordPluginValue(), + MockPluginFileValue(), + ), + ) + + with mock_open('junk text here'): + logic.scan_file('filename') + + assert len(logic.data['filename']) == 3 + + line_numbers = [entry.lineno for entry in logic.data['filename']] + assert set(line_numbers) == set([2, 3]) + def test_unicode_decode_error(self, mock_log): logic = secrets_collection_factory( plugins=(MockPluginFileValue(),), @@ -203,12 +225,14 @@ def test_optional_type(self, filename, secret_hash, expected_value): ) def test_explicit_type_for_optimization(self, type_, is_none): with self._mock_secret_hash(): - logic = secrets_collection_factory(secrets=[ - { - 'filename': 'filename', - 'type_': 'type', - }, - ]) + logic = secrets_collection_factory( + secrets=[ + { + 'filename': 'filename', + 'type_': 'type', + }, + ], + ) assert (logic.get_secret('filename', 'secret_hash', type_) is None) == is_none @@ -343,7 +367,7 @@ class MockPluginFixedValue(MockBasePlugin): def analyze(self, f, filename): # We're not testing the plugin's ability to analyze secrets, so # it doesn't matter what we return - secret = PotentialSecret('mock fixed value type', filename, 1, 'asdf') + secret = PotentialSecret('mock fixed value type', filename, 'asdf', 1) return {secret: secret} @@ -354,8 +378,19 @@ class MockPluginFileValue(MockBasePlugin): def analyze(self, f, filename): # We're not testing the plugin's ability to analyze secrets, so # it doesn't matter what we return - secret = PotentialSecret('mock file value type', filename, 2, f.read().strip()) + secret = PotentialSecret('mock file value type', filename, f.read().strip(), 2) return {secret: secret} +class MockPasswordPluginValue(MockBasePlugin): + + secret_type = 'mock_plugin_file_value' + + def analyze(self, f, filename): + password_secret = PotentialSecret('Password', filename, f.read().strip(), 2) + return { + password_secret: password_secret, + } + + MockUnicodeDecodeError = UnicodeDecodeError('encoding type', b'subject', 0, 1, 'exception message') diff --git a/tests/core/usage_test.py b/tests/core/usage_test.py index 1cf679f2a..880049e00 100644 --- a/tests/core/usage_test.py +++ b/tests/core/usage_test.py @@ -32,6 +32,7 @@ def test_consolidates_output_basic(self): 'Base64HighEntropyString': { 'base64_limit': 4.5, }, + 'KeywordDetector': {}, 'PrivateKeyDetector': {}, } assert not hasattr(args, 'no_private_key_scan') diff --git a/tests/main_test.py b/tests/main_test.py index 27e0da729..8f936a4c6 100644 --- a/tests/main_test.py +++ b/tests/main_test.py @@ -86,6 +86,7 @@ def test_scan_string_basic(self, mock_baseline_initialize): Base64HighEntropyString: False (3.459) BasicAuthDetector : False HexHighEntropyString : True (3.459) + KeywordDetector : False PrivateKeyDetector : False """)[1:] @@ -102,6 +103,7 @@ def test_scan_string_cli_overrides_stdin(self): Base64HighEntropyString: False (2.585) BasicAuthDetector : False HexHighEntropyString : False (2.121) + KeywordDetector : False PrivateKeyDetector : False """)[1:] @@ -179,7 +181,7 @@ def test_old_baseline_ignored_with_update_flag( ( 'test_data/short_files/first_line.py', textwrap.dedent(""" - 1:secret = 'BEEF0123456789a' + 1:seecret = 'BEEF0123456789a' 2:skipped_sequential_false_positive = '0123456789a' 3:print('second line') 4:var = 'third line' @@ -190,19 +192,19 @@ def test_old_baseline_ignored_with_update_flag( textwrap.dedent(""" 1:deploy: 2: user: aaronloo - 3: password: + 3: passhword: 4: secure: thequickbrownfoxjumpsoverthelazydog 5: on: - 6: repo: Yelp/detect-secrets + 6: repo: Yelp/detect-sechrets """)[1:-1], ), ( 'test_data/short_files/last_line.ini', textwrap.dedent(""" 1:[some section] - 2:secrets_for_no_one_to_find = + 2:secreets_for_no_one_to_find = 3: hunter2 - 4: password123 + 4: passsword123 5: BEEF0123456789a """)[1:-1], ), diff --git a/tests/plugins/keyword_test.py b/tests/plugins/keyword_test.py new file mode 100644 index 000000000..5d72e32f5 --- /dev/null +++ b/tests/plugins/keyword_test.py @@ -0,0 +1,30 @@ +from __future__ import absolute_import +from __future__ import unicode_literals + +import pytest + +from detect_secrets.plugins.keyword import KeywordDetector +from testing.mocks import mock_file_object + + +class TestKeywordDetector(object): + + @pytest.mark.parametrize( + 'file_content', + [ + ( + 'login_somewhere --http-password hopenobodyfindsthisone\n' + ), + ( + 'token = "noentropy"' + ), + ], + ) + def test_analyze(self, file_content): + logic = KeywordDetector() + + f = mock_file_object(file_content) + output = logic.analyze(f, 'mock_filename') + assert len(output) == 1 + for potential_secret in output: + assert 'mock_filename' == potential_secret.filename