Yelp · KevinHock · Sep 20, 2018 · Sep 17, 2018 · Sep 18, 2018 · Sep 19, 2018
diff --git a/detect_secrets/core/secrets_collection.py b/detect_secrets/core/secrets_collection.py
@@ -251,18 +251,48 @@ def _results_accumulator(self, filename):
                  Caller is responsible for updating the dictionary with
                  results of plugin analysis.
         """
-        results = {}
+        file_results = {}
 
         for plugin in self.plugins:
-            yield results, plugin
+            yield file_results, plugin
 
-        if not results:
+        if not file_results:
             return
 
+        self._remove_password_secrets_if_line_reported_already(
+            file_results,
+        )
+
         if filename not in self.data:
-            self.data[filename] = results
+            self.data[filename] = file_results
         else:
-            self.data[filename].update(results)
+            self.data[filename].update(file_results)
+
+    def _remove_password_secrets_if_line_reported_already(
+        self,
+        file_results,
+    ):
+        """
+        It is often the case that e.g.
+            SUPER_SECRET_VALUE = 'c3VwZXIgbG9uZyBzdHJ'
+        is reported both by the PasswordDetector and another plugin.
+
+        To minimize diff size, we will simply not report findings from
+        the PasswordDetector if another plugin reports a secret on the
+        same line.
+        """
+        password_secrets = list()
+        line_numbers_of_other_plugins = set()
+
+        for secret in file_results:
+            if secret.type == 'Password':
+                password_secrets.append(secret)
+            else:
+                line_numbers_of_other_plugins.add(secret.lineno)
+
+        for password_secret in password_secrets:
+            if password_secret.lineno in line_numbers_of_other_plugins:
+                del file_results[password_secret]
 
     def _extract_secrets_from_file(self, f, filename):
         """Extract secrets from a given file object.

diff --git a/detect_secrets/core/usage.py b/detect_secrets/core/usage.py
@@ -225,6 +225,11 @@ class PluginOptions(object):
             disable_flag_text='--no-basic-auth-scan',
             disable_help_text='Disables scanning for Basic Auth formatted URIs.',
         ),
+        PluginDescriptor(
+            classname='PasswordDetector',
+            disable_flag_text='--no-password-scan',
+            disable_help_text='Disables scanning for passwords.',
+        ),
     ]
 
     def __init__(self, parser):

diff --git a/detect_secrets/plugins/core/initialize.py b/detect_secrets/plugins/core/initialize.py
@@ -8,6 +8,7 @@
 from ..basic_auth import BasicAuthDetector                  # noqa: F401
 from ..high_entropy_strings import Base64HighEntropyString  # noqa: F401
 from ..high_entropy_strings import HexHighEntropyString     # noqa: F401
+from ..password import PasswordDetector                     # noqa: F401
 from ..private_key import PrivateKeyDetector                # noqa: F401
 from detect_secrets.core.log import log
 

diff --git a/detect_secrets/plugins/password.py b/detect_secrets/plugins/password.py
@@ -0,0 +1,68 @@
+"""
+This code was extracted in part from
+https://github.com/PyCQA/bandit. Using similar heuristic logic,
+we adapted it to fit our plugin infrastructure, to create an organized,
+concerted effort in detecting all type of secrets in code.
+
+Copyright (c) 2014 Hewlett-Packard Development Company, L.P.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+"""
+from __future__ import absolute_import
+
+from .base import BasePlugin
+from detect_secrets.core.potential_secret import PotentialSecret
+
+
+BLACKLIST = (
+    'PASS =',
+    'password',
+    'passwd',
+    'pwd',
+    'secret',
+    'secrete',
+    'token',
+)
+
+
+class PasswordDetector(BasePlugin):
+    """This checks for private keys by determining whether the blacklisted
+    lines are present in the analyzed string.
+    """
+
+    secret_type = 'Password'
+
+    def analyze_string(self, string, line_num, filename):
+        output = {}
+
+        for identifier in self.secret_generator(string.lower()):
+            secret = PotentialSecret(
+                self.secret_type,
+                filename,
+                line_num,
+                identifier,
+            )
+            output[secret] = secret
+
+        return output
+
+    def secret_generator(self, string):
+        for line in BLACKLIST:
+            if line in string:
+                yield line
diff --git a/detect_secrets/plugins/private_key.py b/detect_secrets/plugins/private_key.py
@@ -1,7 +1,7 @@
 """
 This code was extracted in part from
 https://github.com/pre-commit/pre-commit-hooks. Using similar heuristic logic,
-we adapt it to fit our plugin infrastructure, to create an organized,
+we adapted it to fit our plugin infrastructure, to create an organized,
 concerted effort in detecting all type of secrets in code.
 
 Copyright (c) 2014 pre-commit dev team: Anthony Sottile, Ken Struys

diff --git a/test_data/files/file_with_no_secrets.py b/test_data/files/file_with_no_secrets.py
@@ -1,6 +1,5 @@
 #!/usr/bin/python
-# Will change this later.
-SUPER_SECRET_VALUE = "this is just a long string, like a user facing error message"
+REGULAR_VALUE = "this is just a long string, like a user facing error message"
 
 
 def main():

diff --git a/test_data/short_files/last_line.ini b/test_data/short_files/last_line.ini
@@ -1,5 +1,5 @@
 [some section]
-secrets_for_no_one_to_find =
+secreets_for_no_one_to_find =
     hunter2
-    password123
+    passsword123
     BEEF0123456789a
diff --git a/test_data/short_files/middle_line.yml b/test_data/short_files/middle_line.yml
@@ -1,6 +1,6 @@
 deploy:
     user: aaronloo
-    password:
+    passhword:
         secure: thequickbrownfoxjumpsoverthelazydog
     on:
-        repo: Yelp/detect-secrets
+        repo: Yelp/detect-sechrets
diff --git a/tests/core/secrets_collection_test.py b/tests/core/secrets_collection_test.py
@@ -93,6 +93,30 @@ def test_success_multiple_plugins(self):
         line_numbers = [entry.lineno for entry in logic.data['filename']]
         assert set(line_numbers) == set([1, 2, 3])
 
+    def test_removal_of_password_plugin_secrets_if_reported_already(self):
+        logic = secrets_collection_factory(
+            secrets=[
+                {
+                    'filename': 'filename',
+                    'lineno': 3,
+                },
+            ],
+            plugins=(
+                MockPasswordPluginValue(),
+                MockPluginFileValue(),
+            ),
+        )
+
+        with mock_open('junk text here'):
+            logic.scan_file('filename')
+
+        # One from only the MockPluginFileValue, and one from existing secret
+        # This is because the MockPasswordPluginValue was not placed into data
+        assert len(logic.data['filename']) == 2
+
+        line_numbers = [entry.lineno for entry in logic.data['filename']]
+        assert set(line_numbers) == set([2, 3])
+
     def test_unicode_decode_error(self, mock_log):
         logic = secrets_collection_factory(
             plugins=(MockPluginFileValue(),),
@@ -203,12 +227,14 @@ def test_optional_type(self, filename, secret_hash, expected_value):
     )
     def test_explicit_type_for_optimization(self, type_, is_none):
         with self._mock_secret_hash():
-            logic = secrets_collection_factory(secrets=[
-                {
-                    'filename': 'filename',
-                    'type_': 'type',
-                },
-            ])
+            logic = secrets_collection_factory(
+                secrets=[
+                    {
+                        'filename': 'filename',
+                        'type_': 'type',
+                    },
+                ],
+            )
 
         assert (logic.get_secret('filename', 'secret_hash', type_) is None) == is_none
 
@@ -358,4 +384,15 @@ def analyze(self, f, filename):
         return {secret: secret}
 
 
+class MockPasswordPluginValue(MockBasePlugin):
+
+    secret_type = 'mock_plugin_file_value'
+
+    def analyze(self, f, filename):
+        password_secret = PotentialSecret('Password', filename, 2, f.read().strip())
+        return {
+            password_secret: password_secret,
+        }
+
+
 MockUnicodeDecodeError = UnicodeDecodeError('encoding type', b'subject', 0, 1, 'exception message')
diff --git a/tests/core/usage_test.py b/tests/core/usage_test.py
@@ -32,6 +32,7 @@ def test_consolidates_output_basic(self):
             'Base64HighEntropyString': {
                 'base64_limit': 4.5,
             },
+            'PasswordDetector': {},
             'PrivateKeyDetector': {},
         }
         assert not hasattr(args, 'no_private_key_scan')

diff --git a/tests/main_test.py b/tests/main_test.py
@@ -86,6 +86,7 @@ def test_scan_string_basic(self, mock_baseline_initialize):
                 Base64HighEntropyString: False (3.459)
                 BasicAuthDetector      : False
                 HexHighEntropyString   : True  (3.459)
+                PasswordDetector       : False
                 PrivateKeyDetector     : False
             """)[1:]
 
@@ -102,6 +103,7 @@ def test_scan_string_cli_overrides_stdin(self):
                 Base64HighEntropyString: False (2.585)
                 BasicAuthDetector      : False
                 HexHighEntropyString   : False (2.121)
+                PasswordDetector       : False
                 PrivateKeyDetector     : False
             """)[1:]
 
@@ -190,19 +192,19 @@ def test_old_baseline_ignored_with_update_flag(
                 textwrap.dedent("""
                     1:deploy:
                     2:    user: aaronloo
-                    3:    password:
+                    3:    passhword:
                     4:        secure: thequickbrownfoxjumpsoverthelazydog
                     5:    on:
-                    6:        repo: Yelp/detect-secrets
+                    6:        repo: Yelp/detect-sechrets
                 """)[1:-1],
             ),
             (
                 'test_data/short_files/last_line.ini',
                 textwrap.dedent("""
                     1:[some section]
-                    2:secrets_for_no_one_to_find =
+                    2:secreets_for_no_one_to_find =
                     3:    hunter2
-                    4:    password123
+                    4:    passsword123
                     5:    BEEF0123456789a
                 """)[1:-1],
             ),

diff --git a/tests/plugins/password_test.py b/tests/plugins/password_test.py
@@ -0,0 +1,30 @@
+from __future__ import absolute_import
+from __future__ import unicode_literals
+
+import pytest
+
+from detect_secrets.plugins.password import PasswordDetector
+from testing.mocks import mock_file_object
+
+
+class TestPasswordDetector(object):
+
+    @pytest.mark.parametrize(
+        'file_content',
+        [
+            (
+                'login_somewhere --http-password hopenobodyfindsthisone\n'
+            ),
+            (
+                'token = "noentropy"'
+            ),
+        ],
+    )
+    def test_analyze(self, file_content):
+        logic = PasswordDetector()
+
+        f = mock_file_object(file_content)
+        output = logic.analyze(f, 'mock_filename')
+        assert len(output) == 1
+        for potential_secret in output:
+            assert 'mock_filename' == potential_secret.filename