adding cli functionality to check strings in an adhoc manner

Yelp · Sep 6, 2018 · e28021e · e28021e
1 parent be0614b
commit e28021e
Show file tree

Hide file tree

Showing 5 changed files with 122 additions and 14 deletions.
diff --git a/detect_secrets/core/usage.py b/detect_secrets/core/usage.py
@@ -83,7 +83,9 @@ def __init__(self, subparser):
         )
 
     def add_arguments(self):
-        self._add_initialize_baseline_argument()
+        self._add_initialize_baseline_argument()\
+            ._add_adhoc_scanning_argument()
+
         PluginOptions(self.parser).add_arguments()
 
         return self
@@ -124,6 +126,18 @@ def _add_initialize_baseline_argument(self):
 
         return self
 
+    def _add_adhoc_scanning_argument(self):
+        self.parser.add_argument(
+            '--string',
+            nargs='?',
+            const=True,
+            help=(
+                'Scans an individual string, and displays configured '
+                'plugins\' verdict.'
+            ),
+        )
+        return self
+
 
 class AuditOptions(object):
 

diff --git a/detect_secrets/main.py b/detect_secrets/main.py
@@ -26,29 +26,56 @@ def main(argv=None):
         log.set_debug_level(args.verbose)
 
     if args.action == 'scan':
-        output = json.dumps(
-            _perform_scan(args),
-            indent=2,
-            sort_keys=True,
-        )
+        # Plugins are *always* rescanned with fresh settings, because
+        # we want to get the latest updates.
+        plugins = initialize.from_parser_builder(args.plugins)
+        if args.string:
+            line = args.string
+
+            if isinstance(args.string, bool):
+                line = sys.stdin.read().splitlines()[0]
+
+            _scan_string(line, plugins)
 
-        if args.import_filename:
-            _write_to_file(args.import_filename[0], output)
         else:
-            print(output)
+            output = json.dumps(
+                _perform_scan(args, plugins),
+                indent=2,
+                sort_keys=True,
+            )
+
+            if args.import_filename:
+                _write_to_file(args.import_filename[0], output)
+            else:
+                print(output)
 
     elif args.action == 'audit':
         audit.audit_baseline(args.filename[0])
 
     return 0
 
 
-def _perform_scan(args):
-    old_baseline = _get_existing_baseline(args.import_filename)
+def _scan_string(line, plugins):
+    longest_plugin_name_length = max(
+        map(
+            lambda x: len(x.__class__.__name__),
+            plugins,
+        ),
+    )
 
-    # Plugins are *always* rescanned with fresh settings, because
-    # we want to get the latest updates.
-    plugins = initialize.from_parser_builder(args.plugins)
+    output = [
+        ('{:%d}: {}' % longest_plugin_name_length).format(
+            plugin.__class__.__name__,
+            plugin.adhoc_scan(line),
+        )
+        for plugin in plugins
+    ]
+
+    print('\n'.join(sorted(output)))
+
+
+def _perform_scan(args, plugins):
+    old_baseline = _get_existing_baseline(args.import_filename)
 
     # Favors --exclude argument over existing baseline's regex (if exists)
     if args.exclude:

diff --git a/detect_secrets/plugins/base.py b/detect_secrets/plugins/base.py
@@ -50,6 +50,30 @@ def secret_generator(self, string):
         """
         raise NotImplementedError
 
+    def adhoc_scan(self, string):
+        """To support faster discovery, we want the ability to conveniently
+        check what different plugins say regarding a single line/secret. This
+        supports that.
+
+        This is very similar to self.analyze_string, but allows the flexibility
+        for subclasses to add any other notable info (rather than just a
+        PotentialSecret type). e.g. HighEntropyStrings adds their Shannon
+        entropy in which they made their decision.
+
+        :type string: str
+        :param string: the string to analyze
+
+        :rtype: str
+        :returns: descriptive string that fits the format
+            <classname>: <returned-value>
+        """
+        # TODO: Handle multiple secrets on single line.
+        results = self.analyze_string(string, 0, 'does_not_matter')
+        if not results:
+            return 'False'
+        else:
+            return 'True'
+
     @property
     def __dict__(self):
         return {

diff --git a/detect_secrets/plugins/high_entropy_strings.py b/detect_secrets/plugins/high_entropy_strings.py
@@ -119,6 +119,21 @@ def secret_generator(self, string):
             if entropy_value > self.entropy_limit:
                 yield result
 
+    def adhoc_scan(self, string):
+        # Since it's an individual string, it's just bad UX to require quotes
+        # around the expected secret.
+        with self.non_quoted_string_regex():
+            results = self.analyze_string(string, 0, 'does_not_matter')
+
+            # NOTE: Trailing space allows for nicer formatting
+            output = 'False' if not results else 'True '
+            if self.regex.search(string):
+                output += ' ({})'.format(
+                    round(self.calculate_shannon_entropy(string), 3),
+                )
+
+            return output
+
     @contextmanager
     def non_quoted_string_regex(self, strict=True):
         """For certain file formats, strings need not necessarily follow the

diff --git a/tests/main_test.py b/tests/main_test.py
@@ -75,6 +75,34 @@ def test_scan_with_excludes_flag(self, mock_baseline_initialize):
             False,
         )
 
+    def test_scan_string_basic(self, mock_baseline_initialize):
+        with mock_stdin(
+            '012345678ab',
+        ), mock_printer(
+            main_module,
+        ) as printer_shim:
+            assert main('scan --string'.split()) == 0
+            assert printer_shim.message == textwrap.dedent("""
+                Base64HighEntropyString: False (3.459)
+                HexHighEntropyString   : True  (3.459)
+                PrivateKeyDetector     : False
+            """)[1:]
+
+        mock_baseline_initialize.assert_not_called()
+
+    def test_scan_string_cli_overrides_stdin(self):
+        with mock_stdin(
+            '012345678ab',
+        ), mock_printer(
+            main_module,
+        ) as printer_shim:
+            assert main('scan --string 012345'.split()) == 0
+            assert printer_shim.message == textwrap.dedent("""
+                Base64HighEntropyString: False (2.585)
+                HexHighEntropyString   : False (2.121)
+                PrivateKeyDetector     : False
+            """)[1:]
+
     def test_scan_with_all_files_flag(self, mock_baseline_initialize):
         with mock_stdin():
             assert main('scan --all-files'.split()) == 0