From d686ef5c1fc22d54eefddc634cf38f406726c1b9 Mon Sep 17 00:00:00 2001
From: Aaron Loo <aaronloo@yelp.com>
Date: Thu, 5 Jul 2018 10:11:16 -0700
Subject: [PATCH] adding cli functionality to check strings in an adhoc manner

---
 detect_secrets/core/usage.py                  | 17 +++++++-
 detect_secrets/main.py                        | 39 ++++++++++++++++---
 detect_secrets/plugins/base.py                | 23 +++++++++++
 .../plugins/high_entropy_strings.py           | 15 +++++++
 tests/main_test.py                            | 28 +++++++++++++
 5 files changed, 115 insertions(+), 7 deletions(-)

diff --git a/detect_secrets/core/usage.py b/detect_secrets/core/usage.py
index b94c9e7b3..8d4f8692a 100644
--- a/detect_secrets/core/usage.py
+++ b/detect_secrets/core/usage.py
@@ -83,7 +83,9 @@ def __init__(self, subparser):
         )
 
     def add_arguments(self):
-        self._add_initialize_baseline_argument()
+        self._add_initialize_baseline_argument()\
+            ._add_adhoc_scanning_argument()
+
         PluginOptions(self.parser).add_arguments()
 
         return self
@@ -118,6 +120,19 @@ def _add_initialize_baseline_argument(self):
 
         return self
 
+    def _add_adhoc_scanning_argument(self):
+        self.parser.add_argument(
+            '--string',
+            nargs='?',
+            const=True,
+            help=(
+                'Scans an individual string, and displays configured '
+                'plugins\' verdict.'
+            ),
+        )
+
+        return self
+
 
 class AuditOptions(object):
 
diff --git a/detect_secrets/main.py b/detect_secrets/main.py
index 8449b1c00..4afc53502 100644
--- a/detect_secrets/main.py
+++ b/detect_secrets/main.py
@@ -26,9 +26,21 @@ def main(argv=None):
         log.set_debug_level(args.verbose)
 
     if args.action == 'scan':
+        # Plugins are *always* rescanned with fresh settings, because
+        # we want to get the latest updates.
+        plugins = initialize.from_parser_builder(args.plugins)
+
+        if args.string:
+            line = args.string
+            if isinstance(args.string, bool):
+                line = sys.stdin.read().splitlines()[0]
+
+            _scan_string(line, plugins)
+            return 0
+
         print(
             json.dumps(
-                _perform_scan(args),
+                _perform_scan(args, plugins),
                 indent=2,
                 sort_keys=True,
             ),
@@ -40,12 +52,27 @@ def main(argv=None):
     return 0
 
 
-def _perform_scan(args):
-    old_baseline = _get_existing_baseline(args.import_filename)
+def _scan_string(line, plugins):
+    longest_plugin_name_length = max(
+        map(
+            lambda x: len(x.__class__.__name__),
+            plugins,
+        ),
+    )
 
-    # Plugins are *always* rescanned with fresh settings, because
-    # we want to get the latest updates.
-    plugins = initialize.from_parser_builder(args.plugins)
+    output = [
+        ('{:%d}: {}' % longest_plugin_name_length).format(
+            plugin.__class__.__name__,
+            plugin.adhoc_scan(line),
+        )
+        for plugin in plugins
+    ]
+
+    print('\n'.join(sorted(output)))
+
+
+def _perform_scan(args, plugins):
+    old_baseline = _get_existing_baseline(args.import_filename)
 
     # Favors --exclude argument over existing baseline's regex (if exists)
     if args.exclude:
diff --git a/detect_secrets/plugins/base.py b/detect_secrets/plugins/base.py
index afae65fb5..5f812a66f 100644
--- a/detect_secrets/plugins/base.py
+++ b/detect_secrets/plugins/base.py
@@ -50,6 +50,29 @@ def secret_generator(self, string):  # pragma: no cover
         """
         pass
 
+    def adhoc_scan(self, string):
+        """To support faster discovery, we want the ability to conveniently
+        check what different plugins say regarding a single line/secret. This
+        supports that.
+
+        This is very similar to self.analyze_string, but allows the flexibility
+        for subclasses to add any other notable info (rather than just a
+        PotentialSecret type). e.g. HighEntropyStrings adds their Shannon
+        entropy in which they made their decision.
+
+        :type string: str
+        :param string: the string to analyze
+        :rtype: str
+        :returns: descriptive string that fits the format
+            <classname>: <returned-value>
+        """
+        # TODO: Handle multiple secrets on single line.
+        results = self.analyze_string(string, 0, 'does_not_matter')
+        if not results:
+            return 'False'
+        else:
+            return 'True'
+
     @property
     def __dict__(self):
         return {
diff --git a/detect_secrets/plugins/high_entropy_strings.py b/detect_secrets/plugins/high_entropy_strings.py
index 8762d3ddd..d13b56f37 100644
--- a/detect_secrets/plugins/high_entropy_strings.py
+++ b/detect_secrets/plugins/high_entropy_strings.py
@@ -101,6 +101,21 @@ def secret_generator(self, string):
             if entropy_value > self.entropy_limit:
                 yield result
 
+    def adhoc_scan(self, string):
+        # Since it's an individual string, it's just bad UX to require quotes
+        # around the expected secret.
+        with self.non_quoted_string_regex():
+            results = self.analyze_string(string, 0, 'does_not_matter')
+
+            # NOTE: Trailing space allows for nicer formatting
+            output = 'False' if not results else 'True '
+            if self.regex.search(string):
+                output += ' ({})'.format(
+                    round(self.calculate_shannon_entropy(string), 3),
+                )
+
+            return output
+
     @contextmanager
     def non_quoted_string_regex(self, strict=True):
         """For certain file formats, strings need not necessarily follow the
diff --git a/tests/main_test.py b/tests/main_test.py
index a2adce6cd..6412497d2 100644
--- a/tests/main_test.py
+++ b/tests/main_test.py
@@ -73,6 +73,34 @@ def test_scan_with_excludes_flag(self, mock_baseline_initialize):
             '.',
         )
 
+    def test_scan_string_basic(self, mock_baseline_initialize):
+        with mock_stdin(
+            '0123456789a',
+        ), mock_printer(
+            main_module,
+        ) as printer_shim:
+            assert main('scan --string'.split()) == 0
+            assert printer_shim.message == textwrap.dedent("""
+                Base64HighEntropyString: False (3.459)
+                HexHighEntropyString   : True  (3.459)
+                PrivateKeyDetector     : False
+            """)[1:]
+
+        mock_baseline_initialize.assert_not_called()
+
+    def test_scan_string_cli_overrides_stdin(self):
+        with mock_stdin(
+            '0123456789a',
+        ), mock_printer(
+            main_module,
+        ) as printer_shim:
+            assert main('scan --string 012345'.split()) == 0
+            assert printer_shim.message == textwrap.dedent("""
+                Base64HighEntropyString: False (2.585)
+                HexHighEntropyString   : False (2.121)
+                PrivateKeyDetector     : False
+            """)[1:]
+
     def test_reads_from_stdin(self, mock_merge_baseline):
         with mock_stdin(json.dumps({'key': 'value'})):
             assert main(['scan']) == 0