From d686ef5c1fc22d54eefddc634cf38f406726c1b9 Mon Sep 17 00:00:00 2001 From: Aaron Loo Date: Thu, 5 Jul 2018 10:11:16 -0700 Subject: [PATCH] adding cli functionality to check strings in an adhoc manner --- detect_secrets/core/usage.py | 17 +++++++- detect_secrets/main.py | 39 ++++++++++++++++--- detect_secrets/plugins/base.py | 23 +++++++++++ .../plugins/high_entropy_strings.py | 15 +++++++ tests/main_test.py | 28 +++++++++++++ 5 files changed, 115 insertions(+), 7 deletions(-) diff --git a/detect_secrets/core/usage.py b/detect_secrets/core/usage.py index b94c9e7b3..8d4f8692a 100644 --- a/detect_secrets/core/usage.py +++ b/detect_secrets/core/usage.py @@ -83,7 +83,9 @@ def __init__(self, subparser): ) def add_arguments(self): - self._add_initialize_baseline_argument() + self._add_initialize_baseline_argument()\ + ._add_adhoc_scanning_argument() + PluginOptions(self.parser).add_arguments() return self @@ -118,6 +120,19 @@ def _add_initialize_baseline_argument(self): return self + def _add_adhoc_scanning_argument(self): + self.parser.add_argument( + '--string', + nargs='?', + const=True, + help=( + 'Scans an individual string, and displays configured ' + 'plugins\' verdict.' + ), + ) + + return self + class AuditOptions(object): diff --git a/detect_secrets/main.py b/detect_secrets/main.py index 8449b1c00..4afc53502 100644 --- a/detect_secrets/main.py +++ b/detect_secrets/main.py @@ -26,9 +26,21 @@ def main(argv=None): log.set_debug_level(args.verbose) if args.action == 'scan': + # Plugins are *always* rescanned with fresh settings, because + # we want to get the latest updates. + plugins = initialize.from_parser_builder(args.plugins) + + if args.string: + line = args.string + if isinstance(args.string, bool): + line = sys.stdin.read().splitlines()[0] + + _scan_string(line, plugins) + return 0 + print( json.dumps( - _perform_scan(args), + _perform_scan(args, plugins), indent=2, sort_keys=True, ), @@ -40,12 +52,27 @@ def main(argv=None): return 0 -def _perform_scan(args): - old_baseline = _get_existing_baseline(args.import_filename) +def _scan_string(line, plugins): + longest_plugin_name_length = max( + map( + lambda x: len(x.__class__.__name__), + plugins, + ), + ) - # Plugins are *always* rescanned with fresh settings, because - # we want to get the latest updates. - plugins = initialize.from_parser_builder(args.plugins) + output = [ + ('{:%d}: {}' % longest_plugin_name_length).format( + plugin.__class__.__name__, + plugin.adhoc_scan(line), + ) + for plugin in plugins + ] + + print('\n'.join(sorted(output))) + + +def _perform_scan(args, plugins): + old_baseline = _get_existing_baseline(args.import_filename) # Favors --exclude argument over existing baseline's regex (if exists) if args.exclude: diff --git a/detect_secrets/plugins/base.py b/detect_secrets/plugins/base.py index afae65fb5..5f812a66f 100644 --- a/detect_secrets/plugins/base.py +++ b/detect_secrets/plugins/base.py @@ -50,6 +50,29 @@ def secret_generator(self, string): # pragma: no cover """ pass + def adhoc_scan(self, string): + """To support faster discovery, we want the ability to conveniently + check what different plugins say regarding a single line/secret. This + supports that. + + This is very similar to self.analyze_string, but allows the flexibility + for subclasses to add any other notable info (rather than just a + PotentialSecret type). e.g. HighEntropyStrings adds their Shannon + entropy in which they made their decision. + + :type string: str + :param string: the string to analyze + :rtype: str + :returns: descriptive string that fits the format + : + """ + # TODO: Handle multiple secrets on single line. + results = self.analyze_string(string, 0, 'does_not_matter') + if not results: + return 'False' + else: + return 'True' + @property def __dict__(self): return { diff --git a/detect_secrets/plugins/high_entropy_strings.py b/detect_secrets/plugins/high_entropy_strings.py index 8762d3ddd..d13b56f37 100644 --- a/detect_secrets/plugins/high_entropy_strings.py +++ b/detect_secrets/plugins/high_entropy_strings.py @@ -101,6 +101,21 @@ def secret_generator(self, string): if entropy_value > self.entropy_limit: yield result + def adhoc_scan(self, string): + # Since it's an individual string, it's just bad UX to require quotes + # around the expected secret. + with self.non_quoted_string_regex(): + results = self.analyze_string(string, 0, 'does_not_matter') + + # NOTE: Trailing space allows for nicer formatting + output = 'False' if not results else 'True ' + if self.regex.search(string): + output += ' ({})'.format( + round(self.calculate_shannon_entropy(string), 3), + ) + + return output + @contextmanager def non_quoted_string_regex(self, strict=True): """For certain file formats, strings need not necessarily follow the diff --git a/tests/main_test.py b/tests/main_test.py index a2adce6cd..6412497d2 100644 --- a/tests/main_test.py +++ b/tests/main_test.py @@ -73,6 +73,34 @@ def test_scan_with_excludes_flag(self, mock_baseline_initialize): '.', ) + def test_scan_string_basic(self, mock_baseline_initialize): + with mock_stdin( + '0123456789a', + ), mock_printer( + main_module, + ) as printer_shim: + assert main('scan --string'.split()) == 0 + assert printer_shim.message == textwrap.dedent(""" + Base64HighEntropyString: False (3.459) + HexHighEntropyString : True (3.459) + PrivateKeyDetector : False + """)[1:] + + mock_baseline_initialize.assert_not_called() + + def test_scan_string_cli_overrides_stdin(self): + with mock_stdin( + '0123456789a', + ), mock_printer( + main_module, + ) as printer_shim: + assert main('scan --string 012345'.split()) == 0 + assert printer_shim.message == textwrap.dedent(""" + Base64HighEntropyString: False (2.585) + HexHighEntropyString : False (2.121) + PrivateKeyDetector : False + """)[1:] + def test_reads_from_stdin(self, mock_merge_baseline): with mock_stdin(json.dumps({'key': 'value'})): assert main(['scan']) == 0