supporting multiword adhoc string scans

Yelp · Mar 25, 2020 · 849ebaa · 849ebaa
1 parent 79dad49
commit 849ebaa
Show file tree

Hide file tree

Showing 2 changed files with 36 additions and 5 deletions.
diff --git a/detect_secrets/plugins/high_entropy_strings.py b/detect_secrets/plugins/high_entropy_strings.py
@@ -144,7 +144,7 @@ def secret_generator(self, string, *args, **kwargs):
     def adhoc_scan(self, string):
         # Since it's an individual string, it's just bad UX to require quotes
         # around the expected secret.
-        with self.non_quoted_string_regex():
+        with self.non_quoted_string_regex(is_exact_match=False):
             results = self.analyze_line(
                 string,
                 line_num=0,
@@ -153,24 +153,50 @@ def adhoc_scan(self, string):
 
             # NOTE: Trailing space allows for nicer formatting
             output = 'False' if not results else 'True '
-            if self.regex.search(string):
+            if results:
                 output += ' ({})'.format(
-                    round(self.calculate_shannon_entropy(string), 3),
+                    round(
+                        self.calculate_shannon_entropy(
+                            list(results.keys())[0].secret_value,
+                        ),
+                        3,
+                    ),
                 )
+            elif ' ' not in string:
+                # In the case where the string is a single word, and it
+                # matches the regex, we can show the entropy calculation,
+                # to assist investigation when it's unclear *why* something
+                # is not flagged.
+                #
+                # Conversely, if there are multiple words in the string,
+                # the entropy value would be confusing, since it's not clear
+                # which word the entropy is calculated for.
+                matches = self.regex.search(string)
+                if matches and matches.group(1) == string:
+                    output += ' ({})'.format(
+                        round(self.calculate_shannon_entropy(string), 3),
+                    )
 
             return output
 
     @contextmanager
-    def non_quoted_string_regex(self):
+    def non_quoted_string_regex(self, is_exact_match=True):
         """For certain file formats, strings need not necessarily follow the
         normal convention of being denoted by single or double quotes. In these
         cases, we modify the regex accordingly.
 
         Public, because detect_secrets.core.audit needs to reference it.
+
+        :param is_exact_match: True if you need to scan the string itself.
+            However, if the string is a line of text, and you want to see
+            whether a secret exists in this line, use False.
         """
         old_regex = self.regex
 
-        regex_alternative = r'^([{}]+)$'.format(re.escape(self.charset))
+        regex_alternative = r'([{}]+)'.format(re.escape(self.charset))
+        if is_exact_match:
+            regex_alternative = r'^' + regex_alternative + r'$'
+
         self.regex = re.compile(regex_alternative)
 
         try:

diff --git a/tests/main_test.py b/tests/main_test.py
@@ -136,6 +136,11 @@ def test_scan_with_exclude_args(self, mock_baseline_initialize):
                 'False (2.252)',
                 'False',
             ),
+            (
+                'key: 012345678ab',
+                'False',
+                'True  (3.459)',
+            ),
         ],
     )
     def test_scan_string_basic(