Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Supporting multiword adhoc string scans #287

Merged
merged 1 commit into from
Mar 25, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 32 additions & 5 deletions detect_secrets/plugins/high_entropy_strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ def secret_generator(self, string, *args, **kwargs):
def adhoc_scan(self, string):
# Since it's an individual string, it's just bad UX to require quotes
# around the expected secret.
with self.non_quoted_string_regex():
with self.non_quoted_string_regex(is_exact_match=False):
results = self.analyze_line(
string,
line_num=0,
Expand All @@ -153,24 +153,51 @@ def adhoc_scan(self, string):

# NOTE: Trailing space allows for nicer formatting
output = 'False' if not results else 'True '
if self.regex.search(string):
if results:
# We currently assume that there's at most one secret per line.
output += ' ({})'.format(
round(self.calculate_shannon_entropy(string), 3),
round(
self.calculate_shannon_entropy(
list(results.keys())[0].secret_value,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: Maybe add a # There can only be 1 secret in the results comment

),
3,
),
)
elif ' ' not in string:
# In the case where the string is a single word, and it
# matches the regex, we can show the entropy calculation,
# to assist investigation when it's unclear *why* something
# is not flagged.
#
# Conversely, if there are multiple words in the string,
# the entropy value would be confusing, since it's not clear
# which word the entropy is calculated for.
matches = self.regex.search(string)
if matches and matches.group(1) == string:
output += ' ({})'.format(
round(self.calculate_shannon_entropy(string), 3),
)

return output

@contextmanager
def non_quoted_string_regex(self):
def non_quoted_string_regex(self, is_exact_match=True):
"""For certain file formats, strings need not necessarily follow the
normal convention of being denoted by single or double quotes. In these
cases, we modify the regex accordingly.

Public, because detect_secrets.core.audit needs to reference it.

:param is_exact_match: True if you need to scan the string itself.
However, if the string is a line of text, and you want to see
whether a secret exists in this line, use False.
"""
old_regex = self.regex

regex_alternative = r'^([{}]+)$'.format(re.escape(self.charset))
regex_alternative = r'([{}]+)'.format(re.escape(self.charset))
if is_exact_match:
regex_alternative = r'^' + regex_alternative + r'$'

self.regex = re.compile(regex_alternative)

try:
Expand Down
5 changes: 5 additions & 0 deletions tests/main_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,11 @@ def test_scan_with_exclude_args(self, mock_baseline_initialize):
'False (2.252)',
'False',
),
(
'key: 012345678ab',
'False',
'True (3.459)',
),
],
)
def test_scan_string_basic(
Expand Down