Skip to content

Commit

Permalink
🎭 Improve the performance of line regexes
Browse files Browse the repository at this point in the history
This fixes issue #244.
Only check the line for allowlist regexes or --exclude-lines if a secret was found.
  • Loading branch information
KevinHock committed Apr 7, 2020
1 parent 5ca1d7e commit 74d3787
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 25 deletions.
3 changes: 3 additions & 0 deletions detect_secrets/core/secrets_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,8 @@ def scan_diff(
at incremental differences, rather than re-scanning the codebase every time.
This function supports this, and adds information to self.data.
Note that this is only called by detect-secrets-server.
:type diff: str
:param diff: diff string.
e.g. The output of `git diff <fileA> <fileB>`
Expand Down Expand Up @@ -338,6 +340,7 @@ def _extract_secrets_from_patch(self, f, plugin, filename):
"""Extract secrets from a given patch file object.
Note that we only want to capture incoming secrets (so added lines).
Note that this is only called by detect-secrets-server.
:type f: unidiff.patch.PatchedFile
:type plugin: detect_secrets.plugins.base.BasePlugin
Expand Down
46 changes: 26 additions & 20 deletions detect_secrets/plugins/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,17 +70,15 @@ def __init__(
:param false_positive_heuristics: List of fp-heuristic functions
applicable to this plugin
"""
self.exclude_lines_regex = None
if exclude_lines_regex:
self.exclude_lines_regex = re.compile(exclude_lines_regex)
self.exclude_lines_regex = (
re.compile(exclude_lines_regex)
if exclude_lines_regex
else None
)

self.should_verify = should_verify

self.false_positive_heuristics = (
false_positive_heuristics
if false_positive_heuristics
else []
)
self.false_positive_heuristics = false_positive_heuristics or []

@classproperty
def disable_flag_text(cls):
Expand All @@ -101,6 +99,19 @@ def disable_flag_text(cls):
def default_options(cls):
return {}

def _is_excluded_line(self, line):
return (
any(
allowlist_regex.search(line)
for allowlist_regex in ALLOWLIST_REGEXES
)
or
(
self.exclude_lines_regex and
self.exclude_lines_regex.search(line)
)
)

def analyze(self, file, filename):
"""
:param file: The File object itself.
Expand All @@ -114,6 +125,13 @@ def analyze(self, file, filename):
file_lines = tuple(file.readlines())
for line_num, line in enumerate(file_lines, start=1):
results = self.analyze_line(line, line_num, filename)
if (
not results
or
self._is_excluded_line(line)
):
continue

if not self.should_verify:
potential_secrets.update(results)
continue
Expand Down Expand Up @@ -146,18 +164,6 @@ def analyze_line(self, string, line_num, filename):
NOTE: line_num and filename are used for PotentialSecret creation only.
"""
if (
any(
allowlist_regex.search(string) for allowlist_regex in ALLOWLIST_REGEXES
)

or (
self.exclude_lines_regex and
self.exclude_lines_regex.search(string)
)
):
return {}

return self.analyze_string_content(
string,
line_num,
Expand Down
21 changes: 16 additions & 5 deletions tests/plugins/high_entropy_strings_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -297,18 +297,29 @@ def test_discounts_when_all_numbers(self):
)

# This makes sure discounting works.
assert self.logic.calculate_shannon_entropy('0123456789') < \
assert (
self.logic.calculate_shannon_entropy('0123456789')
<
original_scanner.calculate_shannon_entropy('0123456789')

)
# This is the goal.
assert self.logic.calculate_shannon_entropy('0123456789') < 3

# This makes sure it is length dependent.
assert self.logic.calculate_shannon_entropy('0123456789') < \
assert (
self.logic.calculate_shannon_entropy('0123456789')
<
self.logic.calculate_shannon_entropy('01234567890123456789')
)

# This makes sure it only occurs with numbers.
assert self.logic.calculate_shannon_entropy('12345a') == \
assert (
self.logic.calculate_shannon_entropy('12345a')
==
original_scanner.calculate_shannon_entropy('12345a')
assert self.logic.calculate_shannon_entropy('0') == \
)
assert (
self.logic.calculate_shannon_entropy('0')
==
original_scanner.calculate_shannon_entropy('0')
)

0 comments on commit 74d3787

Please sign in to comment.