Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix is_verified not stored in PotentialSecret #578

Merged
merged 3 commits into from
Jul 19, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 24 additions & 9 deletions detect_secrets/core/scan.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from ..types import NamedIO
from ..types import SelfAwareCallable
from ..util import git
from ..util.code_snippet import CodeSnippet
from ..util.code_snippet import get_code_snippet
from ..util.inject import call_function_with_arguments
from ..util.path import get_relative_path
Expand Down Expand Up @@ -112,6 +113,7 @@ def scan_line(line: str) -> Generator[PotentialSecret, None, None]:
'detect_secrets.filters.common.is_invalid_file',
)
get_filters.cache_clear()
context = get_code_snippet(lines=[line], line_number=1)

yield from (
secret
Expand All @@ -122,17 +124,15 @@ def scan_line(line: str) -> Generator[PotentialSecret, None, None]:
line=line,
line_number=0,
enable_eager_search=True,
context=context,
)
if not _is_filtered_out(
required_filter_parameters=['context'],
filename=secret.filename,
secret=secret.secret_value,
plugin=plugin,
line=line,
context=get_code_snippet(
lines=[line],
line_number=1,
),
context=context,
)
)

Expand Down Expand Up @@ -225,18 +225,25 @@ def _scan_for_allowlisted_secrets_in_lines(
line_numbers, lines = zip(*lines)
line_content = [line.rstrip() for line in lines]
for line_number, line in zip(line_numbers, line_content):
context = get_code_snippet(line_content, line_number)
if not is_line_allowlisted(
filename,
line,
context=get_code_snippet(line_content, line_number),
filename=filename,
line=line,
context=context,
lorenzodb1 marked this conversation as resolved.
Show resolved Hide resolved
):
continue

if _is_filtered_out(required_filter_parameters=['line'], filename=filename, line=line):
continue

for plugin in get_plugins():
yield from _scan_line(plugin, filename, line, line_number)
yield from _scan_line(
plugin=plugin,
filename=filename,
line=line,
line_number=line_number,
context=context,
)


def _get_lines_from_file(filename: str) -> Generator[List[str], None, None]:
Expand Down Expand Up @@ -323,7 +330,13 @@ def _process_line_based_plugins(
yield from (
secret
for plugin in get_plugins()
for secret in _scan_line(plugin, filename, line, line_number)
for secret in _scan_line(
plugin=plugin,
filename=filename,
line=line,
line_number=line_number,
context=code_snippet,
)
if not _is_filtered_out(
required_filter_parameters=['context'],
filename=secret.filename,
Expand All @@ -340,6 +353,7 @@ def _scan_line(
filename: str,
line: str,
line_number: int,
context: CodeSnippet,
**kwargs: Any,
) -> Generator[PotentialSecret, None, None]:
# NOTE: We don't apply filter functions here yet, because we don't have any filters
Expand All @@ -349,6 +363,7 @@ def _scan_line(
filename=filename,
line=line,
line_number=line_number,
context=context,
**kwargs,
)
if not secrets:
Expand Down
21 changes: 21 additions & 0 deletions detect_secrets/plugins/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@
from ..constants import VerifiedResult
from ..core.potential_secret import PotentialSecret
from ..settings import get_settings
from detect_secrets.util.code_snippet import CodeSnippet
from detect_secrets.util.inject import call_function_with_arguments


class BasePlugin(metaclass=ABCMeta):
Expand All @@ -46,17 +48,36 @@ def analyze_line(
filename: str,
line: str,
line_number: int = 0,
context: CodeSnippet = None,
**kwargs: Any
) -> Set[PotentialSecret]:
"""This examines a line and finds all possible secret values in it."""
output = set()
for match in self.analyze_string(line, **kwargs): # type: ignore
is_verified: bool = False
# If the filter is disabled it means --no-verify flag was passed
# We won't run verification in that case
if(
'detect_secrets.filters.common.is_ignored_due_to_verification_policies'
in get_settings().filters
):
try:
verified_result = call_function_with_arguments(
self.verify,
secret=match,
context=context,
)
is_verified = True if verified_result == VerifiedResult.VERIFIED_TRUE else False
except requests.exceptions.RequestException:
is_verified = False

output.add(
PotentialSecret(
type=self.secret_type,
filename=filename,
secret=match,
line_number=line_number,
is_verified=is_verified,
),
)

Expand Down
9 changes: 8 additions & 1 deletion detect_secrets/plugins/high_entropy_strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

from ..core.potential_secret import PotentialSecret
from .base import BasePlugin
from detect_secrets.util.code_snippet import CodeSnippet


class HighEntropyStringsPlugin(BasePlugin, metaclass=ABCMeta):
Expand Down Expand Up @@ -45,10 +46,16 @@ def analyze_line(
filename: str,
line: str,
line_number: int = 0,
context: CodeSnippet = None,
enable_eager_search: bool = False,
**kwargs: Any,
) -> Set[PotentialSecret]:
output = super().analyze_line(filename=filename, line=line, line_number=line_number)
output = super().analyze_line(
filename=filename,
line=line,
line_number=line_number,
context=context,
)
if output or not enable_eager_search:
# NOTE: We perform the limit filter at this layer (rather than analyze_string) so
# that we can surface secrets that do not meet the limit criteria when
Expand Down
3 changes: 3 additions & 0 deletions detect_secrets/plugins/keyword.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
from ..util.filetype import determine_file_type
from ..util.filetype import FileType
from .base import BasePlugin
from detect_secrets.util.code_snippet import CodeSnippet


# Note: All values here should be lowercase
Expand Down Expand Up @@ -306,6 +307,7 @@ def analyze_line(
filename: str,
line: str,
line_number: int = 0,
context: CodeSnippet = None,
**kwargs: Any,
) -> Set[PotentialSecret]:
filetype = determine_file_type(filename)
Expand All @@ -314,6 +316,7 @@ def analyze_line(
filename=filename,
line=line,
line_number=line_number,
context=context,
denylist_regex_to_group=denylist_regex_to_group,
)

Expand Down
8 changes: 7 additions & 1 deletion tests/filters/common_filter_test.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import re
from unittest import mock

import pytest
import requests
Expand Down Expand Up @@ -44,7 +45,12 @@ def test_supports_injection_of_context():
# NOTE: This test case relies on the fact that this file contains a multi-factor
# AWS KeyPair.
with register_plugin(ContextAwareMockPlugin()):
main_module.main(['scan', 'test_data/each_secret.py'])
with mock.patch(
'detect_secrets.plugins.aws.verify_aws_secret_access_key',
return_value=False,
):

main_module.main(['scan', 'test_data/each_secret.py'])

@staticmethod
def test_handles_request_error_gracefully():
Expand Down
103 changes: 103 additions & 0 deletions tests/plugins/base_test.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,14 @@
from typing import Generator

import pytest
import requests

from detect_secrets.constants import VerifiedResult
from detect_secrets.core.plugins.util import get_mapping_from_secret_type_to_class
from detect_secrets.plugins.base import BasePlugin
from detect_secrets.settings import get_settings
from detect_secrets.util.code_snippet import CodeSnippet
from detect_secrets.util.code_snippet import get_code_snippet


def test_ensure_all_plugins_have_unique_secret_types():
Expand All @@ -7,3 +17,96 @@ def test_ensure_all_plugins_have_unique_secret_types():
secret_types.add(plugin_type.secret_type)

assert len(secret_types) == len(get_mapping_from_secret_type_to_class())


class MockPlugin(BasePlugin):
secret_type = 'MockPlugin'

def __init__(self, verify_result: VerifiedResult):
self.verify_result = verify_result
self.verify_call_count = 0

def verify(self, secret: str, context: CodeSnippet) -> VerifiedResult:
self.verify_call_count += 1
return self.verify_result

def analyze_string(self, string: str) -> Generator[str, None, None]:
yield string


class MockExceptionRaisingPlugin(BasePlugin):
secret_type = 'MockExceptionRaisingPlugin'

def analyze_string(self, string: str) -> Generator[str, None, None]:
yield string

def verify(self, secret: str, context: CodeSnippet):
raise requests.exceptions.Timeout


class TestAnalyzeLine():
def setup(self):
self.line = 'some-secret'
self.filename = 'secrets.py'
self.context = get_code_snippet(lines=[self.line], line_number=1)

@pytest.mark.parametrize(
'verified_result ,is_verified',
[
(VerifiedResult.UNVERIFIED, False),
(VerifiedResult.VERIFIED_FALSE, False),
(VerifiedResult.VERIFIED_TRUE, True),
],
)
def test_potential_secret_constructed_correctly(self, verified_result, is_verified):
self._enable_filter()
plugin = MockPlugin(verified_result)
output = plugin.analyze_line(
filename=self.filename,
line=self.line,
line_number=1,
context=self.context,
)
secret = list(output)[0]
assert secret.secret_value == self.line
assert secret.type == plugin.secret_type
assert secret.filename == self.filename
assert secret.line_number == 1
assert secret.is_verified == is_verified

def test_no_verification_call_if_verification_filter_is_disabled(self):
self._disable_filter()
plugin = MockPlugin(VerifiedResult.VERIFIED_TRUE)
output = plugin.analyze_line(
filename=self.filename,
line=self.line,
line_number=1,
context=self.context,
)
secret = list(output)[0]
assert secret.is_verified is False
assert plugin.verify_call_count == 0

def test_handle_verify_exception_gracefully(self):
self._enable_filter()
plugin = MockExceptionRaisingPlugin()
output = plugin.analyze_line(
filename=self.filename,
line=self.line,
line_number=1,
context=self.context,
)
secret = list(output)[0]
assert secret.is_verified is False

def _enable_filter(self):
get_settings().filters[
'detect_secrets.filters.common.is_ignored_due_to_verification_policies'
] = {
'min_value': 0,
}

def _disable_filter(self):
get_settings().disable_filters(
'detect_secrets.filters.common.is_ignored_due_to_verification_policies',
)
4 changes: 4 additions & 0 deletions tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ tox_pip_extensions_ext_venv_update = true

[testenv]
passenv = SSH_AUTH_SOCK
# NO_PROXY is needed to call requests API within a forked process
# when using macOS and python version 3.6/3.7
setenv =
NO_PROXY = '*'
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this only a testing issue or do we need to somehow enable this in a production mode as well?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The same issue will be in production as well if the detect-secrets is ran with python 3.6/3.7 in macOS. I didn't want to add NO_PROXY in production code as it would lead to ignoring proxy configurations in the environment and the issue doesn't impact most environment. that being said detect-secrets can still be run in impacted environments by either setting NO_PROXY env variable prior to running it or adding the --no-verify flag.

I think the issue existed prior to this PR it's just that non of the tests invoked Requests APIs from a forked process.

Copy link
Member

@jpdakran jpdakran Jul 15, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we verify that this was existing in some sort of integration test / brute force testing? I would just like to ensure we are not introducing any new issues to the repository.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've created a draft PR with test data that should invoke Requests API from forked process, the same error occurred

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If this is caused by requests, could it be the case that a newer version doesn't have this issue anymore? I see we're using 2.26.0, while the latest one is 2.28.1.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems like requests v2.28.0 dropped support for python 3.6, I tried requests v2.27.1 with no luck.

It's worth noting that the issue can be fixed if we use spawn context for multiprocessing but I think that would come at a performance cost.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right. So python 3.8+ multiprocessing library has switched to spawn where before it was fork. This is an issue since spawn does not share any memory between the parent and child processes. So we default the multiprocessing to fork when creating a pool of processes

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we try and use v2.28.1. Python 3.6 is end of life. We can consider dropping support for it.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I tried v2.28.1, still the same problem. I think this is more of a python issue than requests, the default context was changed to spawn because of crashes similar to this.

Since we are already using default multiprocessing (spawn) for python 3.8+ on macOS, what do you think of checking the current OS and if it's macOS we change to spawn?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry, that was the initial attempt to fix it. We decided to try and stay away from being OS specific so instead of changing the default multiprocessing, we instead passed the shared settings to child processes from the parent. So in python 3.8+ we are using spawn which is the default.

deps = -rrequirements-dev.txt
whitelist_externals = coverage
commands =
Expand Down