diff --git a/CHANGELOG.md b/CHANGELOG.md index c254827ba..3687cee12 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -275,7 +275,7 @@ This includes using `# pragma: allowlist secret` now for inline allowlisting. #### :telescope: Accuracy -- [Added `null` to the `FALSE_POSITIVES` tuple for the `KeywordDetector` plugin, so we do not alert off of it](https://github.com/Yelp/detect-secrets/commit/58df82ce37d64f22cb885960c2031b5f8ebe4b75) +- [Added `null` to the `FALSE_POSITIVES` tuple for the `KeywordDetector` plugin, so we don't alert off of it](https://github.com/Yelp/detect-secrets/commit/58df82ce37d64f22cb885960c2031b5f8ebe4b75) @@ -286,7 +286,7 @@ This includes using `# pragma: allowlist secret` now for inline allowlisting. - Turned the `KeywordDetector` plugin back on, with new regexes and accuracy improvements ([#86]) - Added an `AWSAccessKeyDetector` plugin ([#100]) -- Added the ability to scan `.ini` types files that do not have a header ([#106]) +- Added the ability to scan `.ini` types files that don't have a header ([#106]) [#86]: https://github.com/Yelp/detect-secrets/pull/86 [#100]: https://github.com/Yelp/detect-secrets/pull/100 diff --git a/LICENSE b/LICENSE index a6962b3b4..6db569d43 100644 --- a/LICENSE +++ b/LICENSE @@ -100,13 +100,13 @@ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of + excluding those notices that don't pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not + within such NOTICE file, excluding those notices that don't pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or @@ -114,7 +114,7 @@ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution + don't modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed diff --git a/README.md b/README.md index 785731efa..d9a9e0e2c 100644 --- a/README.md +++ b/README.md @@ -178,7 +178,7 @@ committing secrets. ### Things that won't be prevented * Multi-line secrets -* Default passwords that do not trigger the `KeywordDetector` (e.g. `login = "hunter2"`) +* Default passwords that don't trigger the `KeywordDetector` (e.g. `login = "hunter2"`) ### Plugin Configuration diff --git a/detect_secrets/core/audit.py b/detect_secrets/core/audit.py index 949bc3434..1e5176bcd 100644 --- a/detect_secrets/core/audit.py +++ b/detect_secrets/core/audit.py @@ -41,19 +41,34 @@ class RedundantComparisonError(Exception): AUDIT_RESULT_TO_STRING = { - True: 'positive', - False: 'negative', - None: 'unknown', + True: 'true-positives', + False: 'false-positives', + None: 'unknowns', } EMPTY_PLUGIN_AUDIT_RESULT = { 'results': { - 'positive': [], - 'negative': [], - 'unknown': [], + 'true-positives': defaultdict(list), + 'false-positives': defaultdict(list), + 'unknowns': defaultdict(list), }, 'config': {}, } +EMPTY_STATS_RESULT = { + 'signal': 0, + 'true-positives': { + 'count': 0, + 'files': defaultdict(int), + }, + 'false-positives': { + 'count': 0, + 'files': defaultdict(int), + }, + 'unknowns': { + 'count': 0, + 'files': defaultdict(int), + }, +} def audit_baseline(baseline_filename): @@ -208,12 +223,30 @@ def determine_audit_results(baseline, baseline_path): Given a baseline which has been audited, returns a dictionary describing the results of each plugin in the following form: { - "results": { + "plugins": { "plugin_name1": { "results": { - "positive": [list of secrets with is_secret: true caught by this plugin], - "negative": [list of secrets with is_secret: false caught by this plugin], - "unknown": [list of secrets with no is_secret entry caught by this plugin] + "true-positives": [ + list of { + filename: { + 'line': '...', + 'plaintext':'...', + } + } for secrets with `is_secret: true` caught by this plugin], + "false-positives": [ + list of { + filename: { + 'line': '...', + 'plaintext':'...', + } + } for secrets with `is_secret: false` caught by this plugin], + "unknowns": [ + list of { + filename: { + 'line': '...', + 'plaintext':'...', + } + } for secrets with no `is_secret` entry caught by this plugin] }, "config": {configuration used for the plugin} }, @@ -228,34 +261,49 @@ def determine_audit_results(baseline, baseline_path): all_secrets = _secret_generator(baseline) audit_results = { - 'results': defaultdict(lambda: deepcopy(EMPTY_PLUGIN_AUDIT_RESULT)), + 'plugins': defaultdict(lambda: deepcopy(EMPTY_PLUGIN_AUDIT_RESULT)), + 'stats': deepcopy(EMPTY_STATS_RESULT), } secret_type_to_plugin_name = get_mapping_from_secret_type_to_class_name() + total = 0 for filename, secret in all_secrets: file_contents = _open_file_with_cache(filename) + secret_info = {} + secret_info['line'] = _get_file_line(filename, secret['line_number']) try: - secret_plaintext = get_raw_secret_value( + secret_info['plaintext'] = get_raw_secret_value( secret=secret, plugin_settings=baseline['plugins_used'], file_handle=io.StringIO(file_contents), filename=filename, ) except SecretNotFoundOnSpecifiedLineError: - secret_plaintext = _get_file_line(filename, secret['line_number']) + secret_info['plaintext'] = None plugin_name = secret_type_to_plugin_name[secret['type']] audit_result = AUDIT_RESULT_TO_STRING[secret.get('is_secret')] - audit_results['results'][plugin_name]['results'][audit_result].append(secret_plaintext) + audit_results['plugins'][plugin_name]['results'][audit_result][filename].append(secret_info) + + audit_results['stats'][audit_result]['count'] += 1 + audit_results['stats'][audit_result]['files'][filename] += 1 + total += 1 + audit_results['stats']['signal'] = str( + ( + audit_results['stats']['true-positives']['count'] + / + total + ) * 100, + )[:4] + '%' for plugin_config in baseline['plugins_used']: plugin_name = plugin_config['name'] - if plugin_name not in audit_results['results']: + if plugin_name not in audit_results['plugins']: continue - audit_results['results'][plugin_name]['config'].update(plugin_config) + audit_results['plugins'][plugin_name]['config'].update(plugin_config) git_repo_path = os.path.dirname(os.path.abspath(baseline_path)) git_sha = get_git_sha(git_repo_path) @@ -657,13 +705,9 @@ def get_raw_secret_value( plugin_secrets = plugin.analyze(file_handle, filename) - matching_secret = [ - plugin_secret.secret_value - for plugin_secret in plugin_secrets - if plugin_secret.secret_hash == secret['hashed_secret'] - ] - - if not matching_secret: - raise SecretNotFoundOnSpecifiedLineError(secret['line_number']) + # Return value of matching secret + for plugin_secret in plugin_secrets: + if plugin_secret.secret_hash == secret['hashed_secret']: + return plugin_secret.secret_value - return matching_secret[0] + raise SecretNotFoundOnSpecifiedLineError(secret['line_number']) diff --git a/detect_secrets/core/baseline.py b/detect_secrets/core/baseline.py index bff8da118..ccd64eb3f 100644 --- a/detect_secrets/core/baseline.py +++ b/detect_secrets/core/baseline.py @@ -18,17 +18,27 @@ def initialize( plugins, exclude_files_regex=None, exclude_lines_regex=None, + word_list_file=None, + word_list_hash=None, should_scan_all_files=False, ): """Scans the entire codebase for secrets, and returns a SecretsCollection object. + :type path: list + :type plugins: tuple of detect_secrets.plugins.base.BasePlugin :param plugins: rules to initialize the SecretsCollection with. :type exclude_files_regex: str|None :type exclude_lines_regex: str|None - :type path: list + + :type word_list_file: str|None + :param word_list_file: optional word list file for ignoring certain words. + + :type word_list_hash: str|None + :param word_list_hash: optional iterated sha1 hash of the words in the word list. + :type should_scan_all_files: bool :rtype: SecretsCollection @@ -37,17 +47,21 @@ def initialize( plugins, exclude_files=exclude_files_regex, exclude_lines=exclude_lines_regex, + word_list_file=word_list_file, + word_list_hash=word_list_hash, ) files_to_scan = [] for element in path: if os.path.isdir(element): if should_scan_all_files: - files_to_scan.extend(_get_files_recursively(element)) + files_to_scan.extend( + _get_files_recursively(element), + ) else: - files = _get_git_tracked_files(element) - if files: - files_to_scan.extend(files) + files_to_scan.extend( + _get_git_tracked_files(element), + ) elif os.path.isfile(element): files_to_scan.append(element) else: @@ -65,7 +79,7 @@ def initialize( files_to_scan, ) - for file in files_to_scan: + for file in sorted(files_to_scan): output.scan_file(file) return output @@ -267,6 +281,7 @@ def _get_git_tracked_files(rootdir='.'): :rtype: set|None :returns: filepaths to files which git currently tracks (locally) """ + output = [] try: with open(os.devnull, 'w') as fnull: git_files = subprocess.check_output( @@ -277,13 +292,13 @@ def _get_git_tracked_files(rootdir='.'): ], stderr=fnull, ) - - return set([ - util.get_relative_path(rootdir, filename) - for filename in git_files.decode('utf-8').split() - ]) + for filename in git_files.decode('utf-8').split(): + relative_path = util.get_relative_path_if_in_cwd(rootdir, filename) + if relative_path: + output.append(relative_path) except subprocess.CalledProcessError: - return None + pass + return output def _get_files_recursively(rootdir): @@ -293,6 +308,7 @@ def _get_files_recursively(rootdir): output = [] for root, _, files in os.walk(rootdir): for filename in files: - output.append(util.get_relative_path(root, filename)) - + relative_path = util.get_relative_path_if_in_cwd(root, filename) + if relative_path: + output.append(relative_path) return output diff --git a/detect_secrets/core/bidirectional_iterator.py b/detect_secrets/core/bidirectional_iterator.py index f3f573bc8..b664a99cd 100644 --- a/detect_secrets/core/bidirectional_iterator.py +++ b/detect_secrets/core/bidirectional_iterator.py @@ -1,7 +1,7 @@ class BidirectionalIterator(object): def __init__(self, collection): self.collection = collection - self.index = -1 # starts on -1, as index is increased _before_ getting result + self.index = -1 # Starts on -1, as index is increased _before_ getting result self.step_back_once = False def __next__(self): diff --git a/detect_secrets/core/constants.py b/detect_secrets/core/constants.py index fe59c77db..812f9353a 100644 --- a/detect_secrets/core/constants.py +++ b/detect_secrets/core/constants.py @@ -7,29 +7,37 @@ # and look for "ASCII text", but that might be more expensive. # # Definitely something to look into, if this list gets unruly long. -IGNORED_FILE_EXTENSIONS = { - '7z', - 'bmp', - 'bz2', - 'dmg', - 'exe', - 'gif', - 'gz', - 'ico', - 'jar', - 'jpg', - 'jpeg', - 'png', - 'rar', - 'realm', - 's7z', - 'svg', - 'tar', - 'tif', - 'tiff', - 'webp', - 'zip', -} +IGNORED_FILE_EXTENSIONS = set( + ( + '.7z', + '.bmp', + '.bz2', + '.dmg', + '.eot', + '.exe', + '.gif', + '.gz', + '.ico', + '.jar', + '.jpg', + '.jpeg', + '.mo', + '.png', + '.rar', + '.realm', + '.s7z', + '.svg', + '.tar', + '.tif', + '.tiff', + '.ttf', + '.webp', + '.woff', + '.xls', + '.xlsx', + '.zip', + ), +) class VerifiedResult(Enum): diff --git a/detect_secrets/core/secrets_collection.py b/detect_secrets/core/secrets_collection.py index 9266dd15f..8ef68ae2b 100644 --- a/detect_secrets/core/secrets_collection.py +++ b/detect_secrets/core/secrets_collection.py @@ -12,6 +12,7 @@ from detect_secrets.core.log import log from detect_secrets.core.potential_secret import PotentialSecret from detect_secrets.plugins.common import initialize +from detect_secrets.util import build_automaton class SecretsCollection(object): @@ -21,6 +22,8 @@ def __init__( plugins=(), exclude_files=None, exclude_lines=None, + word_list_file=None, + word_list_hash=None, ): """ :type plugins: tuple of detect_secrets.plugins.base.BasePlugin @@ -32,14 +35,18 @@ def __init__( :type exclude_lines: str|None :param exclude_lines: optional regex for ignored lines. - :type version: str - :param version: version of detect-secrets that SecretsCollection - is valid at. + :type word_list_file: str|None + :param word_list_file: optional word list file for ignoring certain words. + + :type word_list_hash: str|None + :param word_list_hash: optional iterated sha1 hash of the words in the word list. """ self.data = {} self.plugins = plugins self.exclude_files = exclude_files self.exclude_lines = exclude_lines + self.word_list_file = word_list_file + self.word_list_hash = word_list_hash self.version = VERSION @classmethod @@ -93,6 +100,17 @@ def load_baseline_from_dict(cls, data): result.exclude_files = data['exclude']['files'] result.exclude_lines = data['exclude']['lines'] + # In v0.12.7 the `--word-list` option got added + automaton = None + if 'word_list' in data: + result.word_list_file = data['word_list']['file'] + result.word_list_hash = data['word_list']['hash'] + + if result.word_list_file: + # Always ignore the given `data['word_list']['hash']` + # The difference will show whenever the word list changes + automaton, result.word_list_hash = build_automaton(result.word_list_file) + plugins = [] for plugin in data['plugins_used']: plugin_classname = plugin.pop('name') @@ -100,6 +118,7 @@ def load_baseline_from_dict(cls, data): initialize.from_plugin_classname( plugin_classname, exclude_lines_regex=result.exclude_lines, + automaton=automaton, should_verify_secrets=False, **plugin ), @@ -277,6 +296,10 @@ def format_for_baseline_output(self): 'files': self.exclude_files, 'lines': self.exclude_lines, }, + 'word_list': { + 'file': self.word_list_file, + 'hash': self.word_list_hash, + }, 'plugins_used': plugins_used, 'results': results, 'version': self.version, diff --git a/detect_secrets/core/usage.py b/detect_secrets/core/usage.py index c66b2cf0d..7626d6397 100644 --- a/detect_secrets/core/usage.py +++ b/detect_secrets/core/usage.py @@ -14,6 +14,18 @@ def add_exclude_lines_argument(parser): ) +def add_word_list_argument(parser): + parser.add_argument( + '--word-list', + type=str, + help=( + 'Text file with a list of words, ' + 'if a secret contains a word in the list we ignore it.' + ), + dest='word_list_file', + ) + + def add_use_all_plugins_argument(parser): parser.add_argument( '--use-all-plugins', @@ -46,6 +58,7 @@ def add_pre_commit_arguments(self): self._add_filenames_argument()\ ._add_set_baseline_argument()\ ._add_exclude_lines_argument()\ + ._add_word_list_argument()\ ._add_use_all_plugins_argument()\ ._add_no_verify_flag() @@ -108,6 +121,10 @@ def _add_exclude_lines_argument(self): add_exclude_lines_argument(self.parser) return self + def _add_word_list_argument(self): + add_word_list_argument(self.parser) + return self + def _add_use_all_plugins_argument(self): add_use_all_plugins_argument(self.parser) return self @@ -143,9 +160,10 @@ def _add_initialize_baseline_argument(self): ), ) - # Pairing `--exclude-lines` to both pre-commit and `--scan` - # because it can be used for both. + # Pairing `--exclude-lines` and `--word-list` to + # both pre-commit and `--scan` because it can be used for both. add_exclude_lines_argument(self.parser) + add_word_list_argument(self.parser) # Pairing `--exclude-files` with `--scan` because it's only used for the initialization. # The pre-commit hook framework already has an `exclude` option that can @@ -401,11 +419,7 @@ def consolidate_args(args): # Consolidate related args related_args = {} for related_arg_tuple in plugin.related_args: - try: - flag_name, default_value = related_arg_tuple - except ValueError: - flag_name = related_arg_tuple - default_value = None + flag_name, default_value = related_arg_tuple arg_name = PluginOptions._convert_flag_text_to_argument_name( flag_name, diff --git a/detect_secrets/main.py b/detect_secrets/main.py index f6f0d5965..c939bfd28 100644 --- a/detect_secrets/main.py +++ b/detect_secrets/main.py @@ -13,6 +13,7 @@ from detect_secrets.core.secrets_collection import SecretsCollection from detect_secrets.core.usage import ParserBuilder from detect_secrets.plugins.common import initialize +from detect_secrets.util import build_automaton def parse_args(argv): @@ -30,11 +31,17 @@ def main(argv=None): log.set_debug_level(args.verbose) if args.action == 'scan': + automaton = None + word_list_hash = None + if args.word_list_file: + automaton, word_list_hash = build_automaton(args.word_list_file) + # Plugins are *always* rescanned with fresh settings, because # we want to get the latest updates. plugins = initialize.from_parser_builder( args.plugins, exclude_lines_regex=args.exclude_lines, + automaton=automaton, should_verify_secrets=not args.no_verify, ) if args.string: @@ -46,7 +53,12 @@ def main(argv=None): _scan_string(line, plugins) else: - baseline_dict = _perform_scan(args, plugins) + baseline_dict = _perform_scan( + args, + plugins, + automaton, + word_list_hash, + ) if args.import_filename: write_baseline_to_file( @@ -87,7 +99,7 @@ def main(argv=None): return 0 -def _get_plugin_from_baseline(old_baseline): +def _get_plugins_from_baseline(old_baseline): plugins = [] if old_baseline and 'plugins_used' in old_baseline: secrets_collection = SecretsCollection.load_baseline_from_dict(old_baseline) @@ -114,17 +126,25 @@ def _scan_string(line, plugins): print('\n'.join(sorted(output))) -def _perform_scan(args, plugins): +def _perform_scan(args, plugins, automaton, word_list_hash): """ :param args: output of `argparse.ArgumentParser.parse_args` :param plugins: tuple of initialized plugins + :type automaton: ahocorasick.Automaton|None + :param automaton: optional automaton for ignoring certain words. + + :type word_list_hash: str|None + :param word_list_hash: optional iterated sha1 hash of the words in the word list. + :rtype: dict """ old_baseline = _get_existing_baseline(args.import_filename) if old_baseline: - plugins = initialize.merge_plugin_from_baseline( - _get_plugin_from_baseline(old_baseline), args, + plugins = initialize.merge_plugins_from_baseline( + _get_plugins_from_baseline(old_baseline), + args, + automaton=automaton, ) # Favors `--exclude-files` and `--exclude-lines` CLI arguments @@ -139,6 +159,12 @@ def _perform_scan(args, plugins): ): args.exclude_lines = old_baseline['exclude']['lines'] + if ( + not args.word_list_file + and old_baseline.get('word_list') + ): + args.word_list_file = old_baseline['word_list']['file'] + # If we have knowledge of an existing baseline file, we should use # that knowledge and add it to our exclude_files regex. if args.import_filename: @@ -148,6 +174,8 @@ def _perform_scan(args, plugins): plugins=plugins, exclude_files_regex=args.exclude_files, exclude_lines_regex=args.exclude_lines, + word_list_file=args.word_list_file, + word_list_hash=word_list_hash, path=args.path, should_scan_all_files=args.all_files, ).format_for_baseline_output() diff --git a/detect_secrets/plugins/artifactory.py b/detect_secrets/plugins/artifactory.py index b0a728709..bada92b97 100644 --- a/detect_secrets/plugins/artifactory.py +++ b/detect_secrets/plugins/artifactory.py @@ -10,8 +10,8 @@ class ArtifactoryDetector(RegexBasedDetector): secret_type = 'Artifactory Credentials' denylist = [ - # artifactory tokens begin with AKC - re.compile(r'(?:\s|=|:|"|^)AKC[a-zA-Z0-9]{10,}'), # api token - # artifactory encrypted passwords begin with AP[A-Z] - re.compile(r'(?:\s|=|:|"|^)AP[\dABCDEF][a-zA-Z0-9]{8,}'), # password + # Artifactory tokens begin with AKC + re.compile(r'(?:\s|=|:|"|^)AKC[a-zA-Z0-9]{10,}'), # API token + # Artifactory encrypted passwords begin with AP[A-Z] + re.compile(r'(?:\s|=|:|"|^)AP[\dABCDEF][a-zA-Z0-9]{8,}'), # Password ] diff --git a/detect_secrets/plugins/aws.py b/detect_secrets/plugins/aws.py index 84c940516..fc3cf4424 100644 --- a/detect_secrets/plugins/aws.py +++ b/detect_secrets/plugins/aws.py @@ -25,18 +25,18 @@ class AWSKeyDetector(RegexBasedDetector): ) def verify(self, token, content): - secret_access_key = get_secret_access_key(content) - if not secret_access_key: + secret_access_key_candidates = get_secret_access_keys(content) + if not secret_access_key_candidates: return VerifiedResult.UNVERIFIED - for candidate in secret_access_key: + for candidate in secret_access_key_candidates: if verify_aws_secret_access_key(token, candidate): return VerifiedResult.VERIFIED_TRUE return VerifiedResult.VERIFIED_FALSE -def get_secret_access_key(content): +def get_secret_access_keys(content): # AWS secret access keys are 40 characters long. regex = re.compile( r'= *([\'"]?)([%s]{40})(\1)$' % ( @@ -176,7 +176,7 @@ def verify_aws_secret_access_key(key, secret): # pragma: no cover return True -def _sign(key, message, hex=False): # pragma: no cover +def _sign(key, message, hex=False): # pragma: no cover value = hmac.new(key, message.encode('utf-8'), hashlib.sha256) if not hex: return value.digest() diff --git a/detect_secrets/plugins/base.py b/detect_secrets/plugins/base.py index 22f0ad183..2e8bc8a2c 100644 --- a/detect_secrets/plugins/base.py +++ b/detect_secrets/plugins/base.py @@ -4,7 +4,6 @@ from abc import abstractproperty from .common.constants import ALLOWLIST_REGEXES -from .common.filters import is_false_positive from detect_secrets.core.code_snippet import CodeSnippetHighlighter from detect_secrets.core.constants import VerifiedResult from detect_secrets.core.potential_secret import PotentialSecret @@ -69,7 +68,7 @@ def analyze(self, file, filename): ) is_verified = self.verify(result.secret_value, content=str(snippet)) - if is_verified != VerifiedResult.UNVERIFIED: + if is_verified == VerifiedResult.VERIFIED_TRUE: result.is_verified = True if is_verified != VerifiedResult.VERIFIED_FALSE: @@ -149,7 +148,11 @@ def adhoc_scan(self, string): : """ # TODO: Handle multiple secrets on single line. - results = self.analyze_string(string, 0, 'does_not_matter') + results = self.analyze_string( + string, + line_num=0, + filename='does_not_matter', + ) if not results: return 'False' @@ -194,7 +197,7 @@ def __dict__(self): class RegexBasedDetector(BasePlugin): - """Base class for regular-expression based detectors. + """Parent class for regular-expression based detectors. To create a new regex-based detector, subclass this and set `secret_type` with a description and `denylist` @@ -235,7 +238,4 @@ def analyze_string_content(self, string, line_num, filename): def secret_generator(self, string, *args, **kwargs): for regex in self.denylist: for match in regex.findall(string): - if is_false_positive(match): - continue - yield match diff --git a/detect_secrets/plugins/basic_auth.py b/detect_secrets/plugins/basic_auth.py index 020f131e8..29a33b151 100644 --- a/detect_secrets/plugins/basic_auth.py +++ b/detect_secrets/plugins/basic_auth.py @@ -7,7 +7,7 @@ # This list is derived from RFC 3986 Section 2.2. # -# We do not expect any of these delimiter characters to appear in +# We don't expect any of these delimiter characters to appear in # the username/password component of the URL, seeing that this would probably # result in an unexpected URL parsing (and probably won't even work). RESERVED_CHARACTERS = ':/?#[]@' diff --git a/detect_secrets/plugins/common/constants.py b/detect_secrets/plugins/common/constants.py index d96aacdf0..9efc10e22 100644 --- a/detect_secrets/plugins/common/constants.py +++ b/detect_secrets/plugins/common/constants.py @@ -23,9 +23,9 @@ ] ] -# add to this mapping (and ALLOWLIST_REGEXES if applicable) lazily, +# Add to this mapping (and ALLOWLIST_REGEXES if applicable) lazily, # as more language specific file parsers are implemented. -# discussion: https://github.com/Yelp/detect-secrets/pull/105 +# Discussion: https://github.com/Yelp/detect-secrets/pull/105 ALLOWLIST_REGEX = { 'yaml': ALLOWLIST_REGEXES[0], } diff --git a/detect_secrets/plugins/common/filters.py b/detect_secrets/plugins/common/filters.py index 3a5e079db..5c80244ea 100644 --- a/detect_secrets/plugins/common/filters.py +++ b/detect_secrets/plugins/common/filters.py @@ -1,27 +1,67 @@ """ -Heuristic, false positive filters that are shared across all plugin types. +False positive heuristic filters that are shared across all plugin types. This abstraction allows for development of later ML work, or further heuristical determinations (e.g. word filter, entropy comparator). """ import string +from detect_secrets.util import is_python_2 -def is_false_positive(secret): - for func in [ - is_sequential_string, - ]: - if func(secret): - return True - return False +def is_false_positive(secret, automaton): + """ + :type secret: str + :type automaton: ahocorasick.Automaton|None + :param automaton: optional automaton for ignoring certain words. -def is_sequential_string(secret): + :rtype: bool + Returns True if any false positive heuristic function returns True. """ - Returns true if string is sequential. + return any( + func(secret, automaton) + for func in + ( + _is_found_with_aho_corasick, + _is_sequential_string, + ) + ) + + +def _is_found_with_aho_corasick(secret, automaton): + """ + :type secret: str + + :type automaton: ahocorasick.Automaton|None + :param automaton: optional automaton for ignoring certain words. + + :rtype: bool + Returns True if secret contains a word in the automaton. + """ + if not automaton: + return False + + if is_python_2(): # pragma: no cover + # Due to pyahocorasick + secret = secret.encode('utf-8') + + try: + # .lower() to make everything case-insensitive + next(automaton.iter(string=secret.lower())) + return True + except StopIteration: + return False + + +def _is_sequential_string(secret, *args): + """ + :type secret: str + + :rtype: bool + Returns True if string is sequential. """ sequences = ( - # base64 letters first + # Base64 letters first ( string.ascii_uppercase + string.ascii_uppercase + @@ -29,7 +69,7 @@ def is_sequential_string(secret): '+/' ), - # base64 numbers first + # Base64 numbers first ( string.digits + string.ascii_uppercase + @@ -41,7 +81,7 @@ def is_sequential_string(secret): # sequences, since those will happen to be caught by the # base64 checks. - # alphanumeric sequences + # Alphanumeric sequences (string.digits + string.ascii_uppercase) * 2, # Capturing any number sequences diff --git a/detect_secrets/plugins/common/ini_file_parser.py b/detect_secrets/plugins/common/ini_file_parser.py index e42744591..6c828cee6 100644 --- a/detect_secrets/plugins/common/ini_file_parser.py +++ b/detect_secrets/plugins/common/ini_file_parser.py @@ -38,10 +38,10 @@ def __init__(self, file, add_header=False, exclude_lines_regex=None): """ self.parser = configparser.ConfigParser() try: - # python2.7 compatible + # Python2.7 compatible self.parser.optionxform = unicode except NameError: # pragma: no cover - # python3 compatible + # Python3 compatible self.parser.optionxform = str self.exclude_lines_regex = exclude_lines_regex @@ -53,10 +53,10 @@ def __init__(self, file, add_header=False, exclude_lines_regex=None): content = '[global]\n' + content try: - # python2.7 compatible + # Python2.7 compatible self.parser.read_string(unicode(content)) except NameError: # pragma: no cover - # python3 compatible + # Python3 compatible self.parser.read_string(content) # Hacky way to keep track of line location @@ -135,7 +135,7 @@ def _get_value_and_line_offset(self, key, values): if current_value_list_index == len(values_list): if index == 0: - index = 1 # don't want to count the same line again + index = 1 # Don't want to count the same line again self.line_offset += index self.lines = self.lines[index:] lines_modified = True @@ -163,7 +163,7 @@ def _construct_values_list(values): >>> key = value0 ... value1 ... - ... # comment line here + ... # Comment line here ... value2 given that normally, either value0 is supplied, or (value1, value2), diff --git a/detect_secrets/plugins/common/initialize.py b/detect_secrets/plugins/common/initialize.py index d051c8ee3..bcb689249 100644 --- a/detect_secrets/plugins/common/initialize.py +++ b/detect_secrets/plugins/common/initialize.py @@ -19,6 +19,7 @@ def from_parser_builder( plugins_dict, exclude_lines_regex=None, + automaton=None, should_verify_secrets=False, ): """ @@ -28,16 +29,21 @@ def from_parser_builder( :type exclude_lines_regex: str|None :param exclude_lines_regex: optional regex for ignored lines. + :type automaton: ahocorasick.Automaton|None + :param automaton: optional automaton for ignoring certain words. + :type should_verify_secrets: bool :returns: tuple of initialized plugins """ output = [] + for plugin_name in plugins_dict: output.append( from_plugin_classname( plugin_name, exclude_lines_regex=exclude_lines_regex, + automaton=automaton, should_verify_secrets=should_verify_secrets, **plugins_dict[plugin_name] ), @@ -65,7 +71,7 @@ def _get_prioritized_parameters(plugins_dict, is_using_default_value_map, prefer yield plugin_name, param_name, param_value -def merge_plugin_from_baseline(baseline_plugins, args): +def merge_plugins_from_baseline(baseline_plugins, args, automaton): """ :type baseline_plugins: tuple of BasePlugin :param baseline_plugins: BasePlugin instances from baseline file @@ -73,7 +79,10 @@ def merge_plugin_from_baseline(baseline_plugins, args): :type args: dict :param args: dictionary of arguments parsed from usage - param priority: input param > baseline param > default + :type automaton: ahocorasick.Automaton|None + :param automaton: optional automaton for ignoring certain words. + + param priority is input param > baseline param > default :returns: tuple of initialized plugins """ @@ -89,10 +98,10 @@ def _remove_key(d, key): # Use input plugin as starting point if args.use_all_plugins: - # input param and default param are used + # Input param and default param are used plugins_dict = dict(args.plugins) - # baseline param priority > default + # Baseline param priority > default for plugin_name, param_name, param_value in _get_prioritized_parameters( baseline_plugins_dict, args.is_using_default_value, @@ -100,15 +109,16 @@ def _remove_key(d, key): ): try: plugins_dict[plugin_name][param_name] = param_value - except KeyError: + except KeyError: # pragma: no cover log.warning( - 'Baseline contain plugin {} which is not in all plugins! Ignoring...', + 'Baseline contains plugin {} which is not in all plugins! Ignoring...', plugin_name, ) return from_parser_builder( plugins_dict, exclude_lines_regex=args.exclude_lines, + automaton=automaton, should_verify_secrets=not args.no_verify, ) @@ -120,7 +130,7 @@ def _remove_key(d, key): if plugin_name not in disabled_plugins } - # input param priority > baseline + # Input param priority > baseline input_plugins_dict = dict(args.plugins) for plugin_name, param_name, param_value in _get_prioritized_parameters( input_plugins_dict, @@ -139,12 +149,14 @@ def _remove_key(d, key): return from_parser_builder( plugins_dict, exclude_lines_regex=args.exclude_lines, + automaton=automaton, ) def from_plugin_classname( plugin_classname, exclude_lines_regex=None, + automaton=None, should_verify_secrets=False, **kwargs ): @@ -155,6 +167,11 @@ def from_plugin_classname( :type exclude_lines_regex: str|None :param exclude_lines_regex: optional regex for ignored lines. + + :type automaton: ahocorasick.Automaton|None + :param automaton: optional automaton for ignoring English-words. + + :type should_verify_secrets: bool """ klass = globals()[plugin_classname] @@ -165,6 +182,7 @@ def from_plugin_classname( try: instance = klass( exclude_lines_regex=exclude_lines_regex, + automaton=automaton, should_verify=should_verify_secrets, **kwargs ) @@ -207,7 +225,10 @@ def from_secret_type(secret_type, settings): return from_plugin_classname( classname, - # `audit` doesn't need verification + # `audit` does not need to + # perform exclusion, filtering or verification + exclude_lines_regex=None, + automaton=None, should_verify_secrets=False, **plugin_init_vars diff --git a/detect_secrets/plugins/common/util.py b/detect_secrets/plugins/common/util.py index d604aa731..870273ba5 100644 --- a/detect_secrets/plugins/common/util.py +++ b/detect_secrets/plugins/common/util.py @@ -13,6 +13,7 @@ from ..high_entropy_strings import HexHighEntropyString # noqa: F401 from ..jwt import JwtTokenDetector # noqa: F401 from ..keyword import KeywordDetector # noqa: F401 +from ..mailchimp import MailchimpDetector # noqa: F401 from ..private_key import PrivateKeyDetector # noqa: F401 from ..slack import SlackDetector # noqa: F401 from ..stripe import StripeDetector # noqa: F401 diff --git a/detect_secrets/plugins/high_entropy_strings.py b/detect_secrets/plugins/high_entropy_strings.py index 7262db607..ee3ce363d 100644 --- a/detect_secrets/plugins/high_entropy_strings.py +++ b/detect_secrets/plugins/high_entropy_strings.py @@ -6,7 +6,6 @@ import configparser import base64 import math -import os import re import string from abc import ABCMeta @@ -16,18 +15,14 @@ import yaml from .base import BasePlugin +from .common.filetype import determine_file_type +from .common.filetype import FileType from .common.filters import is_false_positive from .common.ini_file_parser import IniFileParser from .common.yaml_file_parser import YamlFileParser from detect_secrets.core.potential_secret import PotentialSecret -YAML_EXTENSIONS = ( - '.yaml', - '.yml', -) - - class HighEntropyStringsPlugin(BasePlugin): """Base class for string pattern matching""" @@ -35,7 +30,7 @@ class HighEntropyStringsPlugin(BasePlugin): secret_type = 'High Entropy String' - def __init__(self, charset, limit, exclude_lines_regex, *args): + def __init__(self, charset, limit, exclude_lines_regex, automaton, *args): if limit < 0 or limit > 8: raise ValueError( 'The limit set for HighEntropyStrings must be between 0.0 and 8.0', @@ -43,6 +38,7 @@ def __init__(self, charset, limit, exclude_lines_regex, *args): self.charset = charset self.entropy_limit = limit + self.automaton = automaton self.regex = re.compile(r'([\'"])([%s]+)(\1)' % charset) super(HighEntropyStringsPlugin, self).__init__( @@ -95,7 +91,7 @@ def analyze_string_content(self, string, line_num, filename): output = {} for result in self.secret_generator(string): - if is_false_positive(result): + if is_false_positive(result, self.automaton): continue secret = PotentialSecret(self.secret_type, filename, result, line_num) @@ -135,22 +131,16 @@ def adhoc_scan(self, string): return output @contextmanager - def non_quoted_string_regex(self, strict=True): + def non_quoted_string_regex(self): """For certain file formats, strings need not necessarily follow the normal convention of being denoted by single or double quotes. In these cases, we modify the regex accordingly. Public, because detect_secrets.core.audit needs to reference it. - - :type strict: bool - :param strict: if True, the regex will match the entire string. """ old_regex = self.regex - regex_alternative = r'([{}]+)'.format(re.escape(self.charset)) - if strict: - regex_alternative = r'^' + regex_alternative + r'$' - + regex_alternative = r'^([{}]+)$'.format(re.escape(self.charset)) self.regex = re.compile(regex_alternative) try: @@ -187,7 +177,7 @@ def _analyze_yaml_file(self, file, filename): """ :returns: same format as super().analyze() """ - if os.path.splitext(filename)[1] not in YAML_EXTENSIONS: + if determine_file_type(filename) != FileType.YAML: # The yaml parser is pretty powerful. It eagerly # parses things when it's not even a yaml file. Therefore, # we use this heuristic to quit early if appropriate. @@ -198,6 +188,10 @@ def _analyze_yaml_file(self, file, filename): exclude_lines_regex=self.exclude_lines_regex, ) data = parser.json() + # If the file is all comments + if not data: + raise yaml.YAMLError + ignored_lines = parser.get_ignored_lines() potential_secrets = {} @@ -206,34 +200,34 @@ def _analyze_yaml_file(self, file, filename): while len(to_search) > 0: item = to_search.pop() - try: - if '__line__' in item and item['__line__'] not in ignored_lines: - # An isinstance check doesn't work in py2 - # so we need the __is_binary__ field. - string_to_scan = self.decode_binary(item['__value__']) \ - if item['__is_binary__'] \ - else item['__value__'] - - secrets = self.analyze_string( - string_to_scan, - item['__line__'], - filename, - ) - - if item['__is_binary__']: - secrets = self._encode_yaml_binary_secrets(secrets) - - potential_secrets.update(secrets) - - if '__line__' in item: - continue - + if '__line__' not in item: for key in item: obj = item[key] if isinstance(item, dict) else key if isinstance(obj, dict): to_search.append(obj) - except TypeError: - pass + continue + + if item['__line__'] in ignored_lines: + continue + + # An isinstance check doesn't work in py2 + # so we need the __is_binary__ field. + string_to_scan = ( + self.decode_binary(item['__value__']) + if item['__is_binary__'] + else item['__value__'] + ) + + secrets = self.analyze_string( + string_to_scan, + item['__line__'], + filename, + ) + + if item['__is_binary__']: + secrets = self._encode_yaml_binary_secrets(secrets) + + potential_secrets.update(secrets) return potential_secrets @@ -276,11 +270,12 @@ class HexHighEntropyString(HighEntropyStringsPlugin): secret_type = 'Hex High Entropy String' - def __init__(self, hex_limit, exclude_lines_regex=None, **kwargs): + def __init__(self, hex_limit, exclude_lines_regex=None, automaton=None, **kwargs): super(HexHighEntropyString, self).__init__( charset=string.hexdigits, limit=hex_limit, exclude_lines_regex=exclude_lines_regex, + automaton=automaton, ) @property @@ -334,11 +329,12 @@ class Base64HighEntropyString(HighEntropyStringsPlugin): secret_type = 'Base64 High Entropy String' - def __init__(self, base64_limit, exclude_lines_regex=None, **kwargs): + def __init__(self, base64_limit, exclude_lines_regex=None, automaton=None, **kwargs): super(Base64HighEntropyString, self).__init__( charset=string.ascii_letters + string.digits + '+/=', limit=base64_limit, exclude_lines_regex=exclude_lines_regex, + automaton=automaton, ) @property diff --git a/detect_secrets/plugins/jwt.py b/detect_secrets/plugins/jwt.py index 9868a01bc..5128dbef4 100644 --- a/detect_secrets/plugins/jwt.py +++ b/detect_secrets/plugins/jwt.py @@ -12,7 +12,7 @@ try: # Python 2 from future_builtins import filter -except ImportError: +except ImportError: # pragma: no cover # Python 3 pass diff --git a/detect_secrets/plugins/keyword.py b/detect_secrets/plugins/keyword.py index e9cca6bc7..ec3edd17b 100644 --- a/detect_secrets/plugins/keyword.py +++ b/detect_secrets/plugins/keyword.py @@ -31,6 +31,7 @@ from .base import BasePlugin from .common.filetype import determine_file_type from .common.filetype import FileType +from .common.filters import is_false_positive from detect_secrets.core.potential_secret import PotentialSecret @@ -253,9 +254,10 @@ class KeywordDetector(BasePlugin): secret_type = 'Secret Keyword' - def __init__(self, keyword_exclude=None, exclude_lines_regex=None, **kwargs): + def __init__(self, keyword_exclude=None, exclude_lines_regex=None, automaton=None, **kwargs): super(KeywordDetector, self).__init__( - exclude_lines_regex, + exclude_lines_regex=exclude_lines_regex, + **kwargs ) self.keyword_exclude = None @@ -265,6 +267,8 @@ def __init__(self, keyword_exclude=None, exclude_lines_regex=None, **kwargs): re.IGNORECASE, ) + self.automaton = automaton + def analyze_string_content(self, string, line_num, filename): output = {} if ( @@ -276,6 +280,8 @@ def analyze_string_content(self, string, line_num, filename): string, filetype=determine_file_type(filename), ): + if is_false_positive(identifier, self.automaton): + continue secret = PotentialSecret( self.secret_type, filename, diff --git a/detect_secrets/plugins/mailchimp.py b/detect_secrets/plugins/mailchimp.py index 9b92ea952..56489051d 100644 --- a/detect_secrets/plugins/mailchimp.py +++ b/detect_secrets/plugins/mailchimp.py @@ -4,8 +4,12 @@ from __future__ import absolute_import import re +from base64 import b64encode + +import requests from .base import RegexBasedDetector +from detect_secrets.core.constants import VerifiedResult class MailchimpDetector(RegexBasedDetector): @@ -13,6 +17,24 @@ class MailchimpDetector(RegexBasedDetector): secret_type = 'Mailchimp Access Key' denylist = ( - # Mailchimp key - re.compile(r'[0-9a-z]{32}(-us[0-9]{1,2})'), + re.compile(r'[0-9a-z]{32}-us[0-9]{1,2}'), ) + + def verify(self, token, **kwargs): # pragma: no cover + _, datacenter_number = token.split('-us') + + response = requests.get( + 'https://us{}.api.mailchimp.com/3.0/'.format( + datacenter_number, + ), + headers={ + 'Authorization': b'Basic ' + b64encode( + 'any_user:{}'.format(token).encode('utf-8'), + ), + }, + ) + return ( + VerifiedResult.VERIFIED_TRUE + if response.status_code == 200 + else VerifiedResult.VERIFIED_FALSE + ) diff --git a/detect_secrets/plugins/stripe.py b/detect_secrets/plugins/stripe.py index 9eda09a2d..8d9e0217e 100644 --- a/detect_secrets/plugins/stripe.py +++ b/detect_secrets/plugins/stripe.py @@ -4,8 +4,12 @@ from __future__ import absolute_import import re +from base64 import b64encode + +import requests from .base import RegexBasedDetector +from detect_secrets.core.constants import VerifiedResult class StripeDetector(RegexBasedDetector): @@ -13,6 +17,25 @@ class StripeDetector(RegexBasedDetector): secret_type = 'Stripe Access Key' denylist = ( - # stripe standard keys begin with sk_live and restricted with rk_live + # Stripe standard keys begin with sk_live and restricted with rk_live re.compile(r'(?:r|s)k_live_[0-9a-zA-Z]{24}'), ) + + def verify(self, token, **kwargs): # pragma: no cover + response = requests.get( + 'https://api.stripe.com/v1/charges', + headers={ + 'Authorization': b'Basic ' + b64encode( + '{}:'.format(token).encode('utf-8'), + ), + }, + ) + + if response.status_code == 200: + return VerifiedResult.VERIFIED_TRUE + + # Restricted keys may be limited to certain endpoints + if token.startswith('rk_live'): + return VerifiedResult.UNVERIFIED + + return VerifiedResult.VERIFIED_FALSE diff --git a/detect_secrets/pre_commit_hook.py b/detect_secrets/pre_commit_hook.py index 871be44a2..0543f02cc 100644 --- a/detect_secrets/pre_commit_hook.py +++ b/detect_secrets/pre_commit_hook.py @@ -13,6 +13,7 @@ from detect_secrets.core.secrets_collection import SecretsCollection from detect_secrets.core.usage import ParserBuilder from detect_secrets.plugins.common import initialize +from detect_secrets.util import build_automaton log = get_logger(format_string='%(message)s') @@ -37,17 +38,24 @@ def main(argv=None): # Error logs handled within logic. return 1 + automaton = None + word_list_hash = None + if args.word_list_file: + automaton, word_list_hash = build_automaton(args.word_list_file) + plugins = initialize.from_parser_builder( args.plugins, exclude_lines_regex=args.exclude_lines, + automaton=automaton, should_verify_secrets=not args.no_verify, ) - # Merge plugin from baseline + # Merge plugins from baseline if baseline_collection: - plugins = initialize.merge_plugin_from_baseline( + plugins = initialize.merge_plugins_from_baseline( baseline_collection.plugins, args, + automaton, ) baseline_collection.plugins = plugins diff --git a/detect_secrets/util.py b/detect_secrets/util.py index b11c30d54..be88daba5 100644 --- a/detect_secrets/util.py +++ b/detect_secrets/util.py @@ -1,8 +1,50 @@ +import hashlib import os import subprocess +import sys -def get_root_directory(): # pragma: no cover +def is_python_2(): + return sys.version_info[0] < 3 + + +def build_automaton(word_list): + """ + :type word_list: str + :param word_list: optional word list file for ignoring certain words. + + :rtype: (ahocorasick.Automaton, str) + :returns: an automaton, and an iterated sha1 hash of the words in the word list. + """ + # Dynamic import due to optional-dependency + try: + import ahocorasick + except ImportError: # pragma: no cover + print('Please install the `pyahocorasick` package to use --word-list') + raise + + # See https://pyahocorasick.readthedocs.io/en/latest/ + # for more information. + automaton = ahocorasick.Automaton() + word_list_hash = hashlib.sha1() + + with open(word_list) as f: + for line in f: + # .lower() to make everything case-insensitive + line = line.lower().strip() + if len(line) > 3: + word_list_hash.update(line.encode('utf-8')) + automaton.add_word(line, line) + + automaton.make_automaton() + + return ( + automaton, + word_list_hash.hexdigest(), + ) + + +def get_root_directory(): # pragma: no cover return os.path.realpath( os.path.join( os.path.dirname(__file__), @@ -11,15 +53,22 @@ def get_root_directory(): # pragma: no cover ) -def get_relative_path(root, path): - """Returns relative path, after following symlinks.""" - return os.path.realpath( - os.path.join(root, path), +def get_relative_path_if_in_cwd(root, filepath): + """Returns relative path, after following symlinks, + if in current working directory. + + :rtype: str|None + """ + filepath = os.path.realpath( + os.path.join(root, filepath), )[len(os.getcwd() + '/'):] + if os.path.isfile(filepath): + return filepath + return None def get_git_sha(path): - """Returns the sha of the git checkout at the input path + """Returns the sha of the git checkout at the input path. :type path: str :param path: directory of the git checkout @@ -40,7 +89,7 @@ def get_git_sha(path): def get_git_remotes(path): """Returns a list of unique git remotes of the checkout - at the input path + at the input path. :type path: str :param path: directory of the git checkout diff --git a/requirements-dev.txt b/requirements-dev.txt index 2f6cc5934..c666ed3ff 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -3,6 +3,7 @@ flake8==3.5.0 mock monotonic pre-commit==1.11.2 +pyahocorasick pytest pyyaml tox>=3.8 diff --git a/setup.py b/setup.py index 76c1cda86..1fd9e5541 100644 --- a/setup.py +++ b/setup.py @@ -29,6 +29,9 @@ 'future', 'functools32', ], + 'word_list': [ + 'pyahocorasick', + ], }, entry_points={ 'console_scripts': [ diff --git a/test_data/only_comments.yaml b/test_data/only_comments.yaml new file mode 100644 index 000000000..6ea3a9682 --- /dev/null +++ b/test_data/only_comments.yaml @@ -0,0 +1 @@ +# This YAML file only has comments, let's not crash on it diff --git a/test_data/word_list.txt b/test_data/word_list.txt new file mode 100644 index 000000000..01ec8ac88 --- /dev/null +++ b/test_data/word_list.txt @@ -0,0 +1 @@ +c3vwzxig diff --git a/testing/factories.py b/testing/factories.py index 6c3ea0467..db58cee6a 100644 --- a/testing/factories.py +++ b/testing/factories.py @@ -4,18 +4,29 @@ from detect_secrets.core.secrets_collection import SecretsCollection -def potential_secret_factory(type_='type', filename='filename', secret='secret', lineno=1): +def potential_secret_factory( + type_='type', + filename='filename', + secret='secret', + lineno=1, +): """This is only marginally better than creating PotentialSecret objects directly, because of the default values. """ return PotentialSecret(type_, filename, secret, lineno) -def secrets_collection_factory(secrets=None, plugins=(), exclude_files_regex=None): +def secrets_collection_factory( + secrets=None, + plugins=(), + exclude_files_regex=None, + word_list_file=None, + word_list_hash=None, +): """ :type secrets: list(dict) :param secrets: list of params to pass to add_secret. - E.g. [ {'secret': 'blah'}, ] + e.g. [ {'secret': 'blah'}, ] :type plugins: tuple :type exclude_files_regex: str|None @@ -25,6 +36,8 @@ def secrets_collection_factory(secrets=None, plugins=(), exclude_files_regex=Non collection = SecretsCollection( plugins, exclude_files=exclude_files_regex, + word_list_file=word_list_file, + word_list_hash=word_list_hash, ) if plugins: diff --git a/tests/core/audit_test.py b/tests/core/audit_test.py index e6e22ae81..ccc3eb306 100644 --- a/tests/core/audit_test.py +++ b/tests/core/audit_test.py @@ -529,7 +529,7 @@ def get_audited_baseline( baseline_fixture = { 'plugins_used': plugins_used, 'results': { - 'file': [ + 'mocked_file': [ { 'hashed_secret': 'a837eb90d815a852f68f56f70b1b3fab24c46c84', 'line_number': 1, @@ -540,17 +540,17 @@ def get_audited_baseline( } if is_secret is not None: - baseline_fixture['results']['file'][0]['is_secret'] = is_secret + baseline_fixture['results']['mocked_file'][0]['is_secret'] = is_secret return baseline_fixture @pytest.mark.parametrize( 'plugins_used', [ - # NOTE: The first config here needs to be + # Note: The first config here needs to be # the HexHighEntropyString config for this test to work. - [{'name': 'HexHighEntropyString'}], # plugin w/o config - [{'name': 'HexHighEntropyString', 'hex_limit': 2}], # plugin w/config + [{'name': 'HexHighEntropyString'}], # Plugin w/o config + [{'name': 'HexHighEntropyString', 'hex_limit': 2}], # Plugin w/config [ {'name': 'HexHighEntropyString'}, {'name': 'Base64HighEntropyString'}, @@ -570,15 +570,18 @@ def test_determine_audit_results_plugin_config( results = audit.determine_audit_results(baseline, '.secrets.baseline') - assert results['results']['HexHighEntropyString']['config'].items() \ - >= plugins_used[0].items() + assert ( + results['plugins']['HexHighEntropyString']['config'].items() + >= + plugins_used[0].items() + ) @pytest.mark.parametrize( 'is_secret, expected_audited_result', [ - (True, 'positive'), - (False, 'negative'), - (None, 'unknown'), + (True, 'true-positives'), + (False, 'false-positives'), + (None, 'unknowns'), ], ) def test_determine_audit_results_is_secret( @@ -595,12 +598,17 @@ def test_determine_audit_results_is_secret( results = audit.determine_audit_results(baseline, '.secrets.baseline') - for audited_result, list_of_secrets \ - in results['results']['HexHighEntropyString']['results'].items(): + for ( + audited_result, + file_to_secrets, + ) in results['plugins']['HexHighEntropyString']['results'].items(): if audited_result == expected_audited_result: - assert plaintext_secret in list_of_secrets + assert any( # pragma: no cover + secret['plaintext'] == plaintext_secret + for secret in file_to_secrets['mocked_file'] + ) else: - assert len(list_of_secrets) == 0 + assert len(file_to_secrets) == 0 @pytest.mark.parametrize( 'git_remotes, git_sha, expected_git_info', @@ -662,8 +670,10 @@ def test_determine_audit_results_secret_not_found( ): results = audit.determine_audit_results(baseline, '.secrets.baseline') - assert whole_plaintext_line in \ - results['results']['HexHighEntropyString']['results']['positive'] + hex_high_results = results['plugins']['HexHighEntropyString']['results'] + assert len(hex_high_results['true-positives']['mocked_file']) == 1 + assert hex_high_results['true-positives']['mocked_file'][0]['line'] == whole_plaintext_line + assert hex_high_results['true-positives']['mocked_file'][0]['plaintext'] is None class TestPrintAuditResults(): diff --git a/tests/core/baseline_test.py b/tests/core/baseline_test.py index 03d7e1dc5..bf0c08d25 100644 --- a/tests/core/baseline_test.py +++ b/tests/core/baseline_test.py @@ -60,6 +60,29 @@ def test_basic_usage(self, path): assert len(results['test_data/files/file_with_secrets.py']) == 1 assert len(results['test_data/files/tmp/file_with_secrets.py']) == 2 + @pytest.mark.parametrize( + 'path', + [ + [ + './test_data/files', + ], + ], + ) + def test_error_when_getting_git_tracked_files(self, path): + with mock_git_calls( + 'detect_secrets.core.baseline.subprocess.check_output', + ( + SubprocessMock( + expected_input='git -C ./test_data/files ls-files', + should_throw_exception=True, + mocked_output='', + ), + ), + ): + results = self.get_results(path=['./test_data/files']) + + assert not results + def test_with_multiple_files(self): results = self.get_results( path=[ @@ -74,13 +97,29 @@ def test_with_multiple_files(self): assert 'test_data/files/tmp/file_with_secrets.py' in results def test_with_multiple_non_existent_files(self): - results = self.get_results(path=['non-existent-file.A', 'non-existent-file.B']) - + with mock.patch( + 'detect_secrets.core.baseline.util.get_relative_path_if_in_cwd', + return_value=None, + ): + results = self.get_results( + path=[ + 'non-existent-file.A', + 'non-existent-file.B', + # Will be non-existant due to mock.patch + 'test_data/files/tmp/', + ], + ) # No expected results, because files don't exist assert not results def test_with_folders_and_files(self): - results = self.get_results(path=['test_data/', 'non-existent-file.B']) + results = self.get_results( + path=[ + 'non-existent-file.B', + 'test_data/', + 'test_data/empty_folder', + ], + ) assert 'test_data/files/file_with_secrets.py' in results assert 'test_data/files/tmp/file_with_secrets.py' in results @@ -134,6 +173,18 @@ def test_scan_all_files(self): ) assert len(results.keys()) == 2 + def test_scan_all_files_with_bad_symlinks(self): + with mock.patch( + 'detect_secrets.core.baseline.util.get_relative_path_if_in_cwd', + return_value=None, + ): + results = self.get_results( + # Will be non-existant due to mock.patch + path=['test_data/files'], + scan_all_files=True, + ) + assert len(results.keys()) == 0 + class TestGetSecretsNotInBaseline(object): diff --git a/tests/core/secrets_collection_test.py b/tests/core/secrets_collection_test.py index 3cf5cdd5f..5c8860ae9 100644 --- a/tests/core/secrets_collection_test.py +++ b/tests/core/secrets_collection_test.py @@ -1,5 +1,6 @@ from __future__ import absolute_import +import hashlib import json from contextlib import contextmanager from time import gmtime @@ -54,6 +55,14 @@ def test_file_is_symbolic_link(self): assert not logic.scan_file('does_not_matter') + def test_skip_ignored_file_extensions(self): + logic = secrets_collection_factory( + plugins=(MockPluginFixedValue(),), + ) + with mock_open('junk text here, as it does not matter'): + skipped_extension = '.svg' + assert not logic.scan_file('some' + skipped_extension) + def test_error_reading_file(self, mock_log): logic = secrets_collection_factory() @@ -279,13 +288,14 @@ def setup(self): PrivateKeyDetector(), ), exclude_files_regex='foo', + word_list_file='will_be_mocked.txt', + word_list_hash='5baa61e4c9b93f3f0682250b6cf8331b7ee68fd8', ) def test_output(self, mock_gmtime): assert ( self.logic.format_for_baseline_output() - - == self.get_point_twelve_and_later_baseline_dict(mock_gmtime) + == self.get_point_twelve_point_seven_and_later_baseline_dict(mock_gmtime) ) def test_load_baseline_from_string_with_pre_point_twelve_string(self, mock_gmtime): @@ -304,12 +314,12 @@ def test_load_baseline_from_string_with_pre_point_twelve_string(self, mock_gmtim assert secrets['exclude']['lines'] is None assert old_original['results'] == secrets['results'] - def test_load_baseline_from_string_with_point_twelve_and_later_string(self, mock_gmtime): + def test_load_baseline_from_string_with_point_twelve_to_twelve_six_string(self, mock_gmtime): """ We use load_baseline_from_string as a proxy to testing load_baseline_from_dict, because it's the most entry into the private function. """ - original = self.get_point_twelve_and_later_baseline_dict(mock_gmtime) + original = self.get_point_twelve_to_twelve_six_later_baseline_dict(mock_gmtime) secrets = SecretsCollection.load_baseline_from_string( json.dumps(original), @@ -319,6 +329,43 @@ def test_load_baseline_from_string_with_point_twelve_and_later_string(self, mock assert secrets['exclude']['lines'] is None assert original['results'] == secrets['results'] + def test_load_baseline_from_string_with_point_twelve_point_seven_and_later_string( + self, + mock_gmtime, + ): + """ + We use load_baseline_from_string as a proxy to testing load_baseline_from_dict, + because it's the most entry into the private function. + """ + original = self.get_point_twelve_point_seven_and_later_baseline_dict(mock_gmtime) + + word_list = """ + roller\n + """ + with mock_open_base( + data=word_list, + namespace='detect_secrets.util.open', + ): + secrets = SecretsCollection.load_baseline_from_string( + json.dumps(original), + ).format_for_baseline_output() + + # v0.12.7+ assertions + assert original['word_list']['file'] == secrets['word_list']['file'] + # Original hash is thrown out and replaced with new word list hash + assert ( + secrets['word_list']['hash'] + == + hashlib.sha1('roller'.encode('utf-8')).hexdigest() + != + original['word_list']['hash'] + ) + + # Regular assertions + assert original['exclude']['files'] == secrets['exclude']['files'] + assert secrets['exclude']['lines'] is None + assert original['results'] == secrets['results'] + def test_load_baseline_without_any_valid_fields(self, mock_log): with pytest.raises(IOError): SecretsCollection.load_baseline_from_string( @@ -338,7 +385,15 @@ def test_load_baseline_without_exclude(self, mock_log): ) assert mock_log.error_messages == 'Incorrectly formatted baseline!\n' - def get_point_twelve_and_later_baseline_dict(self, gmtime): + def get_point_twelve_point_seven_and_later_baseline_dict(self, gmtime): + # In v0.12.7 --word-list got added + baseline = self.get_point_twelve_to_twelve_six_later_baseline_dict(gmtime) + baseline['word_list'] = {} + baseline['word_list']['file'] = 'will_be_mocked.txt' + baseline['word_list']['hash'] = '5baa61e4c9b93f3f0682250b6cf8331b7ee68fd8' + return baseline + + def get_point_twelve_to_twelve_six_later_baseline_dict(self, gmtime): # In v0.12.0 `exclude_regex` got replaced by `exclude` baseline = self._get_baseline_dict(gmtime) baseline['exclude'] = {} diff --git a/tests/main_test.py b/tests/main_test.py index ec69d6252..647ab8828 100644 --- a/tests/main_test.py +++ b/tests/main_test.py @@ -31,6 +31,8 @@ def test_scan_basic(self, mock_baseline_initialize): exclude_lines_regex=None, path='.', should_scan_all_files=False, + word_list_file=None, + word_list_hash=None, ) def test_scan_with_rootdir(self, mock_baseline_initialize): @@ -43,6 +45,8 @@ def test_scan_with_rootdir(self, mock_baseline_initialize): exclude_lines_regex=None, path=['test_data'], should_scan_all_files=False, + word_list_file=None, + word_list_hash=None, ) def test_scan_with_exclude_args(self, mock_baseline_initialize): @@ -57,6 +61,8 @@ def test_scan_with_exclude_args(self, mock_baseline_initialize): exclude_lines_regex='other_patt', path='.', should_scan_all_files=False, + word_list_file=None, + word_list_hash=None, ) @pytest.mark.parametrize( @@ -139,6 +145,8 @@ def test_scan_with_all_files_flag(self, mock_baseline_initialize): exclude_lines_regex=None, path='.', should_scan_all_files=True, + word_list_file=None, + word_list_hash=None, ) def test_reads_from_stdin(self, mock_merge_baseline): @@ -213,7 +221,7 @@ def test_old_baseline_ignored_with_update_flag( @pytest.mark.parametrize( 'plugins_used, plugins_overwriten, plugins_wrote', [ - ( # remove some plugins from baseline + ( # Remove some plugins from baseline [ { 'base64_limit': 4.5, @@ -230,7 +238,7 @@ def test_old_baseline_ignored_with_update_flag( }, ], ), - ( # all plugins + ( # All plugins [ { 'base64_limit': 1.5, @@ -276,7 +284,7 @@ def test_old_baseline_ignored_with_update_flag( }, ], ), - ( # remove some plugins from all plugins + ( # Remove some plugins from all plugins [ { 'base64_limit': 4.5, @@ -316,7 +324,7 @@ def test_old_baseline_ignored_with_update_flag( }, ], ), - ( # use same plugin list from baseline + ( # Use same plugin list from baseline [ { 'base64_limit': 3.5, @@ -337,7 +345,7 @@ def test_old_baseline_ignored_with_update_flag( }, ], ), - ( # overwrite base limit from CLI + ( # Overwrite base limit from CLI [ { 'base64_limit': 3.5, @@ -357,7 +365,7 @@ def test_old_baseline_ignored_with_update_flag( }, ], ), - ( # does not overwrite base limit from CLI if baseline not using the plugin + ( # Does not overwrite base limit from CLI if baseline not using the plugin [ { 'name': 'PrivateKeyDetector', @@ -370,7 +378,7 @@ def test_old_baseline_ignored_with_update_flag( }, ], ), - ( # use overwriten option from CLI only when using --use-all-plugins + ( # Use overwriten option from CLI only when using --use-all-plugins [ { 'base64_limit': 3.5, @@ -412,7 +420,7 @@ def test_old_baseline_ignored_with_update_flag( }, ], ), - ( # use plugin limit from baseline when using --use-all-plugins and no input limit + ( # Use plugin limit from baseline when using --use-all-plugins and no input limit [ { 'base64_limit': 2.5, @@ -484,8 +492,11 @@ def test_plugin_from_old_baseline_respected_with_update_flag( ), ) == 0 - assert file_writer.call_args[1]['data']['plugins_used'] == \ + assert ( + file_writer.call_args[1]['data']['plugins_used'] + == plugins_wrote + ) @pytest.mark.parametrize( 'filename, expected_output', @@ -575,9 +586,14 @@ def test_audit_short_file(self, filename, expected_output): 'name': 'KeywordDetector', }, 'results': { - 'negative': [], - 'positive': [], - 'unknown': ['nothighenoughentropy'], + 'false-positives': {}, + 'true-positives': {}, + 'unknowns': { + 'test_data/short_files/first_line.php': [{ + 'line': "secret = 'notHighEnoughEntropy'", + 'plaintext': 'nothighenoughentropy', + }], + }, }, }, }, @@ -600,7 +616,7 @@ def test_audit_display_results(self, filename, expected_output): ) as printer_shim: main(['audit', '--display-results', 'MOCKED']) - assert json.loads(uncolor(printer_shim.message))['results'] == expected_output + assert json.loads(uncolor(printer_shim.message))['plugins'] == expected_output def test_audit_diff_not_enough_files(self): assert main('audit --diff fileA'.split()) == 1 diff --git a/tests/plugins/aws_key_test.py b/tests/plugins/aws_key_test.py index 1f0e3eea6..10a481f0d 100644 --- a/tests/plugins/aws_key_test.py +++ b/tests/plugins/aws_key_test.py @@ -8,7 +8,7 @@ from detect_secrets.core.constants import VerifiedResult from detect_secrets.plugins.aws import AWSKeyDetector -from detect_secrets.plugins.aws import get_secret_access_key +from detect_secrets.plugins.aws import get_secret_access_keys from testing.mocks import mock_file_object @@ -119,7 +119,7 @@ def counter(*args, **kwargs): [EXAMPLE_SECRET], ), - # multiple candidates + # Multiple candidates ( textwrap.dedent(""" base64_keyA = '{}' @@ -142,4 +142,4 @@ def counter(*args, **kwargs): ), ) def test_get_secret_access_key(content, expected_output): - assert get_secret_access_key(content) == expected_output + assert get_secret_access_keys(content) == expected_output diff --git a/tests/plugins/common/filters_test.py b/tests/plugins/common/filters_test.py index 007d4d089..77e8c93a6 100644 --- a/tests/plugins/common/filters_test.py +++ b/tests/plugins/common/filters_test.py @@ -6,12 +6,10 @@ class TestIsSequentialString: - # TODO: More tests should be had. - @pytest.mark.parametrize( 'secret', ( - # ascii sequence + # ASCII sequence 'ABCDEF', 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', @@ -27,13 +25,13 @@ class TestIsSequentialString: '0123456789abcdef', 'abcdef0123456789', - # base64 sequences + # Base64 sequences 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/', '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+/', ), ) def test_success(self, secret): - assert filters.is_sequential_string(secret) + assert filters._is_sequential_string(secret) @pytest.mark.parametrize( 'secret', @@ -42,4 +40,4 @@ def test_success(self, secret): ), ) def test_failure(self, secret): - assert not filters.is_sequential_string(secret) + assert not filters._is_sequential_string(secret) diff --git a/tests/plugins/common/initialize_test.py b/tests/plugins/common/initialize_test.py index 7b5161b06..ffdf80bfd 100644 --- a/tests/plugins/common/initialize_test.py +++ b/tests/plugins/common/initialize_test.py @@ -55,7 +55,7 @@ def setup(self): def test_success(self): plugin = initialize.from_secret_type( 'Base64 High Entropy String', - self.settings, + settings=self.settings, ) assert isinstance(plugin, Base64HighEntropyString) @@ -64,5 +64,11 @@ def test_success(self): def test_failure(self): assert not initialize.from_secret_type( 'some random secret_type', - self.settings, + settings=self.settings, + ) + + def test_secret_type_not_in_settings(self): + assert not initialize.from_secret_type( + 'Base64 High Entropy String', + settings=[], ) diff --git a/tests/plugins/high_entropy_strings_test.py b/tests/plugins/high_entropy_strings_test.py index 97dffc4ea..026798588 100644 --- a/tests/plugins/high_entropy_strings_test.py +++ b/tests/plugins/high_entropy_strings_test.py @@ -148,7 +148,7 @@ def setup(self): base64_limit=4.5, exclude_lines_regex='CanonicalUser', ), - non_secret_string='c3VwZXIgc2VjcmV0IHZhbHVl', # too short for high entropy + non_secret_string='c3VwZXIgc2VjcmV0IHZhbHVl', # Too short for high entropy secret_string='c3VwZXIgbG9uZyBzdHJpbmcgc2hvdWxkIGNhdXNlIGVub3VnaCBlbnRyb3B5', ) @@ -202,7 +202,7 @@ def test_ini_file(self, filename, secrets): assert count == len(secrets) - def test_yaml_file(self): + def test_yaml_files(self): plugin = Base64HighEntropyString( base64_limit=3, exclude_lines_regex='CanonicalUser', @@ -220,6 +220,10 @@ def test_yaml_file(self): 'Location: test_data/config.yaml:15', ) + with open('test_data/only_comments.yaml') as f: + secrets = plugin.analyze(f, 'test_data/only_comments.yaml') + assert not secrets.values() + def test_env_file(self): plugin = Base64HighEntropyString(4.5) with open('test_data/config.env') as f: @@ -293,3 +297,5 @@ def test_discounts_when_all_numbers(self): # This makes sure it only occurs with numbers. assert self.logic.calculate_shannon_entropy('12345a') == \ original_scanner.calculate_shannon_entropy('12345a') + assert self.logic.calculate_shannon_entropy('0') == \ + original_scanner.calculate_shannon_entropy('0') diff --git a/tests/plugins/keyword_test.py b/tests/plugins/keyword_test.py index 3b7eb4bf0..9b8866ba2 100644 --- a/tests/plugins/keyword_test.py +++ b/tests/plugins/keyword_test.py @@ -1,10 +1,12 @@ from __future__ import absolute_import from __future__ import unicode_literals +import ahocorasick import pytest from detect_secrets.core.potential_secret import PotentialSecret from detect_secrets.plugins.keyword import KeywordDetector +from detect_secrets.util import is_python_2 from testing.mocks import mock_file_object @@ -193,6 +195,28 @@ def test_analyze_standard_positives(self, file_content): == PotentialSecret.hash_secret('m{{h}o)p${e]nob(ody[finds>-_$#thisone}}') ) + @pytest.mark.parametrize( + 'file_content', + STANDARD_POSITIVES, + ) + def test_analyze_standard_positives_with_automaton(self, file_content): + automaton = ahocorasick.Automaton() + + word = 'thisone' + if is_python_2(): # pragma: no cover + # Due to pyahocorasick + word = word.encode('utf-8') + automaton.add_word(word, word) + + automaton.make_automaton() + + logic = KeywordDetector(automaton=automaton) + + f = mock_file_object(file_content) + output = logic.analyze(f, 'mock_filename') + # All skipped due to automaton + assert len(output) == 0 + @pytest.mark.parametrize( 'file_content', STANDARD_POSITIVES, diff --git a/tests/pre_commit_hook_test.py b/tests/pre_commit_hook_test.py index fb04c82e8..0aef962be 100644 --- a/tests/pre_commit_hook_test.py +++ b/tests/pre_commit_hook_test.py @@ -47,13 +47,18 @@ def test_file_with_secrets(self, mock_log): assert message_by_lines[3] == \ 'Location: test_data/files/file_with_secrets.py:3' + def test_file_with_secrets_with_word_list(self): + assert_commit_succeeds( + 'test_data/files/file_with_secrets.py --word-list test_data/word_list.txt', + ) + def test_file_no_secrets(self): assert_commit_succeeds('test_data/files/file_with_no_secrets.py') @pytest.mark.parametrize( 'has_result, use_private_key_scan, hook_command, commit_succeeds', [ - # basic case + # Basic case (True, True, '--baseline will_be_mocked test_data/files/file_with_secrets.py', True), # test_no_overwrite_pass_when_baseline_did_not_use_scanner (True, False, '--baseline will_be_mocked test_data/files/private_key', True), @@ -171,6 +176,10 @@ def test_that_baseline_gets_updated( assert original_baseline['exclude_regex'] == baseline_written['exclude']['files'] assert original_baseline['results'] == baseline_written['results'] + assert 'word_list' not in original_baseline + assert baseline_written['word_list']['file'] is None + assert baseline_written['word_list']['hash'] is None + # See that we updated the plugins and version assert current_version == baseline_written['version'] assert baseline_written['plugins_used'] == [ diff --git a/tests/util_test.py b/tests/util_test.py index bd03cdd68..5b80c9dba 100644 --- a/tests/util_test.py +++ b/tests/util_test.py @@ -1,9 +1,12 @@ +import hashlib import subprocess import mock import pytest from detect_secrets import util +from detect_secrets.plugins.common import filters +from testing.mocks import mock_open GIT_REPO_SHA = b'cbb33d8c545ccf5c55fdcc7d5b0218078598e677' GIT_REMOTES_VERBOSE_ONE_URL = ( @@ -18,6 +21,26 @@ ) +def test_build_automaton(): + word_list = """ + foam\n + """ + with mock_open( + data=word_list, + namespace='detect_secrets.util.open', + ): + automaton, word_list_hash = util.build_automaton(word_list='will_be_mocked.txt') + assert word_list_hash == hashlib.sha1('foam'.encode('utf-8')).hexdigest() + assert filters._is_found_with_aho_corasick( + secret='foam_roller', + automaton=automaton, + ) + assert not filters._is_found_with_aho_corasick( + secret='no_words_in_word_list', + automaton=automaton, + ) + + def test_get_git_sha(): with mock.patch.object( subprocess, @@ -28,6 +51,19 @@ def test_get_git_sha(): assert util.get_git_sha('.') == GIT_REPO_SHA.decode('utf-8') +def test_get_relative_path_if_in_cwd(): + with mock.patch( + 'detect_secrets.util.os.path.isfile', + return_value=False, + ): + assert ( + util.get_relative_path_if_in_cwd( + 'test_data', + 'config.env', + ) is None + ) + + @pytest.mark.parametrize( 'git_remotes_result, expected_urls', [ diff --git a/tox.ini b/tox.ini index c73719cd8..ab6468436 100644 --- a/tox.ini +++ b/tox.ini @@ -11,9 +11,11 @@ deps = -rrequirements-dev.txt whitelist_externals = coverage commands = coverage erase - coverage run -m pytest tests + coverage run -m pytest tests -v coverage report --show-missing --include=tests/* --fail-under 100 - coverage report --show-missing --include=detect_secrets/* --fail-under 98 + # This is so that we do not regress unintentionally + coverage report --show-missing --include=detect_secrets/* --omit=detect_secrets/core/audit.py,detect_secrets/core/secrets_collection.py,detect_secrets/main.py,detect_secrets/plugins/common/ini_file_parser.py --fail-under 100 + coverage report --show-missing --include=detect_secrets/core/audit.py,detect_secrets/core/secrets_collection.py,detect_secrets/main.py,detect_secrets/plugins/common/ini_file_parser.py --fail-under 96 pre-commit run --all-files --show-diff-on-failure [testenv:venv]