From 488334f603a102b2284707629dfb37ec37045e39 Mon Sep 17 00:00:00 2001 From: Victor Zhou Date: Fri, 11 Oct 2019 11:47:50 -0700 Subject: [PATCH] Refactor secret filtering to be a instance method --- detect_secrets/plugins/base.py | 31 ++++++++++++++- detect_secrets/plugins/common/filters.py | 38 +++++++------------ .../plugins/high_entropy_strings.py | 18 +++++---- detect_secrets/plugins/keyword.py | 11 +++++- 4 files changed, 62 insertions(+), 36 deletions(-) diff --git a/detect_secrets/plugins/base.py b/detect_secrets/plugins/base.py index 8b75c68bb..814c07b1e 100644 --- a/detect_secrets/plugins/base.py +++ b/detect_secrets/plugins/base.py @@ -53,12 +53,22 @@ class BasePlugin(object): def secret_type(self): raise NotImplementedError - def __init__(self, exclude_lines_regex=None, should_verify=False, **kwargs): + def __init__( + self, + exclude_lines_regex=None, + should_verify=False, + false_positive_heuristics=None, + **kwargs + ): """ :type exclude_lines_regex: str|None :param exclude_lines_regex: optional regex for ignored lines. :type should_verify: bool + + :type false_positive_heuristics: List[Callable]|None + :param false_positive_heuristics: List of fp-heuristic functions + applicable to this plugin """ self.exclude_lines_regex = None if exclude_lines_regex: @@ -66,6 +76,12 @@ def __init__(self, exclude_lines_regex=None, should_verify=False, **kwargs): self.should_verify = should_verify + self.false_positive_heuristics = ( + false_positive_heuristics + if false_positive_heuristics + else [] + ) + @classproperty def disable_flag_text(cls): name = cls.__name__ @@ -232,6 +248,19 @@ def verify(self, token, content=''): """ return VerifiedResult.UNVERIFIED + def is_secret_false_positive(self, token): + """ + Checks if the input secret is a false-positive according to + this plugin's heuristics. + + :type token: str + :param token: secret found by current plugin + """ + return any( + func(token) + for func in self.false_positive_heuristics + ) if self.false_positive_heuristics else False + @property def __dict__(self): return { diff --git a/detect_secrets/plugins/common/filters.py b/detect_secrets/plugins/common/filters.py index 15fd72d2d..e95e4a9b5 100644 --- a/detect_secrets/plugins/common/filters.py +++ b/detect_secrets/plugins/common/filters.py @@ -34,6 +34,19 @@ def is_found_with_aho_corasick(secret, automaton): return False +def get_aho_corasick_helper(automaton): + """ + Returns a function which determines if a word matches the + input automaton. + + :type automaton: ahocorasick.Automaton + """ + def fn(secret): + return is_found_with_aho_corasick(secret, automaton) + + return fn + + def is_sequential_string(secret, *args): """ :type secret: str @@ -103,12 +116,6 @@ def is_potential_uuid(secret, *args): return bool(_UUID_REGEX.search(secret)) -DEFAULT_FALSE_POSITIVE_HEURISTICS = [ - is_found_with_aho_corasick, - is_sequential_string, -] - - # NOTE: this doesn't handle multiple key-values on a line properly. # NOTE: words that end in "id" will be treated as ids _ID_DETECTOR_REGEX = re.compile(r'id[^a-z0-9]', re.IGNORECASE) @@ -136,25 +143,6 @@ def is_likely_id_string(secret, line): ] -def is_false_positive(secret, automaton, functions=DEFAULT_FALSE_POSITIVE_HEURISTICS): - """ - :type secret: str - - :type automaton: ahocorasick.Automaton|None - :param automaton: optional automaton for ignoring certain words. - - :type functions: Iterable[Callable] - :param functions: list of heuristics to use - - :rtype: bool - Returns True if any false positive heuristic function returns True. - """ - return any( - func(secret, automaton) - for func in functions - ) - - def is_false_positive_with_line_context( secret, line, diff --git a/detect_secrets/plugins/high_entropy_strings.py b/detect_secrets/plugins/high_entropy_strings.py index 57345db12..e9f13416c 100644 --- a/detect_secrets/plugins/high_entropy_strings.py +++ b/detect_secrets/plugins/high_entropy_strings.py @@ -18,10 +18,10 @@ from .base import classproperty from .common.filetype import determine_file_type from .common.filetype import FileType -from .common.filters import is_false_positive +from .common.filters import get_aho_corasick_helper from .common.filters import is_false_positive_with_line_context from .common.filters import is_potential_uuid -from .common.filters import DEFAULT_FALSE_POSITIVE_HEURISTICS +from .common.filters import is_sequential_string from .common.ini_file_parser import IniFileParser from .common.yaml_file_parser import YamlFileParser from detect_secrets.core.potential_secret import PotentialSecret @@ -40,11 +40,17 @@ def __init__(self, charset, limit, exclude_lines_regex, automaton, *args): self.charset = charset self.entropy_limit = limit - self.automaton = automaton self.regex = re.compile(r'([\'"])([%s]+)(\1)' % charset) + false_positive_heuristics = [ + get_aho_corasick_helper(automaton), + is_sequential_string, + is_potential_uuid, + ] + super(HighEntropyStringsPlugin, self).__init__( exclude_lines_regex=exclude_lines_regex, + false_positive_heuristics=false_positive_heuristics, ) def analyze(self, file, filename): @@ -115,11 +121,7 @@ def analyze_string_content(self, string, line_num, filename): output = {} for result in self.secret_generator(string): - # py2+py3 compatible way of copying a list - functions = list(DEFAULT_FALSE_POSITIVE_HEURISTICS) - functions.append(is_potential_uuid) - - if is_false_positive(result, self.automaton, functions=functions): + if self.is_secret_false_positive(result): continue secret = PotentialSecret(self.secret_type, filename, result, line_num) diff --git a/detect_secrets/plugins/keyword.py b/detect_secrets/plugins/keyword.py index a171d987d..db2716cca 100644 --- a/detect_secrets/plugins/keyword.py +++ b/detect_secrets/plugins/keyword.py @@ -32,7 +32,8 @@ from .base import classproperty from .common.filetype import determine_file_type from .common.filetype import FileType -from .common.filters import is_false_positive +from .common.filters import get_aho_corasick_helper +from .common.filters import is_sequential_string from detect_secrets.core.potential_secret import PotentialSecret @@ -273,8 +274,14 @@ def __dict__(self): return output def __init__(self, keyword_exclude=None, exclude_lines_regex=None, automaton=None, **kwargs): + false_positive_heuristics = [ + get_aho_corasick_helper(automaton), + is_sequential_string, + ] + super(KeywordDetector, self).__init__( exclude_lines_regex=exclude_lines_regex, + false_positive_heuristics=false_positive_heuristics, **kwargs ) @@ -298,7 +305,7 @@ def analyze_string_content(self, string, line_num, filename): string, filetype=determine_file_type(filename), ): - if is_false_positive(identifier, self.automaton): + if self.is_secret_false_positive(identifier): continue secret = PotentialSecret( self.secret_type,