Skip to content

Commit

Permalink
Refactor secret filtering to be a instance method
Browse files Browse the repository at this point in the history
  • Loading branch information
Victor Zhou committed Oct 11, 2019
1 parent ece342b commit 488334f
Show file tree
Hide file tree
Showing 4 changed files with 62 additions and 36 deletions.
31 changes: 30 additions & 1 deletion detect_secrets/plugins/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,19 +53,35 @@ class BasePlugin(object):
def secret_type(self):
raise NotImplementedError

def __init__(self, exclude_lines_regex=None, should_verify=False, **kwargs):
def __init__(
self,
exclude_lines_regex=None,
should_verify=False,
false_positive_heuristics=None,
**kwargs
):
"""
:type exclude_lines_regex: str|None
:param exclude_lines_regex: optional regex for ignored lines.
:type should_verify: bool
:type false_positive_heuristics: List[Callable]|None
:param false_positive_heuristics: List of fp-heuristic functions
applicable to this plugin
"""
self.exclude_lines_regex = None
if exclude_lines_regex:
self.exclude_lines_regex = re.compile(exclude_lines_regex)

self.should_verify = should_verify

self.false_positive_heuristics = (
false_positive_heuristics
if false_positive_heuristics
else []
)

@classproperty
def disable_flag_text(cls):
name = cls.__name__
Expand Down Expand Up @@ -232,6 +248,19 @@ def verify(self, token, content=''):
"""
return VerifiedResult.UNVERIFIED

def is_secret_false_positive(self, token):
"""
Checks if the input secret is a false-positive according to
this plugin's heuristics.
:type token: str
:param token: secret found by current plugin
"""
return any(
func(token)
for func in self.false_positive_heuristics
) if self.false_positive_heuristics else False

@property
def __dict__(self):
return {
Expand Down
38 changes: 13 additions & 25 deletions detect_secrets/plugins/common/filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,19 @@ def is_found_with_aho_corasick(secret, automaton):
return False


def get_aho_corasick_helper(automaton):
"""
Returns a function which determines if a word matches the
input automaton.
:type automaton: ahocorasick.Automaton
"""
def fn(secret):
return is_found_with_aho_corasick(secret, automaton)

return fn


def is_sequential_string(secret, *args):
"""
:type secret: str
Expand Down Expand Up @@ -103,12 +116,6 @@ def is_potential_uuid(secret, *args):
return bool(_UUID_REGEX.search(secret))


DEFAULT_FALSE_POSITIVE_HEURISTICS = [
is_found_with_aho_corasick,
is_sequential_string,
]


# NOTE: this doesn't handle multiple key-values on a line properly.
# NOTE: words that end in "id" will be treated as ids
_ID_DETECTOR_REGEX = re.compile(r'id[^a-z0-9]', re.IGNORECASE)
Expand Down Expand Up @@ -136,25 +143,6 @@ def is_likely_id_string(secret, line):
]


def is_false_positive(secret, automaton, functions=DEFAULT_FALSE_POSITIVE_HEURISTICS):
"""
:type secret: str
:type automaton: ahocorasick.Automaton|None
:param automaton: optional automaton for ignoring certain words.
:type functions: Iterable[Callable]
:param functions: list of heuristics to use
:rtype: bool
Returns True if any false positive heuristic function returns True.
"""
return any(
func(secret, automaton)
for func in functions
)


def is_false_positive_with_line_context(
secret,
line,
Expand Down
18 changes: 10 additions & 8 deletions detect_secrets/plugins/high_entropy_strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,10 @@
from .base import classproperty
from .common.filetype import determine_file_type
from .common.filetype import FileType
from .common.filters import is_false_positive
from .common.filters import get_aho_corasick_helper
from .common.filters import is_false_positive_with_line_context
from .common.filters import is_potential_uuid
from .common.filters import DEFAULT_FALSE_POSITIVE_HEURISTICS
from .common.filters import is_sequential_string
from .common.ini_file_parser import IniFileParser
from .common.yaml_file_parser import YamlFileParser
from detect_secrets.core.potential_secret import PotentialSecret
Expand All @@ -40,11 +40,17 @@ def __init__(self, charset, limit, exclude_lines_regex, automaton, *args):

self.charset = charset
self.entropy_limit = limit
self.automaton = automaton
self.regex = re.compile(r'([\'"])([%s]+)(\1)' % charset)

false_positive_heuristics = [
get_aho_corasick_helper(automaton),
is_sequential_string,
is_potential_uuid,
]

super(HighEntropyStringsPlugin, self).__init__(
exclude_lines_regex=exclude_lines_regex,
false_positive_heuristics=false_positive_heuristics,
)

def analyze(self, file, filename):
Expand Down Expand Up @@ -115,11 +121,7 @@ def analyze_string_content(self, string, line_num, filename):
output = {}

for result in self.secret_generator(string):
# py2+py3 compatible way of copying a list
functions = list(DEFAULT_FALSE_POSITIVE_HEURISTICS)
functions.append(is_potential_uuid)

if is_false_positive(result, self.automaton, functions=functions):
if self.is_secret_false_positive(result):
continue

secret = PotentialSecret(self.secret_type, filename, result, line_num)
Expand Down
11 changes: 9 additions & 2 deletions detect_secrets/plugins/keyword.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@
from .base import classproperty
from .common.filetype import determine_file_type
from .common.filetype import FileType
from .common.filters import is_false_positive
from .common.filters import get_aho_corasick_helper
from .common.filters import is_sequential_string
from detect_secrets.core.potential_secret import PotentialSecret


Expand Down Expand Up @@ -273,8 +274,14 @@ def __dict__(self):
return output

def __init__(self, keyword_exclude=None, exclude_lines_regex=None, automaton=None, **kwargs):
false_positive_heuristics = [
get_aho_corasick_helper(automaton),
is_sequential_string,
]

super(KeywordDetector, self).__init__(
exclude_lines_regex=exclude_lines_regex,
false_positive_heuristics=false_positive_heuristics,
**kwargs
)

Expand All @@ -298,7 +305,7 @@ def analyze_string_content(self, string, line_num, filename):
string,
filetype=determine_file_type(filename),
):
if is_false_positive(identifier, self.automaton):
if self.is_secret_false_positive(identifier):
continue
secret = PotentialSecret(
self.secret_type,
Expand Down

0 comments on commit 488334f

Please sign in to comment.