Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Keyword detector optimization #396

Closed
wants to merge 10 commits into from
147 changes: 118 additions & 29 deletions detect_secrets/plugins/keyword.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,13 +42,39 @@
DENYLIST = (
'apikey',
'api_key',
'appkey',
'app_key',
'authkey',
'auth_key',
'servicekey',
'service_key',
'applicationkey',
'application_key',
'accountkey',
'account_key',
'dbkey',
'db_key',
'databasekey',
'database_key',
'clientkey',
'client_key',
'aws_secret_access_key',
'db_pass',
'password',
'passwd',
'pass',
'pwd',
'private_key',
'privatekey',
'priv_key',
'privkey',
'secret',
'secrete',
'secreto',
'keypass',
'token',
'contrasena',
'contraseña',
)
FALSE_POSITIVES = {
'""',
Expand All @@ -60,6 +86,8 @@
'"this',
'#pass',
'#password',
'passes',
'passing',
'$(shell',
"'\"",
"''",
Expand Down Expand Up @@ -144,17 +172,21 @@
}
# Includes ], ', " as closing
CLOSING = r'[]\'"]{0,2}'
DENYLIST_REGEX = r'|'.join(DENYLIST)
AFFIX_REGEX = r'[a-zA-Z0-9_-]*'
DENYLIST_REGEX = r'(' + r'|'.join(DENYLIST) + r')({suffix})?'.format(suffix=AFFIX_REGEX)
DENYLIST_REGEX_WITH_PREFIX = r'({prefix})('.format(prefix=AFFIX_REGEX) \
+ r'|'.join(DENYLIST) + r')({suffix})'.format(suffix=AFFIX_REGEX)
# Non-greedy match
OPTIONAL_WHITESPACE = r'\s*?'
OPTIONAL_NON_WHITESPACE = r'[^\s]{0,50}?'
QUOTE = r'[\'"]'
SECRET = r'[^\s]+'
SECRET = r'""|\'\'|[^\'"\s]+'
domanchi marked this conversation as resolved.
Show resolved Hide resolved
SECRET_WITH_QUOTES = r'""|\'\'|[^\'"\s]+[^\s]*'
SQUARE_BRACKETS = r'(\[\])'

FOLLOWED_BY_COLON_EQUAL_SIGNS_REGEX = re.compile(
# e.g. my_password := "bar" or my_password := bar
r'({denylist})({closing})?{whitespace}:=?{whitespace}({quote}?)({secret})(\3)'.format(
r'({denylist})({closing})?{whitespace}:=?{whitespace}({quote}?)({secret})'.format(
denylist=DENYLIST_REGEX,
closing=CLOSING,
quote=QUOTE,
Expand All @@ -164,17 +196,16 @@
)
FOLLOWED_BY_COLON_REGEX = re.compile(
# e.g. api_key: foo
r'({denylist})({closing})?:{whitespace}({quote}?)({secret})(\3)'.format(
r'({denylist})({closing})?:{whitespace}({secret})'.format(
denylist=DENYLIST_REGEX,
closing=CLOSING,
quote=QUOTE,
whitespace=OPTIONAL_WHITESPACE,
secret=SECRET,
secret=SECRET_WITH_QUOTES,
),
)
FOLLOWED_BY_COLON_QUOTES_REQUIRED_REGEX = re.compile(
# e.g. api_key: "foo"
r'({denylist})({closing})?:({whitespace})({quote})({secret})(\4)'.format(
r'({denylist})({closing})?:({whitespace})({quote})({secret})({quote})'.format(
denylist=DENYLIST_REGEX,
closing=CLOSING,
quote=QUOTE,
Expand All @@ -186,7 +217,7 @@
# e.g. my_password = "bar"
# e.g. my_password = @"bar"
# e.g. my_password[] = "bar";
r'({denylist})({square_brackets})?{optional_whitespace}={optional_whitespace}(@)?(")({secret})(\5)'.format( # noqa: E501
r'({denylist})({square_brackets})?{optional_whitespace}={optional_whitespace}(@)?(")({secret})(")'.format( # noqa: E501
denylist=DENYLIST_REGEX,
square_brackets=SQUARE_BRACKETS,
optional_whitespace=OPTIONAL_WHITESPACE,
Expand All @@ -195,59 +226,112 @@
)
FOLLOWED_BY_EQUAL_SIGNS_REGEX = re.compile(
# e.g. my_password = bar
r'({denylist})({closing})?{whitespace}={whitespace}({quote}?)({secret})(\3)'.format(
r'({denylist})({closing})?{whitespace}={whitespace}({secret})'.format(
denylist=DENYLIST_REGEX,
closing=CLOSING,
quote=QUOTE,
whitespace=OPTIONAL_WHITESPACE,
secret=SECRET,
secret=SECRET_WITH_QUOTES,
),
)
FOLLOWED_BY_EQUAL_SIGNS_QUOTES_REQUIRED_REGEX = re.compile(
# e.g. my_password = "bar"
r'({denylist})({closing})?{whitespace}={whitespace}({quote})({secret})(\3)'.format(
r'({denylist})({closing})?{whitespace}={whitespace}({quote})({secret})({quote})'.format(
denylist=DENYLIST_REGEX,
closing=CLOSING,
quote=QUOTE,
whitespace=OPTIONAL_WHITESPACE,
secret=SECRET,
),
)
FOLLOWED_BY_COMPARATION_QUOTES_REQUIRED_REGEX = re.compile(
# e.g. my_password == "bar" or my_password != "bar" or my_password === "bar"
# or my_password !== "bar"
# e.g. my_password == 'bar' or my_password != 'bar' or my_password === 'bar'
# or my_password !== 'bar'
r'({denylist})({closing})?{whitespace}[!=]{{2,3}}{whitespace}({quote})({secret})({quote})'.format( # noqa: E501
denylist=DENYLIST_REGEX,
closing=CLOSING,
quote=QUOTE,
whitespace=OPTIONAL_WHITESPACE,
secret=SECRET,
),
)
FOLLOWED_BY_REV_COMPARATION_QUOTES_REQUIRED_REGEX = re.compile(
# e.g. "bar" == my_password or "bar" != my_password or "bar" === my_password
# or "bar" !== my_password
# e.g. 'bar' == my_password or 'bar' != my_password or 'bar' === my_password
# or 'bar' !== my_password
r'({quote})({secret})({quote}){whitespace}[!=]{{2,3}}{whitespace}({denylist})'.format(
denylist=DENYLIST_REGEX_WITH_PREFIX,
quote=QUOTE,
whitespace=OPTIONAL_WHITESPACE,
secret=SECRET,
),
)
FOLLOWED_BY_QUOTES_AND_SEMICOLON_REGEX = re.compile(
# e.g. private_key "something";
r'({denylist}){nonWhitespace}{whitespace}({quote})({secret})(\2);'.format(
r'({denylist}){nonWhitespace}{whitespace}({quote})({secret})({quote});'.format(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this was written as such to make sure that we don't have a situation as such:

private_key "something'

Same thing goes for your other regex modifications.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This regex was updated to be sure that the last quote won't be included as a secret in the audit code snippet. The original SECRET regex allows the ' and " characters, so the last quote will be included in the secret value. I have modified the SECRET regex to match only with empty values ("" or '', commented below) or whatever value without ', " and whitespaces. With this changes, I have included the last quote as a new item in this regex to make it more specific.

denylist=DENYLIST_REGEX,
nonWhitespace=OPTIONAL_NON_WHITESPACE,
quote=QUOTE,
whitespace=OPTIONAL_WHITESPACE,
secret=SECRET,
),
)

DENYLIST_REGEX_TO_GROUP = {
FOLLOWED_BY_COLON_REGEX: 4,
FOLLOWED_BY_EQUAL_SIGNS_REGEX: 4,
FOLLOWED_BY_QUOTES_AND_SEMICOLON_REGEX: 3,
FOLLOWED_BY_COLON_QUOTES_REQUIRED_REGEX: 7,
FOLLOWED_BY_COLON_REGEX: 5,
FOLLOWED_BY_EQUAL_SIGNS_QUOTES_REQUIRED_REGEX: 6,
FOLLOWED_BY_QUOTES_AND_SEMICOLON_REGEX: 5,
FOLLOWED_BY_EQUAL_SIGNS_REGEX: 5,
}
GOLANG_DENYLIST_REGEX_TO_GROUP = {
FOLLOWED_BY_COLON_EQUAL_SIGNS_REGEX: 4,
FOLLOWED_BY_EQUAL_SIGNS_REGEX: 4,
FOLLOWED_BY_QUOTES_AND_SEMICOLON_REGEX: 3,
FOLLOWED_BY_COLON_EQUAL_SIGNS_REGEX: 6,
FOLLOWED_BY_EQUAL_SIGNS_QUOTES_REQUIRED_REGEX: 6,
FOLLOWED_BY_QUOTES_AND_SEMICOLON_REGEX: 5,
FOLLOWED_BY_COMPARATION_QUOTES_REQUIRED_REGEX: 6,
FOLLOWED_BY_REV_COMPARATION_QUOTES_REQUIRED_REGEX: 2,
FOLLOWED_BY_EQUAL_SIGNS_REGEX: 5,
}
OBJECTIVE_C_DENYLIST_REGEX_TO_GROUP = {
FOLLOWED_BY_EQUAL_SIGNS_OPTIONAL_BRACKETS_OPTIONAL_AT_SIGN_QUOTES_REQUIRED_REGEX: 6,
FOLLOWED_BY_EQUAL_SIGNS_OPTIONAL_BRACKETS_OPTIONAL_AT_SIGN_QUOTES_REQUIRED_REGEX: 8,
FOLLOWED_BY_COMPARATION_QUOTES_REQUIRED_REGEX: 6,
FOLLOWED_BY_REV_COMPARATION_QUOTES_REQUIRED_REGEX: 2,
}
YML_DENYLIST_REGEX_TO_GROUP = {
FOLLOWED_BY_COLON_QUOTES_REQUIRED_REGEX: 7,
FOLLOWED_BY_COLON_REGEX: 5,
FOLLOWED_BY_EQUAL_SIGNS_QUOTES_REQUIRED_REGEX: 6,
FOLLOWED_BY_EQUAL_SIGNS_REGEX: 5,
FOLLOWED_BY_QUOTES_AND_SEMICOLON_REGEX: 5,
}
PROPERTIES_DENYLIST_REGEX_TO_GROUP = {
FOLLOWED_BY_EQUAL_SIGNS_QUOTES_REQUIRED_REGEX: 6,
FOLLOWED_BY_EQUAL_SIGNS_REGEX: 5,
FOLLOWED_BY_QUOTES_AND_SEMICOLON_REGEX: 5,
}
QUOTES_REQUIRED_DENYLIST_REGEX_TO_GROUP = {
FOLLOWED_BY_COLON_QUOTES_REQUIRED_REGEX: 5,
FOLLOWED_BY_EQUAL_SIGNS_QUOTES_REQUIRED_REGEX: 4,
FOLLOWED_BY_QUOTES_AND_SEMICOLON_REGEX: 3,
FOLLOWED_BY_EQUAL_SIGNS_QUOTES_REQUIRED_REGEX: 6,
FOLLOWED_BY_QUOTES_AND_SEMICOLON_REGEX: 5,
FOLLOWED_BY_COMPARATION_QUOTES_REQUIRED_REGEX: 6,
FOLLOWED_BY_REV_COMPARATION_QUOTES_REQUIRED_REGEX: 2,
FOLLOWED_BY_COLON_QUOTES_REQUIRED_REGEX: 7,
}

QUOTES_REQUIRED_FILETYPES = {
FileType.CLS,
FileType.JAVA,
FileType.JAVASCRIPT,
FileType.PHP,
FileType.PYTHON,
FileType.SWIFT,
FileType.TERRAFORM,
FileType.C,
FileType.CPP,
FileType.CSHARP,
FileType.BASH,
FileType.POWERSHELL,
}


Expand Down Expand Up @@ -309,15 +393,20 @@ def analyze_line(
denylist_regex_to_group = GOLANG_DENYLIST_REGEX_TO_GROUP
elif filetype == FileType.OBJECTIVE_C:
denylist_regex_to_group = OBJECTIVE_C_DENYLIST_REGEX_TO_GROUP
elif filetype == FileType.YAML or filetype == FileType.INI:
denylist_regex_to_group = YML_DENYLIST_REGEX_TO_GROUP
elif filetype == FileType.PROPERTIES or filetype == FileType.INI:
denylist_regex_to_group = PROPERTIES_DENYLIST_REGEX_TO_GROUP
else:
denylist_regex_to_group = DENYLIST_REGEX_TO_GROUP

return super().analyze_line(
filename=filename,
line=line,
line_number=line_number,
denylist_regex_to_group=denylist_regex_to_group,
)
if filetype != FileType.XML:
return super().analyze_line(
filename=filename,
line=line,
line_number=line_number,
denylist_regex_to_group=denylist_regex_to_group,
)

def json(self) -> Dict[str, Any]:
return {
Expand Down
Loading