Skip to content

Commit

Permalink
↪️ Merge pull request #229 from Yelp/cleanup_keyword_detector
Browse files Browse the repository at this point in the history
[Keyword Plugin] Various accuracy improvements
  • Loading branch information
KevinHock committed Aug 24, 2019
2 parents cba0446 + 5de43e2 commit e1ad354
Show file tree
Hide file tree
Showing 3 changed files with 243 additions and 128 deletions.
61 changes: 35 additions & 26 deletions detect_secrets/plugins/common/filetype.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,38 @@
import os
from enum import Enum


class FileType(Enum):
CLS = 0
GO = 1
JAVA = 2
JAVASCRIPT = 3
PHP = 4
PYTHON = 5
YAML = 6
OTHER = 7
EXAMPLE = 1
GO = 2
JAVA = 3
JAVASCRIPT = 4
PHP = 5
OBJECTIVE_C = 6
PYTHON = 7
SWIFT = 8
TERRAFORM = 9
YAML = 10
OTHER = 11


EXTENSION_TO_FILETYPE = {
'.cls': FileType.CLS,
'.example': FileType.EXAMPLE,
'.eyaml': FileType.YAML,
'.go': FileType.GO,
'.java': FileType.JAVA,
'.js': FileType.JAVASCRIPT,
'.m': FileType.OBJECTIVE_C,
'.php': FileType.PHP,
'.py': FileType.PYTHON,
'.pyi': FileType.PYTHON,
'.swift': FileType.SWIFT,
'.tf': FileType.TERRAFORM,
'.yaml': FileType.YAML,
'.yml': FileType.YAML,
}


def determine_file_type(filename):
Expand All @@ -18,22 +41,8 @@ def determine_file_type(filename):
:rtype: FileType
"""
if filename.endswith('.cls'):
return FileType.CLS
elif filename.endswith('.go'):
return FileType.GO
elif filename.endswith('.java'):
return FileType.JAVA
elif filename.endswith('.js'):
return FileType.JAVASCRIPT
elif filename.endswith('.php'):
return FileType.PHP
elif filename.endswith('.py'):
return FileType.PYTHON
elif (
filename.endswith(
('.eyaml', '.yaml', '.yml'),
)
):
return FileType.YAML
return FileType.OTHER
_, file_extension = os.path.splitext(filename)
return EXTENSION_TO_FILETYPE.get(
file_extension,
FileType.OTHER,
)
109 changes: 75 additions & 34 deletions detect_secrets/plugins/keyword.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,9 @@
"'this",
'(nsstring',
'-default}',
'/etc/passwd:ro',
'::',
'<%=',
'<?php',
'<a',
'<aws_secret_access_key>',
'<input',
Expand All @@ -80,53 +81,81 @@
"\\k.*'",
'`cat',
'`grep',
'`sudo',
'account_password',
'api_key',
'disable',
'dummy_secret',
'dummy_value',
'false',
'false):',
'false,',
'false;',
'login_password',
'none',
'none,',
'none}',
'not',
'not_real_key',
'null',
'null,',
'null.*"',
"null.*'",
'null;',
'pass',
'pass)',
'password',
'password)',
'password))',
'password,',
'password},',
'prompt',
'redacted',
'secret',
'some_key',
'str',
'str_to_sign',
'string',
'string)',
'string,',
'string;',
'string?',
'string?)',
'string}',
'string}}',
'test',
'test-access-key',
'thisisnottherealsecret',
'todo',
'true',
'true):',
'true,',
'true;',
'undef',
'undef,',
'{',
'{{',
}
QUOTE = r'[\'"]'
# includes ], ', " as closing
# Includes ], ', " as closing
CLOSING = r'[]\'"]{0,2}'
# non-greedy match
DENYLIST_REGEX = r'|'.join(DENYLIST)
# Non-greedy match
OPTIONAL_WHITESPACE = r'\s*?'
OPTIONAL_NON_WHITESPACE = r'[^\s]*?'
QUOTE = r'[\'"]'
SECRET = r'[^\s]+'
DENYLIST_REGEX = r'|'.join(DENYLIST)
SQUARE_BRACKETS = r'(\[\])'

FOLLOWED_BY_COLON_EQUAL_SIGNS_REGEX = re.compile(
# e.g. my_password := "bar" or my_password := bar
r'({denylist})({closing})?{whitespace}:=?{whitespace}({quote}?)({secret})(\3)'.format(
denylist=DENYLIST_REGEX,
closing=CLOSING,
quote=QUOTE,
whitespace=OPTIONAL_WHITESPACE,
secret=SECRET,
),
)
FOLLOWED_BY_COLON_REGEX = re.compile(
# e.g. api_key: foo
r'({denylist})({closing})?:{whitespace}({quote}?)({secret})(\3)'.format(
Expand All @@ -147,6 +176,17 @@
secret=SECRET,
),
)
FOLLOWED_BY_EQUAL_SIGNS_OPTIONAL_BRACKETS_OPTIONAL_AT_SIGN_QUOTES_REQUIRED_REGEX = re.compile(
# e.g. my_password = "bar"
# e.g. my_password = @"bar"
# e.g. my_password[] = "bar";
r'({denylist})({square_brackets})?{optional_whitespace}={optional_whitespace}(@)?(")({secret})(\5)'.format( # noqa: E501
denylist=DENYLIST_REGEX,
square_brackets=SQUARE_BRACKETS,
optional_whitespace=OPTIONAL_WHITESPACE,
secret=SECRET,
),
)
FOLLOWED_BY_EQUAL_SIGNS_REGEX = re.compile(
# e.g. my_password = bar
r'({denylist})({closing})?{whitespace}={whitespace}({quote}?)({secret})(\3)'.format(
Expand Down Expand Up @@ -178,35 +218,31 @@
secret=SECRET,
),
)
FOLLOWED_BY_COLON_EQUAL_SIGNS_REGEX = re.compile(
# e.g. my_password := "bar" or my_password := bar
r'({denylist})({closing})?{whitespace}:=?{whitespace}({quote}?)({secret})(\3)'.format(
denylist=DENYLIST_REGEX,
closing=CLOSING,
quote=QUOTE,
whitespace=OPTIONAL_WHITESPACE,
secret=SECRET,
),
)
DENYLIST_REGEX_TO_GROUP = {
FOLLOWED_BY_COLON_REGEX: 4,
FOLLOWED_BY_EQUAL_SIGNS_REGEX: 4,
FOLLOWED_BY_QUOTES_AND_SEMICOLON_REGEX: 3,
}
GOLANG_DENYLIST_REGEX_TO_GROUP = {
FOLLOWED_BY_COLON_EQUAL_SIGNS_REGEX: 4,
FOLLOWED_BY_EQUAL_SIGNS_REGEX: 4,
FOLLOWED_BY_QUOTES_AND_SEMICOLON_REGEX: 3,
}
OBJECTIVE_C_DENYLIST_REGEX_TO_GROUP = {
FOLLOWED_BY_EQUAL_SIGNS_OPTIONAL_BRACKETS_OPTIONAL_AT_SIGN_QUOTES_REQUIRED_REGEX: 6,
}
QUOTES_REQUIRED_DENYLIST_REGEX_TO_GROUP = {
FOLLOWED_BY_COLON_QUOTES_REQUIRED_REGEX: 5,
FOLLOWED_BY_EQUAL_SIGNS_QUOTES_REQUIRED_REGEX: 4,
FOLLOWED_BY_QUOTES_AND_SEMICOLON_REGEX: 3,
}
GOLANG_DENYLIST_REGEX_TO_GROUP = {
FOLLOWED_BY_EQUAL_SIGNS_REGEX: 4,
FOLLOWED_BY_QUOTES_AND_SEMICOLON_REGEX: 3,
FOLLOWED_BY_COLON_EQUAL_SIGNS_REGEX: 4,
}
QUOTES_REQUIRED_FILETYPES = {
FileType.CLS,
FileType.JAVA,
FileType.JAVASCRIPT,
FileType.PYTHON,
FileType.SWIFT,
FileType.TERRAFORM,
}


Expand Down Expand Up @@ -257,6 +293,8 @@ def secret_generator(self, string, filetype):
denylist_regex_to_group = QUOTES_REQUIRED_DENYLIST_REGEX_TO_GROUP
elif filetype == FileType.GO:
denylist_regex_to_group = GOLANG_DENYLIST_REGEX_TO_GROUP
elif filetype == FileType.OBJECTIVE_C:
denylist_regex_to_group = OBJECTIVE_C_DENYLIST_REGEX_TO_GROUP
else:
denylist_regex_to_group = DENYLIST_REGEX_TO_GROUP

Expand All @@ -275,24 +313,27 @@ def secret_generator(self, string, filetype):

def probably_false_positive(lowered_secret, filetype):
if (
'fake' in lowered_secret
or 'forgot' in lowered_secret
or lowered_secret in FALSE_POSITIVES
or (
filetype == FileType.JAVASCRIPT
and (
lowered_secret.startswith('this.')
or lowered_secret.startswith('fs.read')
or lowered_secret.startswith('options.')
or lowered_secret == 'new'
any(
false_positive in lowered_secret
for false_positive in (
'/etc/',
'fake',
'forgot',
)
) or lowered_secret in FALSE_POSITIVES
# For e.g. private_key "some/dir/that/is/not/a/secret";
or lowered_secret.count('/') >= 3
# For e.g. "secret": "{secret}"
or (
lowered_secret[0] == '{'
and lowered_secret[-1] == '}'
) or (
filetype == FileType.PHP
filetype not in QUOTES_REQUIRED_FILETYPES
and lowered_secret[0] == '$'
) or (
filetype == FileType.YAML
and lowered_secret.startswith('{{')
and lowered_secret.endswith('}}')
filetype == FileType.EXAMPLE
and lowered_secret[0] == '<'
and lowered_secret[-1] == '>'
)
):
return True
Expand Down
Loading

0 comments on commit e1ad354

Please sign in to comment.