Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Keyword Plugin] Various accuracy improvements #229

Merged
merged 6 commits into from
Aug 24, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 35 additions & 26 deletions detect_secrets/plugins/common/filetype.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,38 @@
import os
from enum import Enum


class FileType(Enum):
CLS = 0
GO = 1
JAVA = 2
JAVASCRIPT = 3
PHP = 4
PYTHON = 5
YAML = 6
OTHER = 7
EXAMPLE = 1
GO = 2
JAVA = 3
JAVASCRIPT = 4
PHP = 5
OBJECTIVE_C = 6
PYTHON = 7
SWIFT = 8
TERRAFORM = 9
YAML = 10
OTHER = 11


EXTENSION_TO_FILETYPE = {
'.cls': FileType.CLS,
'.example': FileType.EXAMPLE,
'.eyaml': FileType.YAML,
'.go': FileType.GO,
'.java': FileType.JAVA,
'.js': FileType.JAVASCRIPT,
'.m': FileType.OBJECTIVE_C,
'.php': FileType.PHP,
'.py': FileType.PYTHON,
'.pyi': FileType.PYTHON,
'.swift': FileType.SWIFT,
'.tf': FileType.TERRAFORM,
'.yaml': FileType.YAML,
'.yml': FileType.YAML,
}


def determine_file_type(filename):
Expand All @@ -18,22 +41,8 @@ def determine_file_type(filename):

:rtype: FileType
"""
if filename.endswith('.cls'):
return FileType.CLS
elif filename.endswith('.go'):
return FileType.GO
elif filename.endswith('.java'):
return FileType.JAVA
elif filename.endswith('.js'):
return FileType.JAVASCRIPT
elif filename.endswith('.php'):
return FileType.PHP
elif filename.endswith('.py'):
return FileType.PYTHON
elif (
filename.endswith(
('.eyaml', '.yaml', '.yml'),
)
):
return FileType.YAML
return FileType.OTHER
_, file_extension = os.path.splitext(filename)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this handle my.weird.filename?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yup

>>> os.path.splitext('my.weirdo.filename')
('my.weirdo', '.filename')

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh right, and does this work with no file extension?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh it does, the get call in the next line has a default

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For posterity, it'll be '' and then get set to FileType.OTHER

(I had an internal πŸ€” about setdefault vs. the .get fallback but πŸ€·β€β™‚ I kinda liked the way it looked.)

return EXTENSION_TO_FILETYPE.get(
file_extension,
FileType.OTHER,
)
109 changes: 75 additions & 34 deletions detect_secrets/plugins/keyword.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,9 @@
"'this",
'(nsstring',
'-default}',
'/etc/passwd:ro',
'::',
'<%=',
'<?php',
'<a',
'<aws_secret_access_key>',
'<input',
Expand All @@ -80,53 +81,81 @@
"\\k.*'",
'`cat',
'`grep',
'`sudo',
'account_password',
'api_key',
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is this in both DENYLIST and FALSE_POSITIVES?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So this is for the case wherein silly code does magical_api_key: api_key, or something to that effect, normally in a test.

DENYLIST will catch the fact that it could be an API key from magical_api_key on the LHS, but then FALSE_POSITIVES will check the RHS and cause us not to alert on it.

'disable',
'dummy_secret',
'dummy_value',
'false',
'false):',
'false,',
'false;',
'login_password',
'none',
'none,',
'none}',
'not',
'not_real_key',
'null',
'null,',
'null.*"',
"null.*'",
'null;',
'pass',
'pass)',
'password',
'password)',
'password))',
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just to clarify, does this mean a word that contains password)) will be marked as a false-positive?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The short-answer is, only if it == password)), not contains.

The FALSE_POSITIVES set is for RHS values normally resulting in false-positives, you could make your real password password)), but it's more likely that e.g. I made some mistake in not requiring quotes for that particular filetype, or haven't made a custom regex for that language when I should, this set patches over certain things like that.

'password,',
'password},',
'prompt',
'redacted',
'secret',
'some_key',
'str',
'str_to_sign',
'string',
'string)',
'string,',
'string;',
'string?',
'string?)',
'string}',
'string}}',
'test',
'test-access-key',
'thisisnottherealsecret',
'todo',
'true',
'true):',
'true,',
'true;',
'undef',
'undef,',
'{',
'{{',
}
QUOTE = r'[\'"]'
# includes ], ', " as closing
# Includes ], ', " as closing
CLOSING = r'[]\'"]{0,2}'
# non-greedy match
DENYLIST_REGEX = r'|'.join(DENYLIST)
# Non-greedy match
OPTIONAL_WHITESPACE = r'\s*?'
OPTIONAL_NON_WHITESPACE = r'[^\s]*?'
QUOTE = r'[\'"]'
SECRET = r'[^\s]+'
DENYLIST_REGEX = r'|'.join(DENYLIST)
SQUARE_BRACKETS = r'(\[\])'

FOLLOWED_BY_COLON_EQUAL_SIGNS_REGEX = re.compile(
# e.g. my_password := "bar" or my_password := bar
r'({denylist})({closing})?{whitespace}:=?{whitespace}({quote}?)({secret})(\3)'.format(
denylist=DENYLIST_REGEX,
closing=CLOSING,
quote=QUOTE,
whitespace=OPTIONAL_WHITESPACE,
secret=SECRET,
),
)
FOLLOWED_BY_COLON_REGEX = re.compile(
# e.g. api_key: foo
r'({denylist})({closing})?:{whitespace}({quote}?)({secret})(\3)'.format(
Expand All @@ -147,6 +176,17 @@
secret=SECRET,
),
)
FOLLOWED_BY_EQUAL_SIGNS_OPTIONAL_BRACKETS_OPTIONAL_AT_SIGN_QUOTES_REQUIRED_REGEX = re.compile(
# e.g. my_password = "bar"
# e.g. my_password = @"bar"
# e.g. my_password[] = "bar";
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Based on below this is Objective-C. What does this actually do in Objective-C? (I couldn't Google it)

Copy link
Collaborator Author

@KevinHock KevinHock Aug 23, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@ means it is a constant
[] means it is a C style array e.g. char myString[] = "This is a C character array";

Something I do not handle yet, but can ticket, are strings like the following

password = [str1 stringByAppendingFormat:@"World"];
NSMutableString *password = [NSMutableString stringWithString:@"This string is mutable"];

Prior to this PR, we'd capture e.g. [str1 and [NSMutableString, as secrets, which was 🀐

r'({denylist})({square_brackets})?{optional_whitespace}={optional_whitespace}(@)?(")({secret})(\5)'.format( # noqa: E501
denylist=DENYLIST_REGEX,
square_brackets=SQUARE_BRACKETS,
optional_whitespace=OPTIONAL_WHITESPACE,
secret=SECRET,
),
)
FOLLOWED_BY_EQUAL_SIGNS_REGEX = re.compile(
# e.g. my_password = bar
r'({denylist})({closing})?{whitespace}={whitespace}({quote}?)({secret})(\3)'.format(
Expand Down Expand Up @@ -178,35 +218,31 @@
secret=SECRET,
),
)
FOLLOWED_BY_COLON_EQUAL_SIGNS_REGEX = re.compile(
# e.g. my_password := "bar" or my_password := bar
r'({denylist})({closing})?{whitespace}:=?{whitespace}({quote}?)({secret})(\3)'.format(
denylist=DENYLIST_REGEX,
closing=CLOSING,
quote=QUOTE,
whitespace=OPTIONAL_WHITESPACE,
secret=SECRET,
),
)
DENYLIST_REGEX_TO_GROUP = {
FOLLOWED_BY_COLON_REGEX: 4,
FOLLOWED_BY_EQUAL_SIGNS_REGEX: 4,
FOLLOWED_BY_QUOTES_AND_SEMICOLON_REGEX: 3,
}
GOLANG_DENYLIST_REGEX_TO_GROUP = {
FOLLOWED_BY_COLON_EQUAL_SIGNS_REGEX: 4,
FOLLOWED_BY_EQUAL_SIGNS_REGEX: 4,
FOLLOWED_BY_QUOTES_AND_SEMICOLON_REGEX: 3,
}
OBJECTIVE_C_DENYLIST_REGEX_TO_GROUP = {
FOLLOWED_BY_EQUAL_SIGNS_OPTIONAL_BRACKETS_OPTIONAL_AT_SIGN_QUOTES_REQUIRED_REGEX: 6,
}
QUOTES_REQUIRED_DENYLIST_REGEX_TO_GROUP = {
FOLLOWED_BY_COLON_QUOTES_REQUIRED_REGEX: 5,
FOLLOWED_BY_EQUAL_SIGNS_QUOTES_REQUIRED_REGEX: 4,
FOLLOWED_BY_QUOTES_AND_SEMICOLON_REGEX: 3,
}
GOLANG_DENYLIST_REGEX_TO_GROUP = {
FOLLOWED_BY_EQUAL_SIGNS_REGEX: 4,
FOLLOWED_BY_QUOTES_AND_SEMICOLON_REGEX: 3,
FOLLOWED_BY_COLON_EQUAL_SIGNS_REGEX: 4,
}
QUOTES_REQUIRED_FILETYPES = {
FileType.CLS,
FileType.JAVA,
FileType.JAVASCRIPT,
FileType.PYTHON,
FileType.SWIFT,
FileType.TERRAFORM,
}


Expand Down Expand Up @@ -257,6 +293,8 @@ def secret_generator(self, string, filetype):
denylist_regex_to_group = QUOTES_REQUIRED_DENYLIST_REGEX_TO_GROUP
elif filetype == FileType.GO:
denylist_regex_to_group = GOLANG_DENYLIST_REGEX_TO_GROUP
elif filetype == FileType.OBJECTIVE_C:
denylist_regex_to_group = OBJECTIVE_C_DENYLIST_REGEX_TO_GROUP
else:
denylist_regex_to_group = DENYLIST_REGEX_TO_GROUP

Expand All @@ -275,24 +313,27 @@ def secret_generator(self, string, filetype):

def probably_false_positive(lowered_secret, filetype):
if (
'fake' in lowered_secret
or 'forgot' in lowered_secret
or lowered_secret in FALSE_POSITIVES
or (
filetype == FileType.JAVASCRIPT
and (
lowered_secret.startswith('this.')
or lowered_secret.startswith('fs.read')
or lowered_secret.startswith('options.')
or lowered_secret == 'new'
any(
false_positive in lowered_secret
for false_positive in (
'/etc/',
'fake',
'forgot',
)
) or lowered_secret in FALSE_POSITIVES
# For e.g. private_key "some/dir/that/is/not/a/secret";
or lowered_secret.count('/') >= 3
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch, added comment in 5de43e2

# For e.g. "secret": "{secret}"
or (
lowered_secret[0] == '{'
and lowered_secret[-1] == '}'
) or (
filetype == FileType.PHP
filetype not in QUOTES_REQUIRED_FILETYPES
and lowered_secret[0] == '$'
) or (
filetype == FileType.YAML
and lowered_secret.startswith('{{')
and lowered_secret.endswith('}}')
filetype == FileType.EXAMPLE
and lowered_secret[0] == '<'
and lowered_secret[-1] == '>'
)
):
return True
Expand Down
Loading