Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added a KeywordDetector plugin #76

Merged
merged 6 commits into from
Sep 20, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 0 additions & 4 deletions detect_secrets/core/audit.py
Original file line number Diff line number Diff line change
Expand Up @@ -321,10 +321,6 @@ def _highlight_secret(secret_line, secret, filename, plugin_settings):
plugin.secret_type,
filename,
secret=raw_secret,

# This doesn't matter, because PotentialSecret only uses
# line numbers for logging, and we're not logging it.
lineno=0,
)

# There could be more than two secrets on the same line.
Expand Down
13 changes: 8 additions & 5 deletions detect_secrets/core/potential_secret.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ def __init__(
self,
typ,
filename,
lineno,
secret,
lineno=0,
is_secret=None,
):
"""
Expand All @@ -31,13 +31,13 @@ def __init__(
:type filename: str
:param filename: name of file that this secret was found

:type secret: str
:param secret: the actual secret identified

:type lineno: int
:param lineno: location of secret, within filename.
Merely used as a reference for easy triage.

:type secret: str
:param secret: the actual secret identified

:type is_secret: bool|None
:param is_secret: whether or not the secret is a true- or false- positive
"""
Expand Down Expand Up @@ -87,7 +87,10 @@ def __ne__(self, other):

def __hash__(self):
return hash(
tuple([getattr(self, x) for x in self.fields_to_compare]),
tuple(
getattr(self, x)
for x in self.fields_to_compare
),
)

def __str__(self): # pragma: no cover
Expand Down
14 changes: 7 additions & 7 deletions detect_secrets/core/secrets_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,8 +88,8 @@ def _load_baseline_from_dict(cls, data):
secret = PotentialSecret(
item['type'],
filename,
item['line_number'],
secret='will be replaced',
lineno=item['line_number'],
is_secret=item.get('is_secret'),
)
secret.secret_hash = item['hashed_secret']
Expand Down Expand Up @@ -204,7 +204,7 @@ def get_secret(self, filename, secret, type_=None):
if type_:
# Optimized lookup, because we know the type of secret
# (and therefore, its hash)
tmp_secret = PotentialSecret(type_, filename, 0, 'will be overriden')
tmp_secret = PotentialSecret(type_, filename, secret='will be overriden')
tmp_secret.secret_hash = secret

if tmp_secret in self.data[filename]:
Expand Down Expand Up @@ -251,18 +251,18 @@ def _results_accumulator(self, filename):
Caller is responsible for updating the dictionary with
results of plugin analysis.
"""
results = {}
file_results = {}

for plugin in self.plugins:
yield results, plugin
yield file_results, plugin

if not results:
if not file_results:
return

if filename not in self.data:
self.data[filename] = results
self.data[filename] = file_results
else:
self.data[filename].update(results)
self.data[filename].update(file_results)

def _extract_secrets_from_file(self, f, filename):
"""Extract secrets from a given file object.
Expand Down
5 changes: 5 additions & 0 deletions detect_secrets/core/usage.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,11 @@ class PluginOptions(object):
disable_flag_text='--no-basic-auth-scan',
disable_help_text='Disables scanning for Basic Auth formatted URIs.',
),
PluginDescriptor(
classname='KeywordDetector',
disable_flag_text='--no-keyword-scan',
disable_help_text='Disables scanning for secret keywords.',
),
]

def __init__(self, parser):
Expand Down
2 changes: 1 addition & 1 deletion detect_secrets/plugins/basic_auth.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ def analyze_string(self, string, line_num, filename):
secret = PotentialSecret(
self.secret_type,
filename,
line_num,
result,
line_num,
)
output[secret] = secret

Expand Down
1 change: 1 addition & 0 deletions detect_secrets/plugins/core/initialize.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from ..basic_auth import BasicAuthDetector # noqa: F401
from ..high_entropy_strings import Base64HighEntropyString # noqa: F401
from ..high_entropy_strings import HexHighEntropyString # noqa: F401
from ..keyword import KeywordDetector # noqa: F401
from ..private_key import PrivateKeyDetector # noqa: F401
from detect_secrets.core.log import log

Expand Down
2 changes: 1 addition & 1 deletion detect_secrets/plugins/high_entropy_strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ def analyze_string(self, string, line_num, filename):
for result in self.secret_generator(string):
if self.is_sequential_string(result):
continue
secret = PotentialSecret(self.secret_type, filename, line_num, result)
secret = PotentialSecret(self.secret_type, filename, result, line_num)
output[secret] = secret

return output
Expand Down
72 changes: 72 additions & 0 deletions detect_secrets/plugins/keyword.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
"""
This code was extracted in part from
https://github.com/PyCQA/bandit. Using similar heuristic logic,
we adapted it to fit our plugin infrastructure, to create an organized,
concerted effort in detecting all type of secrets in code.

Copyright (c) 2014 Hewlett-Packard Development Company, L.P.

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
"""
from __future__ import absolute_import

from .base import BasePlugin
from detect_secrets.core.potential_secret import PotentialSecret
from detect_secrets.plugins.core.constants import WHITELIST_REGEX


BLACKLIST = (
'PASS =',
'password',
'passwd',
'pwd',
'secret',
'secrete',
'token',
)


class KeywordDetector(BasePlugin):
"""This checks if blacklisted keywords
are present in the analyzed string.
"""

secret_type = 'Password'

def analyze_string(self, string, line_num, filename):
output = {}

if WHITELIST_REGEX.search(string):
return output

for identifier in self.secret_generator(string.lower()):
secret = PotentialSecret(
self.secret_type,
filename,
identifier,
line_num,
)
output[secret] = secret

return output

def secret_generator(self, string):
for line in BLACKLIST:
if line in string:
yield line
4 changes: 2 additions & 2 deletions detect_secrets/plugins/private_key.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""
This code was extracted in part from
https://github.com/pre-commit/pre-commit-hooks. Using similar heuristic logic,
we adapt it to fit our plugin infrastructure, to create an organized,
we adapted it to fit our plugin infrastructure, to create an organized,
concerted effort in detecting all type of secrets in code.

Copyright (c) 2014 pre-commit dev team: Anthony Sottile, Ken Struys
Expand Down Expand Up @@ -55,8 +55,8 @@ def analyze_string(self, string, line_num, filename):
secret = PotentialSecret(
self.secret_type,
filename,
line_num,
identifier,
line_num,
)
output[secret] = secret

Expand Down
3 changes: 1 addition & 2 deletions test_data/files/file_with_no_secrets.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
#!/usr/bin/python
# Will change this later.
SUPER_SECRET_VALUE = "this is just a long string, like a user facing error message"
REGULAR_VALUE = "this is just a long string, like a user facing error message"


def main():
Expand Down
2 changes: 1 addition & 1 deletion test_data/files/file_with_secrets.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/bin/python
# Will change this later.
SUPER_SECRET_VALUE = 'c3VwZXIgbG9uZyBzdHJpbmcgc2hvdWxkIGNhdXNlIGVub3VnaCBlbnRyb3B5'
SUPER_SEECRET_VALUE = 'c3VwZXIgbG9uZyBzdHJpbmcgc2hvdWxkIGNhdXNlIGVub3VnaCBlbnRyb3B5'
VERY_SECRET_TOO = 'f6CGV4aMM9zedoh3OUNbSakBymo7yplB' # pragma: whitelist secret


Expand Down
2 changes: 1 addition & 1 deletion test_data/short_files/first_line.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
secret = 'BEEF0123456789a'
seecret = 'BEEF0123456789a'
skipped_sequential_false_positive = '0123456789a'
print('second line')
var = 'third line'
4 changes: 2 additions & 2 deletions test_data/short_files/last_line.ini
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[some section]
secrets_for_no_one_to_find =
secreets_for_no_one_to_find =
hunter2
password123
passsword123
BEEF0123456789a
4 changes: 2 additions & 2 deletions test_data/short_files/middle_line.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
deploy:
user: aaronloo
password:
passhword:
secure: thequickbrownfoxjumpsoverthelazydog
on:
repo: Yelp/detect-secrets
repo: Yelp/detect-sechrets
6 changes: 3 additions & 3 deletions testing/factories.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@
from detect_secrets.core.secrets_collection import SecretsCollection


def potential_secret_factory(type_='type', filename='filename', lineno=1, secret='secret'):
def potential_secret_factory(type_='type', filename='filename', secret='secret', lineno=1):
"""This is only marginally better than creating PotentialSecret objects directly,
because of default values.
"""
return PotentialSecret(type_, filename, lineno, secret)
return PotentialSecret(type_, filename, secret, lineno)


def secrets_collection_factory(secrets=None, plugins=(), exclude_regex=''):
Expand Down Expand Up @@ -51,7 +51,7 @@ def _add_secret(collection, type_='type', secret='secret', filename='filename',
tmp_secret = potential_secret_factory(
type_=type_,
filename=filename,
lineno=lineno,
secret=secret,
lineno=lineno,
)
collection.data[filename][tmp_secret] = tmp_secret
4 changes: 2 additions & 2 deletions tests/core/baseline_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ def test_new_secret_line_old_file(self):
results = get_secrets_not_in_baseline(new_findings, baseline)

assert len(results.data['filename']) == 1
secretA = PotentialSecret('type', 'filename', 1, 'secret1')
secretA = PotentialSecret('type', 'filename', 'secret1', 1)
assert results.data['filename'][secretA].secret_hash == \
PotentialSecret.hash_secret('secret1')
assert baseline.data == backup_baseline
Expand All @@ -201,7 +201,7 @@ def test_rolled_creds(self):

assert len(results.data['filename']) == 1

secretA = PotentialSecret('type', 'filename', 1, 'secret_new')
secretA = PotentialSecret('type', 'filename', 'secret_new', 1)
assert results.data['filename'][secretA].secret_hash == \
PotentialSecret.hash_secret('secret_new')
assert baseline.data == backup_baseline
Expand Down
2 changes: 1 addition & 1 deletion tests/core/potential_secret_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
class TestPotentialSecret(object):

@pytest.mark.parametrize(
'a,b,is_equal',
'a, b, is_equal',
[
(
potential_secret_factory(lineno=1),
Expand Down
51 changes: 43 additions & 8 deletions tests/core/secrets_collection_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,28 @@ def test_success_multiple_plugins(self):
line_numbers = [entry.lineno for entry in logic.data['filename']]
assert set(line_numbers) == set([1, 2, 3])

def test_reporting_of_password_plugin_secrets_if_reported_already(self):
logic = secrets_collection_factory(
secrets=[
{
'filename': 'filename',
'lineno': 3,
},
],
plugins=(
MockPasswordPluginValue(),
MockPluginFileValue(),
),
)

with mock_open('junk text here'):
logic.scan_file('filename')

assert len(logic.data['filename']) == 3

line_numbers = [entry.lineno for entry in logic.data['filename']]
assert set(line_numbers) == set([2, 3])

def test_unicode_decode_error(self, mock_log):
logic = secrets_collection_factory(
plugins=(MockPluginFileValue(),),
Expand Down Expand Up @@ -203,12 +225,14 @@ def test_optional_type(self, filename, secret_hash, expected_value):
)
def test_explicit_type_for_optimization(self, type_, is_none):
with self._mock_secret_hash():
logic = secrets_collection_factory(secrets=[
{
'filename': 'filename',
'type_': 'type',
},
])
logic = secrets_collection_factory(
secrets=[
{
'filename': 'filename',
'type_': 'type',
},
],
)

assert (logic.get_secret('filename', 'secret_hash', type_) is None) == is_none

Expand Down Expand Up @@ -343,7 +367,7 @@ class MockPluginFixedValue(MockBasePlugin):
def analyze(self, f, filename):
# We're not testing the plugin's ability to analyze secrets, so
# it doesn't matter what we return
secret = PotentialSecret('mock fixed value type', filename, 1, 'asdf')
secret = PotentialSecret('mock fixed value type', filename, 'asdf', 1)
return {secret: secret}


Expand All @@ -354,8 +378,19 @@ class MockPluginFileValue(MockBasePlugin):
def analyze(self, f, filename):
# We're not testing the plugin's ability to analyze secrets, so
# it doesn't matter what we return
secret = PotentialSecret('mock file value type', filename, 2, f.read().strip())
secret = PotentialSecret('mock file value type', filename, f.read().strip(), 2)
return {secret: secret}


class MockPasswordPluginValue(MockBasePlugin):

secret_type = 'mock_plugin_file_value'

def analyze(self, f, filename):
password_secret = PotentialSecret('Password', filename, f.read().strip(), 2)
return {
password_secret: password_secret,
}


MockUnicodeDecodeError = UnicodeDecodeError('encoding type', b'subject', 0, 1, 'exception message')
Loading