Skip to content

Commit

Permalink
Merge pull request #13 from Yelp/handling-ini-files
Browse files Browse the repository at this point in the history
Handling ini files
  • Loading branch information
domanchi authored Apr 6, 2018
2 parents ccc60fd + 79a5cb0 commit ce9232e
Show file tree
Hide file tree
Showing 3 changed files with 117 additions and 2 deletions.
84 changes: 82 additions & 2 deletions detect_secrets/plugins/high_entropy_strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,14 @@
import math
import re
import string
from contextlib import contextmanager

from future import standard_library

from .base import BasePlugin
from detect_secrets.core.potential_secret import PotentialSecret
standard_library.install_aliases()
import configparser # noqa: E402


class HighEntropyStringsPlugin(BasePlugin):
Expand All @@ -22,6 +27,14 @@ def __init__(self, charset, limit, *args):
# TODO: Update for not just python comments?
self.ignore_regex = re.compile(r'# ?pragma: ?whitelist[ -]secret')

def analyze(self, file, filename):
try:
return self._analyze_ini_file(file, filename)
except configparser.Error:
file.seek(0)

return super(HighEntropyStringsPlugin, self).analyze(file, filename)

def calculate_shannon_entropy(self, data):
"""Returns the entropy of a given string.
Expand Down Expand Up @@ -54,13 +67,49 @@ def analyze_string(self, string, line_num, filename):
# There may be multiple strings on the same line
results = self.regex.findall(string)
for result in results:
entropy_value = self.calculate_shannon_entropy(result[1])
# To accommodate changing self.regex, due to different filetypes
if isinstance(result, tuple):
result = result[1]

entropy_value = self.calculate_shannon_entropy(result)
if entropy_value > self.entropy_limit:
secret = PotentialSecret(self.secret_type, filename, line_num, result[1])
secret = PotentialSecret(self.secret_type, filename, line_num, result)
output[secret] = secret

return output

def _analyze_ini_file(self, file, filename):
"""
:returns: same format as super().analyze()
"""
parser = configparser.ConfigParser()
parser.read_file(file)

potential_secrets = {}

# Hacky way to keep track of line location.
file.seek(0)
lines = list(map(lambda x: x.strip(), file.readlines()))
line_offset = 0

with self._non_quoted_string_regex():
for section_name, _ in parser.items():
for key, value in parser.items(section_name):
# +1, because we don't want to double count lines
offset = self._get_line_offset(key, value, lines) + 1
line_offset += offset
lines = lines[offset:]

secrets = self.analyze_string(
value,
line_offset,
filename,
)

potential_secrets.update(secrets)

return potential_secrets

@property
def __dict__(self):
output = super(HighEntropyStringsPlugin, self).__dict__
Expand All @@ -70,6 +119,37 @@ def __dict__(self):

return output

@contextmanager
def _non_quoted_string_regex(self):
"""For certain file formats, strings need not necessarily follow the
normal convention of being denoted by single or double quotes. In these
cases, we modify the regex accordingly.
"""
old_regex = self.regex
self.regex = re.compile(r'^([%s]+)$' % self.charset)

yield

self.regex = old_regex

@staticmethod
def _get_line_offset(key, value, lines):
"""Returns the index of the location of key, value pair in lines.
:type key: str
:param key: key, in config file.
:type value: str
:param value: value for key, in config file.
:type lines: list
:param lines: a collection of lines-so-far in file
"""
regex = re.compile(r'^{}[ :=]+{}'.format(key, value))
for index, line in enumerate(lines):
if regex.match(line):
return index


class HexHighEntropyString(HighEntropyStringsPlugin):
"""HighEntropyStringsPlugin for hex strings"""
Expand Down
9 changes: 9 additions & 0 deletions test_data/config.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
[credentials]
password = 12345678901234

[parent]
[child]
key = value

[aws]
aws_secret_key = 2345678901
26 changes: 26 additions & 0 deletions tests/plugins/high_entropy_strings_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,32 @@ def test_ignored_lines(self, content_to_format):

assert len(results) == 0

def test_ini_file(self):
# We're testing two files here, because we want to make sure that
# the HighEntropyStrings regex is reset back to normal after
# scanning the ini file.
filenames = [
'test_data/config.ini',
'test_data/files/file_with_secrets.py',
]

plugin = Base64HighEntropyString(3)

accumulated_secrets = {}
for filename in filenames:
with open(filename) as f:
accumulated_secrets.update(
plugin.analyze(f, filename),
)

for secret in accumulated_secrets.values():
location = str(secret).splitlines()[1]
assert location in (
'Location: test_data/config.ini:2',
'Location: test_data/config.ini:9',
'Location: test_data/files/file_with_secrets.py:3',
)


class TestBase64HighEntropyStrings(HighEntropyStringsTest):

Expand Down

0 comments on commit ce9232e

Please sign in to comment.