diff --git a/detect_secrets/plugins/high_entropy_strings.py b/detect_secrets/plugins/high_entropy_strings.py index 6dafc11d5..db3b1485b 100644 --- a/detect_secrets/plugins/high_entropy_strings.py +++ b/detect_secrets/plugins/high_entropy_strings.py @@ -3,9 +3,14 @@ import math import re import string +from contextlib import contextmanager + +from future import standard_library from .base import BasePlugin from detect_secrets.core.potential_secret import PotentialSecret +standard_library.install_aliases() +import configparser # noqa: E402 class HighEntropyStringsPlugin(BasePlugin): @@ -22,6 +27,14 @@ def __init__(self, charset, limit, *args): # TODO: Update for not just python comments? self.ignore_regex = re.compile(r'# ?pragma: ?whitelist[ -]secret') + def analyze(self, file, filename): + try: + return self._analyze_ini_file(file, filename) + except configparser.Error: + file.seek(0) + + return super(HighEntropyStringsPlugin, self).analyze(file, filename) + def calculate_shannon_entropy(self, data): """Returns the entropy of a given string. @@ -54,13 +67,49 @@ def analyze_string(self, string, line_num, filename): # There may be multiple strings on the same line results = self.regex.findall(string) for result in results: - entropy_value = self.calculate_shannon_entropy(result[1]) + # To accommodate changing self.regex, due to different filetypes + if isinstance(result, tuple): + result = result[1] + + entropy_value = self.calculate_shannon_entropy(result) if entropy_value > self.entropy_limit: - secret = PotentialSecret(self.secret_type, filename, line_num, result[1]) + secret = PotentialSecret(self.secret_type, filename, line_num, result) output[secret] = secret return output + def _analyze_ini_file(self, file, filename): + """ + :returns: same format as super().analyze() + """ + parser = configparser.ConfigParser() + parser.read_file(file) + + potential_secrets = {} + + # Hacky way to keep track of line location. + file.seek(0) + lines = list(map(lambda x: x.strip(), file.readlines())) + line_offset = 0 + + with self._non_quoted_string_regex(): + for section_name, _ in parser.items(): + for key, value in parser.items(section_name): + # +1, because we don't want to double count lines + offset = self._get_line_offset(key, value, lines) + 1 + line_offset += offset + lines = lines[offset:] + + secrets = self.analyze_string( + value, + line_offset, + filename, + ) + + potential_secrets.update(secrets) + + return potential_secrets + @property def __dict__(self): output = super(HighEntropyStringsPlugin, self).__dict__ @@ -70,6 +119,37 @@ def __dict__(self): return output + @contextmanager + def _non_quoted_string_regex(self): + """For certain file formats, strings need not necessarily follow the + normal convention of being denoted by single or double quotes. In these + cases, we modify the regex accordingly. + """ + old_regex = self.regex + self.regex = re.compile(r'^([%s]+)$' % self.charset) + + yield + + self.regex = old_regex + + @staticmethod + def _get_line_offset(key, value, lines): + """Returns the index of the location of key, value pair in lines. + + :type key: str + :param key: key, in config file. + + :type value: str + :param value: value for key, in config file. + + :type lines: list + :param lines: a collection of lines-so-far in file + """ + regex = re.compile(r'^{}[ :=]+{}'.format(key, value)) + for index, line in enumerate(lines): + if regex.match(line): + return index + class HexHighEntropyString(HighEntropyStringsPlugin): """HighEntropyStringsPlugin for hex strings""" diff --git a/test_data/config.ini b/test_data/config.ini new file mode 100644 index 000000000..ab739d796 --- /dev/null +++ b/test_data/config.ini @@ -0,0 +1,9 @@ +[credentials] +password = 12345678901234 + +[parent] + [child] + key = value + +[aws] +aws_secret_key = 2345678901 diff --git a/tests/plugins/high_entropy_strings_test.py b/tests/plugins/high_entropy_strings_test.py index 926284b2d..f30f28cf6 100644 --- a/tests/plugins/high_entropy_strings_test.py +++ b/tests/plugins/high_entropy_strings_test.py @@ -104,6 +104,32 @@ def test_ignored_lines(self, content_to_format): assert len(results) == 0 + def test_ini_file(self): + # We're testing two files here, because we want to make sure that + # the HighEntropyStrings regex is reset back to normal after + # scanning the ini file. + filenames = [ + 'test_data/config.ini', + 'test_data/files/file_with_secrets.py', + ] + + plugin = Base64HighEntropyString(3) + + accumulated_secrets = {} + for filename in filenames: + with open(filename) as f: + accumulated_secrets.update( + plugin.analyze(f, filename), + ) + + for secret in accumulated_secrets.values(): + location = str(secret).splitlines()[1] + assert location in ( + 'Location: test_data/config.ini:2', + 'Location: test_data/config.ini:9', + 'Location: test_data/files/file_with_secrets.py:3', + ) + class TestBase64HighEntropyStrings(HighEntropyStringsTest):