From 17031b7162a75f18eb2ebfabfd2591f5ce366091 Mon Sep 17 00:00:00 2001 From: Aaron Loo Date: Wed, 4 Apr 2018 13:05:29 -0700 Subject: [PATCH 1/3] modifying HighEntropyStrings for .ini support --- .../plugins/high_entropy_strings.py | 89 ++++++++++++++++++- test_data/files/config.ini | 9 ++ tests/plugins/high_entropy_strings_test.py | 26 ++++++ 3 files changed, 122 insertions(+), 2 deletions(-) create mode 100644 test_data/files/config.ini diff --git a/detect_secrets/plugins/high_entropy_strings.py b/detect_secrets/plugins/high_entropy_strings.py index 6dafc11d5..27436cdea 100644 --- a/detect_secrets/plugins/high_entropy_strings.py +++ b/detect_secrets/plugins/high_entropy_strings.py @@ -3,9 +3,19 @@ import math import re import string +from contextlib import contextmanager + +from future import standard_library from .base import BasePlugin from detect_secrets.core.potential_secret import PotentialSecret +standard_library.install_aliases() +import configparser # noqa: E402 + + +INI_FILE_EXTENSIONS = ( + 'ini', +) class HighEntropyStringsPlugin(BasePlugin): @@ -22,6 +32,14 @@ def __init__(self, charset, limit, *args): # TODO: Update for not just python comments? self.ignore_regex = re.compile(r'# ?pragma: ?whitelist[ -]secret') + def analyze(self, file, filename): + # Heuristically determine whether file is an ini-formatted file. + for ext in INI_FILE_EXTENSIONS: + if filename.endswith('.{}'.format(ext)): + return self._analyze_ini_file(file, filename) + + return super(HighEntropyStringsPlugin, self).analyze(file, filename) + def calculate_shannon_entropy(self, data): """Returns the entropy of a given string. @@ -54,13 +72,49 @@ def analyze_string(self, string, line_num, filename): # There may be multiple strings on the same line results = self.regex.findall(string) for result in results: - entropy_value = self.calculate_shannon_entropy(result[1]) + # To accommodate changing self.regex, due to different filetypes + if isinstance(result, tuple): + result = result[1] + + entropy_value = self.calculate_shannon_entropy(result) if entropy_value > self.entropy_limit: - secret = PotentialSecret(self.secret_type, filename, line_num, result[1]) + secret = PotentialSecret(self.secret_type, filename, line_num, result) output[secret] = secret return output + def _analyze_ini_file(self, file, filename): + """ + :returns: same format as super().analyze() + """ + parser = configparser.ConfigParser() + parser.read_file(file) + + potential_secrets = {} + + # Hacky way to keep track of line location. + file.seek(0) + lines = list(map(lambda x: x.strip(), file.readlines())) + line_offset = 0 + + with self._non_quoted_string_regex(): + for section_name, _ in parser.items(): + for key, value in parser.items(section_name): + # +1, because we don't want to double count lines + offset = self._get_line_offset(key, value, lines) + 1 + line_offset += offset + lines = lines[offset:] + + secrets = self.analyze_string( + value, + line_offset, + filename, + ) + + potential_secrets.update(secrets) + + return potential_secrets + @property def __dict__(self): output = super(HighEntropyStringsPlugin, self).__dict__ @@ -70,6 +124,37 @@ def __dict__(self): return output + @contextmanager + def _non_quoted_string_regex(self): + """For certain file formats, strings need not necessarily follow the + normal convention of being denoted by single or double quotes. In these + cases, we modify the regex accordingly. + """ + old_regex = self.regex + self.regex = re.compile(r'^([%s]+)$' % self.charset) + + yield + + self.regex = old_regex + + @staticmethod + def _get_line_offset(key, value, lines): + """Returns the index of the location of key, value pair in lines. + + :type key: str + :param key: key, in config file. + + :type value: str + :param value: value for key, in config file. + + :type lines: list + :param lines: a collection of lines-so-far in file + """ + regex = re.compile(r'^{}[ :=]+{}'.format(key, value)) + for index, line in enumerate(lines): + if regex.match(line): + return index + class HexHighEntropyString(HighEntropyStringsPlugin): """HighEntropyStringsPlugin for hex strings""" diff --git a/test_data/files/config.ini b/test_data/files/config.ini new file mode 100644 index 000000000..ab739d796 --- /dev/null +++ b/test_data/files/config.ini @@ -0,0 +1,9 @@ +[credentials] +password = 12345678901234 + +[parent] + [child] + key = value + +[aws] +aws_secret_key = 2345678901 diff --git a/tests/plugins/high_entropy_strings_test.py b/tests/plugins/high_entropy_strings_test.py index 926284b2d..90d1f76ff 100644 --- a/tests/plugins/high_entropy_strings_test.py +++ b/tests/plugins/high_entropy_strings_test.py @@ -104,6 +104,32 @@ def test_ignored_lines(self, content_to_format): assert len(results) == 0 + def test_ini_file(self): + # We're testing two files here, because we want to make sure that + # the HighEntropyStrings regex is reset back to normal after + # scanning the ini file. + filenames = [ + 'test_data/files/config.ini', + 'test_data/files/file_with_secrets.py', + ] + + plugin = Base64HighEntropyString(3) + + accumulated_secrets = {} + for filename in filenames: + with open(filename) as f: + accumulated_secrets.update( + plugin.analyze(f, filename), + ) + + for secret in accumulated_secrets.values(): + location = str(secret).splitlines()[1] + assert location in ( + 'Location: test_data/files/config.ini:2', + 'Location: test_data/files/config.ini:9', + 'Location: test_data/files/file_with_secrets.py:3', + ) + class TestBase64HighEntropyStrings(HighEntropyStringsTest): From b7633cb60d6ec8997c901629e082e7946d9b3b9f Mon Sep 17 00:00:00 2001 From: Aaron Loo Date: Wed, 4 Apr 2018 17:12:06 -0700 Subject: [PATCH 2/3] getting tests to pass --- test_data/{files => }/config.ini | 0 tests/plugins/high_entropy_strings_test.py | 6 +++--- 2 files changed, 3 insertions(+), 3 deletions(-) rename test_data/{files => }/config.ini (100%) diff --git a/test_data/files/config.ini b/test_data/config.ini similarity index 100% rename from test_data/files/config.ini rename to test_data/config.ini diff --git a/tests/plugins/high_entropy_strings_test.py b/tests/plugins/high_entropy_strings_test.py index 90d1f76ff..f30f28cf6 100644 --- a/tests/plugins/high_entropy_strings_test.py +++ b/tests/plugins/high_entropy_strings_test.py @@ -109,7 +109,7 @@ def test_ini_file(self): # the HighEntropyStrings regex is reset back to normal after # scanning the ini file. filenames = [ - 'test_data/files/config.ini', + 'test_data/config.ini', 'test_data/files/file_with_secrets.py', ] @@ -125,8 +125,8 @@ def test_ini_file(self): for secret in accumulated_secrets.values(): location = str(secret).splitlines()[1] assert location in ( - 'Location: test_data/files/config.ini:2', - 'Location: test_data/files/config.ini:9', + 'Location: test_data/config.ini:2', + 'Location: test_data/config.ini:9', 'Location: test_data/files/file_with_secrets.py:3', ) From 79a5cb075efbf87ca4839201a63405411a42c90d Mon Sep 17 00:00:00 2001 From: Aaron Loo Date: Thu, 5 Apr 2018 12:58:33 -0700 Subject: [PATCH 3/3] more pythonic with duck typing --- detect_secrets/plugins/high_entropy_strings.py | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/detect_secrets/plugins/high_entropy_strings.py b/detect_secrets/plugins/high_entropy_strings.py index 27436cdea..db3b1485b 100644 --- a/detect_secrets/plugins/high_entropy_strings.py +++ b/detect_secrets/plugins/high_entropy_strings.py @@ -13,11 +13,6 @@ import configparser # noqa: E402 -INI_FILE_EXTENSIONS = ( - 'ini', -) - - class HighEntropyStringsPlugin(BasePlugin): """Base class for string pattern matching""" @@ -33,10 +28,10 @@ def __init__(self, charset, limit, *args): self.ignore_regex = re.compile(r'# ?pragma: ?whitelist[ -]secret') def analyze(self, file, filename): - # Heuristically determine whether file is an ini-formatted file. - for ext in INI_FILE_EXTENSIONS: - if filename.endswith('.{}'.format(ext)): - return self._analyze_ini_file(file, filename) + try: + return self._analyze_ini_file(file, filename) + except configparser.Error: + file.seek(0) return super(HighEntropyStringsPlugin, self).analyze(file, filename)