From 57ed5537cf1351ff307e2fc706528240b089cb29 Mon Sep 17 00:00:00 2001 From: Xianjun Zhu Date: Fri, 8 Feb 2019 23:24:38 -0500 Subject: [PATCH] fix: support unicode in markdown --- .../plugins/common/ini_file_parser.py | 27 +++++---- test_data/config.md | 10 ++++ tests/plugins/high_entropy_strings_test.py | 56 ++++++++++++------- 3 files changed, 61 insertions(+), 32 deletions(-) create mode 100644 test_data/config.md diff --git a/detect_secrets/plugins/common/ini_file_parser.py b/detect_secrets/plugins/common/ini_file_parser.py index 737b7b0a6..c7918f3fe 100644 --- a/detect_secrets/plugins/common/ini_file_parser.py +++ b/detect_secrets/plugins/common/ini_file_parser.py @@ -17,23 +17,26 @@ def __init__(self, file, add_header=False, exclude_lines_regex=None): :param exclude_lines_regex: optional regex for ignored lines. """ self.parser = configparser.ConfigParser() - self.parser.optionxform = str + try: + # python2.7 compatible + self.parser.optionxform = unicode + except NameError: + self.parser.optionxform = str self.exclude_lines_regex = exclude_lines_regex - if not add_header: - self.parser.read_file(file) - else: + content = file.read() + if add_header: # This supports environment variables, or other files that look # like config files, without a section header. - content = '[global]\n' + file.read() - - try: - # python2.7 compatible - self.parser.read_string(unicode(content)) - except NameError: - # python3 compatible - self.parser.read_string(content) + content = '[global]\n' + content + + try: + # python2.7 compatible + self.parser.read_string(unicode(content)) + except NameError: + # python3 compatible + self.parser.read_string(content) # Hacky way to keep track of line location file.seek(0) diff --git a/test_data/config.md b/test_data/config.md new file mode 100644 index 000000000..e3c170e71 --- /dev/null +++ b/test_data/config.md @@ -0,0 +1,10 @@ +# Sample markdown file + +[guides](http://localhost/guilds) + +Test Unicode in non ini file would not fail on python 2.7. + +╭─ diagnose +╰» ssh to server x:22324241234423414 + +key="ToCynx5Se4e2PtoZxEhW7lUJcOX15c54" diff --git a/tests/plugins/high_entropy_strings_test.py b/tests/plugins/high_entropy_strings_test.py index 7cfe57f2a..9a1474210 100644 --- a/tests/plugins/high_entropy_strings_test.py +++ b/tests/plugins/high_entropy_strings_test.py @@ -1,6 +1,7 @@ from __future__ import absolute_import from __future__ import unicode_literals +import codecs import string import pytest @@ -145,39 +146,54 @@ def setup(self): secret_string='c3VwZXIgbG9uZyBzdHJpbmcgc2hvdWxkIGNhdXNlIGVub3VnaCBlbnRyb3B5', ) - def test_ini_file(self): + @pytest.mark.parametrize( + 'filename, secrets', + [ + ( + 'test_data/config.ini', + [ + 'Location: test_data/config.ini:2', + 'Location: test_data/config.ini:6', + 'Location: test_data/config.ini:10', + 'Location: test_data/config.ini:15', + 'Location: test_data/config.ini:21', + 'Location: test_data/config.ini:22', ], + ), + ( + 'test_data/files/file_with_secrets.py', + ['Location: test_data/files/file_with_secrets.py:3', ], + ), + # Mark down files with colons and unicode charaters preceding the + # colon on the line would cause the scanner to fail and exit on + # 2.7 due to ini parser being used on non-markdown files + # this test case ensure that scanning can complete and find + # high entropy issues + ( + 'test_data/config.md', + ['Location: test_data/config.md:10', ], + ), + ], + ) + def test_ini_file(self, filename, secrets): # We're testing two files here, because we want to make sure that # the HighEntropyStrings regex is reset back to normal after # scanning the ini file. - filenames = [ - 'test_data/config.ini', - 'test_data/files/file_with_secrets.py', - ] plugin = Base64HighEntropyString(3) accumulated_secrets = {} - for filename in filenames: - with open(filename) as f: - accumulated_secrets.update( - plugin.analyze(f, filename), - ) + with codecs.open(filename, encoding='utf-8') as f: + accumulated_secrets.update( + plugin.analyze(f, filename), + ) count = 0 for secret in accumulated_secrets.values(): location = str(secret).splitlines()[1] - assert location in ( - 'Location: test_data/config.ini:2', - 'Location: test_data/config.ini:6', - 'Location: test_data/config.ini:10', - 'Location: test_data/config.ini:15', - 'Location: test_data/config.ini:21', - 'Location: test_data/config.ini:22', - 'Location: test_data/files/file_with_secrets.py:3', - ) + assert location in secrets count += 1 - assert count == 7 + assert count == len(secrets) def test_yaml_file(self): plugin = Base64HighEntropyString(