From 65e126c38d20e67296a4e5d2952b5209f41cd9ad Mon Sep 17 00:00:00 2001 From: Victor Zhou Date: Wed, 14 Aug 2019 15:56:21 -0700 Subject: [PATCH] Handle binary values specially in yaml file parser This is step (1) in supporting binary in both YAML and non- YAML files. This makes it so that instead of immediately converting the base64-encoded binary into a binary value in python, we just interpret the binary as a normal string, but annotate it as such with the `is_binary` flag. This is needed so that plugins can scan a different value from the value hashed into baselines. --- .../plugins/common/yaml_file_parser.py | 9 +++++-- tests/plugins/common/yaml_file_parser_test.py | 26 +++++++++++++++++++ 2 files changed, 33 insertions(+), 2 deletions(-) diff --git a/detect_secrets/plugins/common/yaml_file_parser.py b/detect_secrets/plugins/common/yaml_file_parser.py index 2e6902064..6279808f3 100644 --- a/detect_secrets/plugins/common/yaml_file_parser.py +++ b/detect_secrets/plugins/common/yaml_file_parser.py @@ -16,7 +16,7 @@ class YamlFileParser(object): Therefore, we take a different approach: intercept the parsing of the yaml file to identify string values. This assumes: - 1. Secrets are strings + 1. Secrets are strings or binaries 2. Secrets are not keys Then, we calculate the entropy of those string values. @@ -75,7 +75,7 @@ def _tag_dict_values(self, map_node): """ new_values = [] for key, value in map_node.value: - if not value.tag.endswith(':str'): + if not value.tag.endswith(':str') and not value.tag.endswith(':binary'): new_values.append((key, value)) continue @@ -92,6 +92,11 @@ def _tag_dict_values(self, map_node): str(value.__line__), 'tag:yaml.org,2002:int', ), + self._create_key_value_pair_for_mapping_node_value( + '__is_binary__', + str(value.tag.endswith(':binary')), + 'tag:yaml.org,2002:bool', + ), ], ) diff --git a/tests/plugins/common/yaml_file_parser_test.py b/tests/plugins/common/yaml_file_parser_test.py index f514c5602..0258bea45 100644 --- a/tests/plugins/common/yaml_file_parser_test.py +++ b/tests/plugins/common/yaml_file_parser_test.py @@ -1,6 +1,9 @@ from __future__ import absolute_import from __future__ import unicode_literals +import mock +import pytest + from detect_secrets.plugins.common.yaml_file_parser import YamlFileParser from testing.mocks import mock_file_object @@ -19,3 +22,26 @@ def test_get_ignored_lines(self): ignored_lines = YamlFileParser(f).get_ignored_lines() assert ignored_lines == {2, 3} + + @pytest.mark.parametrize( + ['yaml_value', 'expected_value', 'expected_is_binary'], + [ + ('string_value', 'string_value', False), + ('!!binary YWJjZGVm', 'YWJjZGVm', True), + ], + ) + def test_possible_secret_format( + self, + yaml_value, + expected_value, + expected_is_binary, + ): + content = 'key: {yaml_value}'.format(yaml_value=yaml_value) + f = mock_file_object(content) + + result = YamlFileParser(f).json() + assert result['key'] == { + '__value__': expected_value, + '__is_binary__': expected_is_binary, + '__line__': mock.ANY, + }