Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Handle binary values in YAML files #223

Merged
merged 4 commits into from
Sep 4, 2019
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 9 additions & 6 deletions detect_secrets/core/potential_secret.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,10 +47,18 @@ def __init__(
self.type = typ
self.filename = filename
self.lineno = lineno
self.secret_hash = self.hash_secret(secret)
self.set_secret(secret)
self.is_secret = is_secret
self.is_verified = False

# If two PotentialSecrets have the same values for these fields,
# they are considered equal. Note that line numbers aren't included
# in this, because line numbers are subject to change.
self.fields_to_compare = ['filename', 'secret_hash', 'type']

def set_secret(self, secret):
self.secret_hash = self.hash_secret(secret)

# NOTE: Originally, we never wanted to keep the secret value in memory,
# after finding it in the codebase. However, to support verifiable
# secrets (and avoid the pain of re-scanning again), we need to
Expand All @@ -61,11 +69,6 @@ def __init__(
# in the repository.
self.secret_value = secret

# If two PotentialSecrets have the same values for these fields,
# they are considered equal. Note that line numbers aren't included
# in this, because line numbers are subject to change.
self.fields_to_compare = ['filename', 'secret_hash', 'type']

@staticmethod
def hash_secret(secret):
"""This offers a way to coherently test this class,
Expand Down
2 changes: 1 addition & 1 deletion detect_secrets/plugins/common/initialize.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ def from_plugin_classname(
klass = globals()[plugin_classname]

# Make sure the instance is a BasePlugin type, before creating it.
if not issubclass(klass, BasePlugin):
if not issubclass(klass, BasePlugin): # pragma: no cover
raise TypeError

try:
Expand Down
24 changes: 16 additions & 8 deletions detect_secrets/plugins/common/yaml_file_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ class YamlFileParser(object):
Therefore, we take a different approach: intercept the parsing of the yaml
file to identify string values. This assumes:

1. Secrets are strings
1. Secrets are strings or binaries
2. Secrets are not keys

Then, we calculate the entropy of those string values.
Expand Down Expand Up @@ -75,22 +75,30 @@ def _tag_dict_values(self, map_node):
"""
new_values = []
for key, value in map_node.value:
if not value.tag.endswith(':str'):
if not (
value.tag.endswith(':str') or
value.tag.endswith(':binary')
):
new_values.append((key, value))
continue

augmented_string = yaml.nodes.MappingNode(
tag=map_node.tag,
value=[
self._create_key_value_pair_for_mapping_node_value(
'__value__',
value.value,
'tag:yaml.org,2002:str',
key='__value__',
value=value.value,
tag=value.tag,
),
self._create_key_value_pair_for_mapping_node_value(
key='__line__',
value=str(value.__line__),
tag='tag:yaml.org,2002:int',
),
self._create_key_value_pair_for_mapping_node_value(
'__line__',
str(value.__line__),
'tag:yaml.org,2002:int',
key='__is_binary__',
value=str(value.tag.endswith(':binary')),
tag='tag:yaml.org,2002:bool',
),
],
)
Expand Down
68 changes: 62 additions & 6 deletions detect_secrets/plugins/high_entropy_strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,13 @@
from backports import configparser
except ImportError: # pragma: no cover
import configparser
import base64
import math
import os
import re
import string
from abc import ABCMeta
from abc import abstractmethod
from contextlib import contextmanager

import yaml
Expand Down Expand Up @@ -206,14 +208,23 @@ def _analyze_yaml_file(self, file, filename):

try:
if '__line__' in item and item['__line__'] not in ignored_lines:
potential_secrets.update(
self.analyze_string(
item['__value__'],
item['__line__'],
filename,
),
# An isinstance check doesn't work in py2
# so we need the __is_binary__ field.
string_to_scan = self.decode_binary(item['__value__']) \
if item['__is_binary__'] \
else item['__value__']

secrets = self.analyze_string(
string_to_scan,
item['__line__'],
filename,
)

if item['__is_binary__']:
secrets = self._encode_yaml_binary_secrets(secrets)

potential_secrets.update(secrets)

if '__line__' in item:
continue

Expand All @@ -226,6 +237,39 @@ def _analyze_yaml_file(self, file, filename):

return potential_secrets

def _encode_yaml_binary_secrets(self, secrets):
result = {}
"""The secrets dict format is
`{PotentialSecret: PotentialSecret}`, where both key and
value are the same object. Therefore, we can just mutate
the potential secret once.
"""
for potential_secret in secrets.keys():
secret_in_yaml_format = yaml.dump(
self.encode_to_binary(potential_secret.secret_value),
).replace(
'!!binary ',
'',
)

potential_secret.set_secret(secret_in_yaml_format)

result[potential_secret] = potential_secret

return result

@abstractmethod
def decode_binary(self, bytes_object): # pragma: no cover
"""Converts the bytes to a string which can be checked for
high entropy."""
pass

@abstractmethod
def encode_to_binary(self, string): # pragma: no cover
"""Converts a string (usually a high-entropy secret) to
binary. Usually the inverse of decode_binary."""
pass


class HexHighEntropyString(HighEntropyStringsPlugin):
"""HighEntropyStringsPlugin for hex encoded strings"""
Expand Down Expand Up @@ -278,6 +322,12 @@ def calculate_shannon_entropy(self, data):

return entropy

def decode_binary(self, bytes_object):
return bytes_object.decode('utf-8')

def encode_to_binary(self, string):
return string.encode('utf-8')


class Base64HighEntropyString(HighEntropyStringsPlugin):
"""HighEntropyStringsPlugin for base64 encoded strings"""
Expand All @@ -299,3 +349,9 @@ def __dict__(self):
})

return output

def decode_binary(self, bytes_object):
return base64.b64encode(bytes_object).decode('utf-8')

def encode_to_binary(self, string):
return base64.b64decode(string)
2 changes: 2 additions & 0 deletions test_data/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,5 @@ list_of_keys:
- 234567890a

test_agent::allowlisted_api_key: 'ToCynx5Se4e2PtoZxEhW7lUJcOX15c54' # pragma: allowlist secret

high_entropy_binary_secret: !!binary MjNjcnh1IDJieXJpdXYyeXJpaTJidnl1MnI4OXkyb3UwMg==
2 changes: 2 additions & 0 deletions test_data/config2.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# This is yaml.dump('2b00042f7481c7b056c4b410d28f33cf'.encode('utf-8'))
high_entropy_hex_binary_secret: !!binary MmIwMDA0MmY3NDgxYzdiMDU2YzRiNDEwZDI4ZjMzY2Y=
26 changes: 26 additions & 0 deletions tests/plugins/common/yaml_file_parser_test.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
from __future__ import absolute_import
from __future__ import unicode_literals

import mock
import pytest

from detect_secrets.plugins.common.yaml_file_parser import YamlFileParser
from testing.mocks import mock_file_object

Expand All @@ -19,3 +22,26 @@ def test_get_ignored_lines(self):
ignored_lines = YamlFileParser(f).get_ignored_lines()

assert ignored_lines == {2, 3}

@pytest.mark.parametrize(
['yaml_value', 'expected_value', 'expected_is_binary'],
[
('string_value', 'string_value', False),
('!!binary YWJjZGVm', b'abcdef', True),
],
)
def test_possible_secret_format(
self,
yaml_value,
expected_value,
expected_is_binary,
):
content = 'key: {yaml_value}'.format(yaml_value=yaml_value)
f = mock_file_object(content)

result = YamlFileParser(f).json()
assert result['key'] == {
'__value__': expected_value,
'__is_binary__': expected_is_binary,
'__line__': mock.ANY,
}
37 changes: 31 additions & 6 deletions tests/plugins/high_entropy_strings_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,11 @@
from __future__ import unicode_literals

import codecs
import string

import pytest

from detect_secrets.plugins.high_entropy_strings import Base64HighEntropyString
from detect_secrets.plugins.high_entropy_strings import HexHighEntropyString
from detect_secrets.plugins.high_entropy_strings import HighEntropyStringsPlugin
from testing.mocks import mock_file_object


Expand Down Expand Up @@ -213,12 +211,13 @@ def test_yaml_file(self):
with open('test_data/config.yaml') as f:
secrets = plugin.analyze(f, 'test_data/config.yaml')

assert len(secrets.values()) == 2
assert len(secrets.values()) == 3
for secret in secrets.values():
location = str(secret).splitlines()[1]
assert location in (
'Location: test_data/config.yaml:3',
'Location: test_data/config.yaml:6',
'Location: test_data/config.yaml:15',
)

def test_env_file(self):
Expand All @@ -234,6 +233,21 @@ def test_env_file(self):
)


class HexHighEntropyStringsWithStandardEntropy(HexHighEntropyString):
"""Copies the HexHighEntropyString class, but keeps the standard
Shannon entropy calculation.
"""

def __init__(self, *args, **kwargs):
super(HexHighEntropyStringsWithStandardEntropy, self).__init__(
*args,
**kwargs
)

def calculate_shannon_entropy(self, data):
return super(HexHighEntropyString, self).calculate_shannon_entropy(data)


class TestHexHighEntropyStrings(HighEntropyStringsTest):

def setup(self):
Expand All @@ -247,10 +261,21 @@ def setup(self):
secret_string='2b00042f7481c7b056c4b410d28f33cf',
)

def test_yaml_file(self):
plugin = HexHighEntropyString(3)
with open('test_data/config2.yaml') as f:
secrets = plugin.analyze(f, 'test_data/config2.yaml')

assert len(secrets.values()) == 1
for secret in secrets.values():
location = str(secret).splitlines()[1]
assert location in (
'Location: test_data/config2.yaml:2',
)

def test_discounts_when_all_numbers(self):
original_scanner = HighEntropyStringsPlugin(
charset=string.hexdigits,
limit=3,
original_scanner = HexHighEntropyStringsWithStandardEntropy(
hex_limit=3,
exclude_lines_regex=None,
)

Expand Down