Skip to content

Commit

Permalink
Add support for SHA384 and SHA512 #minor
Browse files Browse the repository at this point in the history
Added support for remaining in-scope SHA-2 algorithms:
 - SHA384
 - SHA512

Dropped the idea of including checksum algorithms (namely adler32) as
user-selectable algorithms. The various benefits of secure hashes
outweigh the marginal performance benefits of checksum algorithms with
the internal data format of jfpv1.
  • Loading branch information
cobaltine committed Jan 3, 2021
1 parent 52236ea commit 73ec8e7
Show file tree
Hide file tree
Showing 6 changed files with 69 additions and 44 deletions.
15 changes: 8 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
![](https://img.shields.io/github/license/cobaltine/json-fingerprint) ![](https://img.shields.io/pypi/pyversions/json-fingerprint) ![](https://img.shields.io/github/workflow/status/cobaltine/json-fingerprint/Test%20runner/main) ![](https://img.shields.io/github/workflow/status/cobaltine/json-fingerprint/Release%20Python%20package/main?label=pypi%20release) [![](https://img.shields.io/pypi/v/json-fingerprint)](https://pypi.org/project/json-fingerprint/) ![Code Climate maintainability](https://img.shields.io/codeclimate/maintainability/cobaltine/json-fingerprint) [![Coverage Status](https://coveralls.io/repos/github/cobaltine/json-fingerprint/badge.svg?branch=main)](https://coveralls.io/github/cobaltine/json-fingerprint?branch=main)


Create consistent and comparable fingerprints (checksums/hashes) from unordered JSON data.
Create consistent and comparable fingerprints with secure hashes from unordered JSON data.

A json fingerprint consists of three parts: the version of the underlying algorithm, the hash function used and a hex digest of the hash function output. A complete example could look like this: `jfpv1$sha256$5815eb0ce6f4e5ab0a771cce2a8c5432f64222f8fd84b4cc2d38e4621fae86af`.

Expand All @@ -16,19 +16,20 @@ This is a list of high-level development and documentation tasks, which need to

- [ ] Formalized the jfpv1 specification
- [x] JSON type support
- [x] Primitive types (int/float, str, bool, None)
- [x] Arrays (list)
- [x] Objects (dict)
- [x] Primitives and literals
- [x] Arrays
- [x] Objects
- [x] Flattened "sibling-aware" internal data structure
- [x] Support nested JSON data structures with mixed types
- [ ] Support most common SHA-2 hash functions
- [x] Support most common SHA-2 hash functions
- [x] SHA256
- [ ] SHA384
- [ ] SHA512
- [x] SHA384
- [x] SHA512
- [ ] Dynamic jfpv1 fingerprint comparison function (JSON string against a fingerprint)
- [x] Performance characteristics that scale sufficiently
- [ ] Extensive verification against potential fingerprint (hash) collisions


## Installation

To install the json-fingerprint package, run `pip install json-fingerprint`.
Expand Down
32 changes: 19 additions & 13 deletions json_fingerprint/_jfpv1.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,19 +8,25 @@
)


def _create_json_sha256_hash(data: Any) -> str:
def _create_json_hash(data: Any, hash_function: str) -> str:
"""Create an sha256 hash from json-converted data, sorted by key names."""
stringified = json.dumps(data, sort_keys=True)
m = hashlib.sha256()
if hash_function == 'sha256':
m = hashlib.sha256()
if hash_function == 'sha384':
m = hashlib.sha384()
if hash_function == 'sha512':
m = hashlib.sha512()
m.update(stringified.encode('utf-8'))

return m.hexdigest()


def _create_sorted_sha256_hash_list(data: Dict) -> List[Dict]:
def _create_sorted_hash_list(data: Dict, hash_function: str) -> List[Dict]:
"""Create a sorted sha256 hash list."""
out = []
for obj in data:
hash = _create_json_sha256_hash(obj)
hash = _create_json_hash(obj, hash_function=hash_function)
out.append(hash)
out.sort()
return out
Expand Down Expand Up @@ -48,13 +54,13 @@ def _build_element(path: str, siblings: str, value: Any):
}


def _flatten_json(data: Dict, path: str = '', siblings: List = [], debug: bool = False) -> List:
def _flatten_json(data: Dict, hash_function: str, path: str = '', siblings: List = [], debug: bool = False) -> List:
"""Flatten json data structures into a sibling-aware data element list."""
out = []
if type(data) is dict:
for key in data.keys():
p = _build_path(key=f'{{{key}}}', base_path=path)
output = _flatten_json(data=data[key], path=p, siblings=siblings, debug=debug)
output = _flatten_json(data=data[key], hash_function=hash_function, path=p, siblings=siblings, debug=debug)
out.extend(output)
return out

Expand All @@ -64,17 +70,17 @@ def _flatten_json(data: Dict, path: str = '', siblings: List = [], debug: bool =
# Iterate and collect sibling structures, which'll be then attached to each sibling element
siblings = []
for item in data:
output = _flatten_json(data=item, path=p, debug=debug)
output = _flatten_json(data=item, hash_function=hash_function, path=p, debug=debug)
siblings.extend(output)

# Debug mode, which allows non-hashed sibling structures to be inspected and tested against
if not debug:
siblings = _create_sorted_sha256_hash_list(siblings)
siblings = _create_json_sha256_hash(siblings)
siblings = _create_sorted_hash_list(data=siblings, hash_function=hash_function)
siblings = _create_json_hash(data=siblings, hash_function=hash_function)

# Recurse with each value in list to typecheck it and eventually get the element value
for item in data:
output = _flatten_json(data=item, path=p, siblings=siblings, debug=debug)
output = _flatten_json(data=item, hash_function=hash_function, path=p, siblings=siblings, debug=debug)
out.extend(output)
return out

Expand All @@ -85,7 +91,7 @@ def _flatten_json(data: Dict, path: str = '', siblings: List = [], debug: bool =

def _create_jfpv1_fingerprint(data: Any, hash_function: str, version: int):
"""Create a jfpv1 fingerprint."""
flattened_json = _flatten_json(data=data)
sorted_hash_list = _create_sorted_sha256_hash_list(data=flattened_json)
hex_digest = _create_json_sha256_hash(data=sorted_hash_list)
flattened_json = _flatten_json(data=data, hash_function=hash_function)
sorted_hash_list = _create_sorted_hash_list(data=flattened_json, hash_function=hash_function)
hex_digest = _create_json_hash(data=sorted_hash_list, hash_function=hash_function)
return f'jfpv1${hash_function}${hex_digest}'
2 changes: 2 additions & 0 deletions json_fingerprint/_validators.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
JFPV1_HASH_FUNCTIONS = (
'sha256',
'sha384',
'sha512',
)

JSON_FINGERPRINT_VERSIONS = (
Expand Down
46 changes: 23 additions & 23 deletions json_fingerprint/tests/test_jfpv1.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@


class TestJfpv1(unittest.TestCase):
def test_create_json_sha256_hash(self):
def test_create_json_hash(self):
"""Test jfpv1 json sha256 hash creation.
Verify that:
Expand All @@ -15,10 +15,10 @@ def test_create_json_sha256_hash(self):
m = hashlib.sha256()
m.update(stringified.encode('utf-8'))
expected_hex_digest = m.hexdigest()
hex_digest = _jfpv1._create_json_sha256_hash(data=data)
hex_digest = _jfpv1._create_json_hash(data=data, hash_function='sha256')
self.assertEqual(hex_digest, expected_hex_digest)

def test_jfpv1_create_sorted_sha256_hash_list(self):
def test_jfpv1_create_sorted_hash_list(self):
"""Test jfpv1 hash list, used for condensing unique identifiers into an easily sortable list.
Verify that:
Expand All @@ -33,7 +33,7 @@ def test_jfpv1_create_sorted_sha256_hash_list(self):
'c',
]

output_data_hashes = _jfpv1._create_sorted_sha256_hash_list(data=input_data)
output_data_hashes = _jfpv1._create_sorted_hash_list(data=input_data, hash_function='sha256')
input_data_hashes = []
for datum in input_data:
m = hashlib.sha256()
Expand Down Expand Up @@ -65,7 +65,7 @@ def test_jfpv1_build_path(self):
combination_path = _jfpv1._build_path(key=dict_key, base_path=list_key)
self.assertEqual(combination_path, '[5]|{foo}')

obj_out_raw = _jfpv1._flatten_json(data=[1, {'[1]|{foo}': 'bar'}, 2], debug=False)
obj_out_raw = _jfpv1._flatten_json(data=[1, {'[1]|{foo}': 'bar'}, 2], hash_function='sha256')
self.assertEqual(obj_out_raw[0]['path'], '[3]')
self.assertEqual(obj_out_raw[1]['path'], '[3]|{[1]|{foo}}')
self.assertEqual(obj_out_raw[2]['path'], '[3]')
Expand All @@ -87,7 +87,7 @@ def test_jfpv1_flattened_json_sha256_fingerprint(self):
1,
[2, 3],
]
obj_out_raw = _jfpv1._flatten_json(data=obj_in, debug=True)
obj_out_raw = _jfpv1._flatten_json(data=obj_in, hash_function='sha256', debug=True)
expected_obj_out_raw = [
{
'path': '[2]',
Expand Down Expand Up @@ -131,45 +131,45 @@ def test_jfpv1_flattened_json_sha256_fingerprint(self):
]
self.assertEqual(obj_out_raw, expected_obj_out_raw)

obj_out = _jfpv1._flatten_json(data=obj_in, debug=False)
inner_list_siblings_hash_list = _jfpv1._create_sorted_sha256_hash_list([
obj_out = _jfpv1._flatten_json(data=obj_in, hash_function='sha256')
inner_list_siblings_hash_list = _jfpv1._create_sorted_hash_list([
{'path': '[2]|[2]', 'value': 2},
{'path': '[2]|[2]', 'value': 3}
])
siblings_1 = _jfpv1._create_sorted_sha256_hash_list([
], hash_function='sha256')
siblings_1 = _jfpv1._create_sorted_hash_list([
{'path': '[2]', 'value': 1},
{
'path': '[2]|[2]',
'siblings': _jfpv1._create_json_sha256_hash(inner_list_siblings_hash_list),
'siblings': _jfpv1._create_json_hash(data=inner_list_siblings_hash_list, hash_function='sha256'),
'value': 2
},
{
'path': '[2]|[2]',
'siblings': _jfpv1._create_json_sha256_hash(inner_list_siblings_hash_list),
'siblings': _jfpv1._create_json_hash(data=inner_list_siblings_hash_list, hash_function='sha256'),
'value': 3
}
])
], hash_function='sha256')
expected_out_hash_lists = [
{
'path': '[2]',
'siblings': _jfpv1._create_json_sha256_hash(siblings_1),
'siblings': _jfpv1._create_json_hash(data=siblings_1, hash_function='sha256'),
'value': 1
},
{
'path': '[2]|[2]',
'siblings': _jfpv1._create_json_sha256_hash(inner_list_siblings_hash_list),
'siblings': _jfpv1._create_json_hash(data=inner_list_siblings_hash_list, hash_function='sha256'),
'value': 2
},
{
'path': '[2]|[2]',
'siblings': _jfpv1._create_json_sha256_hash(inner_list_siblings_hash_list),
'siblings': _jfpv1._create_json_hash(data=inner_list_siblings_hash_list, hash_function='sha256'),
'value': 3
},
]
self.assertEqual(obj_out, expected_out_hash_lists)

sorted_hash_list = _jfpv1._create_sorted_sha256_hash_list(data=obj_out)
hex_digest = _jfpv1._create_json_sha256_hash(data=sorted_hash_list)
sorted_hash_list = _jfpv1._create_sorted_hash_list(data=obj_out, hash_function='sha256')
hex_digest = _jfpv1._create_json_hash(data=sorted_hash_list, hash_function='sha256')
jfpv1_out_1 = f'jfpv1$sha256${hex_digest}'
jfpv1_out_2 = _jfpv1._create_jfpv1_fingerprint(data=obj_in, hash_function='sha256', version=1)
self.assertEqual(jfpv1_out_1, jfpv1_out_2)
Expand All @@ -181,7 +181,7 @@ def test_jfpv1_json_flattener_primitive_integer_handling(self):
- Integers are 'flattened' correctly (path, siblings and value)
- Fingerprint matches with pre-verified fingerprint"""
int_val = 123
int_out_raw = _jfpv1._flatten_json(data=int_val)
int_out_raw = _jfpv1._flatten_json(data=int_val, hash_function='sha256')
expected_int_out_raw = [{'path': '', 'value': int_val}]
self.assertEqual(int_out_raw, expected_int_out_raw)

Expand All @@ -192,7 +192,7 @@ def test_jfpv1_json_flattener_primitive_float_handling(self):
- Floats are 'flattened' correctly (path, siblings and value)
- Fingerprint matches with pre-verified fingerprint"""
float_val = 123.321
float_out_raw = _jfpv1._flatten_json(data=float_val)
float_out_raw = _jfpv1._flatten_json(data=float_val, hash_function='sha256')
expected_float_out_raw = [{'path': '', 'value': float_val}]
self.assertEqual(float_out_raw, expected_float_out_raw)

Expand All @@ -203,7 +203,7 @@ def test_jfpv1_json_flattener_primitive_string_handling(self):
- Strings are 'flattened' correctly (path, siblings and value)
- Fingerprint matches with pre-verified fingerprint"""
string_val = 'alpha 123'
string_out_raw = _jfpv1._flatten_json(data=string_val)
string_out_raw = _jfpv1._flatten_json(data=string_val, hash_function='sha256')
expected_string_out_raw = [{'path': '', 'value': string_val}]
self.assertEqual(string_out_raw, expected_string_out_raw)

Expand All @@ -214,7 +214,7 @@ def test_jfpv1_json_flattener_primitive_boolean_handling(self):
- Booleans are 'flattened' correctly (path, siblings and value)
- Fingerprint matches with pre-verified fingerprint"""
bool_val = True
bool_out_raw = _jfpv1._flatten_json(data=bool_val)
bool_out_raw = _jfpv1._flatten_json(data=bool_val, hash_function='sha256')
expected_bool_out_raw = [{'path': '', 'value': bool_val}]
self.assertEqual(bool_out_raw, expected_bool_out_raw)

Expand All @@ -225,7 +225,7 @@ def test_jfpv1_json_flattener_primitive_none_handling(self):
- Booleans are 'flattened' correctly (path, siblings and value)
- Fingerprint matches with pre-verified fingerprint"""
none_val = None
none_out_raw = _jfpv1._flatten_json(data=none_val)
none_out_raw = _jfpv1._flatten_json(data=none_val, hash_function='sha256')
expected_bool_out_raw = [{'path': '', 'value': none_val}]
self.assertEqual(none_out_raw, expected_bool_out_raw)

Expand Down
16 changes: 16 additions & 0 deletions json_fingerprint/tests/test_json_fingerprint.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,22 @@ def test_jfpv1_sha256_output_format(self):
fp = json_fingerprint(input='{"foo": "bar"}', hash_function='sha256', version=1)
self.assertRegex(fp, '^jfpv1\\$sha256\\$[0-9a-f]{64}$')

def test_jfpv1_sha384_output_format(self):
"""Test jfpv1 output format.
Verify that:
- Complete jfpv1-sha256 output fingerprint is properly formatted"""
fp = json_fingerprint(input='{"foo": "bar"}', hash_function='sha384', version=1)
self.assertRegex(fp, '^jfpv1\\$sha384\\$[0-9a-f]{96}$')

def test_jfpv1_sha512_output_format(self):
"""Test jfpv1 output format.
Verify that:
- Complete jfpv1-sha256 output fingerprint is properly formatted"""
fp = json_fingerprint(input='{"foo": "bar"}', hash_function='sha512', version=1)
self.assertRegex(fp, '^jfpv1\\$sha512\\$[0-9a-f]{128}$')

def test_jfpv1_sha256_mixed_order(self):
"""Test jfpv1 sha256 mixed order fingerprint match.
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
author='Ville Lehtinen',
author_email='ville.lehtinen@cobaltine.fi',
license='MIT',
description='Create consistent and comparable fingerprints (checksums/hashes) from unordered JSON data',
description='Create consistent and comparable fingerprints with secure hashes from unordered JSON data',
long_description=long_description,
long_description_content_type='text/markdown',
url='https://github.com/cobaltine/json-fingerprint',
Expand Down

0 comments on commit 73ec8e7

Please sign in to comment.