Skip to content

Commit

Permalink
Add new test cases #minor
Browse files Browse the repository at this point in the history
Breaking changes:
 - Rename: FINGERPRINT_HASH_FUNCTIONS -> JFPV1_HASH_FUNCTIONS
 - Rename: FINGERPRINT_VERSIONS -> JSON_FINGERPRINT_VERSIONS

Other changes:
 - Refactored path formatting/building to a separate function
 - Renamed some internal functions for clarity
 - Added missing docstrings

New test cases:
 - Tests for handling of primitive types (int, float, str, bool)
 - Tests for refactored path builder function
  • Loading branch information
cobaltine committed Dec 28, 2020
1 parent cae54a0 commit 5f06228
Show file tree
Hide file tree
Showing 3 changed files with 130 additions and 46 deletions.
70 changes: 39 additions & 31 deletions json_fingerprint/json_fingerprint.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,11 @@
List,
)

FINGERPRINT_HASH_FUNCTIONS = (
JFPV1_HASH_FUNCTIONS = (
'sha256',
)

FINGERPRINT_VERSIONS = (
JSON_FINGERPRINT_VERSIONS = (
1,
)

Expand All @@ -31,68 +31,76 @@ class FingerprintVersionError(Exception):
pass


def _create_hash(data) -> str:
def _create_json_sha256_hash(data) -> str:
"""Create an sha256 hash from json-converted data, sorted by key names."""
stringified = json.dumps(data, sort_keys=True)
m = hashlib.sha256()
m.update(stringified.encode('utf-8'))
return m.hexdigest()


def _create_sorted_hash_list(data: Dict) -> List[Dict]:
def _create_sorted_sha256_hash_list(data: Dict) -> List[Dict]:
"""Create a sorted sha256 hash list."""
out = []
for obj in data:
hash = _create_hash(obj)
hash = _create_json_sha256_hash(obj)
out.append(hash)
out.sort()
return out


def _build_path(key: str, base_path: str):
if base_path:
return f'{base_path}|{key}'
return key


def _flatten_json(data: Dict, path: str = '', siblings: List = [], debug: bool = False) -> List:
"""Flatten json data structures into a sibling-aware data element list."""
out = []
if type(data) is dict:
for key in data.keys():
p = f'{{{key}}}'
if path:
p = f'{path}|{p}'
out.extend(_flatten_json(data[key], path=p, siblings=siblings, debug=debug))
elif type(data) is list:
p = f'[{len(data)}]'
if path:
p = f'{path}|{p}'
p = _build_path(key=f'{{{key}}}', base_path=path)
output = _flatten_json(data=data[key], path=p, siblings=siblings, debug=debug)
out.extend(output)
return out

if type(data) is list:
p = _build_path(key=f'[{len(data)}]', base_path=path)
siblings = []
for item in data:
output = _flatten_json(item, path=p, debug=debug)
output = _flatten_json(data=item, path=p, debug=debug)
siblings.extend(output)

for item in data:
output = _flatten_json(item, path=p, siblings=siblings, debug=debug)
output = _flatten_json(data=item, path=p, siblings=siblings, debug=debug)
out.extend(output)
else:
if not debug:
siblings = _create_sorted_hash_list(siblings)
element = {
'path': path,
'siblings': siblings,
'value': data,
}
out.append(element)

return out

if not debug:
siblings = _create_sorted_sha256_hash_list(siblings)
element = {
'path': path,
'siblings': siblings,
'value': data,
}
out.append(element)
return out


def json_fingerprint(input: str, hash_function: str, version: str) -> str:
"""Create json fingerprints with the selected hash function and jfp version."""
if type(input) is not str:
err = f'Expected data type \'{type("")}\' (JSON in string format), instead got \'{type(input)}\''
raise FingerprintInputDataTypeError(err)

if hash_function not in FINGERPRINT_HASH_FUNCTIONS:
err = (f'Expected one of supported hash functions \'{FINGERPRINT_HASH_FUNCTIONS}\', '
if hash_function not in JFPV1_HASH_FUNCTIONS:
err = (f'Expected one of supported hash functions \'{JFPV1_HASH_FUNCTIONS}\', '
f'instead got \'{hash_function}\'')
raise FingerprintHashFunctionError(err)

if version not in FINGERPRINT_VERSIONS:
err = (f'Expected one of supported JSON fingerprint versions \'{FINGERPRINT_VERSIONS}\', '
if version not in JSON_FINGERPRINT_VERSIONS:
err = (f'Expected one of supported JSON fingerprint versions \'{JSON_FINGERPRINT_VERSIONS}\', '
f'instead got \'{version}\'')
raise FingerprintVersionError(err)

Expand All @@ -103,6 +111,6 @@ def json_fingerprint(input: str, hash_function: str, version: str) -> str:
raise FingerprintJSONLoadError(err) from None

flattened_json = _flatten_json(data=loaded)
sorted_hash_list = _create_sorted_hash_list(data=flattened_json)
hex_digest = _create_hash(sorted_hash_list)
sorted_hash_list = _create_sorted_sha256_hash_list(data=flattened_json)
hex_digest = _create_json_sha256_hash(sorted_hash_list)
return f'jfpv1${hash_function}${hex_digest}'
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,92 @@ def test_jfpv1_sha256_output_format(self):
Verify that:
- Complete jfpv1-sha256 output fingerprint is properly formatted"""
fp = jfp.json_fingerprint('{"foo": "bar"}', hash_function='sha256', version=1)
fp = jfp.json_fingerprint(input='{"foo": "bar"}', hash_function='sha256', version=1)
self.assertRegex(fp, '^jfpv1\\$sha256\\$[0-9a-f]{64}$')

def test_jfpv1_build_path(self):
"""Test jfpv1 raw path formatting.
Verify that:
- Path list and dict element encapsulation work as intended
- Combination paths are separated properly (pipe character)
- List and dict indicators, and path separator, display correctly even if same
characters ([]{}|) are used in dict field names"""
root = ''
dict_key = '{foo}' # Dict with a field named 'foo'
list_key = '[5]' # List with 5 elements in it

dict_root_path = jfp._build_path(key=dict_key, base_path=root)
self.assertEqual(dict_root_path, '{foo}')

list_root_path = jfp._build_path(key=list_key, base_path=root)
self.assertEqual(list_root_path, '[5]')

combination_path = jfp._build_path(key=dict_key, base_path=list_key)
self.assertEqual(combination_path, '[5]|{foo}')

obj_out_raw = jfp._flatten_json(data=[1, {'[1]|{foo}': 'bar'}, 2], debug=False)
self.assertEqual(obj_out_raw[0]['path'], '[3]')
self.assertEqual(obj_out_raw[1]['path'], '[3]|{[1]|{foo}}')
self.assertEqual(obj_out_raw[2]['path'], '[3]')

def test_jfpv1_sha256_primitive_integer_handling(self):
"""Test jfpv1 primitive integer handling.
Verify that:
- Integers are 'flattened' correctly (path, siblings and value)
- Fingerprint matches with pre-verified fingerprint"""
int_val = 123
int_out_raw = jfp._flatten_json(data=int_val)
expected_int_out_raw = [{'path': '', 'siblings': [], 'value': int_val}]
self.assertEqual(int_out_raw, expected_int_out_raw)
int_fp = jfp.json_fingerprint(input=json.dumps(int_val), hash_function='sha256', version=1)
self.assertEqual(int_fp, 'jfpv1$sha256$16096dbc64a551bd3ab7fde9935338b3575b8c1e1e371b9af7765b7d8fb5ccc5')

def test_jfpv1_sha256_primitive_float_handling(self):
"""Test jfpv1 primitive float handling.
Verify that:
- Floats are 'flattened' correctly (path, siblings and value)
- Fingerprint matches with pre-verified fingerprint"""
float_val = 123.321
float_out_raw = jfp._flatten_json(data=float_val)
expected_float_out_raw = [{'path': '', 'siblings': [], 'value': float_val}]
self.assertEqual(float_out_raw, expected_float_out_raw)
float_fp = jfp.json_fingerprint(input=json.dumps(float_val), hash_function='sha256', version=1)
self.assertEqual(float_fp, 'jfpv1$sha256$33755cda351618d316af2661b9e9a5c87123b715898662b50837b8079135bfbb')

def test_jfpv1_sha256_primitive_string_handling(self):
"""Test jfpv1 primitive string handling.
Verify that:
- Strings are 'flattened' correctly (path, siblings and value)
- Fingerprint matches with pre-verified fingerprint"""
string_val = 'alpha 123'
string_out_raw = jfp._flatten_json(data=string_val)
expected_string_out_raw = [{'path': '', 'siblings': [], 'value': string_val}]
self.assertEqual(string_out_raw, expected_string_out_raw)
string_fp = jfp.json_fingerprint(input=json.dumps(string_val), hash_function='sha256', version=1)
self.assertEqual(string_fp, 'jfpv1$sha256$4d1da719b6f0845aa4a4036150322f76d03f0781dadab388d858d45a881a4e24')

def test_jfpv1_sha256_primitive_boolean_handling(self):
"""Test jfpv1 primitive boolean handling.
Verify that:
- Booleans are 'flattened' correctly (path, siblings and value)
- Fingerprint matches with pre-verified fingerprint"""
bool_val = True
bool_out_raw = jfp._flatten_json(data=bool_val)
expected_bool_out_raw = [{'path': '', 'siblings': [], 'value': bool_val}]
self.assertEqual(bool_out_raw, expected_bool_out_raw)
bool_fp = jfp.json_fingerprint(input=json.dumps(bool_val), hash_function='sha256', version=1)
self.assertEqual(bool_fp, 'jfpv1$sha256$ffd2ec80a46b8035bd07c380548e62deaf730c2822c72e7c2fe690b4928f80cd')

def test_jfpv1_flattened_json_sibling_format(self):
"""Test jfpv1 json flattener.
Verify that:
- jfpv1 json flattener produces expected raw output format (non-hashed siblings)
- jfpv1 json flattener produces expected raw output format (non-hashed siblings in debug mode)
- jfpv1 json flattener produces expected output format (hashed siblings)"""
obj_in = [
1,
Expand Down Expand Up @@ -94,10 +172,11 @@ def test_jfpv1_flattened_json_sibling_format(self):
self.assertEqual(obj_out, expected_obj_out)

def test_jfpv1_flattened_json_structural_distinction_1(self):
"""Test jfpv1 json flattener depth handling.
"""Test jfpv1 json flattener's structural value distinction.
Verify that:
- Identical value content in different structures (depths) don't prodcue same outputs"""
- Identical value content in identical depths, but in different structures,
don't produce identical outputs"""
obj_in_1 = [
1,
[1, [2, 2]],
Expand All @@ -114,31 +193,30 @@ def test_jfpv1_flattened_json_structural_distinction_1(self):
self.assertNotEqual(fp_1, fp_2)

def test_jfpv1_flattened_json_structural_distinction_2(self):
"""Test jfpv1 json flattener element distribution distinction.
"""Test jfpv1 json flattener's structural value distinction.
Verify that:
- Values in identical paths but different sibling values don't get matched"""
obj_in_1 = [
[1, ["x", "x"]],
[2, ["y", "y"]],
[1, ['x', 'x']],
[2, ['y', 'y']],
]
fp_1 = jfp.json_fingerprint(input=json.dumps(obj_in_1), hash_function='sha256', version=1)

obj_in_2 = [
[1, ["x", "y"]],
[2, ["x", "y"]],
[1, ['x', 'y']],
[2, ['x', 'y']],
]
fp_2 = jfp.json_fingerprint(input=json.dumps(obj_in_2), hash_function='sha256', version=1)

self.assertNotEqual(fp_1, fp_2)

def test_jfpv1_create_sorted_hash_list(self):
def test_jfpv1_create_sorted_sha256_hash_list(self):
"""Test jfpv1 hash list, used for condensing unique identifiers into an easily sortable list.
Verify that:
- The hash list produces valid SHA256 hashes from json-formatted data
- Sorts the hashes properly"""

input_data = [
# SHA256 (json-formatted): ac8d8342bbb2362d13f0a559a3621bb407011368895164b628a54f7fc33fc43c
'a',
Expand All @@ -148,8 +226,7 @@ def test_jfpv1_create_sorted_hash_list(self):
'c',
]

output_data_hashes = jfp._create_sorted_hash_list(data=input_data)

output_data_hashes = jfp._create_sorted_sha256_hash_list(data=input_data)
input_data_hashes = []
for datum in input_data:
m = hashlib.sha256()
Expand Down
3 changes: 1 addition & 2 deletions json_fingerprint/tests/test_jfpv1_testdata.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,7 @@ def test_jfpv1_sha256_mixed_order(self):
fp_1 = jfp.json_fingerprint(self.test_obj_1, hash_function='sha256', version=1)
fp_2 = jfp.json_fingerprint(self.test_obj_2, hash_function='sha256', version=1)
self.assertEqual(fp_1, fp_2)
self.assertEqual(fp_1,
'jfpv1$sha256$ef72ce73da41ca55d727c47982f43d6955d2f33e37f0f2bbcfd569334d458e58')
self.assertEqual(fp_1, 'jfpv1$sha256$ef72ce73da41ca55d727c47982f43d6955d2f33e37f0f2bbcfd569334d458e58')


if __name__ == '__main__':
Expand Down

0 comments on commit 5f06228

Please sign in to comment.