Skip to content

Commit

Permalink
Merge 8462c48 into fe416ed
Browse files Browse the repository at this point in the history
  • Loading branch information
cobaltine committed May 12, 2023
2 parents fe416ed + 8462c48 commit 5b3d2b2
Show file tree
Hide file tree
Showing 4 changed files with 88 additions and 11 deletions.
31 changes: 23 additions & 8 deletions json_fingerprint/_jfpv1.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def _create_json_hash(data: Any, hash_function: str) -> str:
return m.hexdigest()


def _create_sorted_hash_list(data: Dict, hash_function: str) -> List[Dict]:
def _create_sorted_hash_list(data: List, hash_function: str) -> List[str]:
"""Create a sorted sha256 hash list."""
out = []
for obj in data:
Expand All @@ -42,7 +42,7 @@ def _build_path(key: str, base_path: str):
return key


def _build_element(path: str, siblings: str, value: Any):
def _build_element(path: str, siblings: List[str], value: Any):
"""Build an element dictionary based on presence of sibling data."""
if siblings:
return {
Expand All @@ -57,20 +57,29 @@ def _build_element(path: str, siblings: str, value: Any):
}


def _flatten_json(data: Dict, hash_function: str, path: str = "", siblings: List = [], debug: bool = False) -> List:
def _flatten_json(data: Any, hash_function: str, path: str = "", siblings: List = [], debug: bool = False) -> List:
"""Flatten json data structures into a sibling-aware data element list."""
out = []
if type(data) is dict:

# Process non-empty dicts
if type(data) is dict and data:
for key in data.keys():
p = _build_path(key=f"{{{key}}}", base_path=path)
output = _flatten_json(data=data[key], hash_function=hash_function, path=p, siblings=siblings, debug=debug)
output = _flatten_json(
data=data[key],
hash_function=hash_function,
path=p,
siblings=siblings,
debug=debug,
)
out.extend(output)
return out

if type(data) is list:
# Process non-empty lists
if type(data) is list and data:
p = _build_path(key=f"[{len(data)}]", base_path=path)

# Iterate and collect sibling structures, which'll be then attached to each sibling element
# Iterate and collect sibling structures, which will be then attached to each sibling element
siblings = []
for item in data:
output = _flatten_json(data=item, hash_function=hash_function, path=p, debug=debug)
Expand All @@ -83,7 +92,13 @@ def _flatten_json(data: Dict, hash_function: str, path: str = "", siblings: List

# Recurse with each value in list to typecheck it and eventually get the element value
for item in data:
output = _flatten_json(data=item, hash_function=hash_function, path=p, siblings=siblings, debug=debug)
output = _flatten_json(
data=item,
hash_function=hash_function,
path=p,
siblings=siblings,
debug=debug,
)
out.extend(output)
return out

Expand Down
2 changes: 1 addition & 1 deletion json_fingerprint/_validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@


def _validate_hash_function(hash_function: str, version: int):
if hash_function not in JFPV1_HASH_FUNCTIONS:
if version == 1 and hash_function not in JFPV1_HASH_FUNCTIONS:
err = f"Expected one of supported hash functions '{JFPV1_HASH_FUNCTIONS}', " f"instead got '{hash_function}'"
raise FingerprintHashFunctionError(err)

Expand Down
38 changes: 36 additions & 2 deletions json_fingerprint/tests/test_create.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def test_jfpv1_sha256_structural_distinction_1(self):
"""Test jfpv1 json flattener's structural value distinction.
Verify that:
- Identical value content in identical depths, but in different structures,
- Identical values at identical depths, but held in different data structures,
don't produce identical outputs"""
obj_in_1 = [
1,
Expand All @@ -85,7 +85,7 @@ def test_jfpv1_sha256_structural_distinction_2(self):
"""Test jfpv1 json flattener's structural value distinction.
Verify that:
- Values in identical paths/structures but different sibling values don't get matched"""
- Values in identical data structure paths, but different sibling values, don't get matched"""
obj_in_1 = [
[1, ["x", "x"]],
[2, ["y", "y"]],
Expand All @@ -100,6 +100,40 @@ def test_jfpv1_sha256_structural_distinction_2(self):

self.assertNotEqual(fp_1, fp_2)

def test_jfpv1_empty_list_as_value(self):
"""Test jfpv1 json flattener's ability to handle empty lists as values.
Versions up to 0.12.2 did not acknowledge empty lists as values.
Related issue: https://github.com/cobaltine/json-fingerprint/issues/33
Verify that:
- Empty lists (and, as such, underlying data structure paths) are not ignored by the json flattener"""

obj_in_1 = {"field1": "yes"}
fp_1 = create(input=json.dumps(obj_in_1), hash_function="sha256", version=1)

obj_in_2 = {"field1": "yes", "field2": []}
fp_2 = create(input=json.dumps(obj_in_2), hash_function="sha256", version=1)

self.assertNotEqual(fp_1, fp_2)

def test_jfpv1_empty_dict_as_value(self):
"""Test jfpv1 json flattener's ability to handle empty dicts as values.
Versions up to 0.12.2 did not acknowledge empty dicts as values.
Related issue: https://github.com/cobaltine/json-fingerprint/issues/33
Verify that:
- Empty dicts (and, as such, underlying data structure paths) are not ignored by the json flattener"""

obj_in_1 = {"field1": "yes"}
fp_1 = create(input=json.dumps(obj_in_1), hash_function="sha256", version=1)

obj_in_2 = {"field1": "yes", "field2": {}}
fp_2 = create(input=json.dumps(obj_in_2), hash_function="sha256", version=1)

self.assertNotEqual(fp_1, fp_2)


if __name__ == "__main__":
unittest.main()
28 changes: 28 additions & 0 deletions json_fingerprint/tests/test_jfpv1.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,34 @@ def test_jfpv1_json_flattener_primitive_none_handling(self):
expected_bool_out_raw = [{"path": "", "value": none_val}]
self.assertEqual(none_out_raw, expected_bool_out_raw)

def test_jfpv1_json_flattener_empty_list_handling(self):
"""Test jfpv1 json flattener's ability to handle empty lists as values.
Versions up to v0.12.2 did not acknowledge empty lists as values.
Related issue: https://github.com/cobaltine/json-fingerprint/issues/33
Verify that:
- Empty lists are considered to be values"""

empty_list_val = []
empty_list_out_raw = _jfpv1._flatten_json(data=empty_list_val, hash_function="sha256")
expected_emtpy_list_out_raw = [{"path": "", "value": empty_list_val}]
self.assertEqual(empty_list_out_raw, expected_emtpy_list_out_raw)

def test_jfpv1_json_flattener_empty_dict_handling(self):
"""Test jfpv1 json flattener's ability to handle empty dicts as values.
Versions up to v0.12.2 did not acknowledge empty dicts as values.
Related issue: https://github.com/cobaltine/json-fingerprint/issues/33
Verify that:
- Empty dicts are considered to be values"""

empty_dict_val = {}
empty_dict_out_raw = _jfpv1._flatten_json(data=empty_dict_val, hash_function="sha256")
expected_empty_dict_out_raw = [{"path": "", "value": empty_dict_val}]
self.assertEqual(empty_dict_out_raw, expected_empty_dict_out_raw)


if __name__ == "__main__":
unittest.main()

0 comments on commit 5b3d2b2

Please sign in to comment.