Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix empty list and empty dict handling #35

Merged
merged 1 commit into from
May 13, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 23 additions & 8 deletions json_fingerprint/_jfpv1.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def _create_json_hash(data: Any, hash_function: str) -> str:
return m.hexdigest()


def _create_sorted_hash_list(data: Dict, hash_function: str) -> List[Dict]:
def _create_sorted_hash_list(data: List, hash_function: str) -> List[str]:
"""Create a sorted sha256 hash list."""
out = []
for obj in data:
Expand All @@ -42,7 +42,7 @@ def _build_path(key: str, base_path: str):
return key


def _build_element(path: str, siblings: str, value: Any):
def _build_element(path: str, siblings: List[str], value: Any):
"""Build an element dictionary based on presence of sibling data."""
if siblings:
return {
Expand All @@ -57,20 +57,29 @@ def _build_element(path: str, siblings: str, value: Any):
}


def _flatten_json(data: Dict, hash_function: str, path: str = "", siblings: List = [], debug: bool = False) -> List:
def _flatten_json(data: Any, hash_function: str, path: str = "", siblings: List = [], debug: bool = False) -> List:
"""Flatten json data structures into a sibling-aware data element list."""
out = []
if type(data) is dict:

# Process non-empty dicts
if type(data) is dict and data:
for key in data.keys():
p = _build_path(key=f"{{{key}}}", base_path=path)
output = _flatten_json(data=data[key], hash_function=hash_function, path=p, siblings=siblings, debug=debug)
output = _flatten_json(
data=data[key],
hash_function=hash_function,
path=p,
siblings=siblings,
debug=debug,
)
out.extend(output)
return out

if type(data) is list:
# Process non-empty lists
if type(data) is list and data:
p = _build_path(key=f"[{len(data)}]", base_path=path)

# Iterate and collect sibling structures, which'll be then attached to each sibling element
# Iterate and collect sibling structures, which will be then attached to each sibling element
siblings = []
for item in data:
output = _flatten_json(data=item, hash_function=hash_function, path=p, debug=debug)
Expand All @@ -83,7 +92,13 @@ def _flatten_json(data: Dict, hash_function: str, path: str = "", siblings: List

# Recurse with each value in list to typecheck it and eventually get the element value
for item in data:
output = _flatten_json(data=item, hash_function=hash_function, path=p, siblings=siblings, debug=debug)
output = _flatten_json(
data=item,
hash_function=hash_function,
path=p,
siblings=siblings,
debug=debug,
)
out.extend(output)
return out

Expand Down
2 changes: 1 addition & 1 deletion json_fingerprint/_validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@


def _validate_hash_function(hash_function: str, version: int):
if hash_function not in JFPV1_HASH_FUNCTIONS:
if version == 1 and hash_function not in JFPV1_HASH_FUNCTIONS:
err = f"Expected one of supported hash functions '{JFPV1_HASH_FUNCTIONS}', " f"instead got '{hash_function}'"
raise FingerprintHashFunctionError(err)

Expand Down
38 changes: 36 additions & 2 deletions json_fingerprint/tests/test_create.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def test_jfpv1_sha256_structural_distinction_1(self):
"""Test jfpv1 json flattener's structural value distinction.

Verify that:
- Identical value content in identical depths, but in different structures,
- Identical values at identical depths, but held in different data structures,
don't produce identical outputs"""
obj_in_1 = [
1,
Expand All @@ -85,7 +85,7 @@ def test_jfpv1_sha256_structural_distinction_2(self):
"""Test jfpv1 json flattener's structural value distinction.

Verify that:
- Values in identical paths/structures but different sibling values don't get matched"""
- Values in identical data structure paths, but different sibling values, don't get matched"""
obj_in_1 = [
[1, ["x", "x"]],
[2, ["y", "y"]],
Expand All @@ -100,6 +100,40 @@ def test_jfpv1_sha256_structural_distinction_2(self):

self.assertNotEqual(fp_1, fp_2)

def test_jfpv1_empty_list_as_value(self):
"""Test jfpv1 json flattener's ability to handle empty lists as values.

Versions up to 0.12.2 did not acknowledge empty lists as values.
Related issue: https://github.com/cobaltine/json-fingerprint/issues/33

Verify that:
- Empty lists (and, as such, underlying data structure paths) are not ignored by the json flattener"""

obj_in_1 = {"field1": "yes"}
fp_1 = create(input=json.dumps(obj_in_1), hash_function="sha256", version=1)

obj_in_2 = {"field1": "yes", "field2": []}
fp_2 = create(input=json.dumps(obj_in_2), hash_function="sha256", version=1)

self.assertNotEqual(fp_1, fp_2)

def test_jfpv1_empty_dict_as_value(self):
"""Test jfpv1 json flattener's ability to handle empty dicts as values.

Versions up to 0.12.2 did not acknowledge empty dicts as values.
Related issue: https://github.com/cobaltine/json-fingerprint/issues/33

Verify that:
- Empty dicts (and, as such, underlying data structure paths) are not ignored by the json flattener"""

obj_in_1 = {"field1": "yes"}
fp_1 = create(input=json.dumps(obj_in_1), hash_function="sha256", version=1)

obj_in_2 = {"field1": "yes", "field2": {}}
fp_2 = create(input=json.dumps(obj_in_2), hash_function="sha256", version=1)

self.assertNotEqual(fp_1, fp_2)


if __name__ == "__main__":
unittest.main()
28 changes: 28 additions & 0 deletions json_fingerprint/tests/test_jfpv1.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,34 @@ def test_jfpv1_json_flattener_primitive_none_handling(self):
expected_bool_out_raw = [{"path": "", "value": none_val}]
self.assertEqual(none_out_raw, expected_bool_out_raw)

def test_jfpv1_json_flattener_empty_list_handling(self):
"""Test jfpv1 json flattener's ability to handle empty lists as values.

Versions up to v0.12.2 did not acknowledge empty lists as values.
Related issue: https://github.com/cobaltine/json-fingerprint/issues/33

Verify that:
- Empty lists are considered to be values"""

empty_list_val = []
empty_list_out_raw = _jfpv1._flatten_json(data=empty_list_val, hash_function="sha256")
expected_emtpy_list_out_raw = [{"path": "", "value": empty_list_val}]
self.assertEqual(empty_list_out_raw, expected_emtpy_list_out_raw)

def test_jfpv1_json_flattener_empty_dict_handling(self):
"""Test jfpv1 json flattener's ability to handle empty dicts as values.

Versions up to v0.12.2 did not acknowledge empty dicts as values.
Related issue: https://github.com/cobaltine/json-fingerprint/issues/33

Verify that:
- Empty dicts are considered to be values"""

empty_dict_val = {}
empty_dict_out_raw = _jfpv1._flatten_json(data=empty_dict_val, hash_function="sha256")
expected_empty_dict_out_raw = [{"path": "", "value": empty_dict_val}]
self.assertEqual(empty_dict_out_raw, expected_empty_dict_out_raw)


if __name__ == "__main__":
unittest.main()