Skip to content

Commit

Permalink
Update docs #minor
Browse files Browse the repository at this point in the history
Updated README.md with generic information about the project and some
new badges. Extended the example section.

Breaking changes:
 - Changed json_fingerprint() `data` parameter to `input`
  • Loading branch information
cobaltine committed Dec 22, 2020
1 parent 3cd730f commit 71c1a44
Show file tree
Hide file tree
Showing 4 changed files with 48 additions and 22 deletions.
49 changes: 35 additions & 14 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,27 +1,48 @@
![](https://img.shields.io/github/license/cobaltine/json-fingerprint) ![](https://img.shields.io/pypi/pyversions/json-fingerprint) ![](https://img.shields.io/pypi/v/json-fingerprint) [![Coverage Status](https://coveralls.io/repos/github/cobaltine/json-fingerprint/badge.svg?branch=main)](https://coveralls.io/github/cobaltine/json-fingerprint?branch=main)
# json-fingerprint

The **json-fingerprint** package provides easy checksum creation ("fingerprinting") from unordered JSON data.
![](https://img.shields.io/github/license/cobaltine/json-fingerprint) ![](https://img.shields.io/pypi/pyversions/json-fingerprint) ![](https://img.shields.io/github/workflow/status/cobaltine/json-fingerprint/Test%20runner/main) ![](https://img.shields.io/github/workflow/status/cobaltine/json-fingerprint/Release%20Python%20package/main?label=pypi%20release) [![](https://img.shields.io/pypi/v/json-fingerprint)](https://pypi.org/project/json-fingerprint/) [![Coverage Status](https://coveralls.io/repos/github/cobaltine/json-fingerprint/badge.svg?branch=main)](https://coveralls.io/github/cobaltine/json-fingerprint?branch=main)

## Installation and use

To install the package, run `pip install json-fingerprint`.
Create consistent and comparable fingerprints (checksums/hashes) from unordered JSON data.

Below is a sample for creating simple fingerprints:
A json fingerprint consists of three parts: the version of the underlying algorithm, the hash function used and a hex digest of the hash function output. A complete example could look like this: `jfpv1$sha256$5815eb0ce6f4e5ab0a771cce2a8c5432f64222f8fd84b4cc2d38e4621fae86af`.

The first part indicates the algorithm version, `jfpv1`, which would translate to **j**son **f**inger**p**rint **v**ersion **1**. The second part, `sha256`, indicates that SHA256 is the hash function that was used. The last part, `5815eb0ce6f4e5ab0a771cce2a8c5432f64222f8fd84b4cc2d38e4621fae86af`, is a standard hex digest of the hash function output.


## Installation

To install the json-fingerprint package, run `pip install json-fingerprint`.


## Examples

The example below shows how to create and compare json fingerprints.

```python
import json
import json_fingerprint as jfp

obj_1_str = json.dumps([3, 2, 1, {"foo": "bar"}])
obj_2_str = json.dumps([2, {"foo": "bar"}, 1, 3])
fp_1 = jfp.json_fingerprint(obj_1_str, hash_function='sha256', version=1)
fp_2 = jfp.json_fingerprint(obj_2_str, hash_function='sha256', version=1)
print(f'fp_1: {fp_1}')
print(f'fp_2: {fp_2}')
obj_1_str = json.dumps([3, 2, 1, {'foo': 'bar'}])
obj_2_str = json.dumps([2, {'foo': 'bar'}, 1, 3]) # Same data in different order
fp_1 = jfp.json_fingerprint(input=obj_1_str, hash_function='sha256', version=1)
fp_2 = jfp.json_fingerprint(input=obj_2_str, hash_function='sha256', version=1)
print(f'Fingerprint 1: {fp_1}')
print(f'Fingerprint 2: {fp_2}')
```
This will output two identical fingerprints regardless of the different ordering of the json elements:
This will output two identical fingerprints regardless of the different order of the json elements:

```
fp_1: jfpv1$sha256$5815eb0ce6f4e5ab0a771cce2a8c5432f64222f8fd84b4cc2d38e4621fae86af
fp_2: jfpv1$sha256$5815eb0ce6f4e5ab0a771cce2a8c5432f64222f8fd84b4cc2d38e4621fae86af
Fingerprint 1: jfpv1$sha256$5815eb0ce6f4e5ab0a771cce2a8c5432f64222f8fd84b4cc2d38e4621fae86af
Fingerprint 2: jfpv1$sha256$5815eb0ce6f4e5ab0a771cce2a8c5432f64222f8fd84b4cc2d38e4621fae86af
```
If the previous example was extended a bit, the objects could be easily compared:
```python
if fp_1 == fp_2:
# Do something if fingerprints match
print('Fingerprints match')
else:
# Do nothing or something else
print('Fingerprints not matching')
```
Since json objects with identical data content and structure will always produce identical fingerprints, the fingerprints can be used effectively for various purposes. These include finding duplicate json data from a larger dataset, json data cache validation/invalidation and data integrity checking.
13 changes: 9 additions & 4 deletions json_fingerprint/json_fingerprint.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,15 @@
class FingerprintJSONLoadError(Exception):
pass


class FingerprintInputDataTypeError(Exception):
pass


class FingerprintHashFunctionError(Exception):
pass


class FingerprintVersionError(Exception):
pass

Expand Down Expand Up @@ -51,6 +54,7 @@ def _flatten_json(data: Dict, out: List, path: str = '') -> List:

return out


def _create_hash_list(data: Dict) -> List[Dict]:
out = []
for obj in data:
Expand All @@ -61,9 +65,10 @@ def _create_hash_list(data: Dict) -> List[Dict]:
out.sort()
return out

def json_fingerprint(data: str, hash_function: str, version: str) -> str:
if type(data) is not str:
err = f'Expected data type \'{type("")}\' (JSON in string format), instead got \'{type(data)}\''

def json_fingerprint(input: str, hash_function: str, version: str) -> str:
if type(input) is not str:
err = f'Expected data type \'{type("")}\' (JSON in string format), instead got \'{type(input)}\''
raise FingerprintInputDataTypeError(err)

if hash_function not in FINGERPRINT_HASH_FUNCTIONS:
Expand All @@ -77,7 +82,7 @@ def json_fingerprint(data: str, hash_function: str, version: str) -> str:
raise FingerprintVersionError(err)

try:
loaded = json.loads(data)
loaded = json.loads(input)
except Exception:
err = 'Unable to load JSON'
raise FingerprintJSONLoadError(err) from None
Expand Down
6 changes: 3 additions & 3 deletions json_fingerprint/tests/test_jfpv1.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,23 +47,23 @@ def test_jfpv1_json_fingerprint_version_error(self):
Verify that:
- FingerprintVersionError is properly raised with unsupported version"""
with self.assertRaises(jfp.FingerprintVersionError):
jfp.json_fingerprint(data=self.test_obj_1, hash_function='sha256', version=-1)
jfp.json_fingerprint(input=self.test_obj_1, hash_function='sha256', version=-1)

def test_jfpv1_input_data_type_error(self):
"""Test jfpv1 input data type error.
Verify that:
- FingerprintInputDataTypeError is properly raised with incorrect data type input"""
with self.assertRaises(jfp.FingerprintInputDataTypeError):
jfp.json_fingerprint(data={'foo': 'bar'}, hash_function='sha256', version=1)
jfp.json_fingerprint(input={'foo': 'bar'}, hash_function='sha256', version=1)

def test_jfpv1_hash_function_error(self):
"""Test JSON fingerprint hash function selector error.
Verify that:
- FingerprintHashFunctionError is properly raised with unsupported hash function selector"""
with self.assertRaises(jfp.FingerprintHashFunctionError):
jfp.json_fingerprint(data=self.test_obj_1, hash_function='not123', version=1)
jfp.json_fingerprint(input=self.test_obj_1, hash_function='not123', version=1)

def test_jfpv1_json_load_error(self):
"""Test JSON fingerprint raw JSON string load error.
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
author='Ville Lehtinen',
author_email='ville.lehtinen@cobaltine.fi',
license='MIT',
description='json fingerprinting tool for creating consistent and comparable checksums of unordered json data',
description='Create consistent and comparable fingerprints (checksums/hashes) from unordered JSON data',
long_description=long_description,
long_description_content_type='text/markdown',
url='https://github.com/cobaltine/json-fingerprint',
Expand Down

0 comments on commit 71c1a44

Please sign in to comment.