Skip to content

Commit

Permalink
refactor(tests): refactor unit tests for document hashing (#1626)
Browse files Browse the repository at this point in the history
* docs(types): fix the docstring for hashing

* refactor(tests): refactor unit tests for Document hashing
  • Loading branch information
nan-wang committed Jan 8, 2021
1 parent 9a89ab7 commit 36bb513
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 14 deletions.
2 changes: 1 addition & 1 deletion jina/types/document/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,7 @@ def update_content_hash(self,
:param include_fields: a tuple of field names that included when computing content hash
.. note::
"exclude_fields" and "exclude_fields" are mutually exclusive, use one only
"exclude_fields" and "include_fields" are mutually exclusive, use one only
"""
masked_d = jina_pb2.DocumentProto()
masked_d.CopyFrom(self._document)
Expand Down
25 changes: 12 additions & 13 deletions tests/unit/types/document/test_document.py
Original file line number Diff line number Diff line change
Expand Up @@ -293,28 +293,27 @@ def test_include_scalar():


def test_include_repeated_fields():
d1 = Document()
dd1 = Document()
d1.chunks.append(dd1)
d1.chunks[0].update_content_hash(exclude_fields=('parent_id', 'id', 'content_hash'))
d1.chunks[0].parent_id = 0
d1.update_content_hash(include_fields=('chunks',), exclude_fields=None)
def build_document(chunk=None):
d = Document()
d.chunks.append(chunk)
d.chunks[0].update_content_hash(exclude_fields=('parent_id', 'id', 'content_hash'))
d.chunks[0].parent_id = 0
d.update_content_hash(include_fields=('chunks',), exclude_fields=None)
return d

d2 = Document()
d2.chunks.append(dd1)
d2.chunks[0].update_content_hash(exclude_fields=('parent_id', 'id', 'content_hash'))
d2.chunks[0].parent_id = 0
d2.update_content_hash(include_fields=('chunks',), exclude_fields=None)
c = Document()
d1 = build_document(chunk=c)
d2 = build_document(chunk=c)

assert d1.chunks[0].content_hash == d2.chunks[0].content_hash
assert d1.content_hash == d2.content_hash

# change text should result in same harsh
# change text should result in same hash
d2.text = 'world'
d2.update_content_hash(include_fields=('chunks',), exclude_fields=None)
assert d1.content_hash == d2.content_hash

# change chunks should result in diff harsh
# change chunks should result in diff hash
d2.chunks.clear()
d2.update_content_hash(include_fields=('chunks',), exclude_fields=None)
assert d1.content_hash != d2.content_hash

0 comments on commit 36bb513

Please sign in to comment.