From c73d938b26db55beb652ec895139cf59e612ef22 Mon Sep 17 00:00:00 2001 From: Han Xiao Date: Fri, 5 Feb 2021 23:57:41 +0100 Subject: [PATCH] fix(types): fix extend in docset (#1883) --- jina/types/document/multimodal.py | 11 +++++++++-- jina/types/sets/document.py | 3 ++- tests/unit/types/sets/test_chunkset.py | 10 +++++++++- 3 files changed, 20 insertions(+), 4 deletions(-) diff --git a/jina/types/document/multimodal.py b/jina/types/document/multimodal.py index 3e6ea96b60fbb..d8b701a5dda0f 100644 --- a/jina/types/document/multimodal.py +++ b/jina/types/document/multimodal.py @@ -44,10 +44,17 @@ def __init__(self, document: Optional[DocumentSourceType] = None, super().__init__(document=document, copy=copy, **kwargs) if chunks or modality_content_map: if chunks: - granularities = [chunk.granularity for chunk in chunks] - if len(set(granularities)) != 1: + g = {c.granularity for c in chunks} + if len(g) != 1: raise BadDocType('Each chunk should have the same granularity.') self.chunks.extend(chunks) + + # in case chunks have granularity defined, override + gv = list(g)[0] + if gv != 0: + for c in self.chunks: + c.granularity = gv + elif modality_content_map: self.modality_content_map = modality_content_map self._handle_chunk_level_attributes() diff --git a/jina/types/sets/document.py b/jina/types/sets/document.py index a38110a0d3bfa..3f0aee876a5ad 100644 --- a/jina/types/sets/document.py +++ b/jina/types/sets/document.py @@ -86,7 +86,8 @@ def add(self, doc: 'Document') -> 'Document': return self.append(doc) def extend(self, iterable: Iterable['Document']) -> None: - self._docs_proto.extend(doc.proto for doc in iterable) + for doc in iterable: + self.append(doc) def clear(self): del self._docs_proto[:] diff --git a/tests/unit/types/sets/test_chunkset.py b/tests/unit/types/sets/test_chunkset.py index 96b87a3061c4c..8df4afb9db671 100644 --- a/tests/unit/types/sets/test_chunkset.py +++ b/tests/unit/types/sets/test_chunkset.py @@ -1,6 +1,6 @@ import pytest -from jina import Request +from jina import Request from jina.types.document import Document from jina.types.sets.chunk import ChunkSet @@ -49,3 +49,11 @@ def test_append_from_documents(chunkset, document_factory, reference_doc): assert rv.parent_id == reference_doc.id assert rv.granularity == reference_doc.granularity + 1 assert rv.mime_type == 'text/plain' + + +def test_doc_chunks_init(): + d = Document(chunks=[Document()], matches=[Document()]) + assert d.chunks[0].granularity == 1 + assert d.chunks[0].adjacency == 0 + assert d.matches[0].adjacency == 1 + assert d.matches[0].granularity == 0