From 4e1db7671168306d25a278e24fbcc3d0e39dee15 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Florian=20H=C3=B6nicke?= Date: Fri, 5 Feb 2021 18:10:15 +0100 Subject: [PATCH] refactor: remove unique id (#1872) * refactor: remove unique id * refactor: force parent id to be string * tests: fix cache driver * test: fix cache driver * test: fix workspace * test: flow multimode * refactor: indexer * fix: better default handling of key length Co-authored-by: Maximilian Werk --- jina/clients/request/helper.py | 3 +- jina/drivers/rank/__init__.py | 4 +- jina/drivers/rank/aggregate/__init__.py | 7 +- jina/executors/indexers/__init__.py | 24 +++-- jina/executors/indexers/cache.py | 5 +- jina/executors/indexers/keyvalue.py | 5 +- jina/executors/indexers/vector.py | 4 +- jina/types/document/__init__.py | 40 ++------ jina/types/document/uid.py | 95 ------------------- jina/types/sets/document.py | 6 -- tests/integration/crud/simple/test_crud.py | 17 +++- .../integration/docidcache/test_crud_cache.py | 2 +- tests/unit/drivers/test_cache_driver.py | 8 +- tests/unit/drivers/test_concat_driver.py | 5 +- tests/unit/flow/test_flow_index.py | 5 +- tests/unit/flow/test_flow_multimode.py | 7 +- tests/unit/test_helper.py | 8 +- tests/unit/test_workspace.py | 5 +- tests/unit/types/sets/test_documentset.py | 3 +- 19 files changed, 58 insertions(+), 195 deletions(-) delete mode 100644 jina/types/document/uid.py diff --git a/jina/clients/request/helper.py b/jina/clients/request/helper.py index 9a7002cb91f92..403e92d52861d 100644 --- a/jina/clients/request/helper.py +++ b/jina/clients/request/helper.py @@ -67,4 +67,5 @@ def _add_docs_groundtruths(req, batch, data_type, _kwargs): def _add_ids(req, batch): - req.ids.extend(batch) + string_ids = (str(doc_id) for doc_id in batch) + req.ids.extend(string_ids) diff --git a/jina/drivers/rank/__init__.py b/jina/drivers/rank/__init__.py index 481ef6b084541..4ae99456f4954 100644 --- a/jina/drivers/rank/__init__.py +++ b/jina/drivers/rank/__init__.py @@ -4,7 +4,6 @@ from .. import BaseExecutableDriver from ...types.document import Document -from ...types.document.uid import UniqueId from ...types.score import NamedScore @@ -65,8 +64,7 @@ def _sort_matches_in_place(self, context_doc: 'Document', match_scores: 'np.ndar op_name = self.exec.__class__.__name__ cm = context_doc.matches cm.build() - for str_match_id, score in match_scores: - match_id = UniqueId(str_match_id) + for match_id, score in match_scores: cm[match_id].score = NamedScore(value=score, op_name=op_name, ref_id=context_doc.id) cm.sort(key=lambda x: x.score.value, reverse=True) diff --git a/jina/drivers/rank/aggregate/__init__.py b/jina/drivers/rank/aggregate/__init__.py index 9cc678d3314db..c993d265699d9 100644 --- a/jina/drivers/rank/aggregate/__init__.py +++ b/jina/drivers/rank/aggregate/__init__.py @@ -10,7 +10,6 @@ from .. import BaseRankDriver if False: - from ....types.document.uid import UniqueId from ....types.sets import DocumentSet @@ -105,9 +104,9 @@ def _apply_all(self, docs: 'DocumentSet', :return: """ - match_idx = [] # type: List[Tuple[UniqueId, UniqueId, UniqueId, float]] - query_meta = {} # type: Dict[UniqueId, Dict] - match_meta = {} # type: Dict[UniqueId, Dict] + match_idx = [] # type: List[Tuple[str, str, str, float]] + query_meta = {} # type: Dict[str, Dict] + match_meta = {} # type: Dict[str, Dict] parent_id_chunk_id_map = defaultdict(list) matches_by_id = defaultdict(Document) for chunk in docs: diff --git a/jina/executors/indexers/__init__.py b/jina/executors/indexers/__init__.py index 0d0db4d8eace6..1f377c1a5dd75 100644 --- a/jina/executors/indexers/__init__.py +++ b/jina/executors/indexers/__init__.py @@ -41,6 +41,7 @@ class BaseIndexer(BaseExecutor): def __init__(self, index_filename: str = None, + key_length: int = None, *args, **kwargs): """ @@ -51,26 +52,23 @@ def __init__(self, super().__init__(*args, **kwargs) self.index_filename = index_filename #: the file name of the stored index, no path is required self._size = 0 - self._key_length = 16 #: the default minimum length of the key, will be expanded one time on the first batch + self._key_length = key_length #: the default minimum length of the key, will be expanded one time on the first batch @property def key_length(self) -> int: return self._key_length + def _assert_key_length(self, keys): + max_key_len = max([len(k) for k in keys]) + + if self.key_length is None: + self.key_length = max(16, max_key_len) + elif max_key_len > self.key_length: + raise ValueError(f'This indexer allows only keys of length {self._key_length}, but yours is {max_key_len}.') + @key_length.setter def key_length(self, val: int): - """Set the max key length. """ - if not self._key_length or self._key_length < val: - # expand once - self._key_length = val - elif val < self._key_length: - # just padding, no big deal - self.logger.warning( - f'key padding is triggered. this indexer allows only keys at length {self._key_length}, ' - f'but your max key length is {val}.') - elif val > self._key_length: - # panic - raise ValueError(f'this indexer allows only keys at length {self._key_length}, but yours is {val}') + self._key_length = val def add(self, *args, **kwargs): """Add documents to the index. diff --git a/jina/executors/indexers/cache.py b/jina/executors/indexers/cache.py index 232425be84640..7c0e01067a51e 100644 --- a/jina/executors/indexers/cache.py +++ b/jina/executors/indexers/cache.py @@ -75,16 +75,14 @@ def add(self, doc_id: str, *args, **kwargs): self.query_handler.cache_val_to_id[data] = doc_id self._size += 1 - def query(self, data, *args, **kwargs) -> Optional[bool]: + def query(self, data: str, *args, **kwargs) -> Optional[bool]: """Check whether the data exists in the cache. :param data: either the id or the content_hash of a Document :return: status """ - return data in self.query_handler.cache_val_to_id - def update(self, keys: Iterable[str], values: Iterable[any], *args, **kwargs): """Update cached documents. :param keys: list of Document.id @@ -99,7 +97,6 @@ def update(self, keys: Iterable[str], values: Iterable[any], *args, **kwargs): del self.query_handler.cache_val_to_id[old_value] self.query_handler.cache_val_to_id[value] = key - def delete(self, keys: Iterable[str], *args, **kwargs): """Delete documents from the cache. :param keys: list of Document.id diff --git a/jina/executors/indexers/keyvalue.py b/jina/executors/indexers/keyvalue.py index 6f730bc486a84..76f60e77c3366 100644 --- a/jina/executors/indexers/keyvalue.py +++ b/jina/executors/indexers/keyvalue.py @@ -56,7 +56,6 @@ def __init__(self, *args, **kwargs): self._total_byte_len = 0 self._start = 0 self._page_size = mmap.ALLOCATIONGRANULARITY - self._key_length = 0 def add(self, keys: Iterable[str], values: Iterable[bytes], *args, **kwargs): """Add the serialized documents to the index via document ids. @@ -66,9 +65,7 @@ def add(self, keys: Iterable[str], values: Iterable[bytes], *args, **kwargs): """ if not keys: return - - max_key_len = max([len(k) for k in keys]) - self.key_length = max_key_len + self._assert_key_length(keys) for key, value in zip(keys, values): l = len(value) #: the length diff --git a/jina/executors/indexers/vector.py b/jina/executors/indexers/vector.py index 067b4adf17fee..e9b32939f2006 100644 --- a/jina/executors/indexers/vector.py +++ b/jina/executors/indexers/vector.py @@ -124,8 +124,8 @@ def add(self, keys: Iterable[str], vectors: 'np.ndarray', *args, **kwargs) -> No :param keys: a list of ``id``, i.e. ``doc.id`` in protobuf :param vectors: embeddings """ - max_key_len = max([len(k) for k in keys]) - self.key_length = max_key_len + self._assert_key_length(keys) + np_keys = np.array(keys, (np.str_, self.key_length)) self._add(np_keys, vectors) diff --git a/jina/types/document/__init__.py b/jina/types/document/__init__.py index 350f4dc838865..b9f6dda89ec03 100644 --- a/jina/types/document/__init__.py +++ b/jina/types/document/__init__.py @@ -13,7 +13,6 @@ from google.protobuf.field_mask_pb2 import FieldMask from .converters import png_to_buffer, to_datauri, guess_mime -from .uid import DIGEST_SIZE, UniqueId from ..mixin import ProtoTypeMixin from ..ndarray.generic import NdArray from ..score import NamedScore @@ -25,6 +24,7 @@ from ...proto import jina_pb2 __all__ = ['Document', 'DocumentContentType', 'DocumentSourceType'] +DIGEST_SIZE = 8 DocumentContentType = TypeVar('DocumentContentType', bytes, str, np.ndarray) DocumentSourceType = TypeVar('DocumentSourceType', @@ -261,45 +261,23 @@ def parent_id(self) -> str: @id.setter def id(self, value: Union[bytes, str, int]): - """Set document id to a string value + """Set document id to a string value. - .. note: - - Customized ``id`` is acceptable as long as - - it only contains the symbols "0"–"9" to represent values 0 to 9, - and "A"–"F" (or alternatively "a"–"f"). - - it has 16 chars described above. - - :param value: restricted string value + :param value: id as bytes, int or str :return: """ - if isinstance(value, str): - self._pb_body.id = value - else: - warnings.warn(f'expecting a string as ID, receiving {type(value)}. ' - f'Note this type will be deprecated soon', DeprecationWarning) - self._pb_body.id = UniqueId(value) + self._pb_body.id = str(value) + @parent_id.setter def parent_id(self, value: Union[bytes, str, int]): - """Set document's parent id to a string value + """Set document's parent id to a string value. - .. note: - - Customized ``id`` is acceptable as long as - - it only contains the symbols "0"–"9" to represent values 0 to 9, - and "A"–"F" (or alternatively "a"–"f"). - - it has 16 chars described above. - - :param value: restricted string value + :param value: id as bytes, int or str :return: """ - if isinstance(value, str): - self._pb_body.parent_id = value - else: - warnings.warn(f'expecting a string as ID, receiving {type(value)}. ' - f'Note this type will be deprecated soon', DeprecationWarning) - self._pb_body.parent_id = UniqueId(value) + self._pb_body.parent_id = str(value) + @property def blob(self) -> 'np.ndarray': diff --git a/jina/types/document/uid.py b/jina/types/document/uid.py deleted file mode 100644 index 2ea758cbaa4d8..0000000000000 --- a/jina/types/document/uid.py +++ /dev/null @@ -1,95 +0,0 @@ -""" -Remarks on the ``id``, we have three views for it -- ``id``: ``str`` is a hex string, for non-binary environment such as HTTP, CLI, HTML and also human-readable. it will be used as the major view. -- ``bytes``: ``bytes`` is the binary format of str, it has 8 bytes fixed length, so it can be used in the dense file storage, e.g. BinaryPbIndexer, as it requires the key has to be fixed length. -- ``int``:``int`` (formerly names ``hash``) is the integer form of bytes. This is useful when sometimes you want to use key along with other numeric values together in one ndarray, such as ranker and Numpyindexer -.. note: - Customized ``id`` is acceptable as long as - - it only contains the symbols "0"–"9" to represent values 0 to 9, - and "A"–"F" (or alternatively "a"–"f"). - - it has even length. -""" - -__copyright__ = "Copyright (c) 2020 Jina AI Limited. All rights reserved." -__license__ = "Apache-2.0" - -import re -import sys -import warnings -from binascii import unhexlify - -import numpy as np - -from ...excepts import BadDocID -from ...helper import typename - -DIGEST_SIZE = 8 -_id_regex = re.compile(r'([0-9a-fA-F][0-9a-fA-F])+') - - -def int2bytes(value: int) -> bytes: - return int(value).to_bytes(DIGEST_SIZE, sys.byteorder, signed=True) - - -def bytes2int(value: bytes) -> int: - return int.from_bytes(value, sys.byteorder, signed=True) - - -def id2bytes(value: str) -> bytes: - try: - return unhexlify(value) - except: - is_valid_id(value) - - -def bytes2id(value: bytes) -> str: - return value.hex() - - -def int2id(value: int) -> str: - return bytes2id(int2bytes(value)) - - -def id2int(value: str) -> int: - return bytes2int(id2bytes(value)) - - -def is_valid_id(value: str) -> bool: - if not isinstance(value, str) or not _id_regex.match(value): - raise BadDocID(f'{value} is not a valid id. Customized ``id`` is only acceptable when: \ - - it only contains chars "0"–"9" to represent values 0 to 9, \ - and "A"–"F" (or alternatively "a"–"f"). \ - - it has 16 chars described above.') - return True - - -class UniqueId(str): - def __new__(cls, seq): - if isinstance(seq, (int, np.integer)): - seq = int2id(int(seq)) - elif isinstance(seq, bytes): - seq = bytes2id(seq) - elif seq == '': - pass - elif isinstance(seq, str): - seq = seq - elif seq is not None: - raise BadDocID(f'{typename(seq)}: {seq} is not a valid id') - - return str.__new__(cls, seq) - - def __int__(self): - """The document id in the integer form of bytes, as 8 bytes map to int64. - This is useful when sometimes you want to use key along with other numeric values together in one ndarray, - such as ranker and Numpyindexer - """ - warnings.warn('UniqueId to int conversion is not reliable and deprecated', DeprecationWarning) - return id2int(self) - - def __bytes__(self): - """The document id in the binary format of str, it has 8 bytes fixed length, - so it can be used in the dense file storage, e.g. BinaryPbIndexer, - as it requires the key has to be fixed length. - """ - warnings.warn('UniqueId to str conversion is not reliable and deprecated', DeprecationWarning) - return id2bytes(self) diff --git a/jina/types/sets/document.py b/jina/types/sets/document.py index 2850f55aea164..a38110a0d3bfa 100644 --- a/jina/types/sets/document.py +++ b/jina/types/sets/document.py @@ -36,11 +36,8 @@ def insert(self, index: int, doc: 'Document') -> None: self._docs_proto.insert(index, doc.proto) def __setitem__(self, key, value: 'Document'): - from ..document.uid import UniqueId if isinstance(key, int): self._docs_proto[key].CopyFrom(value) - elif isinstance(key, UniqueId): - self._docs_map[str(key)].CopyFrom(value) elif isinstance(key, str): self._docs_map[key].CopyFrom(value) else: @@ -59,11 +56,8 @@ def __iter__(self): def __getitem__(self, item): from ..document import Document - from ..document.uid import UniqueId if isinstance(item, int): return Document(self._docs_proto[item]) - elif isinstance(item, UniqueId): - return Document(self._docs_map[str(item)]) elif isinstance(item, str): return Document(self._docs_map[item]) elif isinstance(item, slice): diff --git a/tests/integration/crud/simple/test_crud.py b/tests/integration/crud/simple/test_crud.py index 2dfb56ec75551..9ce473ccf5204 100644 --- a/tests/integration/crud/simple/test_crud.py +++ b/tests/integration/crud/simple/test_crud.py @@ -29,7 +29,7 @@ def config(tmpdir): def random_docs(start, end, embed_dim=10, jitter=1, has_content=True): for j in range(start, end): d = Document() - d.id = str(f'{j}' * 16) + d.id = j if has_content: d.tags['id'] = j d.text = ''.join(random.choice(string.ascii_lowercase) for _ in range(10)).encode('utf8') @@ -37,11 +37,18 @@ def random_docs(start, end, embed_dim=10, jitter=1, has_content=True): yield d +def get_ids_to_delete(start, end, as_string): + if as_string: + return (str(idx) for idx in range(start, end)) + return range(start, end) + + def validate_index_size(num_indexed_docs, compound=False): from jina.executors.compound import CompoundExecutor if compound: - path = Path(CompoundExecutor.get_component_workspace_from_compound_workspace(os.environ['JINA_TOPK_DIR'], 'chunk_indexer', 0)) + path = Path(CompoundExecutor.get_component_workspace_from_compound_workspace(os.environ['JINA_TOPK_DIR'], + 'chunk_indexer', 0)) else: path = Path(os.environ['JINA_TOPK_DIR']) bin_files = list(path.glob('*.bin')) @@ -96,8 +103,8 @@ def validate_results(resp): mock.assert_called_once() -@pytest.mark.parametrize('has_content', [True, False]) -def test_delete_kv(config, mocker, has_content): +@pytest.mark.parametrize('as_string', [True, False]) +def test_delete_kv(config, mocker, as_string): flow_file = 'flow_kv.yml' def validate_result_factory(num_matches): @@ -117,7 +124,7 @@ def validate_results(resp): mock.assert_called_once() with Flow.load_config(flow_file) as index_flow: - index_flow.delete(input_fn=[d.id for d in random_docs(0, 3, has_content=has_content)]) + index_flow.delete(input_fn=get_ids_to_delete(0, 3, as_string)) validate_index_size(7) mock = mocker.Mock() diff --git a/tests/integration/docidcache/test_crud_cache.py b/tests/integration/docidcache/test_crud_cache.py index 3301b0e07313a..9f58f4b5a290d 100644 --- a/tests/integration/docidcache/test_crud_cache.py +++ b/tests/integration/docidcache/test_crud_cache.py @@ -140,7 +140,7 @@ def check_docs(chunk_content, chunks, same_content, docs, ids_used, index_start= def check_indexers_size(chunks, nr_docs, field, tmp_path, same_content, shards, post_op): - cache_indexer_path = tmp_path / 'cache.bin' + cache_indexer_path = os.path.join(tmp_path, 'cache.bin') with BaseIndexer.load(cache_indexer_path) as cache: assert isinstance(cache, DocCache) cache_full_size = cache.size diff --git a/tests/unit/drivers/test_cache_driver.py b/tests/unit/drivers/test_cache_driver.py index a3b1b82b57772..f9277bdf3c2ac 100644 --- a/tests/unit/drivers/test_cache_driver.py +++ b/tests/unit/drivers/test_cache_driver.py @@ -10,7 +10,7 @@ from jina.executors import BaseExecutor from jina.executors.indexers.cache import DocCache, ID_KEY, CONTENT_HASH_KEY from jina.proto import jina_pb2 -from jina.types.document import Document, UniqueId +from jina.types.document import Document from tests import random_docs @@ -114,12 +114,12 @@ def exec_fn(self): def test_cache_content_driver_same_content(tmpdir, test_metas): - doc1 = Document(id=1) + doc1 = Document(id='1') doc1.text = 'blabla' doc1.update_content_hash() docs1 = DocumentSet([doc1]) - doc2 = Document(id=2) + doc2 = Document(id='2') doc2.text = 'blabla' doc2.update_content_hash() docs2 = DocumentSet([doc2]) @@ -146,7 +146,7 @@ def test_cache_content_driver_same_content(tmpdir, test_metas): doc1.text = new_string doc1.update_content_hash() with BaseExecutor.load(filename) as executor: - executor.update([UniqueId(1)], [doc1.content_hash]) + executor.update(['1'], [doc1.content_hash]) with BaseExecutor.load(filename) as executor: assert executor.query(doc1.content_hash) is True diff --git a/tests/unit/drivers/test_concat_driver.py b/tests/unit/drivers/test_concat_driver.py index dec9658ee7bbe..b1b9ae199164f 100644 --- a/tests/unit/drivers/test_concat_driver.py +++ b/tests/unit/drivers/test_concat_driver.py @@ -4,7 +4,6 @@ from jina import Document from jina.flow import Flow -from jina.types.document.uid import UniqueId from jina.types.ndarray.generic import NdArray e1 = np.random.random([7]) @@ -18,13 +17,13 @@ def input_fn(): doc1.embedding = e1 with Document() as chunk1: chunk1.embedding = e2 - chunk1.id = UniqueId(1) + chunk1.id = 1 doc1.chunks.add(chunk1) with Document() as doc2: doc2.embedding = e3 with Document() as chunk2: chunk2.embedding = e4 - chunk2.id = UniqueId(2) + chunk2.id = 2 doc2.chunks.add(chunk2) return [doc1, doc2] diff --git a/tests/unit/flow/test_flow_index.py b/tests/unit/flow/test_flow_index.py index 573f4e23c8afc..95b03f0872229 100644 --- a/tests/unit/flow/test_flow_index.py +++ b/tests/unit/flow/test_flow_index.py @@ -5,7 +5,6 @@ from jina.flow import Flow from jina.proto import jina_pb2 -from jina.types.document.uid import UniqueId from tests import random_docs, rm_files cur_dir = os.path.dirname(os.path.abspath(__file__)) @@ -14,10 +13,10 @@ def random_queries(num_docs, chunks_per_doc=5): for j in range(num_docs): d = jina_pb2.DocumentProto() - d.id = UniqueId(j) + d.id = j for k in range(chunks_per_doc): dd = d.chunks.add() - dd.id = UniqueId(num_docs + j * chunks_per_doc + k) + dd.id = num_docs + j * chunks_per_doc + k yield d diff --git a/tests/unit/flow/test_flow_multimode.py b/tests/unit/flow/test_flow_multimode.py index 260335baa4eb5..6363ffa713538 100644 --- a/tests/unit/flow/test_flow_multimode.py +++ b/tests/unit/flow/test_flow_multimode.py @@ -9,7 +9,6 @@ from jina.executors.indexers.keyvalue import BinaryPbIndexer from jina.flow import Flow from jina.proto import jina_pb2 -from jina.types.document.uid import UniqueId cur_dir = os.path.dirname(os.path.abspath(__file__)) @@ -43,15 +42,15 @@ def test_flow_with_modalities(tmpdir, restful): def input_fn(): doc1 = jina_pb2.DocumentProto() doc1.text = 'title: this is mode1 from doc1, body: this is mode2 from doc1' - doc1.id = UniqueId(1) + doc1.id = '1' doc2 = jina_pb2.DocumentProto() doc2.text = 'title: this is mode1 from doc2, body: this is mode2 from doc2' - doc2.id = UniqueId(2) + doc2.id = '2' doc3 = jina_pb2.DocumentProto() doc3.text = 'title: this is mode1 from doc3, body: this is mode2 from doc3' - doc3.id = UniqueId(3) + doc3.id = '3' return [doc1, doc2, doc3] diff --git a/tests/unit/test_helper.py b/tests/unit/test_helper.py index 3eb8ed241fb67..c1806273a3e5b 100644 --- a/tests/unit/test_helper.py +++ b/tests/unit/test_helper.py @@ -1,4 +1,3 @@ -import random import time from types import SimpleNamespace @@ -14,8 +13,8 @@ from jina.logging import default_logger from jina.logging.profile import TimeContext from jina.proto import jina_pb2 -from jina.types.document.uid import * from tests import random_docs +import numpy as np def test_cached_property(): @@ -60,11 +59,6 @@ def test_time_context(): assert tc.readable_duration == '2 seconds' -def test_np_int(): - a = random.randint(0, 100000) - assert int2bytes(np.int64(a)) == int2bytes(a) - - def test_dunder_get(): a = SimpleNamespace() a.b = {'c': 1} diff --git a/tests/unit/test_workspace.py b/tests/unit/test_workspace.py index 6aecf2d4b6f8d..b76d596b07b26 100644 --- a/tests/unit/test_workspace.py +++ b/tests/unit/test_workspace.py @@ -5,7 +5,6 @@ import pytest from jina.executors import BaseExecutor -from jina.types.document import UniqueId cur_dir = os.path.dirname(os.path.abspath(__file__)) @@ -214,11 +213,11 @@ def test_compound_indexer_rw(test_workspace): assert indexer[0] == indexer['test_meta'] assert not indexer[0].is_updated assert not indexer.is_updated - indexer[0].add([UniqueId(j), UniqueId(j * 2), UniqueId(j * 3)], [bytes(j), bytes(j * 2), bytes(j * 3)]) + indexer[0].add([str(j), str(j * 2), str(j * 3)], [bytes(j), bytes(j * 2), bytes(j * 3)]) assert indexer[0].is_updated assert indexer.is_updated assert not indexer[1].is_updated - indexer[1].add([UniqueId(j), UniqueId(j * 2)], all_vecs[(j * 2, j * 2 + 1), :]) + indexer[1].add([str(j), str(j * 2)], all_vecs[(j * 2, j * 2 + 1), :]) assert indexer[1].is_updated indexer.save() # the compound executor itself is not modified, therefore should not generate a save diff --git a/tests/unit/types/sets/test_documentset.py b/tests/unit/types/sets/test_documentset.py index 94e948454f4b2..2e32b1f395606 100644 --- a/tests/unit/types/sets/test_documentset.py +++ b/tests/unit/types/sets/test_documentset.py @@ -3,7 +3,6 @@ import pytest from jina import Document -from jina.types.document.uid import UniqueId from jina.types.sets import DocumentSet DOCUMENTS_PER_LEVEL = 1 @@ -106,7 +105,7 @@ def test_build(docset): def test_set_get_success(docset, document_factory): docset.build() doc = document_factory.create(4, 'test 4') - doc_id = str(UniqueId(2)) + doc_id = 2 docset[doc_id] = doc assert docset[doc_id].text == 'test 4' doc_0_id = docset[0].id