From 4e1db7671168306d25a278e24fbcc3d0e39dee15 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Florian=20H=C3=B6nicke?= <hoenicke.florian@gmail.com>
Date: Fri, 5 Feb 2021 18:10:15 +0100
Subject: [PATCH] refactor: remove unique id (#1872)

* refactor: remove unique id

* refactor: force parent id to be string

* tests: fix cache driver

* test: fix cache driver

* test: fix workspace

* test: flow multimode

* refactor: indexer

* fix: better default handling of key length

Co-authored-by: Maximilian Werk <maximilian.werk@jina.ai>
---
 jina/clients/request/helper.py                |  3 +-
 jina/drivers/rank/__init__.py                 |  4 +-
 jina/drivers/rank/aggregate/__init__.py       |  7 +-
 jina/executors/indexers/__init__.py           | 24 +++--
 jina/executors/indexers/cache.py              |  5 +-
 jina/executors/indexers/keyvalue.py           |  5 +-
 jina/executors/indexers/vector.py             |  4 +-
 jina/types/document/__init__.py               | 40 ++------
 jina/types/document/uid.py                    | 95 -------------------
 jina/types/sets/document.py                   |  6 --
 tests/integration/crud/simple/test_crud.py    | 17 +++-
 .../integration/docidcache/test_crud_cache.py |  2 +-
 tests/unit/drivers/test_cache_driver.py       |  8 +-
 tests/unit/drivers/test_concat_driver.py      |  5 +-
 tests/unit/flow/test_flow_index.py            |  5 +-
 tests/unit/flow/test_flow_multimode.py        |  7 +-
 tests/unit/test_helper.py                     |  8 +-
 tests/unit/test_workspace.py                  |  5 +-
 tests/unit/types/sets/test_documentset.py     |  3 +-
 19 files changed, 58 insertions(+), 195 deletions(-)
 delete mode 100644 jina/types/document/uid.py

diff --git a/jina/clients/request/helper.py b/jina/clients/request/helper.py
index 9a7002cb91f92..403e92d52861d 100644
--- a/jina/clients/request/helper.py
+++ b/jina/clients/request/helper.py
@@ -67,4 +67,5 @@ def _add_docs_groundtruths(req, batch, data_type, _kwargs):
 
 
 def _add_ids(req, batch):
-    req.ids.extend(batch)
+    string_ids = (str(doc_id) for doc_id in batch)
+    req.ids.extend(string_ids)
diff --git a/jina/drivers/rank/__init__.py b/jina/drivers/rank/__init__.py
index 481ef6b084541..4ae99456f4954 100644
--- a/jina/drivers/rank/__init__.py
+++ b/jina/drivers/rank/__init__.py
@@ -4,7 +4,6 @@
 
 from .. import BaseExecutableDriver
 from ...types.document import Document
-from ...types.document.uid import UniqueId
 from ...types.score import NamedScore
 
 
@@ -65,8 +64,7 @@ def _sort_matches_in_place(self, context_doc: 'Document', match_scores: 'np.ndar
         op_name = self.exec.__class__.__name__
         cm = context_doc.matches
         cm.build()
-        for str_match_id, score in match_scores:
-            match_id = UniqueId(str_match_id)
+        for match_id, score in match_scores:
             cm[match_id].score = NamedScore(value=score, op_name=op_name, ref_id=context_doc.id)
 
         cm.sort(key=lambda x: x.score.value, reverse=True)
diff --git a/jina/drivers/rank/aggregate/__init__.py b/jina/drivers/rank/aggregate/__init__.py
index 9cc678d3314db..c993d265699d9 100644
--- a/jina/drivers/rank/aggregate/__init__.py
+++ b/jina/drivers/rank/aggregate/__init__.py
@@ -10,7 +10,6 @@
 from .. import BaseRankDriver
 
 if False:
-    from ....types.document.uid import UniqueId
     from ....types.sets import DocumentSet
 
 
@@ -105,9 +104,9 @@ def _apply_all(self, docs: 'DocumentSet',
         :return:
         """
 
-        match_idx = []  # type: List[Tuple[UniqueId, UniqueId, UniqueId, float]]
-        query_meta = {}  # type: Dict[UniqueId, Dict]
-        match_meta = {}  # type: Dict[UniqueId, Dict]
+        match_idx = []  # type: List[Tuple[str, str, str, float]]
+        query_meta = {}  # type: Dict[str, Dict]
+        match_meta = {}  # type: Dict[str, Dict]
         parent_id_chunk_id_map = defaultdict(list)
         matches_by_id = defaultdict(Document)
         for chunk in docs:
diff --git a/jina/executors/indexers/__init__.py b/jina/executors/indexers/__init__.py
index 0d0db4d8eace6..1f377c1a5dd75 100644
--- a/jina/executors/indexers/__init__.py
+++ b/jina/executors/indexers/__init__.py
@@ -41,6 +41,7 @@ class BaseIndexer(BaseExecutor):
 
     def __init__(self,
                  index_filename: str = None,
+                 key_length: int = None,
                  *args, **kwargs):
         """
 
@@ -51,26 +52,23 @@ def __init__(self,
         super().__init__(*args, **kwargs)
         self.index_filename = index_filename  #: the file name of the stored index, no path is required
         self._size = 0
-        self._key_length = 16  #: the default minimum length of the key, will be expanded one time on the first batch
+        self._key_length = key_length  #: the default minimum length of the key, will be expanded one time on the first batch
 
     @property
     def key_length(self) -> int:
         return self._key_length
 
+    def _assert_key_length(self, keys):
+        max_key_len = max([len(k) for k in keys])
+
+        if self.key_length is None:
+            self.key_length = max(16, max_key_len)
+        elif max_key_len > self.key_length:
+            raise ValueError(f'This indexer allows only keys of length {self._key_length}, but yours is {max_key_len}.')
+
     @key_length.setter
     def key_length(self, val: int):
-        """Set the max key length. """
-        if not self._key_length or self._key_length < val:
-            # expand once
-            self._key_length = val
-        elif val < self._key_length:
-            # just padding, no big deal
-            self.logger.warning(
-                f'key padding is triggered. this indexer allows only keys at length {self._key_length}, '
-                f'but your max key length is {val}.')
-        elif val > self._key_length:
-            # panic
-            raise ValueError(f'this indexer allows only keys at length {self._key_length}, but yours is {val}')
+        self._key_length = val
 
     def add(self, *args, **kwargs):
         """Add documents to the index.
diff --git a/jina/executors/indexers/cache.py b/jina/executors/indexers/cache.py
index 232425be84640..7c0e01067a51e 100644
--- a/jina/executors/indexers/cache.py
+++ b/jina/executors/indexers/cache.py
@@ -75,16 +75,14 @@ def add(self, doc_id: str, *args, **kwargs):
         self.query_handler.cache_val_to_id[data] = doc_id
         self._size += 1
 
-    def query(self, data, *args, **kwargs) -> Optional[bool]:
+    def query(self, data: str, *args, **kwargs) -> Optional[bool]:
         """Check whether the data exists in the cache.
 
         :param data: either the id or the content_hash of a Document
         :return: status
         """
-
         return data in self.query_handler.cache_val_to_id
 
-
     def update(self, keys: Iterable[str], values: Iterable[any], *args, **kwargs):
         """Update cached documents.
         :param keys: list of Document.id
@@ -99,7 +97,6 @@ def update(self, keys: Iterable[str], values: Iterable[any], *args, **kwargs):
                 del self.query_handler.cache_val_to_id[old_value]
                 self.query_handler.cache_val_to_id[value] = key
 
-
     def delete(self, keys: Iterable[str], *args, **kwargs):
         """Delete documents from the cache.
         :param keys: list of Document.id
diff --git a/jina/executors/indexers/keyvalue.py b/jina/executors/indexers/keyvalue.py
index 6f730bc486a84..76f60e77c3366 100644
--- a/jina/executors/indexers/keyvalue.py
+++ b/jina/executors/indexers/keyvalue.py
@@ -56,7 +56,6 @@ def __init__(self, *args, **kwargs):
         self._total_byte_len = 0
         self._start = 0
         self._page_size = mmap.ALLOCATIONGRANULARITY
-        self._key_length = 0
 
     def add(self, keys: Iterable[str], values: Iterable[bytes], *args, **kwargs):
         """Add the serialized documents to the index via document ids.
@@ -66,9 +65,7 @@ def add(self, keys: Iterable[str], values: Iterable[bytes], *args, **kwargs):
         """
         if not keys:
             return
-
-        max_key_len = max([len(k) for k in keys])
-        self.key_length = max_key_len
+        self._assert_key_length(keys)
 
         for key, value in zip(keys, values):
             l = len(value)  #: the length
diff --git a/jina/executors/indexers/vector.py b/jina/executors/indexers/vector.py
index 067b4adf17fee..e9b32939f2006 100644
--- a/jina/executors/indexers/vector.py
+++ b/jina/executors/indexers/vector.py
@@ -124,8 +124,8 @@ def add(self, keys: Iterable[str], vectors: 'np.ndarray', *args, **kwargs) -> No
         :param keys: a list of ``id``, i.e. ``doc.id`` in protobuf
         :param vectors: embeddings
         """
-        max_key_len = max([len(k) for k in keys])
-        self.key_length = max_key_len
+        self._assert_key_length(keys)
+
         np_keys = np.array(keys, (np.str_, self.key_length))
 
         self._add(np_keys, vectors)
diff --git a/jina/types/document/__init__.py b/jina/types/document/__init__.py
index 350f4dc838865..b9f6dda89ec03 100644
--- a/jina/types/document/__init__.py
+++ b/jina/types/document/__init__.py
@@ -13,7 +13,6 @@
 from google.protobuf.field_mask_pb2 import FieldMask
 
 from .converters import png_to_buffer, to_datauri, guess_mime
-from .uid import DIGEST_SIZE, UniqueId
 from ..mixin import ProtoTypeMixin
 from ..ndarray.generic import NdArray
 from ..score import NamedScore
@@ -25,6 +24,7 @@
 from ...proto import jina_pb2
 
 __all__ = ['Document', 'DocumentContentType', 'DocumentSourceType']
+DIGEST_SIZE = 8
 
 DocumentContentType = TypeVar('DocumentContentType', bytes, str, np.ndarray)
 DocumentSourceType = TypeVar('DocumentSourceType',
@@ -261,45 +261,23 @@ def parent_id(self) -> str:
 
     @id.setter
     def id(self, value: Union[bytes, str, int]):
-        """Set document id to a string value
+        """Set document id to a string value.
 
-        .. note:
-
-            Customized ``id`` is acceptable as long as
-            - it only contains the symbols "0"–"9" to represent values 0 to 9,
-            and "A"–"F" (or alternatively "a"–"f").
-            - it has 16 chars described above.
-
-        :param value: restricted string value
+        :param value: id as bytes, int or str
         :return:
         """
-        if isinstance(value, str):
-            self._pb_body.id = value
-        else:
-            warnings.warn(f'expecting a string as ID, receiving {type(value)}. '
-                          f'Note this type will be deprecated soon', DeprecationWarning)
-            self._pb_body.id = UniqueId(value)
+        self._pb_body.id = str(value)
+
 
     @parent_id.setter
     def parent_id(self, value: Union[bytes, str, int]):
-        """Set document's parent id to a string value
+        """Set document's parent id to a string value.
 
-        .. note:
-
-            Customized ``id`` is acceptable as long as
-            - it only contains the symbols "0"–"9" to represent values 0 to 9,
-            and "A"–"F" (or alternatively "a"–"f").
-            - it has 16 chars described above.
-
-        :param value: restricted string value
+        :param value: id as bytes, int or str
         :return:
         """
-        if isinstance(value, str):
-            self._pb_body.parent_id = value
-        else:
-            warnings.warn(f'expecting a string as ID, receiving {type(value)}. '
-                          f'Note this type will be deprecated soon', DeprecationWarning)
-            self._pb_body.parent_id = UniqueId(value)
+        self._pb_body.parent_id = str(value)
+
 
     @property
     def blob(self) -> 'np.ndarray':
diff --git a/jina/types/document/uid.py b/jina/types/document/uid.py
deleted file mode 100644
index 2ea758cbaa4d8..0000000000000
--- a/jina/types/document/uid.py
+++ /dev/null
@@ -1,95 +0,0 @@
-"""
-Remarks on the ``id``, we have three views for it
-- ``id``: ``str`` is a hex string, for non-binary environment such as HTTP, CLI, HTML and also human-readable. it will be used as the major view.
-- ``bytes``: ``bytes`` is the binary format of str, it has 8 bytes fixed length, so it can be used in the dense file storage, e.g. BinaryPbIndexer, as it requires the key has to be fixed length.
-- ``int``:``int`` (formerly names ``hash``) is the integer form of bytes. This is useful when sometimes you want to use key along with other numeric values together in one ndarray, such as ranker and Numpyindexer
-.. note:
-    Customized ``id`` is acceptable as long as
-    - it only contains the symbols "0"–"9" to represent values 0 to 9,
-    and "A"–"F" (or alternatively "a"–"f").
-    - it has even length.
-"""
-
-__copyright__ = "Copyright (c) 2020 Jina AI Limited. All rights reserved."
-__license__ = "Apache-2.0"
-
-import re
-import sys
-import warnings
-from binascii import unhexlify
-
-import numpy as np
-
-from ...excepts import BadDocID
-from ...helper import typename
-
-DIGEST_SIZE = 8
-_id_regex = re.compile(r'([0-9a-fA-F][0-9a-fA-F])+')
-
-
-def int2bytes(value: int) -> bytes:
-    return int(value).to_bytes(DIGEST_SIZE, sys.byteorder, signed=True)
-
-
-def bytes2int(value: bytes) -> int:
-    return int.from_bytes(value, sys.byteorder, signed=True)
-
-
-def id2bytes(value: str) -> bytes:
-    try:
-        return unhexlify(value)
-    except:
-        is_valid_id(value)
-
-
-def bytes2id(value: bytes) -> str:
-    return value.hex()
-
-
-def int2id(value: int) -> str:
-    return bytes2id(int2bytes(value))
-
-
-def id2int(value: str) -> int:
-    return bytes2int(id2bytes(value))
-
-
-def is_valid_id(value: str) -> bool:
-    if not isinstance(value, str) or not _id_regex.match(value):
-        raise BadDocID(f'{value} is not a valid id. Customized ``id`` is only acceptable when: \
-        - it only contains chars "0"–"9" to represent values 0 to 9, \
-        and "A"–"F" (or alternatively "a"–"f"). \
-        - it has 16 chars described above.')
-    return True
-
-
-class UniqueId(str):
-    def __new__(cls, seq):
-        if isinstance(seq, (int, np.integer)):
-            seq = int2id(int(seq))
-        elif isinstance(seq, bytes):
-            seq = bytes2id(seq)
-        elif seq == '':
-            pass
-        elif isinstance(seq, str):
-            seq = seq
-        elif seq is not None:
-            raise BadDocID(f'{typename(seq)}: {seq} is not a valid id')
-
-        return str.__new__(cls, seq)
-
-    def __int__(self):
-        """The document id in the integer form of bytes, as 8 bytes map to int64.
-        This is useful when sometimes you want to use key along with other numeric values together in one ndarray,
-        such as ranker and Numpyindexer
-        """
-        warnings.warn('UniqueId to int conversion is not reliable and deprecated', DeprecationWarning)
-        return id2int(self)
-
-    def __bytes__(self):
-        """The document id in the binary format of str, it has 8 bytes fixed length,
-        so it can be used in the dense file storage, e.g. BinaryPbIndexer,
-        as it requires the key has to be fixed length.
-        """
-        warnings.warn('UniqueId to str conversion is not reliable and deprecated', DeprecationWarning)
-        return id2bytes(self)
diff --git a/jina/types/sets/document.py b/jina/types/sets/document.py
index 2850f55aea164..a38110a0d3bfa 100644
--- a/jina/types/sets/document.py
+++ b/jina/types/sets/document.py
@@ -36,11 +36,8 @@ def insert(self, index: int, doc: 'Document') -> None:
         self._docs_proto.insert(index, doc.proto)
 
     def __setitem__(self, key, value: 'Document'):
-        from ..document.uid import UniqueId
         if isinstance(key, int):
             self._docs_proto[key].CopyFrom(value)
-        elif isinstance(key, UniqueId):
-            self._docs_map[str(key)].CopyFrom(value)
         elif isinstance(key, str):
             self._docs_map[key].CopyFrom(value)
         else:
@@ -59,11 +56,8 @@ def __iter__(self):
 
     def __getitem__(self, item):
         from ..document import Document
-        from ..document.uid import UniqueId
         if isinstance(item, int):
             return Document(self._docs_proto[item])
-        elif isinstance(item, UniqueId):
-            return Document(self._docs_map[str(item)])
         elif isinstance(item, str):
             return Document(self._docs_map[item])
         elif isinstance(item, slice):
diff --git a/tests/integration/crud/simple/test_crud.py b/tests/integration/crud/simple/test_crud.py
index 2dfb56ec75551..9ce473ccf5204 100644
--- a/tests/integration/crud/simple/test_crud.py
+++ b/tests/integration/crud/simple/test_crud.py
@@ -29,7 +29,7 @@ def config(tmpdir):
 def random_docs(start, end, embed_dim=10, jitter=1, has_content=True):
     for j in range(start, end):
         d = Document()
-        d.id = str(f'{j}' * 16)
+        d.id = j
         if has_content:
             d.tags['id'] = j
             d.text = ''.join(random.choice(string.ascii_lowercase) for _ in range(10)).encode('utf8')
@@ -37,11 +37,18 @@ def random_docs(start, end, embed_dim=10, jitter=1, has_content=True):
         yield d
 
 
+def get_ids_to_delete(start, end, as_string):
+    if as_string:
+        return (str(idx) for idx in range(start, end))
+    return range(start, end)
+
+
 def validate_index_size(num_indexed_docs, compound=False):
     from jina.executors.compound import CompoundExecutor
 
     if compound:
-        path = Path(CompoundExecutor.get_component_workspace_from_compound_workspace(os.environ['JINA_TOPK_DIR'], 'chunk_indexer', 0))
+        path = Path(CompoundExecutor.get_component_workspace_from_compound_workspace(os.environ['JINA_TOPK_DIR'],
+                                                                                     'chunk_indexer', 0))
     else:
         path = Path(os.environ['JINA_TOPK_DIR'])
     bin_files = list(path.glob('*.bin'))
@@ -96,8 +103,8 @@ def validate_results(resp):
     mock.assert_called_once()
 
 
-@pytest.mark.parametrize('has_content', [True, False])
-def test_delete_kv(config, mocker, has_content):
+@pytest.mark.parametrize('as_string', [True, False])
+def test_delete_kv(config, mocker, as_string):
     flow_file = 'flow_kv.yml'
 
     def validate_result_factory(num_matches):
@@ -117,7 +124,7 @@ def validate_results(resp):
     mock.assert_called_once()
 
     with Flow.load_config(flow_file) as index_flow:
-        index_flow.delete(input_fn=[d.id for d in random_docs(0, 3, has_content=has_content)])
+        index_flow.delete(input_fn=get_ids_to_delete(0, 3, as_string))
     validate_index_size(7)
 
     mock = mocker.Mock()
diff --git a/tests/integration/docidcache/test_crud_cache.py b/tests/integration/docidcache/test_crud_cache.py
index 3301b0e07313a..9f58f4b5a290d 100644
--- a/tests/integration/docidcache/test_crud_cache.py
+++ b/tests/integration/docidcache/test_crud_cache.py
@@ -140,7 +140,7 @@ def check_docs(chunk_content, chunks, same_content, docs, ids_used, index_start=
 
 
 def check_indexers_size(chunks, nr_docs, field, tmp_path, same_content, shards, post_op):
-    cache_indexer_path = tmp_path / 'cache.bin'
+    cache_indexer_path = os.path.join(tmp_path, 'cache.bin')
     with BaseIndexer.load(cache_indexer_path) as cache:
         assert isinstance(cache, DocCache)
         cache_full_size = cache.size
diff --git a/tests/unit/drivers/test_cache_driver.py b/tests/unit/drivers/test_cache_driver.py
index a3b1b82b57772..f9277bdf3c2ac 100644
--- a/tests/unit/drivers/test_cache_driver.py
+++ b/tests/unit/drivers/test_cache_driver.py
@@ -10,7 +10,7 @@
 from jina.executors import BaseExecutor
 from jina.executors.indexers.cache import DocCache, ID_KEY, CONTENT_HASH_KEY
 from jina.proto import jina_pb2
-from jina.types.document import Document, UniqueId
+from jina.types.document import Document
 from tests import random_docs
 
 
@@ -114,12 +114,12 @@ def exec_fn(self):
 
 
 def test_cache_content_driver_same_content(tmpdir, test_metas):
-    doc1 = Document(id=1)
+    doc1 = Document(id='1')
     doc1.text = 'blabla'
     doc1.update_content_hash()
     docs1 = DocumentSet([doc1])
 
-    doc2 = Document(id=2)
+    doc2 = Document(id='2')
     doc2.text = 'blabla'
     doc2.update_content_hash()
     docs2 = DocumentSet([doc2])
@@ -146,7 +146,7 @@ def test_cache_content_driver_same_content(tmpdir, test_metas):
     doc1.text = new_string
     doc1.update_content_hash()
     with BaseExecutor.load(filename) as executor:
-        executor.update([UniqueId(1)], [doc1.content_hash])
+        executor.update(['1'], [doc1.content_hash])
 
     with BaseExecutor.load(filename) as executor:
         assert executor.query(doc1.content_hash) is True
diff --git a/tests/unit/drivers/test_concat_driver.py b/tests/unit/drivers/test_concat_driver.py
index dec9658ee7bbe..b1b9ae199164f 100644
--- a/tests/unit/drivers/test_concat_driver.py
+++ b/tests/unit/drivers/test_concat_driver.py
@@ -4,7 +4,6 @@
 
 from jina import Document
 from jina.flow import Flow
-from jina.types.document.uid import UniqueId
 from jina.types.ndarray.generic import NdArray
 
 e1 = np.random.random([7])
@@ -18,13 +17,13 @@ def input_fn():
         doc1.embedding = e1
         with Document() as chunk1:
             chunk1.embedding = e2
-            chunk1.id = UniqueId(1)
+            chunk1.id = 1
         doc1.chunks.add(chunk1)
     with Document() as doc2:
         doc2.embedding = e3
         with Document() as chunk2:
             chunk2.embedding = e4
-            chunk2.id = UniqueId(2)
+            chunk2.id = 2
         doc2.chunks.add(chunk2)
     return [doc1, doc2]
 
diff --git a/tests/unit/flow/test_flow_index.py b/tests/unit/flow/test_flow_index.py
index 573f4e23c8afc..95b03f0872229 100644
--- a/tests/unit/flow/test_flow_index.py
+++ b/tests/unit/flow/test_flow_index.py
@@ -5,7 +5,6 @@
 
 from jina.flow import Flow
 from jina.proto import jina_pb2
-from jina.types.document.uid import UniqueId
 from tests import random_docs, rm_files
 
 cur_dir = os.path.dirname(os.path.abspath(__file__))
@@ -14,10 +13,10 @@
 def random_queries(num_docs, chunks_per_doc=5):
     for j in range(num_docs):
         d = jina_pb2.DocumentProto()
-        d.id = UniqueId(j)
+        d.id = j
         for k in range(chunks_per_doc):
             dd = d.chunks.add()
-            dd.id = UniqueId(num_docs + j * chunks_per_doc + k)
+            dd.id = num_docs + j * chunks_per_doc + k
         yield d
 
 
diff --git a/tests/unit/flow/test_flow_multimode.py b/tests/unit/flow/test_flow_multimode.py
index 260335baa4eb5..6363ffa713538 100644
--- a/tests/unit/flow/test_flow_multimode.py
+++ b/tests/unit/flow/test_flow_multimode.py
@@ -9,7 +9,6 @@
 from jina.executors.indexers.keyvalue import BinaryPbIndexer
 from jina.flow import Flow
 from jina.proto import jina_pb2
-from jina.types.document.uid import UniqueId
 
 cur_dir = os.path.dirname(os.path.abspath(__file__))
 
@@ -43,15 +42,15 @@ def test_flow_with_modalities(tmpdir, restful):
     def input_fn():
         doc1 = jina_pb2.DocumentProto()
         doc1.text = 'title: this is mode1 from doc1, body: this is mode2 from doc1'
-        doc1.id = UniqueId(1)
+        doc1.id = '1'
 
         doc2 = jina_pb2.DocumentProto()
         doc2.text = 'title: this is mode1 from doc2, body: this is mode2 from doc2'
-        doc2.id = UniqueId(2)
+        doc2.id = '2'
 
         doc3 = jina_pb2.DocumentProto()
         doc3.text = 'title: this is mode1 from doc3, body: this is mode2 from doc3'
-        doc3.id = UniqueId(3)
+        doc3.id = '3'
 
         return [doc1, doc2, doc3]
 
diff --git a/tests/unit/test_helper.py b/tests/unit/test_helper.py
index 3eb8ed241fb67..c1806273a3e5b 100644
--- a/tests/unit/test_helper.py
+++ b/tests/unit/test_helper.py
@@ -1,4 +1,3 @@
-import random
 import time
 from types import SimpleNamespace
 
@@ -14,8 +13,8 @@
 from jina.logging import default_logger
 from jina.logging.profile import TimeContext
 from jina.proto import jina_pb2
-from jina.types.document.uid import *
 from tests import random_docs
+import numpy as np
 
 
 def test_cached_property():
@@ -60,11 +59,6 @@ def test_time_context():
     assert tc.readable_duration == '2 seconds'
 
 
-def test_np_int():
-    a = random.randint(0, 100000)
-    assert int2bytes(np.int64(a)) == int2bytes(a)
-
-
 def test_dunder_get():
     a = SimpleNamespace()
     a.b = {'c': 1}
diff --git a/tests/unit/test_workspace.py b/tests/unit/test_workspace.py
index 6aecf2d4b6f8d..b76d596b07b26 100644
--- a/tests/unit/test_workspace.py
+++ b/tests/unit/test_workspace.py
@@ -5,7 +5,6 @@
 import pytest
 
 from jina.executors import BaseExecutor
-from jina.types.document import UniqueId
 
 cur_dir = os.path.dirname(os.path.abspath(__file__))
 
@@ -214,11 +213,11 @@ def test_compound_indexer_rw(test_workspace):
             assert indexer[0] == indexer['test_meta']
             assert not indexer[0].is_updated
             assert not indexer.is_updated
-            indexer[0].add([UniqueId(j), UniqueId(j * 2), UniqueId(j * 3)], [bytes(j), bytes(j * 2), bytes(j * 3)])
+            indexer[0].add([str(j), str(j * 2), str(j * 3)], [bytes(j), bytes(j * 2), bytes(j * 3)])
             assert indexer[0].is_updated
             assert indexer.is_updated
             assert not indexer[1].is_updated
-            indexer[1].add([UniqueId(j), UniqueId(j * 2)], all_vecs[(j * 2, j * 2 + 1), :])
+            indexer[1].add([str(j), str(j * 2)], all_vecs[(j * 2, j * 2 + 1), :])
             assert indexer[1].is_updated
             indexer.save()
             # the compound executor itself is not modified, therefore should not generate a save
diff --git a/tests/unit/types/sets/test_documentset.py b/tests/unit/types/sets/test_documentset.py
index 94e948454f4b2..2e32b1f395606 100644
--- a/tests/unit/types/sets/test_documentset.py
+++ b/tests/unit/types/sets/test_documentset.py
@@ -3,7 +3,6 @@
 import pytest
 
 from jina import Document
-from jina.types.document.uid import UniqueId
 from jina.types.sets import DocumentSet
 
 DOCUMENTS_PER_LEVEL = 1
@@ -106,7 +105,7 @@ def test_build(docset):
 def test_set_get_success(docset, document_factory):
     docset.build()
     doc = document_factory.create(4, 'test 4')
-    doc_id = str(UniqueId(2))
+    doc_id = 2
     docset[doc_id] = doc
     assert docset[doc_id].text == 'test 4'
     doc_0_id = docset[0].id