jina-ai · hanxiao · Feb 5, 2021 · Feb 4, 2021 · Feb 4, 2021 · Feb 4, 2021
diff --git a/jina/clients/request/helper.py b/jina/clients/request/helper.py
@@ -67,4 +67,5 @@ def _add_docs_groundtruths(req, batch, data_type, _kwargs):
 
 
 def _add_ids(req, batch):
-    req.ids.extend(batch)
+    string_ids = (str(doc_id) for doc_id in batch)
+    req.ids.extend(string_ids)
diff --git a/jina/drivers/rank/__init__.py b/jina/drivers/rank/__init__.py
@@ -4,7 +4,6 @@
 
 from .. import BaseExecutableDriver
 from ...types.document import Document
-from ...types.document.uid import UniqueId
 from ...types.score import NamedScore
 
 
@@ -66,7 +65,7 @@ def _sort_matches_in_place(self, context_doc: 'Document', match_scores: 'np.ndar
         cm = context_doc.matches
         cm.build()
         for str_match_id, score in match_scores:
-            match_id = UniqueId(str_match_id)
+            match_id = str_match_id
             cm[match_id].score = NamedScore(value=score, op_name=op_name, ref_id=context_doc.id)
 
         cm.sort(key=lambda x: x.score.value, reverse=True)
diff --git a/jina/drivers/rank/aggregate/__init__.py b/jina/drivers/rank/aggregate/__init__.py
@@ -10,7 +10,6 @@
 from .. import BaseRankDriver
 
 if False:
-    from ....types.document.uid import UniqueId
     from ....types.sets import DocumentSet
 
 
@@ -105,9 +104,9 @@ def _apply_all(self, docs: 'DocumentSet',
         :return:
         """
 
-        match_idx = []  # type: List[Tuple[UniqueId, UniqueId, UniqueId, float]]
-        query_meta = {}  # type: Dict[UniqueId, Dict]
-        match_meta = {}  # type: Dict[UniqueId, Dict]
+        match_idx = []  # type: List[Tuple[str, str, str, float]]
+        query_meta = {}  # type: Dict[str, Dict]
+        match_meta = {}  # type: Dict[str, Dict]
         parent_id_chunk_id_map = defaultdict(list)
         matches_by_id = defaultdict(Document)
         for chunk in docs:

diff --git a/jina/types/document/__init__.py b/jina/types/document/__init__.py
@@ -12,7 +12,6 @@
 from google.protobuf.field_mask_pb2 import FieldMask
 
 from .converters import png_to_buffer, to_datauri, guess_mime
-from .uid import DIGEST_SIZE, UniqueId
 from ..mixin import ProtoTypeMixin
 from ..ndarray.generic import NdArray
 from ..score import NamedScore
@@ -24,6 +23,7 @@
 from ...proto import jina_pb2
 
 __all__ = ['Document', 'DocumentContentType', 'DocumentSourceType']
+DIGEST_SIZE = 8
 
 DocumentContentType = TypeVar('DocumentContentType', bytes, str, np.ndarray)
 DocumentSourceType = TypeVar('DocumentSourceType',
@@ -226,24 +226,13 @@ def parent_id(self) -> str:
 
     @id.setter
     def id(self, value: Union[bytes, str, int]):
-        """Set document id to a string value
+        """Set document id to a string value.
 
-        .. note:
-
-            Customized ``id`` is acceptable as long as
-            - it only contains the symbols "0"–"9" to represent values 0 to 9,
-            and "A"–"F" (or alternatively "a"–"f").
-            - it has 16 chars described above.
-
-        :param value: restricted string value
+        :param value: id as bytes, int or str
         :return:
         """
-        if isinstance(value, str):
-            self._pb_body.id = value
-        else:
-            warnings.warn(f'expecting a string as ID, receiving {type(value)}. '
-                          f'Note this type will be deprecated soon', DeprecationWarning)
-            self._pb_body.id = UniqueId(value)
+        self._pb_body.id = str(value)
+
 
     @parent_id.setter
     def parent_id(self, value: Union[bytes, str, int]):
@@ -264,7 +253,7 @@ def parent_id(self, value: Union[bytes, str, int]):
         else:
             warnings.warn(f'expecting a string as ID, receiving {type(value)}. '
                           f'Note this type will be deprecated soon', DeprecationWarning)
-            self._pb_body.parent_id = UniqueId(value)
+            self._pb_body.parent_id = value
 
     @property
     def blob(self) -> 'np.ndarray':

diff --git a/jina/types/document/uid.py b/jina/types/document/uid.py
diff --git a/jina/types/sets/document.py b/jina/types/sets/document.py
@@ -36,11 +36,8 @@ def insert(self, index: int, doc: 'Document') -> None:
         self._docs_proto.insert(index, doc.proto)
 
     def __setitem__(self, key, value: 'Document'):
-        from ..document.uid import UniqueId
         if isinstance(key, int):
             self._docs_proto[key].CopyFrom(value)
-        elif isinstance(key, UniqueId):
-            self._docs_map[str(key)].CopyFrom(value)
         elif isinstance(key, str):
             self._docs_map[key].CopyFrom(value)
         else:
@@ -59,11 +56,8 @@ def __iter__(self):
 
     def __getitem__(self, item):
         from ..document import Document
-        from ..document.uid import UniqueId
         if isinstance(item, int):
             return Document(self._docs_proto[item])
-        elif isinstance(item, UniqueId):
-            return Document(self._docs_map[str(item)])
         elif isinstance(item, str):
             return Document(self._docs_map[item])
         elif isinstance(item, slice):

diff --git a/tests/integration/crud/simple/test_crud.py b/tests/integration/crud/simple/test_crud.py
@@ -29,19 +29,26 @@ def config(tmpdir):
 def random_docs(start, end, embed_dim=10, jitter=1, has_content=True):
     for j in range(start, end):
         d = Document()
-        d.id = str(f'{j}' * 16)
+        d.id = j
         if has_content:
             d.tags['id'] = j
             d.text = ''.join(random.choice(string.ascii_lowercase) for _ in range(10)).encode('utf8')
             d.embedding = np.random.random([embed_dim + np.random.randint(0, jitter)])
         yield d
 
 
+def get_ids_to_delete(start, end, as_string):
+    if as_string:
+        return (str(idx) for idx in range(start, end))
+    return range(start, end)
+
+
 def validate_index_size(num_indexed_docs, compound=False):
     from jina.executors.compound import CompoundExecutor
 
     if compound:
-        path = Path(CompoundExecutor.get_component_workspace_from_compound_workspace(os.environ['JINA_TOPK_DIR'], 'chunk_indexer', 0))
+        path = Path(CompoundExecutor.get_component_workspace_from_compound_workspace(os.environ['JINA_TOPK_DIR'],
+                                                                                     'chunk_indexer', 0))
     else:
         path = Path(os.environ['JINA_TOPK_DIR'])
     bin_files = list(path.glob('*.bin'))
@@ -96,8 +103,8 @@ def validate_results(resp):
     mock.assert_called_once()
 
 
-@pytest.mark.parametrize('has_content', [True, False])
-def test_delete_kv(config, mocker, has_content):
+@pytest.mark.parametrize('as_string', [True, False])
+def test_delete_kv(config, mocker, as_string):
     flow_file = 'flow_kv.yml'
 
     def validate_result_factory(num_matches):
@@ -117,7 +124,7 @@ def validate_results(resp):
     mock.assert_called_once()
 
     with Flow.load_config(flow_file) as index_flow:
-        index_flow.delete(input_fn=[d.id for d in random_docs(0, 3, has_content=has_content)])
+        index_flow.delete(input_fn=get_ids_to_delete(0, 3, as_string))
     validate_index_size(7)
 
     mock = mocker.Mock()

diff --git a/tests/unit/drivers/test_cache_driver.py b/tests/unit/drivers/test_cache_driver.py
@@ -10,7 +10,7 @@
 from jina.executors import BaseExecutor
 from jina.executors.indexers.cache import DocCache, ID_KEY, CONTENT_HASH_KEY
 from jina.proto import jina_pb2
-from jina.types.document import Document, UniqueId
+from jina.types.document import Document
 from tests import random_docs
 
 
@@ -146,7 +146,7 @@ def test_cache_content_driver_same_content(tmpdir, test_metas):
     doc1.text = new_string
     doc1.update_content_hash()
     with BaseExecutor.load(filename) as executor:
-        executor.update([UniqueId(1)], [doc1.content_hash])
+        executor.update([1], [doc1.content_hash])
 
     with BaseExecutor.load(filename) as executor:
         assert executor.query(doc1.content_hash) is True

diff --git a/tests/unit/drivers/test_concat_driver.py b/tests/unit/drivers/test_concat_driver.py
@@ -4,7 +4,6 @@
 
 from jina import Document
 from jina.flow import Flow
-from jina.types.document.uid import UniqueId
 from jina.types.ndarray.generic import NdArray
 
 e1 = np.random.random([7])
@@ -18,13 +17,13 @@ def input_fn():
         doc1.embedding = e1
         with Document() as chunk1:
             chunk1.embedding = e2
-            chunk1.id = UniqueId(1)
+            chunk1.id = 1
         doc1.chunks.add(chunk1)
     with Document() as doc2:
         doc2.embedding = e3
         with Document() as chunk2:
             chunk2.embedding = e4
-            chunk2.id = UniqueId(2)
+            chunk2.id = 2
         doc2.chunks.add(chunk2)
     return [doc1, doc2]
 

diff --git a/tests/unit/flow/test_flow_index.py b/tests/unit/flow/test_flow_index.py
@@ -5,7 +5,6 @@
 
 from jina.flow import Flow
 from jina.proto import jina_pb2
-from jina.types.document.uid import UniqueId
 from tests import random_docs, rm_files
 
 cur_dir = os.path.dirname(os.path.abspath(__file__))
@@ -14,10 +13,10 @@
 def random_queries(num_docs, chunks_per_doc=5):
     for j in range(num_docs):
         d = jina_pb2.DocumentProto()
-        d.id = UniqueId(j)
+        d.id = j
         for k in range(chunks_per_doc):
             dd = d.chunks.add()
-            dd.id = UniqueId(num_docs + j * chunks_per_doc + k)
+            dd.id = num_docs + j * chunks_per_doc + k
         yield d
 
 

diff --git a/tests/unit/flow/test_flow_multimode.py b/tests/unit/flow/test_flow_multimode.py
@@ -9,7 +9,6 @@
 from jina.executors.indexers.keyvalue import BinaryPbIndexer
 from jina.flow import Flow
 from jina.proto import jina_pb2
-from jina.types.document.uid import UniqueId
 
 cur_dir = os.path.dirname(os.path.abspath(__file__))
 
@@ -43,15 +42,15 @@ def test_flow_with_modalities(tmpdir, restful):
     def input_fn():
         doc1 = jina_pb2.DocumentProto()
         doc1.text = 'title: this is mode1 from doc1, body: this is mode2 from doc1'
-        doc1.id = UniqueId(1)
+        doc1.id = 1
 
         doc2 = jina_pb2.DocumentProto()
         doc2.text = 'title: this is mode1 from doc2, body: this is mode2 from doc2'
-        doc2.id = UniqueId(2)
+        doc2.id = 2
 
         doc3 = jina_pb2.DocumentProto()
         doc3.text = 'title: this is mode1 from doc3, body: this is mode2 from doc3'
-        doc3.id = UniqueId(3)
+        doc3.id = 3
 
         return [doc1, doc2, doc3]
 

diff --git a/tests/unit/test_helper.py b/tests/unit/test_helper.py
@@ -1,4 +1,3 @@
-import random
 import time
 from types import SimpleNamespace
 
@@ -14,8 +13,8 @@
 from jina.logging import default_logger
 from jina.logging.profile import TimeContext
 from jina.proto import jina_pb2
-from jina.types.document.uid import *
 from tests import random_docs
+import numpy as np
 
 
 def test_cached_property():
@@ -60,11 +59,6 @@ def test_time_context():
     assert tc.readable_duration == '2 seconds'
 
 
-def test_np_int():
-    a = random.randint(0, 100000)
-    assert int2bytes(np.int64(a)) == int2bytes(a)
-
-
 def test_dunder_get():
     a = SimpleNamespace()
     a.b = {'c': 1}