fix: back to old nomenclature

jina-ai · Aug 24, 2020 · 128b27b · 128b27b
1 parent 2b6f9f1
commit 128b27b
Show file tree

Hide file tree

Showing 3 changed files with 18 additions and 19 deletions.
diff --git a/jina/drivers/rank.py b/jina/drivers/rank.py
@@ -71,7 +71,6 @@ def _apply_all(self, docs: Iterable['jina_pb2.Document'], context_doc: 'jina_pb2
 
         # np.uint32 uses 32 bits. np.float32 uses 23 bit mantissa, so integer greater than 2^23 will have their
         # least significant bits truncated.
-
         if match_idx:
             match_idx = np.array(match_idx, dtype=np.float64)
 

diff --git a/jina/executors/rankers/__init__.py b/jina/executors/rankers/__init__.py
@@ -34,42 +34,42 @@ class Chunk2DocRanker(BaseRanker):
 
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
-        self.col_parent_id = 0
-        self.col_doc_id = 1
-        self.col_query_doc_id = 2
+        self.col_doc_id = 0
+        self.col_chunk_id = 1
+        self.col_query_chunk_id = 2
         self.col_score = 3
 
-    def score(self, match_idx: 'np.ndarray', query_doc_meta: Dict, match_doc_meta: Dict) -> 'np.ndarray':
+    def score(self, match_idx: 'np.ndarray', query_chunk_meta: Dict, match_chunk_meta: Dict) -> 'np.ndarray':
         """Translate the chunk-level top-k results into doc-level top-k results. Some score functions may leverage the
         meta information of the query, hence the meta info of the query chunks and matched chunks are given
         as arguments.
 
         :param match_idx: a [N x 4] numpy ``ndarray``, column-wise:
 
-                - ``match_idx[:, 0]``: ``parent_id`` of the matched documents, integer
-                - ``match_idx[:, 1]``: ``doc_id`` of the matched documents, integer
-                - ``match_idx[:, 2]``: ``doc_id`` of the query documents, integer
+                - ``match_idx[:, 0]``: ``doc_id`` of the matched chunks, integer
+                - ``match_idx[:, 1]``: ``chunk_id`` of the matched chunks, integer
+                - ``match_idx[:, 2]``: ``chunk_id`` of the query chunks, integer
                 - ``match_idx[:, 3]``: distance/metric/score between the query and matched chunks, float
-        :param query_doc_meta: the meta information of the query documents, where the key is query document' ``dod_id``,
+        :param query_chunk_meta: the meta information of the query chunks, where the key is query chunks' ``chunk_id``,
             the value is extracted by the ``required_keys``.
-        :param match_doc_meta: the meta information of the matched docks, where the key is matched docs'
-            ``doc_id``, the value is extracted by the ``required_keys``.
+        :param match_chunk_meta: the meta information of the matched chunks, where the key is matched chunks'
+            ``chunk_id``, the value is extracted by the ``required_keys``.
         :return: a [N x 2] numpy ``ndarray``, where the first column is the matched documents' ``doc_id`` (integer)
                 the second column is the score/distance/metric between the matched doc and the query doc (float).
         """
-        _groups = self.group_by_parent_id(match_idx)
+        _groups = self.group_by_doc_id(match_idx)
         r = []
         for _g in _groups:
-            _doc_id, _doc_score = self._get_score(_g, query_doc_meta, match_doc_meta)
+            _doc_id, _doc_score = self._get_score(_g, query_chunk_meta, match_chunk_meta)
             r.append((_doc_id, _doc_score))
         return self.sort_doc_by_score(r)
 
-    def group_by_parent_id(self, match_idx):
+    def group_by_doc_id(self, match_idx):
         """
         Group the ``match_idx`` by ``doc_id``
         :return: an iterator over the groups
         """
-        return self._group_by(match_idx, self.col_parent_id)
+        return self._group_by(match_idx, self.col_doc_id)
 
     @staticmethod
     def _group_by(match_idx, col):
@@ -79,7 +79,7 @@ def _group_by(match_idx, col):
         # group by ``col``
         return np.split(_sorted_m, np.cumsum(_doc_counts))[:-1]
 
-    def _get_score(self, match_idx, query_doc_meta, match_doc_meta, *args, **kwargs):
+    def _get_score(self, match_idx, query_chunk_meta, match_chunk_meta, *args, **kwargs):
         raise NotImplementedError
 
     @staticmethod
@@ -92,5 +92,5 @@ def sort_doc_by_score(r):
         r = r[r[:, -1].argsort()[::-1]]
         return r
 
-    def get_doc_id(self, match_with_same_parent_id):
-        return match_with_same_parent_id[0, self.col_parent_id]
+    def get_doc_id(self, match_with_same_doc_id):
+        return match_with_same_doc_id[0, self.col_doc_id]
diff --git a/tests/unit/drivers/test_chunk2doc_rank_drivers.py b/tests/unit/drivers/test_chunk2doc_rank_drivers.py
@@ -13,7 +13,7 @@ def __init__(self, *args, **kwargs):
         self.required_keys = {'length'}
 
     def _get_score(self, match_idx, query_chunk_meta, match_chunk_meta, *args, **kwargs):
-        return match_idx[0][self.col_parent_id], match_chunk_meta[match_idx[0][self.col_doc_id]]['length']
+        return match_idx[0][self.col_doc_id], match_chunk_meta[match_idx[0][self.col_chunk_id]]['length']
 
 
 class SimpleChunk2DocRankDriver(Chunk2DocRankDriver):