This repository has been archived by the owner on Aug 31, 2021. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 49
/
test_bimatchranker.py
55 lines (48 loc) · 1.78 KB
/
test_bimatchranker.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import numpy as np
from jina.executors.rankers import Chunk2DocRanker
from .. import BiMatchRanker
def create_data():
query_chunk2match_chunk = {
100: [
{'parent_id': 1, 'id': 10, 'score': 0.4, 'length': 200},
],
110: [
{'parent_id': 1, 'id': 10, 'score': 0.3, 'length': 200},
{'parent_id': 1, 'id': 11, 'score': 0.2, 'length': 200},
{'parent_id': 4294967294, 'id': 20, 'score': 0.1, 'length': 300},
]
}
query_chunk_meta = {}
match_chunk_meta = {}
match_idx = []
num_query_chunks = len(query_chunk2match_chunk)
for query_chunk_id, matches in query_chunk2match_chunk.items():
query_chunk_meta[query_chunk_id] = {'length': num_query_chunks}
for c in matches:
match_chunk_meta[c['id']] = {'length': c['length']}
match_idx.append((
c['parent_id'],
c['id'],
query_chunk_id,
c['score'],
))
match_idx_numpy = np.array(
match_idx,
dtype=[
(Chunk2DocRanker.COL_MATCH_PARENT_ID, np.int64),
(Chunk2DocRanker.COL_MATCH_ID, np.int64),
(Chunk2DocRanker.COL_DOC_CHUNK_ID, np.int64),
(Chunk2DocRanker.COL_SCORE, np.float64)
]
)
return match_idx_numpy, query_chunk_meta, match_chunk_meta
def test_bimatchranker():
ranker = BiMatchRanker()
match_idx, query_chunk_meta, match_chunk_meta = create_data()
doc_idx = ranker.score(match_idx, query_chunk_meta, match_chunk_meta)
# check the matched docs are in descending order of the scores
assert doc_idx[0][1] > doc_idx[1][1]
assert doc_idx[1][0] == 4294967294
assert doc_idx[0][0] == 1
# check the number of matched docs
assert len(doc_idx) == 2