In [4]:
import sys
from pathlib import Path

SRC_PATH = Path("..") / "src"
sys.path.append(str(SRC_PATH))

In [5]:
from metrics import recall_at_k, mrr, ndcg_at_k, hit_rate_at_k

preds = [
    [1, 2, 3, 4],
    [3, 2, 1, 5],
    [6, 7, 8, 9]
]

targets = [
    {1},
    {2, 5},
    {10}
]


In [6]:
print("Recall@1:", recall_at_k(preds, targets, 1))   # ~0.33
print("Recall@3:", recall_at_k(preds, targets, 3))   # ~0.66
print("HitRate@1:", hit_rate_at_k(preds, targets, 1))
print("MRR:", mrr(preds, targets))
print("nDCG@3:", ndcg_at_k(preds, targets, 3))


Recall@1: 0.3333333333333333
Recall@3: 0.5
HitRate@1: 0.3333333333333333
MRR: 0.5
nDCG@3: 0.5436432457540534


In [7]:
import random
from pipeline import search


class FakeRetriever:
    def __init__(self, corpus):
        self.corpus = corpus

    def search(self, query, top_k=10):
        return random.sample(self.corpus, top_k)


class FakeReranker:
    def score(self, query, passages):
        return [random.random() for _ in passages]


In [8]:
corpus = [
    {"doc_id": i, "text": f"Document {i}"} for i in range(50)
]


In [9]:
retriever = FakeRetriever(corpus)
reranker = FakeReranker()

results = search(
    query="test query",
    retriever=retriever,
    reranker=reranker,
    top_k_retrieve=20,
    top_k_final=5
)

results


[{'doc_id': 3, 'text': 'Document 3'},
 {'doc_id': 11, 'text': 'Document 11'},
 {'doc_id': 20, 'text': 'Document 20'},
 {'doc_id': 31, 'text': 'Document 31'},
 {'doc_id': 46, 'text': 'Document 46'}]

In [10]:
results = search(
    query="test query",
    retriever=retriever,
    reranker=None,
    top_k_retrieve=20,
    top_k_final=5
)

results


[{'doc_id': 30, 'text': 'Document 30'},
 {'doc_id': 16, 'text': 'Document 16'},
 {'doc_id': 3, 'text': 'Document 3'},
 {'doc_id': 49, 'text': 'Document 49'},
 {'doc_id': 12, 'text': 'Document 12'}]

In [11]:
from evaluate_retriever import evaluate_retriever

queries = ["q1", "q2", "q3"]
relevant_docs = [{1}, {2}, {100}]

res = evaluate_retriever(
    queries=queries,
    relevant_docs=relevant_docs,
    retriever=retriever
)

res


Evaluating Retriever: 100%|██████████| 3/3 [00:00<?, ?it/s]


{'Recall@5': 0.0,
 'HitRate@5': 0.0,
 'Recall@10': 0.0,
 'HitRate@10': 0.0,
 'Recall@20': 0.0,
 'HitRate@20': 0.0,
 'MRR': 0.0,
 'nDCG@10': 0.0}