In [1]:
import os

import ir_datasets
import evaluate
from tqdm.notebook import tqdm

from irise.indexer import Indexer
from irise import INDEX_DIR

In [14]:
SYSTEM = "TFIDF"
# predictions = {
#     "query": [],
#     "q0": [],
#     "docid": [],
#     "score": [],
#     "rank": [],
#     "system": [],
# }
predictions = []
indexer = Indexer(path=INDEX_DIR / "irise_index_advanced")
dataset = ir_datasets.load("beir/msmarco/test")
for query in tqdm(dataset.queries_iter(), total=dataset.queries_count()):
    pred = {
        "query": [],
        "q0": [],
        "docid": [],
        "score": [],
        "rank": [],
        "system": [],
    }
    results = indexer.search(query.text, limit=dataset.docs_count(), weighting=SYSTEM)
    if not results:
        pred["query"].append(int(query.query_id))
        pred["q0"].append("q0")
        pred["docid"].append(str(-1))
        pred["score"].append(-1)
        pred["rank"].append(-1)
        pred["system"].append(SYSTEM)
    else:
        for rank, (score, docid) in enumerate(results.top_n):
            pred["query"].append(int(query.query_id))
            pred["q0"].append(f"q{query.query_id}")
            pred["docid"].append(str(docid))
            pred["score"].append(score)
            pred["rank"].append(rank)
            pred["system"].append(SYSTEM)
    predictions.append(pred)

  0%|          | 0/43 [00:00<?, ?it/s]

In [15]:
predictions

[{'query': [19335],
  'q0': ['q19335'],
  'docid': ['5501804'],
  'score': [20.19284474327607],
  'rank': [0],
  'system': ['TFIDF']},
 {'query': [47923],
  'q0': ['q47923'],
  'docid': ['2227757'],
  'score': [98.75159736109245],
  'rank': [0],
  'system': ['TFIDF']},
 {'query': [87181,
   87181,
   87181,
   87181,
   87181,
   87181,
   87181,
   87181,
   87181,
   87181,
   87181,
   87181,
   87181,
   87181,
   87181,
   87181,
   87181,
   87181,
   87181,
   87181,
   87181,
   87181,
   87181,
   87181,
   87181,
   87181,
   87181,
   87181,
   87181,
   87181,
   87181,
   87181,
   87181,
   87181,
   87181,
   87181,
   87181,
   87181,
   87181,
   87181,
   87181,
   87181,
   87181,
   87181,
   87181,
   87181,
   87181,
   87181,
   87181,
   87181,
   87181,
   87181,
   87181,
   87181,
   87181,
   87181,
   87181,
   87181,
   87181,
   87181,
   87181,
   87181,
   87181,
   87181,
   87181,
   87181],
  'q0': ['q87181',
   'q87181',
   'q87181',
   'q87181',
  

In [16]:
# qrels = {
#     "query": [],
#     "q0": [],
#     "docid": [],
#     "rel": [],
# }
qrels = []
current_id = ""
current_qrel = None
for qrel in tqdm(dataset.qrels_iter(), total=dataset.qrels_count()):
    if qrel.query_id != current_id:
        if current_qrel is not None:
            qrels.append(current_qrel)
        current_qrel = {
            "query": [],
            "q0": [],
            "docid": [],
            "rel": [],
        }
        current_id = qrel.query_id
    current_qrel["query"].append(int(qrel.query_id))
    current_qrel["q0"].append(f"q{qrel.query_id}")
    current_qrel["docid"].append(qrel.doc_id)
    current_qrel["rel"].append(qrel.relevance)
qrels.append(current_qrel)  # add the last one

  0%|          | 0/9260 [00:00<?, ?it/s]

In [18]:
qrels[1]

{'query': [47923,
  47923,
  47923,
  47923,
  47923,
  47923,
  47923,
  47923,
  47923,
  47923,
  47923,
  47923,
  47923,
  47923,
  47923,
  47923,
  47923,
  47923,
  47923,
  47923,
  47923,
  47923,
  47923,
  47923,
  47923,
  47923,
  47923,
  47923,
  47923,
  47923,
  47923,
  47923,
  47923,
  47923,
  47923,
  47923,
  47923,
  47923,
  47923,
  47923,
  47923,
  47923,
  47923,
  47923,
  47923,
  47923,
  47923,
  47923,
  47923,
  47923,
  47923,
  47923,
  47923,
  47923,
  47923,
  47923,
  47923,
  47923,
  47923,
  47923,
  47923,
  47923,
  47923,
  47923,
  47923,
  47923,
  47923,
  47923,
  47923,
  47923,
  47923,
  47923,
  47923,
  47923,
  47923,
  47923,
  47923,
  47923,
  47923,
  47923,
  47923,
  47923,
  47923,
  47923,
  47923,
  47923,
  47923,
  47923,
  47923,
  47923,
  47923,
  47923,
  47923,
  47923,
  47923,
  47923,
  47923,
  47923,
  47923,
  47923,
  47923,
  47923,
  47923,
  47923,
  47923,
  47923,
  47923,
  47923,
  47923,
  47923,
 

In [4]:
trec_eval = evaluate.load("trec_eval")

In [5]:
results_tfidf = trec_eval.compute(references=[qrels], predictions=[predictions])

  selection = selection[~selection["rel"].isnull()].groupby("query").first().copy()


In [73]:
results

{'runid': 'bm25',
 'num_ret': 193,
 'num_rel': 4102,
 'num_rel_ret': 91,
 'num_q': 43,
 'map': 0.026539430854207936,
 'gm_map': 0.0003330836456447731,
 'bpref': 0.031150645416553026,
 'Rprec': 0.03386240921870229,
 'recip_rank': 0.37984496124031014,
 'P@5': 0.21395348837209305,
 'P@10': 0.13488372093023254,
 'P@15': 0.11007751937984496,
 'P@20': 0.09418604651162793,
 'P@30': 0.07054263565891472,
 'P@100': 0.021162790697674416,
 'P@200': 0.010581395348837208,
 'P@500': 0.0042325581395348845,
 'P@1000': 0.0021162790697674422,
 'NDCG@5': 0.19136769935348993,
 'NDCG@10': 0.1422570019702889,
 'NDCG@15': 0.12542659497485611,
 'NDCG@20': 0.1147591332765549,
 'NDCG@30': 0.10008778327081985,
 'NDCG@100': 0.07321093743024241,
 'NDCG@200': 0.07117612916274589,
 'NDCG@500': 0.07113236503969173,
 'NDCG@1000': 0.07113236503969173}

In [84]:
results_tfidf

{'runid': 'TFIDF',
 'num_ret': 37,
 'num_rel': 296,
 'num_rel_ret': 20,
 'num_q': 4,
 'map': 0.043620841867515685,
 'gm_map': 0.0006929465911493908,
 'bpref': 0.05859653172075274,
 'Rprec': 0.06038970697605236,
 'recip_rank': 0.375,
 'P@5': 0.30000000000000004,
 'P@10': 0.22499999999999998,
 'P@15': 0.19999999999999998,
 'P@20': 0.1875,
 'P@30': 0.16666666666666666,
 'P@100': 0.049999999999999996,
 'P@200': 0.024999999999999998,
 'P@500': 0.009999999999999998,
 'P@1000': 0.004999999999999999,
 'NDCG@5': 0.2012167986481089,
 'NDCG@10': 0.15548921332871304,
 'NDCG@15': 0.13850637345559375,
 'NDCG@20': 0.14108855843244605,
 'NDCG@30': 0.12411529832289284,
 'NDCG@100': 0.08639438422761744,
 'NDCG@200': 0.08639438422761744,
 'NDCG@500': 0.08639438422761744,
 'NDCG@1000': 0.08639438422761744}

In [13]:
predictions

{'query': [19335,
  47923,
  87181,
  87181,
  87181,
  87181,
  87181,
  87181,
  87181,
  87181,
  87181,
  87181,
  87181,
  87181,
  87181,
  87181,
  87181,
  87181,
  87181,
  87181,
  87181,
  87181,
  87181,
  87181,
  87181,
  87181,
  87181,
  87181,
  87181,
  87181,
  87181,
  87181,
  87181,
  87181,
  87181,
  87181,
  87181,
  87181,
  87181,
  87181,
  87181,
  87181,
  87181,
  87181,
  87181,
  87181,
  87181,
  87181,
  87181,
  87181,
  87181,
  87181,
  87181,
  87181,
  87181,
  87181,
  87181,
  87181,
  87181,
  87181,
  87181,
  87181,
  87181,
  87181,
  87181,
  87181,
  87181,
  87181,
  87452,
  87452,
  87452,
  87452,
  87452,
  87452,
  87452,
  87452,
  87452,
  87452,
  87452,
  87452,
  87452,
  104861,
  104861,
  104861,
  104861,
  104861,
  104861,
  104861,
  104861,
  104861,
  104861,
  104861,
  104861,
  104861,
  104861,
  104861,
  104861,
  104861,
  104861,
  104861,
  104861,
  104861,
  130510,
  131843,
  131843,
  131843,
  146187,
  

In [6]:
results_tfidf

{'runid': 'TFIDF',
 'num_ret': 309,
 'num_rel': 4102,
 'num_rel_ret': 0,
 'num_q': 43,
 'map': 0.0,
 'gm_map': 9.99999999999998e-06,
 'bpref': 0.0,
 'Rprec': 0.0,
 'recip_rank': 0.0,
 'P@5': 0.0,
 'P@10': 0.0,
 'P@15': 0.0,
 'P@20': 0.0,
 'P@30': 0.0,
 'P@100': 0.0,
 'P@200': 0.0,
 'P@500': 0.0,
 'P@1000': 0.0,
 'NDCG@5': 0.0,
 'NDCG@10': 0.0,
 'NDCG@15': 0.0,
 'NDCG@20': 0.0,
 'NDCG@30': 0.0,
 'NDCG@100': 0.0,
 'NDCG@200': 0.0,
 'NDCG@500': 0.0,
 'NDCG@1000': 0.0}

In [22]:
assert len(predictions) == len(qrels)
qps, qrs = [], []
for pred, qrel in zip(predictions, qrels):
    hits = sum([docid in qrel["docid"] for docid in pred]) 
    num_rel = len(qrel["rel"]) - qrel["rel"].count(0)
    qp = hits / len(pred["docid"])
    qr = hits / num_rel
    qps.append(qp)
    qrs.append(qr)
print("AP @ 1: ", sum(qps) / len(qps))
print("AR @ 1: ", sum(qrs) / len(qrs))

AP @ 1:  0.0
AR @ 1:  0.0
