In [1]:
import pandas as pd
import pyterrier as pt
import yaml
import os
from src.load_index import load_index, load_topics, load_qrels, tag
from src.extend_runs import extend_run_full
import sqlite3
from repro_eval.Evaluator import RpdEvaluator
import pytrec_eval
import numpy as np

from repro_eval.util import arp, arp_scores

if not pt.started():
    pt.init(boot_packages=["com.github.terrierteam:terrier-prf:-SNAPSHOT"])

  from .autonotebook import tqdm as notebook_tqdm
PyTerrier 0.10.1 has loaded Terrier 5.9 (built by craigm on 2024-05-02 17:40) and terrier-helper 0.0.8

No etc/terrier.properties, using terrier.default.properties for bootstrap configuration.


In [2]:
with open("data/LongEval/metadata.yml", "r") as yamlfile:
    config = yaml.load(yamlfile, Loader=yaml.FullLoader)

results_path = "data/results/filter_not_rel/"

index = load_index("t3")
topics = load_topics("t3", "test")

>>> Loaded index with  2049729 documents.


### Create baseline with 1500 docs per query

In [3]:
# Extended baseine for filtering
# BM25 = pt.BatchRetrieve(index, wmodel="BM25", verbose=True, num_results=1500)

# pt.io.write_results(
#     BM25(topics), results_path + f"/CIR_BM25_D-t3_T-t3-long"
# )

### Extend with docids and qids

In [4]:
# Extend the run
# extend_run_full(results_path + f"CIR_BM25_D-t3_T-t3-long")

### Extend with qrels

In [5]:
run = pd.read_csv(results_path + f"CIR_BM25_D-t3_T-t3-l_extended.t3", sep=" ")

  run = pd.read_csv(results_path + f"CIR_BM25_D-t3_T-t3-l_extended.t3", sep=" ")


In [6]:
conn = sqlite3.connect("data/database.db")
query = "SELECT * FROM qrel"
qrels = pd.read_sql_query(query, conn)

In [7]:
qrels["key"] = qrels["queryid"]+qrels["docid"]

In [8]:
qmap=qrels[["key", "relevance"]].set_index("key").to_dict()["relevance"]

In [9]:
def get_qrel(row, subcollection):
    query_id = row[f"queryid_{subcollection}"]
    doc_id = row[f"docid_{subcollection}"]
    if isinstance(query_id, str) and isinstance(doc_id, str):
        return qmap.get(row[f"queryid_{subcollection}"] + row[f"docid_{subcollection}"], None)
    else:
        return None
       
for subcollection in ["t0", "t1", "t2", "t3"]:
    run[f"qrel_{subcollection}"] = run.apply(get_qrel, subcollection=subcollection, axis=1)


This adds between 450 to 600 qrels per sub-collection

# Filter not rel

In [10]:
# for subcollection in ["t0", "t1", "t2"]:
history = ["t2", "t1", "t0"]
reranked = run.copy()

for subcollection in history:
    reranked = reranked[reranked[f"qrel_{subcollection}"] != 0.0]

In [11]:
reranked = reranked.sort_values(['queryid','score'], ascending=False).groupby('queryid').head(1000)
reranked['rank'] = reranked.groupby('queryid')['score'].rank(ascending=False).astype(int)


In [99]:
run_name = f"CIR_BM25_D-t3_T-t3-filter-notrel-{"".join(history)}"
run_reranked_path = os.path.join(results_path, run_name)

In [100]:
pt.io.write_results(reranked[["queryid", "0", "docid", "score", "rank", "run"]].rename(columns={"queryid": "qid", "docid": "docno"}), run_reranked_path, format='trec', run_name=run_name)

In [101]:
base_path = "data"

with open(os.path.join(base_path, config["subcollections"]["t3"]["qrels"]["test"]), "r") as f_qrels:
    qrels = pytrec_eval.parse_qrel(f_qrels)
evaluator = pytrec_eval.RelevanceEvaluator(qrels, pytrec_eval.supported_measures)

In [102]:
with open(run_reranked_path) as run_reranked:
    r = pytrec_eval.parse_run(run_reranked)
    scores = evaluator.evaluate(r)
    print( "|", 
          ", ".join(history), "|",
        str(round(arp_scores(scores)["P_10"], 4)), "|",
        str(round(arp_scores(scores)["bpref"], 4)), "|",
        str(round(arp_scores(scores)["ndcg"], 4)), "|"
        )

| t3 | 0.1798 | 0.7784 | 0.3851 |


# Qrel Boost

In [50]:
def qrel_boost(run, history, _lambda=0.5, mu=2):
    reranking = run.copy()
    
    # min max normalization per topic
    reranking['score'] = reranking.groupby('queryid')['score'].transform(lambda x : x / x.max())
    
    for subcollection in history:
        # Relevant
        reranking.loc[reranking[f"qrel_{subcollection}"] == 1, 'score'] = reranking.loc[reranking[f"qrel_{subcollection}"] > 0, 'score'] * _lambda ** 2
        reranking.loc[reranking[f"qrel_{subcollection}"] > 0, 'score'] = reranking.loc[reranking[f"qrel_{subcollection}"] > 0, 'score'] * (_lambda ** 2) * mu
        
        # All Not Relevant
        reranking.loc[(reranking[f"qrel_{subcollection}"] == 0) | (reranking[f"qrel_{subcollection}"].isna()), 'score'] = reranking.loc[(reranking[f"qrel_{subcollection}"] == 0) | (reranking[f"qrel_{subcollection}"].isna()), 'score'] *(1-_lambda) ** 2

        
    reranking = reranking.sort_values(['queryid','score'], ascending=False).groupby('queryid').head(1000)
    reranking['rank'] = reranking.groupby('queryid')['score'].rank(ascending=False).astype(int)
    
    return reranking

In [51]:
base_path = "data"
with open(os.path.join(base_path, config["subcollections"]["t3"]["qrels"]["test"]), "r") as f_qrels:
    qrels = pytrec_eval.parse_qrel(f_qrels)
evaluator = pytrec_eval.RelevanceEvaluator(qrels, pytrec_eval.supported_measures)


for _lambda in np.arange(0.1, 1, 0.1):
    reranked = qrel_boost(run, history, _lambda)
    run_name = f"CIR_BM25_D-t3_T-t3-filter-notrel-{"".join(history)}-l{_lambda}"
    run_reranked_path = os.path.join(results_path, run_name)

    pt.io.write_results(reranked[["queryid", "0", "docid", "score", "rank", "run"]].rename(columns={"queryid": "qid", "docid": "docno"}), run_reranked_path, format='trec', run_name=run_name)

    with open(run_reranked_path) as run_reranked:
        r = pytrec_eval.parse_run(run_reranked)
        scores = evaluator.evaluate(r)
        print( "|", 
              ", ".join(history), "|",
                      str(_lambda), "|",
            str(round(arp_scores(scores)["P_10"], 4)), "|",
            str(round(arp_scores(scores)["bpref"], 4)), "|",
            str(round(arp_scores(scores)["ndcg"], 4)), "|"
            )

| t2 | 0.1 | 0.154 | 0.4231 | 0.349 |
| t2 | 0.2 | 0.154 | 0.4231 | 0.349 |
| t2 | 0.30000000000000004 | 0.154 | 0.4234 | 0.3492 |
| t2 | 0.4 | 0.1543 | 0.4256 | 0.351 |
| t2 | 0.5 | 0.1585 | 0.4304 | 0.3622 |
| t2 | 0.6 | 0.1776 | 0.4498 | 0.3828 |
| t2 | 0.7000000000000001 | 0.1788 | 0.4505 | 0.3837 |
| t2 | 0.8 | 0.1788 | 0.4505 | 0.3837 |
| t2 | 0.9 | 0.1788 | 0.4505 | 0.3835 |


In [53]:
base_path = "data"
with open(os.path.join(base_path, config["subcollections"]["t3"]["qrels"]["test"]), "r") as f_qrels:
    qrels = pytrec_eval.parse_qrel(f_qrels)
evaluator = pytrec_eval.RelevanceEvaluator(qrels, pytrec_eval.supported_measures)

history = ["t2", "t1"]
for _lambda in np.arange(0.1, 1, 0.1):
    reranked = qrel_boost(run, history, _lambda)
    run_name = f"CIR_BM25_D-t3_T-t3-filter-notrel-{"".join(history)}-l{_lambda}"
    run_reranked_path = os.path.join(results_path, run_name)

    pt.io.write_results(reranked[["queryid", "0", "docid", "score", "rank", "run"]].rename(columns={"queryid": "qid", "docid": "docno"}), run_reranked_path, format='trec', run_name=run_name)

    with open(run_reranked_path) as run_reranked:
        r = pytrec_eval.parse_run(run_reranked)
        scores = evaluator.evaluate(r)
        print( "|", 
              ", ".join(history), "|",
                      str(_lambda), "|",
            str(round(arp_scores(scores)["P_10"], 4)), "|",
            str(round(arp_scores(scores)["bpref"], 4)), "|",
            str(round(arp_scores(scores)["ndcg"], 4)), "|"
            )

| t2, t1 | 0.1 | 0.1513 | 0.4164 | 0.3442 |
| t2, t1 | 0.2 | 0.1513 | 0.4164 | 0.3442 |
| t2, t1 | 0.30000000000000004 | 0.1513 | 0.4165 | 0.3443 |
| t2, t1 | 0.4 | 0.1517 | 0.4185 | 0.3464 |
| t2, t1 | 0.5 | 0.1574 | 0.4256 | 0.3598 |
| t2, t1 | 0.6 | 0.1828 | 0.4527 | 0.3884 |
| t2, t1 | 0.7000000000000001 | 0.1858 | 0.4533 | 0.3909 |
| t2, t1 | 0.8 | 0.1858 | 0.4533 | 0.3909 |
| t2, t1 | 0.9 | 0.1858 | 0.4533 | 0.3906 |


In [54]:
base_path = "data"
with open(os.path.join(base_path, config["subcollections"]["t3"]["qrels"]["test"]), "r") as f_qrels:
    qrels = pytrec_eval.parse_qrel(f_qrels)
evaluator = pytrec_eval.RelevanceEvaluator(qrels, pytrec_eval.supported_measures)

history = ["t2", "t1", "t0"]
for _lambda in np.arange(0.1, 1, 0.1):
    reranked = qrel_boost(run, history, _lambda)
    run_name = f"CIR_BM25_D-t3_T-t3-filter-notrel-{"".join(history)}-l{_lambda}"
    run_reranked_path = os.path.join(results_path, run_name)

    pt.io.write_results(reranked[["queryid", "0", "docid", "score", "rank", "run"]].rename(columns={"queryid": "qid", "docid": "docno"}), run_reranked_path, format='trec', run_name=run_name)

    with open(run_reranked_path) as run_reranked:
        r = pytrec_eval.parse_run(run_reranked)
        scores = evaluator.evaluate(r)
        print( "|", 
              ", ".join(history), "|",
                      str(_lambda), "|",
            str(round(arp_scores(scores)["P_10"], 4)), "|",
            str(round(arp_scores(scores)["bpref"], 4)), "|",
            str(round(arp_scores(scores)["ndcg"], 4)), "|"
            )

| t2, t1, t0 | 0.1 | 0.1492 | 0.4119 | 0.3407 |
| t2, t1, t0 | 0.2 | 0.1492 | 0.4119 | 0.3407 |
| t2, t1, t0 | 0.30000000000000004 | 0.1492 | 0.4122 | 0.3409 |
| t2, t1, t0 | 0.4 | 0.1495 | 0.4148 | 0.343 |
| t2, t1, t0 | 0.5 | 0.1559 | 0.4219 | 0.3571 |
| t2, t1, t0 | 0.6 | 0.1858 | 0.4534 | 0.3901 |
| t2, t1, t0 | 0.7000000000000001 | 0.1891 | 0.4542 | 0.3928 |
| t2, t1, t0 | 0.8 | 0.1891 | 0.4542 | 0.3928 |
| t2, t1, t0 | 0.9 | 0.1891 | 0.4541 | 0.3927 |


In [13]:
history = ["t2"]
_lambda = 1 + 2
reranked = direct_boost(run, history, _lambda=_lambda)

In [208]:
run_name = f"CIR_BM25_D-t3_T-t3-rr-rel-{"".join(history)}"
run_reranked_path = os.path.join(results_path, run_name)

In [209]:
pt.io.write_results(reranked[["queryid", "0", "docid", "score", "rank", "run"]].rename(columns={"queryid": "qid", "docid": "docno"}), run_reranked_path, format='trec', run_name=run_name)

base_path = "data"

with open(os.path.join(base_path, config["subcollections"]["t3"]["qrels"]["test"]), "r") as f_qrels:
    qrels = pytrec_eval.parse_qrel(f_qrels)
evaluator = pytrec_eval.RelevanceEvaluator(qrels, pytrec_eval.supported_measures)

In [210]:
with open(run_reranked_path) as run_reranked:
    r = pytrec_eval.parse_run(run_reranked)
    scores = evaluator.evaluate(r)
    print( "|", 
          str(_lambda), "|",
          ", ".join(history), "|",
        str(round(arp_scores(scores)["P_10"], 4)), "|",
        str(round(arp_scores(scores)["bpref"], 4)), "|",
        str(round(arp_scores(scores)["ndcg"], 4)), "|"
        )

| 3 | t2 | 0.1788 | 0.4493 | 0.3822 |
