In [1]:
import pandas as pd  # type: ignore
from src.exp_logger import logger  # type: ignore
from src.load_index import setup_system
import numpy as np
import pyterrier as pt  # type: ignore
import yaml  # type: ignore

with open("settings.yml", "r") as yamlfile:
    config = yaml.load(yamlfile, Loader=yaml.FullLoader)

PyTerrier 0.9.2 has loaded Terrier 5.7 (built by craigm on 2022-11-10 18:30) and terrier-helper 0.0.7

No etc/terrier.properties, using terrier.default.properties for bootstrap configuration.


In [2]:
index, topics, qrels = setup_system("WT")

Loaded index with  1570734 documents.


In [3]:
def get_qrels(train_topics, validation_topics, test_topics, qrels):
    def filter_ids(topics):
        needed_ids = list(topics["qid"].unique())  # needed ids
        qrels_split = qrels[qrels["qid"].isin(needed_ids)]
        diff = len(needed_ids) - len(qrels_split["qid"].unique())
        return qrels_split
    
    train_qrels = filter_ids(train_topics)
    validation_qrels = filter_ids(validation_topics)
    test_qrels = filter_ids(test_topics)
    return train_qrels, validation_qrels, test_qrels

In [4]:
train_topics, validation_topics, test_topics = np.split(
    topics, [int(0.6 * len(topics)), int(0.8 * len(topics))]
)

train_qrels, validation_qrels, test_qrels = get_qrels(train_topics, validation_topics, test_topics, qrels)

In [12]:
def get_runs(run_names: str):
    runs = []
    names = []
    runs_dir = "results/trec"

    for run_name in run_names:
        runs.append(pt.io.read_results(os.path.join(runs_dir, run_name)))
        names.append(run_name)
    
    return runs, names

runs_train = os.listdir("results/trec")


runs, names = get_runs(runs_train)

results = pt.Experiment(
    runs,
    topics[topics["qid"].isin(config["top_runs"])],
    qrels,
    eval_metrics=["ndcg", "P_20", "map", "ndcg_cut_20", "recip_rank", "bpref"],
    names=names,
    verbose=True,
    # highlight="bold",
    round=4
    
    # perquery=True
)

pt.Experiment: 100%|██████████| 18/18 [00:11<00:00,  1.58system/s]


In [13]:
results.sort_values(by="name")

Unnamed: 0,name,ndcg,P_20,map,ndcg_cut_20,recip_rank,bpref
14,IRCologne-BM25.WT,0.3367,0.1232,0.173,0.2319,0.3628,0.4104
3,IRCologne-BM25_Bo1.WT,0.3462,0.1283,0.1756,0.2323,0.3386,0.4367
16,IRCologne-BM25_RM3.WT,0.3363,0.129,0.1719,0.2283,0.3307,0.4265
12,IRCologne-BM25_axio.WT,0.3462,0.1283,0.1756,0.2323,0.3386,0.4367
15,IRCologne-DPH.WT,0.348,0.1261,0.1754,0.2374,0.3662,0.4166
11,IRCologne-PL2.WT,0.3478,0.1261,0.1782,0.2392,0.3719,0.4224
13,IRCologne-RRF(BBBXP).WT,0.3618,0.1275,0.1802,0.2371,0.3561,0.4402
10,IRCologne-RRF(BBXP).WT,0.3673,0.1297,0.185,0.2461,0.3659,0.4412
6,IRCologne-RRF(BRXP).WT,0.3661,0.129,0.1851,0.2426,0.3718,0.4469
7,IRCologne-RRF(BXP).WT,0.3532,0.1246,0.1771,0.2349,0.3622,0.4275


In [6]:
monot5 = pt.io.read_results("results/trec/IRCologne-monoT5.WT")
monot5_WT_train = pt.io.read_results("results/trec/IRCologne-monoT5_WT-train.WT")

In [10]:

results = pt.Experiment(
    [monot5, monot5_WT_train],
    topics[topics["qid"].isin(config["top_runs"])],
    train_qrels,
    eval_metrics=["ndcg", "P_20", "map", "ndcg_cut_20", "recip_rank", "bpref"],
    names=["monot5", "monot5_WT_train"],
    verbose=True,
    highlight="bold",
    round=4
    
    # perquery=True
)

pt.Experiment: 100%|██████████| 2/2 [00:01<00:00,  1.36system/s]


In [7]:
monot5_WT = pt.io.read_results("results/trec/IRCologne-monoT5-passages-rel.WT")
monot5_WT_train = pt.io.read_results("results/trec/IRCologne-monoT5_WT-passages-rel-train.WT")


results = pt.Experiment(
    [monot5_WT, monot5_WT_train],
    topics[topics["qid"].isin(config["top_runs"])],
    test_qrels,
    eval_metrics=["ndcg", "P_20", "map", "ndcg_cut_20", "recip_rank", "bpref"],
    names=["monot5", "monot5_WT_train"],
    verbose=True,
    highlight="bold",
    round=4,
    filter_by_topics=True
    
    # perquery=True
)

pt.Experiment: 100%|██████████| 2/2 [00:00<00:00, 12.68system/s]


In [8]:
results

Unnamed: 0,name,ndcg,P_20,map,ndcg_cut_20,recip_rank,bpref
0,monot5,0.3674,0.1378,0.1943,0.2626,0.3922,0.4186
1,monot5_WT_train,0.2522,0.0511,0.0708,0.1099,0.2208,0.4413


In [6]:
results

Unnamed: 0,name,ndcg,P_20,map,ndcg_cut_20,recip_rank,bpref
0,monot5,0.3674,0.1378,0.1943,0.2626,0.3922,0.4186
1,monot5_WT_train,0.1901,0.0222,0.0339,0.0347,0.0777,0.5369


In [8]:
results

Unnamed: 0,name,ndcg,P_20,map,ndcg_cut_20,recip_rank,bpref
0,monot5,0.4219,0.2083,0.2839,0.3803,0.5074,0.4245
1,monot5_WT_train,0.2609,0.075,0.1054,0.1392,0.2686,0.4094


In [5]:
monot5 = pt.io.read_results("results/trec/IRCologne-monoT5-passages-rel.WT")
monot5_WT = pt.io.read_results("results/trec/IRCologne-monoT5_WT-passages-rel.WT")


results = pt.Experiment(
    [monot5, monot5_WT],
    topics[topics["qid"].isin(config["top_runs"])],
    test_qrels,
    eval_metrics=["ndcg", "P_20", "map", "ndcg_cut_20", "recip_rank", "bpref"],
    names=["monot5", "monot5_WT_train"],
    verbose=True,
    highlight="bold",
    round=4,
    filter_by_topics=True
    
    # perquery=True
)

pt.Experiment: 100%|██████████| 2/2 [00:00<00:00, 12.59system/s]


In [6]:
results

Unnamed: 0,name,ndcg,P_20,map,ndcg_cut_20,recip_rank,bpref
0,monot5,0.4219,0.2083,0.2839,0.3803,0.5074,0.4245
1,monot5_WT_train,0.2562,0.075,0.1002,0.1416,0.3333,0.4669


In [6]:
monot5 = pt.io.read_results("results/trec/IRCologne-monoT5-passages-rel.WT")
monot5_WT = pt.io.read_results("results/trec/IRCologne-monoT5_WT-passages-rel.WT")
monot5_WT_fast = pt.io.read_results("results/trec/IRCologne-monoT5_WT_10-3-passages-rel.WT")


results = pt.Experiment(
    [monot5, monot5_WT, monot5_WT_fast],
    topics[topics["qid"].isin(config["top_runs"])],
    test_qrels,
    eval_metrics=["ndcg", "P_20", "map", "ndcg_cut_20", "recip_rank", "bpref"],
    names=["monot5", "monot5_WT_train", "monot5_WT_fast"],
    verbose=True,
    highlight="bold",
    round=4,
    filter_by_topics=True
    
    # perquery=True
)

pt.Experiment: 100%|██████████| 3/3 [00:00<00:00, 12.62system/s]


In [7]:
results

Unnamed: 0,name,ndcg,P_20,map,ndcg_cut_20,recip_rank,bpref
0,monot5,0.4219,0.2083,0.2839,0.3803,0.5074,0.4245
1,monot5_WT_train,0.2562,0.075,0.1002,0.1416,0.3333,0.4669
2,monot5_WT_fast,0.1464,0.0,0.0088,0.0,0.0064,0.4246
