In [2]:
from tira.third_party_integrations import ensure_pyterrier_is_loaded, persist_and_normalize_run
ensure_pyterrier_is_loaded()

import pyterrier as pt
import os
import sys
import pandas as pd
from tqdm import tqdm
sys.path.append('../')
from deepct_query_reduction import DeepCTQueryReduction


index = pt.IndexRef.of(os.path.abspath('../../pyterrier-index/index/data.properties'))

In [3]:
pipeline = '/deepct-models/deepct-main-01/output/model.ckpt-22503;0.70;True'

for d in ['tot-test', 'tot-train', 'tot-dev']:
    !mkdir -p {d}

    queries = pt.io.read_topics(f'../../{d}/queries.xml', 'trecxml')
    deep_ct_query_reduction = DeepCTQueryReduction(f'../{d}-deepct-predictions/predictions.json', pipeline).as_transformer()

    bm25 = pt.BatchRetrieve(index, wmodel="BM25")

    retrieval_pipeline = deep_ct_query_reduction >> bm25

    run = retrieval_pipeline(queries)
    persist_and_normalize_run(run, 'webis-bm25r-1', f'{d}/run.txt')

### Final test to verify that the scores are identical to the hyperparameter tuning.

In [4]:
def report_effectiveness(dataset):
    queries = pt.io.read_topics(f'../../{dataset}/queries.xml', 'trecxml')
    run = pt.Transformer.from_df(pt.io.read_results(f'{dataset}/run.txt'))

    ret = pt.Experiment([run], queries, pt.io.read_qrels(f'../../{dataset}/qrels.txt'), eval_metrics=["ndcg_cut_10", 'recip_rank', 'P_10', 'recall_100', 'recall_1000', 'recall_5000'])
    
    return {i: ret.iloc[0][i] for i in ['ndcg_cut_10', 'recip_rank', 'P_10', 'recall_100', 'recall_1000', 'recall_5000']}

def report_pipeline():
    ret = {}
    for dataset in ['tot-train', 'tot-dev']:
        for k,v in report_effectiveness(dataset).items():
            ret[dataset.split('-')[1] + '_' + k] = v
    return pd.DataFrame([ret])

report_pipeline()

Unnamed: 0,train_ndcg_cut_10,train_recip_rank,train_P_10,train_recall_100,train_recall_1000,train_recall_5000,dev_ndcg_cut_10,dev_recip_rank,dev_P_10,dev_recall_100,dev_recall_1000,dev_recall_5000
0,0.098709,0.092952,0.013333,0.293333,0.513333,0.513333,0.095243,0.091492,0.012667,0.273333,0.493333,0.493333
