In [1]:
import jnius_config
jnius_config.set_classpath("target/anserini-0.2.1-SNAPSHOT-fatjar.jar")

from jnius import autoclass
JString = autoclass('java.lang.String')
JSearcher = autoclass('io.anserini.search.SimpleSearcher')

searcher = JSearcher(JString('/tuna1/indexes/lucene-index.robust04.pos+docvectors+rawdocs'))


In [2]:
import shlex
import subprocess
import sys
def get_qid2text():
    f = open("src/main/resources/topics-and-qrels/topics.robust04.301-450.601-700.txt")
    qid2text = {}
    seeTitle = False
    for l in f:
        qid_prefix = "<num> Number:"
        title_prefix = "<title>"
        if l.startswith(qid_prefix):
            qid = int(l[len(qid_prefix):])
        if seeTitle:
            seeTitle = False
            qid2text[qid] = l.strip()
        elif l.startswith(title_prefix):
            title = l[len(title_prefix):].strip()
            if len(title) == 0:
                seeTitle = True
            else:
                qid2text[qid] = title
    return qid2text

In [9]:
qid2text = get_qid2text()

## 5-folds test on Robust04

In [40]:
import json
from tqdm import tqdm

def cal_score_new(fn_qrels="src/main/resources/topics-and-qrels/qrels.robust2004.txt", prediction="score.txt"):
    cmd = "/bin/sh run_eval_new.sh {} {}".format(prediction, fn_qrels)
    pargs = shlex.split(cmd)
    p = subprocess.Popen(pargs, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    pout, perr = p.communicate()
    print("running {}".format(cmd))
    if sys.version_info[0] < 3:
        lines = pout.split('\n')
    else:
        lines = pout.split(b'\n')
    Map = float(lines[0].strip().split()[-1])
    Mrr = float(lines[1].strip().split()[-1])
    P30 = float(lines[2].strip().split()[-1])
    P20 = float(lines[3].strip().split()[-1])
    NDCG20 = float(lines[4].strip().split()[-1])
    print(Map)
    print(Mrr)
    print(P30)
    print(P20)
    print(NDCG20)
    return Map, Mrr, P30, P20, NDCG20

def search_new(searcher, prediction_fn, qid2text, K = 1000):
    f = open(prediction_fn, "w")
    for qid in qid2text:
        hits = searcher.search(JString(qid2text[qid]), K)
        for i in range(len(hits)):
            sim = hits[i].score
            docno = hits[i].docid
            f.write("{} 0 {} 0 {} {}\n".format(qid, docno, sim, method))
    f.close()
    
def get_qid2text_new(data):
    qid2text = {}
    for d in data['questions']:
        qid2text[d["id"]] = d["body"]
    return qid2text

In [42]:
# BM25 + RM3
best_rm3_parameters = [[45, 8, 0.2], [45, 6, 0.3], [50, 10, 0.2], [50, 12, 0.2], [45, 8, 0.2]]

for split in range(1, 6):
    ftrain = json.load(open("src/main/resources/fine_tuning/drr_folds/rob04.train.s{}.json".format(split)))
    fdev = json.load(open("src/main/resources/fine_tuning/drr_folds/rob04.dev.s{}.json".format(split)))
    ftest = json.load(open("src/main/resources/fine_tuning/drr_folds/rob04.test.s{}.json".format(split)))
    for mode, data in [("train", ftrain), ("dev", fdev), ("test", ftest)]: #  
        qid2text = get_qid2text_new(data)
        method = "BM25_0.9_0.5_RM3_{}_{}_{}".format(*best_rm3_parameters[split-1])
        prediction_fn = "predictions_tuned/predict_{}_robust04_split{}_{}.txt".format(method, split, mode)
        searcher.setBM25Similarity(0.9, 0.5)
        searcher.setRM3ReRanker(*best_rm3_parameters[split-1])
        search_new(searcher, prediction_fn, qid2text)
        cal_score_new(prediction=prediction_fn)
        print()

running /bin/sh run_eval_new.sh predictions_tuned/predict_BM25_0.9_0.5_RM3_45_8_0.2_robust04_split1_train.txt src/main/resources/topics-and-qrels/qrels.robust2004.txt
0.3053
0.6668
0.4037
0.3567
0.4552

running /bin/sh run_eval_new.sh predictions_tuned/predict_BM25_0.9_0.5_RM3_45_8_0.2_robust04_split1_dev.txt src/main/resources/topics-and-qrels/qrels.robust2004.txt
0.2941
0.724
0.3816
0.349
0.4414

running /bin/sh run_eval_new.sh predictions_tuned/predict_BM25_0.9_0.5_RM3_45_8_0.2_robust04_split1_test.txt src/main/resources/topics-and-qrels/qrels.robust2004.txt
0.2973
0.6487
0.388
0.344
0.4468

running /bin/sh run_eval_new.sh predictions_tuned/predict_BM25_0.9_0.5_RM3_45_6_0.3_robust04_split2_train.txt src/main/resources/topics-and-qrels/qrels.robust2004.txt
0.3263
0.6837
0.4187
0.3662
0.4696

running /bin/sh run_eval_new.sh predictions_tuned/predict_BM25_0.9_0.5_RM3_45_6_0.3_robust04_split2_dev.txt src/main/resources/topics-and-qrels/qrels.robust2004.txt
0.2435
0.5995
0.354
0.318
0.41

In [45]:
# BM25

for split in range(1, 6):
    ftrain = json.load(open("src/main/resources/fine_tuning/drr_folds/rob04.train.s{}.json".format(split)))
    fdev = json.load(open("src/main/resources/fine_tuning/drr_folds/rob04.dev.s{}.json".format(split)))
    ftest = json.load(open("src/main/resources/fine_tuning/drr_folds/rob04.test.s{}.json".format(split)))
    for mode, data in [("train", ftrain), ("dev", fdev), ("test", ftest)]: #  
        qid2text = get_qid2text_new(data)
        method = "BM25_0.9_0.5"
        prediction_fn = "predictions_tuned/predict_{}_robust04_split{}_{}.txt".format(method, split, mode)
        searcher.setBM25Similarity(0.9, 0.5)
        searcher.setNormalReRanker()
        search_new(searcher, prediction_fn, qid2text)
        cal_score_new(prediction=prediction_fn)
        print()

running /bin/sh run_eval_new.sh predictions_tuned/predict_BM25_0.9_0.5_robust04_split1_train.txt src/main/resources/topics-and-qrels/qrels.robust2004.txt
0.2494
0.7004
0.365
0.3109
0.4265

running /bin/sh run_eval_new.sh predictions_tuned/predict_BM25_0.9_0.5_robust04_split1_dev.txt src/main/resources/topics-and-qrels/qrels.robust2004.txt
0.2307
0.6391
0.3378
0.2939
0.3926

running /bin/sh run_eval_new.sh predictions_tuned/predict_BM25_0.9_0.5_robust04_split1_test.txt src/main/resources/topics-and-qrels/qrels.robust2004.txt
0.2603
0.6423
0.356
0.3173
0.4252

running /bin/sh run_eval_new.sh predictions_tuned/predict_BM25_0.9_0.5_robust04_split2_train.txt src/main/resources/topics-and-qrels/qrels.robust2004.txt
0.2645
0.6914
0.3723
0.3193
0.4314

running /bin/sh run_eval_new.sh predictions_tuned/predict_BM25_0.9_0.5_robust04_split2_dev.txt src/main/resources/topics-and-qrels/qrels.robust2004.txt
0.2151
0.6694
0.334
0.292
0.4104

running /bin/sh run_eval_new.sh predictions_tuned/predict_B

In [44]:
# QL + RM3
best_rm3_parameters = [[45, 8, 0.2], [45, 6, 0.3], [50, 10, 0.2], [50, 12, 0.2], [45, 8, 0.2]]

for split in range(1, 6):
    ftrain = json.load(open("src/main/resources/fine_tuning/drr_folds/rob04.train.s{}.json".format(split)))
    fdev = json.load(open("src/main/resources/fine_tuning/drr_folds/rob04.dev.s{}.json".format(split)))
    ftest = json.load(open("src/main/resources/fine_tuning/drr_folds/rob04.test.s{}.json".format(split)))
    for mode, data in [("train", ftrain), ("dev", fdev), ("test", ftest)]: # 
        qid2text = get_qid2text_new(data)
        method = "QL_1000_RM3_{}_{}_{}".format(*best_rm3_parameters[split-1])
        prediction_fn = "predictions_tuned/predict_{}_robust04_split{}_{}.txt".format(method, split, mode)
        searcher.setLMDirichletSimilarity(1000)
        searcher.setRM3ReRanker(*best_rm3_parameters[split-1])
        search_new(searcher, prediction_fn, qid2text)
        cal_score_new(prediction=prediction_fn)
        print()

running /bin/sh run_eval_new.sh predictions_tuned/predict_QL_1000_RM3_45_8_0.2_robust04_split1_train.txt src/main/resources/topics-and-qrels/qrels.robust2004.txt
0.2854
0.6187
0.379
0.3333
0.4282

running /bin/sh run_eval_new.sh predictions_tuned/predict_QL_1000_RM3_45_8_0.2_robust04_split1_dev.txt src/main/resources/topics-and-qrels/qrels.robust2004.txt
0.2491
0.6548
0.3378
0.2973
0.3869

running /bin/sh run_eval_new.sh predictions_tuned/predict_QL_1000_RM3_45_8_0.2_robust04_split1_test.txt src/main/resources/topics-and-qrels/qrels.robust2004.txt
0.2893
0.6318
0.37
0.3247
0.4291

running /bin/sh run_eval_new.sh predictions_tuned/predict_QL_1000_RM3_45_6_0.3_robust04_split2_train.txt src/main/resources/topics-and-qrels/qrels.robust2004.txt
0.305
0.6497
0.3973
0.3491
0.4445

running /bin/sh run_eval_new.sh predictions_tuned/predict_QL_1000_RM3_45_6_0.3_robust04_split2_dev.txt src/main/resources/topics-and-qrels/qrels.robust2004.txt
0.2157
0.5483
0.316
0.2753
0.3797

running /bin/sh run_

In [46]:
# QL
for split in range(1, 6):
    ftrain = json.load(open("src/main/resources/fine_tuning/drr_folds/rob04.train.s{}.json".format(split)))
    fdev = json.load(open("src/main/resources/fine_tuning/drr_folds/rob04.dev.s{}.json".format(split)))
    ftest = json.load(open("src/main/resources/fine_tuning/drr_folds/rob04.test.s{}.json".format(split)))
    for mode, data in [("train", ftrain), ("dev", fdev), ("test", ftest)]: # 
        qid2text = get_qid2text_new(data)
        method = "QL_1000".format()
        prediction_fn = "predictions_tuned/predict_{}_robust04_split{}_{}.txt".format(method, split, mode)
        searcher.setLMDirichletSimilarity(1000.0)
        searcher.setNormalReRanker()
        search_new(searcher, prediction_fn, qid2text)
        cal_score_new(prediction=prediction_fn)
        print()

running /bin/sh run_eval_new.sh predictions_tuned/predict_QL_1000_robust04_split1_train.txt src/main/resources/topics-and-qrels/qrels.robust2004.txt
0.2491
0.6872
0.361
0.3151
0.4174

running /bin/sh run_eval_new.sh predictions_tuned/predict_QL_1000_robust04_split1_dev.txt src/main/resources/topics-and-qrels/qrels.robust2004.txt
0.2179
0.6462
0.3143
0.2748
0.3672

running /bin/sh run_eval_new.sh predictions_tuned/predict_QL_1000_robust04_split1_test.txt src/main/resources/topics-and-qrels/qrels.robust2004.txt
0.2685
0.6514
0.364
0.3193
0.4298

running /bin/sh run_eval_new.sh predictions_tuned/predict_QL_1000_robust04_split2_train.txt src/main/resources/topics-and-qrels/qrels.robust2004.txt
0.2706
0.7029
0.3763
0.3276
0.4327

running /bin/sh run_eval_new.sh predictions_tuned/predict_QL_1000_robust04_split2_dev.txt src/main/resources/topics-and-qrels/qrels.robust2004.txt
0.2041
0.6044
0.318
0.282
0.384

running /bin/sh run_eval_new.sh predictions_tuned/predict_QL_1000_robust04_split2_tes