In [2]:
import shlex
import subprocess
import sys
import json
from tqdm import tqdm
import string

import jnius_config

# target_old: lucene6 for thesis
# target_old: lucene7 
jnius_config.set_classpath("target/anserini-0.3.1-SNAPSHOT-fatjar.jar")

from jnius import autoclass
JString = autoclass('java.lang.String')
JSearcher = autoclass('io.anserini.search.SimpleSearcher')

searcher = JSearcher(JString('./lucene-index.robust04.pos+docvectors+rawdocs'))

## 5-folds test on Robust04

In [3]:
def cal_score_new(fn_qrels="src/main/resources/topics-and-qrels/qrels.robust2004.txt", prediction="score.txt"):
    cmd = "/bin/sh run_eval_new.sh {} {}".format(prediction, fn_qrels)
    pargs = shlex.split(cmd)
    p = subprocess.Popen(pargs, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    pout, perr = p.communicate()
    print("running {}".format(cmd))
    if sys.version_info[0] < 3:
        lines = pout.split('\n')
    else:
        lines = pout.split(b'\n')
    Map = float(lines[0].strip().split()[-1])
    Mrr = float(lines[1].strip().split()[-1])
    P20 = float(lines[2].strip().split()[-1])
    P30 = float(lines[3].strip().split()[-1])
    NDCG20 = float(lines[4].strip().split()[-1])
    print(Map)
    print(Mrr)
    print(P30)
    print(P20)
    print(NDCG20)
    return Map, Mrr, P30, P20, NDCG20


def get_qid2reldocids(fqrel):
    f = open(fqrel)
    qid2reldocids = {}
    for l in f:
        qid, _, docid, score = l.replace("\n", "").strip().split()
        if int(score) > 0:
            if qid not in qid2reldocids:
                qid2reldocids[qid] = set()
            qid2reldocids[qid].add(docid)
    return qid2reldocids

def search_new(searcher, prediction_fn, qid2text, output_fn, qid2reldocids, K=1000):
    f = open(prediction_fn, "w")
    out = open(output_fn, "w")
    for qid in qid2text:
        a = qid2text[qid]
        hits = searcher.search(JString(a), K)
        for i in range(len(hits)):
            sim = hits[i].score
            docno = hits[i].docid
            label = 1 if qid in qid2reldocids and docno in qid2reldocids[qid] else 0
            b = parse_doc_from_index(hits[i].content)
            f.write("{} 0 {} 0 {} {}\n".format(qid, docno, sim, method))
            out.write("{}\t{}\t{}\t{}\t{}\n".format(label, a, b, qid, docno))
            out.flush()
    f.close()
    out.close()
    
def get_qid2text_new(data):
    qid2text = {}
    for d in data['questions']:
        qid2text[d["id"]] = d["body"]
    return qid2text


def parse_doc_from_index(content):
    ls = content.split("\n")
    see_text = False
    doc = ""
    for l in ls:
        l = l.replace("\n", "").strip()
        if "<TEXT>" in l:
            see_text = True
        elif "</TEXT>" in l:
            break
        elif see_text:
            if l == "<P>" or l == "</P>":
                continue
            doc += l + " "
    return doc.strip()


In [4]:
fqrel = "src/main/resources/topics-and-qrels/qrels.robust2004.txt"
qid2reldocids = get_qid2reldocids(fqrel)

In [6]:
# BM25 + RM3
# best_rm3_parameters = [[45, 8, 0.2], [45, 6, 0.3], [50, 10, 0.2], [50, 12, 0.2], [45, 8, 0.2]]
best_rm3_parameters = [[47, 9, 0.3], [47, 9, 0.3], [47, 9, 0.3], [47, 9, 0.3], [26, 8, 0.3]]
for split in range(1, 6):
    ftrain = json.load(open("src/main/resources/fine_tuning/drr_folds/rob04.train.s{}.json".format(split)))
    fdev = json.load(open("src/main/resources/fine_tuning/drr_folds/rob04.dev.s{}.json".format(split)))
    ftest = json.load(open("src/main/resources/fine_tuning/drr_folds/rob04.test.s{}.json".format(split)))
    for mode, data in [("train", ftrain), ("dev", fdev), ("test", ftest)]: #  
        qid2text = get_qid2text_new(data)
        method = "BM25_0.9_0.5_RM3_{}_{}_{}".format(*best_rm3_parameters[split-1])
        prediction_fn = "predictions_tuned/predict_{}_robust04_split{}_{}.txt".format(method, split, mode)
        output_fn = "src/main/python/rerank/MatchZoo/data/robust04/Robust04Corpus/split{}_{}_{}.txt".format(split, mode, method)
        searcher.setBM25Similarity(0.9, 0.5)
        searcher.setRM3Reranker(*best_rm3_parameters[split-1])
        search_new(searcher, prediction_fn, qid2text, output_fn, qid2reldocids)
        cal_score_new(prediction=prediction_fn)
        print()

KeyboardInterrupt: 

In [None]:
# BM25
for split in range(1, 6):
    ftrain = json.load(open("src/main/resources/fine_tuning/drr_folds/rob04.train.s{}.json".format(split)))
    fdev = json.load(open("src/main/resources/fine_tuning/drr_folds/rob04.dev.s{}.json".format(split)))
    ftest = json.load(open("src/main/resources/fine_tuning/drr_folds/rob04.test.s{}.json".format(split)))
    for mode, data in [("train", ftrain), ("dev", fdev), ("test", ftest)]: #  
        qid2text = get_qid2text_new(data)
        method = "BM25_0.9_0.5"
        prediction_fn = "predictions_tuned/predict_{}_robust04_split{}_{}.txt".format(method, split, mode)
        output_fn = "src/main/python/rerank/MatchZoo/data/robust04/Robust04Corpus/split{}_{}_{}.txt".format(split, mode, method)
        searcher.setBM25Similarity(0.9, 0.5)
        searcher.setDefaultReranker()
        search_new(searcher, prediction_fn, qid2text, output_fn, qid2reldocids)
        cal_score_new(prediction=prediction_fn)
        print()

In [None]:
# QL + RM3
best_rm3_parameters = [[45, 8, 0.2], [45, 6, 0.3], [50, 10, 0.2], [50, 12, 0.2], [45, 8, 0.2]]

for split in range(1, 6):
    ftrain = json.load(open("src/main/resources/fine_tuning/drr_folds/rob04.train.s{}.json".format(split)))
    fdev = json.load(open("src/main/resources/fine_tuning/drr_folds/rob04.dev.s{}.json".format(split)))
    ftest = json.load(open("src/main/resources/fine_tuning/drr_folds/rob04.test.s{}.json".format(split)))
    for mode, data in [("train", ftrain), ("dev", fdev), ("test", ftest)]: # 
        qid2text = get_qid2text_new(data)
        method = "QL_1000_RM3_{}_{}_{}".format(*best_rm3_parameters[split-1])
        prediction_fn = "predictions_tuned/predict_{}_robust04_split{}_{}.txt".format(method, split, mode)
        output_fn = "src/main/python/rerank/MatchZoo/data/robust04/Robust04Corpus/split{}_{}_{}.txt".format(split, mode, method)
        searcher.setLMDirichletSimilarity(1000)
        searcher.setRM3Reranker(*best_rm3_parameters[split-1])
        search_new(searcher, prediction_fn, qid2text, output_fn, qid2reldocids)
        cal_score_new(prediction=prediction_fn)
        print()

In [9]:
# QL
for split in range(1, 6):
    ftrain = json.load(open("src/main/resources/fine_tuning/drr_folds/rob04.train.s{}.json".format(split)))
    fdev = json.load(open("src/main/resources/fine_tuning/drr_folds/rob04.dev.s{}.json".format(split)))
    ftest = json.load(open("src/main/resources/fine_tuning/drr_folds/rob04.test.s{}.json".format(split)))
    for mode, data in [("train", ftrain), ("dev", fdev), ("test", ftest)]: # 
        qid2text = get_qid2text_new(data)
        method = "QL_1000".format()
        prediction_fn = "predictions_tuned/predict_{}_robust04_split{}_{}.txt".format(method, split, mode)
        output_fn = "src/main/python/rerank/MatchZoo/data/robust04/Robust04Corpus/split{}_{}.txt".format(split, mode)
        searcher.setLMDirichletSimilarity(1000.0)
        searcher.setDefaultReranker()
        search_new(searcher, prediction_fn, qid2text, output_fn, qid2reldocids)
        cal_score_new(prediction=prediction_fn)
        print()

running /bin/sh run_eval_new.sh predictions_tuned/predict_QL_1000_robust04_split1_train.txt src/main/resources/topics-and-qrels/qrels.robust2004.txt
0.2486
0.6999
0.3147
0.3587
0.4182

running /bin/sh run_eval_new.sh predictions_tuned/predict_QL_1000_robust04_split1_dev.txt src/main/resources/topics-and-qrels/qrels.robust2004.txt
0.2175
0.6579
0.2769
0.3122
0.3677

running /bin/sh run_eval_new.sh predictions_tuned/predict_QL_1000_robust04_split1_test.txt src/main/resources/topics-and-qrels/qrels.robust2004.txt
0.2696
0.6607
0.318
0.365
0.4334

running /bin/sh run_eval_new.sh predictions_tuned/predict_QL_1000_robust04_split2_train.txt src/main/resources/topics-and-qrels/qrels.robust2004.txt
0.2712
0.7149
0.3276
0.376
0.4347

running /bin/sh run_eval_new.sh predictions_tuned/predict_QL_1000_robust04_split2_dev.txt src/main/resources/topics-and-qrels/qrels.robust2004.txt
0.2019
0.6158
0.2793
0.313
0.384

running /bin/sh run_eval_new.sh predictions_tuned/predict_QL_1000_robust04_split2_tes

## 2-folds test on Robust04

In [27]:
def get_qid2text(fn, qid2text, split):
    qid_fold0, qid_fold1 = json.load(open(fn))
    if split == 0:
        train_num = int(len(qid_fold0)*0.8)
        qids_train = qid_fold0[:train_num]
        qids_dev = qid_fold0[train_num:]
        qids_test = qid_fold1
    else:
        train_num = int(len(qid_fold1)*0.8)
        qids_train = qid_fold1[:train_num]
        qids_dev = qid_fold1[train_num:]
        qids_test = qid_fold0
    qid2text_train = {qid: qid2text[qid] for qid in qids_train}
    qid2text_dev = {qid: qid2text[qid] for qid in qids_dev}
    qid2text_test = {qid: qid2text[qid] for qid in qids_test}
    return qid2text_train, qid2text_dev, qid2text_test

def get_qid2query(ftopic):
    qid2query = {}
    f = open(ftopic)
    query_tag = "title"
    empty = False
    for l in f:
        if empty == True:
            qid2query[qid] = l.replace("\n", "").strip()
            empty = False
        ind = l.find("Number: ")
        if ind >= 0:
            qid = l[ind+8:-1]
            qid = str(int(qid))
        ind = l.find("<{}>".format(query_tag))
        if ind >= 0:
            query = l[ind+8:-1].strip()
            if len(query) == 0:
                empty = True
            else:
                qid2query[qid] = query
    return qid2query

# fqrel = "src/main/resources/topics-and-qrels/qrels.robust2004.txt"
# qid2reldocids = get_qid2reldocids(fqrel)

In [31]:
# BM25
ftopic = "src/main/resources/topics-and-qrels/topics.robust04.301-450.601-700.txt"
qid2text_all = get_qid2query(ftopic)
for split in range(2):
    qids_train, qids_dev, qids_test = get_qid2text("src/main/resources/fine_tuning/robust04-paper1-folds.json",\
                                                   qid2text_all, split)
    for mode, qid2text in [("train", qids_train), ("dev", qids_dev), ("test", qids_test)]: #  
        method = "BM25_0.9_0.5"
        prediction_fn = "predictions_tuned/predict_{}_robust04_split{}_{}.txt".format(method, split, mode)
        output_fn = "src/main/python/rerank/MatchZoo/data/robust04/Robust04Corpus/split{}_2fold_{}_{}.txt".format(split, mode, method)
        searcher.setBM25Similarity(0.9, 0.5)
        searcher.setDefaultReranker()
        search_new(searcher, prediction_fn, qid2text, output_fn, qid2reldocids)
        cal_score_new(prediction=prediction_fn)
        print()

running /bin/sh run_eval_new.sh predictions_tuned/predict_BM25_0.9_0.5_robust04_split0_train.txt src/main/resources/topics-and-qrels/qrels.robust2004.txt
0.2557
0.6747
0.2967
0.3355
0.4175

running /bin/sh run_eval_new.sh predictions_tuned/predict_BM25_0.9_0.5_robust04_split0_dev.txt src/main/resources/topics-and-qrels/qrels.robust2004.txt
0.2548
0.7325
0.2958
0.3583
0.4031

running /bin/sh run_eval_new.sh predictions_tuned/predict_BM25_0.9_0.5_robust04_split0_test.txt src/main/resources/topics-and-qrels/qrels.robust2004.txt
0.2477
0.6827
0.3259
0.3824
0.4308

running /bin/sh run_eval_new.sh predictions_tuned/predict_BM25_0.9_0.5_robust04_split1_train.txt src/main/resources/topics-and-qrels/qrels.robust2004.txt
0.2299
0.6651
0.3267
0.385
0.4281

running /bin/sh run_eval_new.sh predictions_tuned/predict_BM25_0.9_0.5_robust04_split1_dev.txt src/main/resources/topics-and-qrels/qrels.robust2004.txt
0.3187
0.7532
0.3227
0.372
0.4417

running /bin/sh run_eval_new.sh predictions_tuned/predict

In [38]:
# BM25+RM3
best_rm3_parameters = [[26, 8, 0.3], [50, 17, 0.2]]
ftopic = "src/main/resources/topics-and-qrels/topics.robust04.301-450.601-700.txt"
qid2text_all = get_qid2query(ftopic)
for split in range(2):
    qids_train, qids_dev, qids_test = get_qid2text("src/main/resources/fine_tuning/robust04-paper1-folds.json",\
                                                   qid2text_all, split)
    for mode, qid2text in [("train", qids_train), ("dev", qids_dev), ("test", qids_test)]: #  
        method = "BM25_0.9_0.5_RM3_{}_{}_{}".format(*best_rm3_parameters[split])
        prediction_fn = "predictions_tuned/predict_{}_robust04_split{}_{}.txt".format(method, split, mode)
        output_fn = "src/main/python/rerank/MatchZoo/data/robust04/Robust04Corpus/split{}_2fold_{}_{}.txt".format(split, mode, method)
        searcher.setBM25Similarity(0.9, 0.5)
        searcher.setRM3Reranker(*best_rm3_parameters[split])
        search_new(searcher, prediction_fn, qid2text, output_fn, qid2reldocids)
        cal_score_new(prediction=prediction_fn)
        print()

running /bin/sh run_eval_new.sh predictions_tuned/predict_BM25_0.9_0.5_RM3_26_8_0.3_robust04_split0_train.txt src/main/resources/topics-and-qrels/qrels.robust2004.txt
0.3005
0.6804
0.34
0.3825
0.4556

running /bin/sh run_eval_new.sh predictions_tuned/predict_BM25_0.9_0.5_RM3_26_8_0.3_robust04_split0_dev.txt src/main/resources/topics-and-qrels/qrels.robust2004.txt
0.3596
0.7213
0.3819
0.4396
0.4819

running /bin/sh run_eval_new.sh predictions_tuned/predict_BM25_0.9_0.5_RM3_26_8_0.3_robust04_split0_test.txt src/main/resources/topics-and-qrels/qrels.robust2004.txt
0.2953
0.6525
0.3523
0.4008
0.4434

running /bin/sh run_eval_new.sh predictions_tuned/predict_BM25_0.9_0.5_RM3_50_17_0.2_robust04_split1_train.txt src/main/resources/topics-and-qrels/qrels.robust2004.txt
0.2875
0.6463
0.3663
0.415
0.4532

running /bin/sh run_eval_new.sh predictions_tuned/predict_BM25_0.9_0.5_RM3_50_17_0.2_robust04_split1_dev.txt src/main/resources/topics-and-qrels/qrels.robust2004.txt
0.3595
0.7182
0.3467
0.402


In [35]:
qids_test

{'302': 'Poliomyelitis and Post-Polio',
 '304': 'Endangered Species (Mammals)',
 '306': 'African Civilian Deaths',
 '308': 'Implant Dentistry',
 '310': 'Radio Waves and Brain Cancer',
 '312': 'Hydroponics',
 '314': 'Marine Vegetation',
 '316': 'Polygamy Polyandry Polygyny',
 '318': 'Best Retirement Country',
 '320': 'Undersea Fiber Optic Cable',
 '322': 'International Art Crime',
 '324': 'Argentine/British Relations',
 '326': 'Ferry Sinkings',
 '328': 'Pope Beatifications',
 '330': 'Iran-Iraq Cooperation',
 '332': 'Income Tax Evasion',
 '334': 'Export Controls Cryptography',
 '336': 'Black Bear Attacks',
 '338': 'Risk of Aspirin',
 '340': 'Land Mine Ban',
 '342': 'Diplomatic Expulsion',
 '344': 'Abuses of E-Mail',
 '346': 'Educational Standards',
 '348': 'Agoraphobia',
 '350': 'Health and Computer Terminals',
 '352': 'British Chunnel impact',
 '354': 'journalist risks',
 '356': 'postmenopausal estrogen Britain',
 '358': 'blood-alcohol fatalities',
 '360': 'drug legalization benefits',
