/
relevancy_metrics.py
40 lines (35 loc) · 1.92 KB
/
relevancy_metrics.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import os
import subprocess
import time
def get_map_mrr(qids, predictions, labels, device=0, keep_results=False):
"""
Get the map and mrr using the trec_eval utility.
qids, predictions, labels should have the same length.
device is not a required parameter, it is only used to prevent potential naming conflicts when you
are calling this concurrently from different threads of execution.
:param qids: query ids of predictions and labels
:param predictions: iterable of predictions made by the models
:param labels: iterable of labels of the dataset
:param device: device (GPU index or -1 for CPU) for identification purposes only
"""
qrel_fname = 'trecqa_{}_{}.qrel'.format(time.time(), device)
results_fname = 'trecqa_{}_{}.results'.format(time.time(), device)
qrel_template = '{qid} 0 {docno} {rel}\n'
results_template = '{qid} 0 {docno} 0 {sim} castor-model\n'
with open(qrel_fname, 'w') as f1, open(results_fname, 'w') as f2:
docnos = range(len(qids))
for qid, docno, predicted, actual in zip(qids, docnos, predictions, labels):
f1.write(qrel_template.format(qid=qid, docno=docno, rel=actual))
f2.write(results_template.format(qid=qid, docno=docno, sim=predicted))
trec_eval_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'trec_eval-9.0.5/trec_eval')
trec_out = subprocess.check_output([trec_eval_path, '-m', 'map', '-m', 'recip_rank', qrel_fname, results_fname])
trec_out_lines = str(trec_out, 'utf-8').split('\n')
mean_average_precision = float(trec_out_lines[0].split('\t')[-1])
mean_reciprocal_rank = float(trec_out_lines[1].split('\t')[-1])
if keep_results:
print("Saving prediction file to {}".format(results_fname))
print("Saving qrel file to {}".format(qrel_fname))
else:
os.remove(results_fname)
os.remove(qrel_fname)
return mean_average_precision, mean_reciprocal_rank