-
Notifications
You must be signed in to change notification settings - Fork 11
/
run_retrieve.sh
88 lines (80 loc) · 3.52 KB
/
run_retrieve.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
set -e
batch_size=64
echo "Parameters: Batch size is set to $batch_size"
echo "Begin Evaluation\n"
for mode in "dev" "test" ; do
echo "Evaluating on msmarco ${mode} set\n"
for dataset in "doc" "passage"; do
for m in 96 64 48 32 24 16; do
echo "Running inference for msmarco-${dataset} ${mode} dataset"
echo "Parameters: Number of subvectors for each ${dataset} is set to ${m}"
index_path="./data/${dataset}/download_jpq_index/OPQ${m},IVF1,PQ${m}x8.index"
query_encoder_path="./data/${dataset}/download_query_encoder/m${m}"
preprocess_dir="./data/${dataset}/preprocess"
output_path=./data/$dataset/run_retrieve/${mode}/run.${mode}.m${m}.rank
if [ $m -ge 56 ]
then
echo "Use cpu search"
python -m jpq.run_retrieval \
--preprocess_dir $preprocess_dir \
--index_path $index_path \
--mode $mode \
--query_encoder_dir $query_encoder_path \
--output_path $output_path \
--batch_size $batch_size
else
echo "Use gpu searchp"
python -m jpq.run_retrieval \
--preprocess_dir $preprocess_dir \
--index_path $index_path \
--mode $mode \
--query_encoder_dir $query_encoder_path \
--output_path $output_path \
--batch_size $batch_size \
--gpu_search
fi
# evaluation
label_path=./data/${dataset}/preprocess/$mode-qrel.tsv
if [ $mode = "dev" ]
then
if [ $dataset = "passage" ]
then
python ./msmarco_eval.py $label_path $output_path
else
python ./msmarco_eval.py $label_path $output_path 100
fi
else
./data/trec_eval-9.0.7/trec_eval -c -mrecall.100 -mndcg_cut.10 $label_path $output_path
fi
echo "Convert qids and pids to official ids"
official_id_rank_path=./data/$dataset/run_retrieve/${mode}/official.run.${mode}.m${m}.rank
python -m jpq.cvt_back \
--input_path $output_path \
--preprocess_dir $preprocess_dir \
--mode $mode \
--output_path $official_id_rank_path \
--dataset $dataset
echo "Use official qrels files to compute metrics"
if [ $mode = "dev" ]
then
# Evaluate MSMARCO Dev set
if [ $dataset = "passage" ]
then
python ./msmarco_eval.py ./data/passage/dataset/qrels.dev.small.tsv $official_id_rank_path
else
python ./msmarco_eval.py ./data/doc/dataset/msmarco-docdev-qrels.tsv $official_id_rank_path doc
fi
else
# Evaluate TREC Test
if [ $dataset = "passage" ]
then
./data/trec_eval-9.0.7/trec_eval -c -mndcg_cut.10 -mrecall.100 ./data/passage/dataset/2019qrels-pass.txt $official_id_rank_path
else
./data/trec_eval-9.0.7/trec_eval -c -mndcg_cut.10 -mrecall.100 ./data/doc/dataset/2019qrels-docs.txt $official_id_rank_path
fi
fi
echo "End experiment for m=$m"
echo "***************************\n"
done
done
done