Skip to content

Commit

Permalink
Add 2CR for Contriever on BEIR (#1446)
Browse files Browse the repository at this point in the history
  • Loading branch information
Cathrineee committed Mar 6, 2023
1 parent fa3181d commit dfae4bb
Show file tree
Hide file tree
Showing 9 changed files with 1,585 additions and 37 deletions.
1,018 changes: 989 additions & 29 deletions docs/2cr/beir.html

Large diffs are not rendered by default.

379 changes: 379 additions & 0 deletions pyserini/prebuilt_index_info.py

Large diffs are not rendered by default.

148 changes: 148 additions & 0 deletions pyserini/resources/beir.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -443,3 +443,151 @@ conditions:
- nDCG@10: 0.6992
R@100: 0.9270
R@1000: 0.9767
- name: contriever
command: python -m pyserini.search.faiss --encoder-class contriever --encoder facebook/contriever --index beir-v1.0.0-${dataset}.contriever --topics beir-v1.0.0-${dataset}-test --output $output --batch 128 --threads 16 --hits 1000 --remove-query
datasets:
- dataset: trec-covid
scores:
- nDCG@10: 0.2732
R@100: 0.0368
R@1000: 0.1675
- dataset: bioasq
scores:
- nDCG@10: 0.3016
R@100: 0.5412
R@1000: 0.7396
- dataset: nfcorpus
scores:
- nDCG@10: 0.3173
R@100: 0.2943
R@1000: 0.6232
- dataset: nq
scores:
- nDCG@10: 0.2536
R@100: 0.7712
R@1000: 0.9286
- dataset: hotpotqa
scores:
- nDCG@10: 0.4807
R@100: 0.7046
R@1000: 0.8294
- dataset: fiqa
scores:
- nDCG@10: 0.2449
R@100: 0.5619
R@1000: 0.8215
- dataset: signal1m
scores:
- nDCG@10: 0.2338
R@100: 0.2568
R@1000: 0.4757
- dataset: trec-news
scores:
- nDCG@10: 0.3484
R@100: 0.4234
R@1000: 0.7389
- dataset: robust04
scores:
- nDCG@10: 0.3155
R@100: 0.2757
R@1000: 0.5097
- dataset: arguana
scores:
- nDCG@10: 0.3791
R@100: 0.9011
R@1000: 0.9851
- dataset: webis-touche2020
scores:
- nDCG@10: 0.1668
R@100: 0.3736
R@1000: 0.7144
- dataset: cqadupstack-android
scores:
- nDCG@10: 0.3771
R@100: 0.7436
R@1000: 0.9173
- dataset: cqadupstack-english
scores:
- nDCG@10: 0.3571
R@100: 0.6442
R@1000: 0.8042
- dataset: cqadupstack-gaming
scores:
- nDCG@10: 0.4597
R@100: 0.8092
R@1000: 0.9354
- dataset: cqadupstack-gis
scores:
- nDCG@10: 0.2411
R@100: 0.5792
R@1000: 0.8018
- dataset: cqadupstack-mathematica
scores:
- nDCG@10: 0.1841
R@100: 0.5127
R@1000: 0.7757
- dataset: cqadupstack-physics
scores:
- nDCG@10: 0.3430
R@100: 0.7013
R@1000: 0.8980
- dataset: cqadupstack-programmers
scores:
- nDCG@10: 0.3029
R@100: 0.6402
R@1000: 0.8434
- dataset: cqadupstack-stats
scores:
- nDCG@10: 0.2483
R@100: 0.5269
R@1000: 0.7417
- dataset: cqadupstack-tex
scores:
- nDCG@10: 0.1540
R@100: 0.4333
R@1000: 0.6870
- dataset: cqadupstack-unix
scores:
- nDCG@10: 0.2636
R@100: 0.5879
R@1000: 0.8212
- dataset: cqadupstack-webmasters
scores:
- nDCG@10: 0.2878
R@100: 0.6485
R@1000: 0.8800
- dataset: cqadupstack-wordpress
scores:
- nDCG@10: 0.1914
R@100: 0.5364
R@1000: 0.7551
- dataset: quora
scores:
- nDCG@10: 0.8349
R@100: 0.9871
R@1000: 0.9981
- dataset: dbpedia-entity
scores:
- nDCG@10: 0.2916
R@100: 0.4529
R@1000: 0.7142
- dataset: scidocs
scores:
- nDCG@10: 0.1491
R@100: 0.3601
R@1000: 0.6105
- dataset: fever
scores:
- nDCG@10: 0.6821
R@100: 0.9356
R@1000: 0.9655
- dataset: climate-fever
scores:
- nDCG@10: 0.1550
R@100: 0.4422
R@1000: 0.7232
- dataset: scifact
scores:
- nDCG@10: 0.6493
R@100: 0.9260
R@1000: 0.9967
20 changes: 20 additions & 0 deletions scripts/beir/gather_beir_index_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,3 +74,23 @@
print(f' "unique_terms": {stats["unique_terms"]},')
print(f' "downloaded": False')
print(f' }},')

# Stats for "contriever" indexes
for key in beir_keys:
index_reader = IndexReader(f'indexes/faiss.beir-v1.0.0-{key}.contriever.{date}.{commitid}')
stats = index_reader.stats()
md5 = compute_md5(f'indexes/faiss.beir-v1.0.0-{key}.contriever.{date}.{commitid}.tar.gz')
size = os.path.getsize(f'indexes/faiss.beir-v1.0.0-{key}.contriever.{date}.{commitid}.tar.gz')
print(f' "beir-v1.0.0-{key}.contriever": {{')
print(f' "description": "Faiss index for BEIR v1.0.0 ({beir_keys[key]}) corpus encoded by Contriever encoder.",')
print(f' "filename": "faiss.beir-v1.0.0-{key}.contriever.{date}.{commitid}.tar.gz",')
print(f' "readme": "faiss.beir-v1.0.0-{key}.contriever.{date}.{commitid}.README.md",')
print(f' "urls": [')
print(f' "https://rgw.cs.uwaterloo.ca/JIMMYLIN-bucket0/pyserini-indexes/faiss.beir-v1.0.0-{key}.contriever.{date}.{commitid}.tar.gz"')
print(f' ],')
print(f' "md5": "{md5}",')
print(f' "size compressed (bytes)": {size},')
print(f' "documents": {stats["documents"]},')
print(f' "downloaded": False,')
print(f' "texts": "beir-v1.0.0-{key}.flat"')
print(f' }},')
13 changes: 13 additions & 0 deletions scripts/beir/run_beir_baselines.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,3 +92,16 @@
os.system(cmd)
cmd = f'python -m pyserini.eval.trec_eval -c -m ndcg_cut.10 -m recall.100,1000 beir-v1.0.0-{key}-test runs/run.beir-v1.0.0-{key}-splade_distil_cocodenser_medium.trec'
os.system(cmd)

# Runs on Contriever index
for key in beir_keys:
cmd = f'python -m pyserini.search.faiss \
--encoder-class contriever --encoder facebook/contriever \
--index beir-v1.0.0-{key}.contriever \
--topics beir-v1.0.0-{key}-test \
--output runs/run.beir.contriever.{key}.txt \
--batch 128 --threads 16 \
--remove-query --hits 1000'
os.system(cmd)
cmd = f'python -m pyserini.eval.trec_eval -c -m ndcg_cut.10 -m recall.100,1000 beir-v1.0.0-{key}-test runs/run.beir.contriever.{key}.txt'
os.system(cmd)
3 changes: 3 additions & 0 deletions scripts/repro_matrix/beir_html.template
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,7 @@ pre[class*="prettyprint"] {
<th class="headertop" colspan="3"><b>BM25 Flat</b></th>
<th class="headertop" colspan="3"><b>BM25 Multifield</b></th>
<th class="headertop" colspan="2"><b>SPLADE</b></th>
<th class="headertop" colspan="2"><b>Contriever</b></th>
</tr>
<tr>
<th class="headerbottom" scope="col"></th>
Expand All @@ -161,6 +162,8 @@ pre[class*="prettyprint"] {
<th class="headerbottom" scope="col"></th>
<th class="headerbottom" scope="col">nDCG@10</th>
<th class="headerbottom" scope="col">R@100</th>
<th class="headerbottom" scope="col">nDCG@10</th>
<th class="headerbottom" scope="col">R@100</th>
</tr>
</thead>
<tbody>
Expand Down
20 changes: 19 additions & 1 deletion scripts/repro_matrix/beir_html_row.template
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,12 @@
<td></td>
<td>$s5</td>
<td>$s6</td>
<td>$s7</td>
<td>$s8</td>
</tr>
<tr class="hide-table-padding">
<td></td>
<td colspan="9">
<td colspan="11">
<div id="collapse${row_cnt}" class="collapse in p-3">

<!-- Tabs navs -->
Expand All @@ -27,6 +29,9 @@
<li class="nav-item" role="presentation">
<a class="nav-link" id="row${row_cnt}-tab3-header" data-mdb-toggle="tab" href="#row${row_cnt}-tab3" role="tab" aria-controls="row${row_cnt}-tab3" aria-selected="false" style="text-transform:none">SPLADE</a>
</li>
<li class="nav-item" role="presentation">
<a class="nav-link" id="row${row_cnt}-tab4-header" data-mdb-toggle="tab" href="#row${row_cnt}-tab4" role="tab" aria-controls="row${row_cnt}-tab4" aria-selected="false" style="text-transform:none">Contriever</a>
</li>
</ul>
<!-- Tabs navs -->

Expand Down Expand Up @@ -70,6 +75,19 @@ Evaluation commands:
<pre><code>${eval_cmd3}</code></pre>
</blockquote>

</div>
<div class="tab-pane fade" id="row${row_cnt}-tab4" role="tabpanel" aria-labelledby="row${row_cnt}-tab4">
Command to generate run:

<blockquote class="mycode">
<pre><code>$cmd4
</code></pre></blockquote>
Evaluation commands:

<blockquote class="mycode">
<pre><code>${eval_cmd4}</code></pre>
</blockquote>

</div>
</div>
<!-- Tabs content -->
Expand Down
5 changes: 5 additions & 0 deletions scripts/repro_matrix/generate_html_beir.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
def format_run_command(raw):
return raw.replace('--topics', '\\\n --topics')\
.replace('--index', '\\\n --index')\
.replace('--encoder-class', '\\\n --encoder-class')\
.replace('--output ', '\\\n --output ')\
.replace('--output-format trec', '\\\n --output-format trec \\\n ') \
.replace('--hits ', '\\\n --hits ')
Expand Down Expand Up @@ -84,12 +85,16 @@ def read_file(f):
s4=f'{table[dataset]["multifield"]["R@100"]:8.4f}',
s5=f'{table[dataset]["splade-distil-cocodenser-medium"]["nDCG@10"]:8.4f}',
s6=f'{table[dataset]["splade-distil-cocodenser-medium"]["R@100"]:8.4f}',
s7=f'{table[dataset]["contriever"]["nDCG@10"]:8.4f}',
s8=f'{table[dataset]["contriever"]["R@100"]:8.4f}',
cmd1=commands[dataset]["flat"],
cmd2=commands[dataset]["multifield"],
cmd3=commands[dataset]["splade-distil-cocodenser-medium"],
cmd4=commands[dataset]["contriever"],
eval_cmd1=eval_commands[dataset]["flat"].rstrip(),
eval_cmd2=eval_commands[dataset]["multifield"].rstrip(),
eval_cmd3=eval_commands[dataset]["splade-distil-cocodenser-medium"].rstrip(),
eval_cmd4=eval_commands[dataset]["contriever"].rstrip(),
)

html_rows.append(s)
Expand Down
16 changes: 9 additions & 7 deletions scripts/repro_matrix/run_all_beir.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,18 +96,20 @@
final_score = (top_level_sums[model][metric] + cqa_score) / 18
final_scores[model][metric] = final_score

print(' ' * 30 + 'BM25-flat' + ' ' * 10 + 'BM25-mf' + ' ' * 11 + 'SPLADE')
print(' ' * 30 + 'BM25-flat' + ' ' * 10 + 'BM25-mf' + ' ' * 11 + 'SPLADE' + ' ' * 11 + 'Contriever')
print(' ' * 26 + 'nDCG@10 R@100 ' * 3)
print(' ' * 27 + '-' * 14 + ' ' + '-' * 14 + ' ' + '-' * 14)
print(' ' * 27 + '-' * 14 + ' ' + '-' * 14 + ' ' + '-' * 14 + ' ' + '-' * 14)
for dataset in beir_keys:
print(f'{dataset:25}' +
f'{table[dataset]["bm25-flat"]["nDCG@10"]:8.4f}{table[dataset]["bm25-flat"]["R@100"]:8.4f} ' +
f'{table[dataset]["bm25-multifield"]["nDCG@10"]:8.4f}{table[dataset]["bm25-multifield"]["R@100"]:8.4f} ' +
f'{table[dataset]["splade-distil-cocodenser-medium"]["nDCG@10"]:8.4f}{table[dataset]["splade-distil-cocodenser-medium"]["R@100"]:8.4f}')
print(' ' * 27 + '-' * 14 + ' ' + '-' * 14 + ' ' + '-' * 14)
print('avg' + ' ' * 22 + f'{final_scores["bm25-flat"]["nDCG@10"]:8.4f}{final_scores["bm25-flat"]["R@100"]:8.4f} ' +
f'{final_scores["bm25-multifield"]["nDCG@10"]:8.4f}{final_scores["bm25-multifield"]["R@100"]:8.4f} ' +
f'{final_scores["splade-distil-cocodenser-medium"]["nDCG@10"]:8.4f}{final_scores["splade-distil-cocodenser-medium"]["R@100"]:8.4f} ')
f'{table[dataset]["splade-distil-cocodenser-medium"]["nDCG@10"]:8.4f}{table[dataset]["splade-distil-cocodenser-medium"]["R@100"]:8.4f}' +
f'{table[dataset]["contriever"]["nDCG@10"]:8.4f}{table[dataset]["contriever"]["R@100"]:8.4f} ')
print(' ' * 27 + '-' * 14 + ' ' + '-' * 14 + ' ' + '-' * 14 + ' ' + '-' * 14)
print('avg' + ' ' * 22 + f'{final_scores["bm25-flat"]["nDCG@10"]:8.4f}{final_scores["bm25-flat"]["R@100"]:8.4f} ' +
f'{final_scores["bm25-multifield"]["nDCG@10"]:8.4f}{final_scores["bm25-multifield"]["R@100"]:8.4f} ' +
f'{final_scores["splade-distil-cocodenser-medium"]["nDCG@10"]:8.4f}{final_scores["splade-distil-cocodenser-medium"]["R@100"]:8.4f} ' +
f'{final_scores["contriever"]["nDCG@10"]:8.4f}{final_scores["contriever"]["R@100"]:8.4f} ')

end = time.time()

Expand Down

0 comments on commit dfae4bb

Please sign in to comment.