Skip to content

Commit

Permalink
Repackage prebuilt indexes for MS MARCO V2 doc (#1873)
Browse files Browse the repository at this point in the history
  • Loading branch information
lintool authored Apr 30, 2024
1 parent 6ed6c45 commit d4d9368
Show file tree
Hide file tree
Showing 26 changed files with 270 additions and 409 deletions.
80 changes: 40 additions & 40 deletions docs/2cr/msmarco-v2-doc.html

Large diffs are not rendered by default.

180 changes: 146 additions & 34 deletions docs/prebuilt-indexes.md

Large diffs are not rendered by default.

16 changes: 8 additions & 8 deletions pyserini/2cr/msmarco-v2-doc.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ conditions:
display: BM25 w/ doc2query-T5 doc (k1=0.9, b=0.4)
display-html: BM25 w/ doc2query-T5 doc (<i>k<sub><small>1</small></sub></i>=0.9, <i>b</i>=0.4)
display-row: (2a)
command: python -m pyserini.search.lucene --threads ${sparse_threads} --batch-size ${sparse_batch_size} --index msmarco-v2-doc-d2q-t5 --topics $topics --output $output --bm25
command: python -m pyserini.search.lucene --threads ${sparse_threads} --batch-size ${sparse_batch_size} --index msmarco-v2-doc.d2q-t5 --topics $topics --output $output --bm25
topics:
- topic_key: msmarco-v2-doc-dev
eval_key: msmarco-v2-doc-dev
Expand Down Expand Up @@ -173,7 +173,7 @@ conditions:
display: BM25 w/ doc2query-T5 doc segmented (k1=0.9, b=0.4)
display-html: BM25 w/ doc2query-T5 doc segmented (<i>k<sub><small>1</small></sub></i>=0.9, <i>b</i>=0.4)
display-row: (2b)
command: python -m pyserini.search.lucene --threads ${sparse_threads} --batch-size ${sparse_batch_size} --index msmarco-v2-doc-segmented-d2q-t5 --topics $topics --output $output --bm25 --hits 10000 --max-passage-hits 1000 --max-passage
command: python -m pyserini.search.lucene --threads ${sparse_threads} --batch-size ${sparse_batch_size} --index msmarco-v2-doc-segmented.d2q-t5 --topics $topics --output $output --bm25 --hits 10000 --max-passage-hits 1000 --max-passage
topics:
- topic_key: msmarco-v2-doc-dev
eval_key: msmarco-v2-doc-dev
Expand Down Expand Up @@ -207,7 +207,7 @@ conditions:
display: BM25+RM3 w/ doc2query-T5 doc (k1=0.9, b=0.4)
display-html: BM25+RM3 w/ doc2query-T5 doc (<i>k<sub><small>1</small></sub></i>=0.9, <i>b</i>=0.4)
display-row: (2c)
command: python -m pyserini.search.lucene --threads ${sparse_threads} --batch-size ${sparse_batch_size} --index msmarco-v2-doc-d2q-t5-docvectors --topics $topics --output $output --bm25 --rm3
command: python -m pyserini.search.lucene --threads ${sparse_threads} --batch-size ${sparse_batch_size} --index msmarco-v2-doc.d2q-t5-docvectors --topics $topics --output $output --bm25 --rm3
topics:
- topic_key: msmarco-v2-doc-dev
eval_key: msmarco-v2-doc-dev
Expand Down Expand Up @@ -241,7 +241,7 @@ conditions:
display: BM25+RM3 w/ doc2query-T5 doc segmented (k1=0.9, b=0.4)
display-html: BM25+RM3 w/ doc2query-T5 doc segmented (<i>k<sub><small>1</small></sub></i>=0.9, <i>b</i>=0.4)
display-row: (2d)
command: python -m pyserini.search.lucene --threads ${sparse_threads} --batch-size ${sparse_batch_size} --index msmarco-v2-doc-segmented-d2q-t5-docvectors --topics $topics --output $output --bm25 --rm3 --hits 10000 --max-passage-hits 1000 --max-passage
command: python -m pyserini.search.lucene --threads ${sparse_threads} --batch-size ${sparse_batch_size} --index msmarco-v2-doc-segmented.d2q-t5-docvectors --topics $topics --output $output --bm25 --rm3 --hits 10000 --max-passage-hits 1000 --max-passage
topics:
- topic_key: msmarco-v2-doc-dev
eval_key: msmarco-v2-doc-dev
Expand Down Expand Up @@ -275,7 +275,7 @@ conditions:
display: "uniCOIL (noexp): pre-encoded"
display-html: "uniCOIL (noexp): pre-encoded queries"
display-row: (3a)
command: python -m pyserini.search.lucene --threads ${sparse_threads} --batch-size ${sparse_batch_size} --index msmarco-v2-doc-segmented-unicoil-noexp-0shot --topics $topics --output $output --impact --hits 10000 --max-passage-hits 1000 --max-passage
command: python -m pyserini.search.lucene --threads ${sparse_threads} --batch-size ${sparse_batch_size} --index msmarco-v2-doc-segmented.unicoil-noexp-0shot --topics $topics --output $output --impact --hits 10000 --max-passage-hits 1000 --max-passage
topics:
- topic_key: msmarco-v2-doc-dev-unicoil-noexp
eval_key: msmarco-v2-doc-dev
Expand Down Expand Up @@ -309,7 +309,7 @@ conditions:
display: "uniCOIL (w/ doc2query-T5): pre-encoded"
display-html: "uniCOIL (w/ doc2query-T5): pre-encoded queries"
display-row: (3b)
command: python -m pyserini.search.lucene --threads ${sparse_threads} --batch-size ${sparse_batch_size} --index msmarco-v2-doc-segmented-unicoil-0shot --topics $topics --output $output --impact --hits 10000 --max-passage-hits 1000 --max-passage
command: python -m pyserini.search.lucene --threads ${sparse_threads} --batch-size ${sparse_batch_size} --index msmarco-v2-doc-segmented.unicoil-0shot --topics $topics --output $output --impact --hits 10000 --max-passage-hits 1000 --max-passage
topics:
- topic_key: msmarco-v2-doc-dev-unicoil
eval_key: msmarco-v2-doc-dev
Expand Down Expand Up @@ -342,7 +342,7 @@ conditions:
- name: unicoil-noexp-otf
display: "uniCOIL (noexp): query inference with PyTorch"
display-html: "uniCOIL (noexp): query inference with PyTorch"
command: python -m pyserini.search.lucene --threads ${sparse_threads} --batch-size ${sparse_batch_size} --index msmarco-v2-doc-segmented-unicoil-noexp-0shot --topics $topics --encoder castorini/unicoil-noexp-msmarco-passage --output $output --impact --hits 10000 --max-passage-hits 1000 --max-passage
command: python -m pyserini.search.lucene --threads ${sparse_threads} --batch-size ${sparse_batch_size} --index msmarco-v2-doc-segmented.unicoil-noexp-0shot --topics $topics --encoder castorini/unicoil-noexp-msmarco-passage --output $output --impact --hits 10000 --max-passage-hits 1000 --max-passage
topics:
- topic_key: msmarco-v2-doc-dev
eval_key: msmarco-v2-doc-dev
Expand Down Expand Up @@ -375,7 +375,7 @@ conditions:
- name: unicoil-otf
display: "uniCOIL (w/ doc2query-T5): query inference with PyTorch"
display-html: "uniCOIL (w/ doc2query-T5): query inference with PyTorch"
command: python -m pyserini.search.lucene --threads ${sparse_threads} --batch-size ${sparse_batch_size} --index msmarco-v2-doc-segmented-unicoil-0shot --topics $topics --encoder castorini/unicoil-msmarco-passage --output $output --impact --hits 10000 --max-passage-hits 1000 --max-passage
command: python -m pyserini.search.lucene --threads ${sparse_threads} --batch-size ${sparse_batch_size} --index msmarco-v2-doc-segmented.unicoil-0shot --topics $topics --encoder castorini/unicoil-msmarco-passage --output $output --impact --hits 10000 --max-passage-hits 1000 --max-passage
topics:
- topic_key: msmarco-v2-doc-dev
eval_key: msmarco-v2-doc-dev
Expand Down
Loading

0 comments on commit d4d9368

Please sign in to comment.