From fa3181dc24a5eab55ba84f1b5c015b0633f8d4ee Mon Sep 17 00:00:00 2001 From: Jimmy Lin Date: Sat, 4 Mar 2023 08:50:10 -0500 Subject: [PATCH] Rename BEIR Lucene prebuilt tf indexes (#1455) Change schema from beir-v1.0.0-dataset-model to beir-v1.0.0-dataset.model (i.e., dash to dot) for "flat" and "multifield". --- pyserini/prebuilt_index_info.py | 696 ++++++++++++------------ pyserini/resources/beir.yaml | 8 +- scripts/beir/gather_beir_index_stats.py | 76 +-- scripts/repro_matrix/run_all_beir.py | 12 +- 4 files changed, 366 insertions(+), 426 deletions(-) diff --git a/pyserini/prebuilt_index_info.py b/pyserini/prebuilt_index_info.py index 9c9d5bdcb..5745d3c2b 100644 --- a/pyserini/prebuilt_index_info.py +++ b/pyserini/prebuilt_index_info.py @@ -1072,407 +1072,407 @@ }, # BEIR (v1.0.0) flat indexes - "beir-v1.0.0-trec-covid-flat": { + "beir-v1.0.0-trec-covid.flat": { "description": "Lucene flat index of BEIR (v1.0.0): TREC-COVID", - "filename": "lucene-index.beir-v1.0.0-trec-covid-flat.20221116.505594.tar.gz", - "readme": "lucene-index.beir-v1.0.0-flat.20221116.505594.README.md", + "filename": "lucene-index.beir-v1.0.0-trec-covid.flat.20221116.505594.tar.gz", + "readme": "lucene-index.beir-v1.0.0-trec-covid.flat.20221116.505594.README.md", "urls": [ - "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-trec-covid-flat.20221116.505594.tar.gz" + "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-trec-covid.flat.20221116.505594.tar.gz" ], - "md5": "9ae06c30a7c352f18a5a8e75b88b9106", - "size compressed (bytes)": 226268661, + "md5": "57b812594b11d064a23123137ae7dade", + "size compressed (bytes)": 226268665, "total_terms": 20822821, "documents": 171331, "unique_terms": 202648, "downloaded": False }, - "beir-v1.0.0-bioasq-flat": { + "beir-v1.0.0-bioasq.flat": { "description": "Lucene flat index of BEIR (v1.0.0): BioASQ", - "filename": "lucene-index.beir-v1.0.0-bioasq-flat.20221116.505594.tar.gz", - "readme": "lucene-index.beir-v1.0.0-flat.20221116.505594.README.md", + "filename": "lucene-index.beir-v1.0.0-bioasq.flat.20221116.505594.tar.gz", + "readme": "lucene-index.beir-v1.0.0-bioasq.flat.20221116.505594.README.md", "urls": [ - "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-bioasq-flat.20221116.505594.tar.gz" + "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-bioasq.flat.20221116.505594.tar.gz" ], - "md5": "d9b098a7e127a79f390285290a7c0ba8", - "size compressed (bytes)": 24821933182, + "md5": "cf8d4804b06bb8678d30b1375b46a0b3", + "size compressed (bytes)": 24821933356, "total_terms": 2257541758, "documents": 14914603, "unique_terms": 4960004, "downloaded": False }, - "beir-v1.0.0-nfcorpus-flat": { + "beir-v1.0.0-nfcorpus.flat": { "description": "Lucene flat index of BEIR (v1.0.0): NFCorpus", - "filename": "lucene-index.beir-v1.0.0-nfcorpus-flat.20221116.505594.tar.gz", - "readme": "lucene-index.beir-v1.0.0-flat.20221116.505594.README.md", + "filename": "lucene-index.beir-v1.0.0-nfcorpus.flat.20221116.505594.tar.gz", + "readme": "lucene-index.beir-v1.0.0-nfcorpus.flat.20221116.505594.README.md", "urls": [ - "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-nfcorpus-flat.20221116.505594.tar.gz" + "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-nfcorpus.flat.20221116.505594.tar.gz" ], - "md5": "12ad00c0f58393b9a6c473183b4ff55a", - "size compressed (bytes)": 6509693, + "md5": "34c0b11ad13a4715a78d025902061d37", + "size compressed (bytes)": 6509700, "total_terms": 637485, "documents": 3633, "unique_terms": 22111, "downloaded": False }, - "beir-v1.0.0-nq-flat": { + "beir-v1.0.0-nq.flat": { "description": "Lucene flat index of BEIR (v1.0.0): NQ", - "filename": "lucene-index.beir-v1.0.0-nq-flat.20221116.505594.tar.gz", - "readme": "lucene-index.beir-v1.0.0-flat.20221116.505594.README.md", + "filename": "lucene-index.beir-v1.0.0-nq.flat.20221116.505594.tar.gz", + "readme": "lucene-index.beir-v1.0.0-nq.flat.20221116.505594.README.md", "urls": [ - "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-nq-flat.20221116.505594.tar.gz" + "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-nq.flat.20221116.505594.tar.gz" ], - "md5": "fefe2c93019b2eb899d875a90861b9f4", - "size compressed (bytes)": 1645453710, + "md5": "a2c5db4dd3780fff3c7c6bfea1dd08e8", + "size compressed (bytes)": 1645453748, "total_terms": 151249294, "documents": 2681468, "unique_terms": 997027, "downloaded": False }, - "beir-v1.0.0-hotpotqa-flat": { + "beir-v1.0.0-hotpotqa.flat": { "description": "Lucene flat index of BEIR (v1.0.0): HotpotQA", - "filename": "lucene-index.beir-v1.0.0-hotpotqa-flat.20221116.505594.tar.gz", - "readme": "lucene-index.beir-v1.0.0-flat.20221116.505594.README.md", + "filename": "lucene-index.beir-v1.0.0-hotpotqa.flat.20221116.505594.tar.gz", + "readme": "lucene-index.beir-v1.0.0-hotpotqa.flat.20221116.505594.README.md", "urls": [ - "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-hotpotqa-flat.20221116.505594.tar.gz" + "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-hotpotqa.flat.20221116.505594.tar.gz" ], - "md5": "29723f2d1ea53880720765dd558fb264", - "size compressed (bytes)": 2019085858, + "md5": "3be2875f93537369641287dcdf25add9", + "size compressed (bytes)": 2019081888, "total_terms": 172477066, "documents": 5233329, "unique_terms": 2644892, "downloaded": False }, - "beir-v1.0.0-fiqa-flat": { + "beir-v1.0.0-fiqa.flat": { "description": "Lucene flat index of BEIR (v1.0.0): FiQA-2018", - "filename": "lucene-index.beir-v1.0.0-fiqa-flat.20221116.505594.tar.gz", - "readme": "lucene-index.beir-v1.0.0-flat.20221116.505594.README.md", + "filename": "lucene-index.beir-v1.0.0-fiqa.flat.20221116.505594.tar.gz", + "readme": "lucene-index.beir-v1.0.0-fiqa.flat.20221116.505594.README.md", "urls": [ - "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-fiqa-flat.20221116.505594.tar.gz" + "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-fiqa.flat.20221116.505594.tar.gz" ], - "md5": "7ead89ae57ca09a0f6f39f0c621feba8", - "size compressed (bytes)": 55982529, + "md5": "409b779e8a39813d2fbdfd1ea2f009e9", + "size compressed (bytes)": 55982536, "total_terms": 5288635, "documents": 57600, "unique_terms": 66977, "downloaded": False }, - "beir-v1.0.0-signal1m-flat": { + "beir-v1.0.0-signal1m.flat": { "description": "Lucene flat index of BEIR (v1.0.0): Signal-1M", - "filename": "lucene-index.beir-v1.0.0-signal1m-flat.20221116.505594.tar.gz", - "readme": "lucene-index.beir-v1.0.0-flat.20221116.505594.README.md", + "filename": "lucene-index.beir-v1.0.0-signal1m.flat.20221116.505594.tar.gz", + "readme": "lucene-index.beir-v1.0.0-signal1m.flat.20221116.505594.README.md", "urls": [ - "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-signal1m-flat.20221116.505594.tar.gz" + "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-signal1m.flat.20221116.505594.tar.gz" ], - "md5": "a1c952e55f7dd3383c9140c7b446e044", - "size compressed (bytes)": 496596590, + "md5": "d0828b92a3df814bfa4b73bddeb25da7", + "size compressed (bytes)": 496596576, "total_terms": 32240069, "documents": 2866315, "unique_terms": 796647, "downloaded": False }, - "beir-v1.0.0-trec-news-flat": { + "beir-v1.0.0-trec-news.flat": { "description": "Lucene flat index of BEIR (v1.0.0): TREC-NEWS", - "filename": "lucene-index.beir-v1.0.0-trec-news-flat.20221116.505594.tar.gz", - "readme": "lucene-index.beir-v1.0.0-flat.20221116.505594.README.md", + "filename": "lucene-index.beir-v1.0.0-trec-news.flat.20221116.505594.tar.gz", + "readme": "lucene-index.beir-v1.0.0-trec-news.flat.20221116.505594.README.md", "urls": [ - "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-trec-news-flat.20221116.505594.tar.gz" + "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-trec-news.flat.20221116.505594.tar.gz" ], - "md5": "599aca58ee57e9593d40953b12a1bd69", - "size compressed (bytes)": 2623577554, + "md5": "98df3de34b4b76a4390520c606817ec4", + "size compressed (bytes)": 2623576957, "total_terms": 275651967, "documents": 594589, "unique_terms": 729872, "downloaded": False }, - "beir-v1.0.0-robust04-flat": { + "beir-v1.0.0-robust04.flat": { "description": "Lucene flat index of BEIR (v1.0.0): Robust04", - "filename": "lucene-index.beir-v1.0.0-robust04-flat.20221116.505594.tar.gz", - "readme": "lucene-index.beir-v1.0.0-flat.20221116.505594.README.md", + "filename": "lucene-index.beir-v1.0.0-robust04.flat.20221116.505594.tar.gz", + "readme": "lucene-index.beir-v1.0.0-robust04.flat.20221116.505594.README.md", "urls": [ - "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-robust04-flat.20221116.505594.tar.gz" + "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-robust04.flat.20221116.505594.tar.gz" ], - "md5": "717325e5c282f45d2a8e189e9ef89388", - "size compressed (bytes)": 1728446672, + "md5": "89dfcb7297c12a772d1bfd7917df908d", + "size compressed (bytes)": 1728446730, "total_terms": 174384263, "documents": 528036, "unique_terms": 923466, "downloaded": False }, - "beir-v1.0.0-arguana-flat": { + "beir-v1.0.0-arguana.flat": { "description": "Lucene flat index of BEIR (v1.0.0): ArguAna", - "filename": "lucene-index.beir-v1.0.0-arguana-flat.20221116.505594.tar.gz", - "readme": "lucene-index.beir-v1.0.0-flat.20221116.505594.README.md", + "filename": "lucene-index.beir-v1.0.0-arguana.flat.20221116.505594.tar.gz", + "readme": "lucene-index.beir-v1.0.0-arguana.flat.20221116.505594.README.md", "urls": [ - "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-arguana-flat.20221116.505594.tar.gz" + "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-arguana.flat.20221116.505594.tar.gz" ], - "md5": "5989163bb5c7e0a29f8241f3fae95c02", - "size compressed (bytes)": 10563484, + "md5": "d6c005689a9e7e91f3b1a7fbc74063e1", + "size compressed (bytes)": 10563485, "total_terms": 969528, "documents": 8674, "unique_terms": 23895, "downloaded": False }, - "beir-v1.0.0-webis-touche2020-flat": { + "beir-v1.0.0-webis-touche2020.flat": { "description": "Lucene flat index of BEIR (v1.0.0): Webis-Touche2020", - "filename": "lucene-index.beir-v1.0.0-webis-touche2020-flat.20221116.505594.tar.gz", - "readme": "lucene-index.beir-v1.0.0-flat.20221116.505594.README.md", + "filename": "lucene-index.beir-v1.0.0-webis-touche2020.flat.20221116.505594.tar.gz", + "readme": "lucene-index.beir-v1.0.0-webis-touche2020.flat.20221116.505594.README.md", "urls": [ - "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-webis-touche2020-flat.20221116.505594.tar.gz" + "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-webis-touche2020.flat.20221116.505594.tar.gz" ], - "md5": "5f54ca552b6075b8dff6d9ae9cd138e6", - "size compressed (bytes)": 750400909, + "md5": "20c6e9f29461eea1a520cd1abead709a", + "size compressed (bytes)": 750400932, "total_terms": 76082209, "documents": 382545, "unique_terms": 525540, "downloaded": False }, - "beir-v1.0.0-cqadupstack-android-flat": { + "beir-v1.0.0-cqadupstack-android.flat": { "description": "Lucene flat index of BEIR (v1.0.0): CQADupStack-android", - "filename": "lucene-index.beir-v1.0.0-cqadupstack-android-flat.20221116.505594.tar.gz", - "readme": "lucene-index.beir-v1.0.0-flat.20221116.505594.README.md", + "filename": "lucene-index.beir-v1.0.0-cqadupstack-android.flat.20221116.505594.tar.gz", + "readme": "lucene-index.beir-v1.0.0-cqadupstack-android.flat.20221116.505594.README.md", "urls": [ - "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-cqadupstack-android-flat.20221116.505594.tar.gz" + "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-cqadupstack-android.flat.20221116.505594.tar.gz" ], - "md5": "14451ca82955bc7ce3b1df73091d5f0e", - "size compressed (bytes)": 17423304, + "md5": "9f9f35e34f76336bc6e516599cbaf75b", + "size compressed (bytes)": 17423320, "total_terms": 1760762, "documents": 22998, "unique_terms": 41456, "downloaded": False }, - "beir-v1.0.0-cqadupstack-english-flat": { + "beir-v1.0.0-cqadupstack-english.flat": { "description": "Lucene flat index of BEIR (v1.0.0): CQADupStack-english", - "filename": "lucene-index.beir-v1.0.0-cqadupstack-english-flat.20221116.505594.tar.gz", - "readme": "lucene-index.beir-v1.0.0-flat.20221116.505594.README.md", + "filename": "lucene-index.beir-v1.0.0-cqadupstack-english.flat.20221116.505594.tar.gz", + "readme": "lucene-index.beir-v1.0.0-cqadupstack-english.flat.20221116.505594.README.md", "urls": [ - "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-cqadupstack-english-flat.20221116.505594.tar.gz" + "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-cqadupstack-english.flat.20221116.505594.tar.gz" ], - "md5": "0b66a52f9f67b4ddd163590bb968efee", - "size compressed (bytes)": 24949592, + "md5": "7d887497d32eedd92c314c93feaca28e", + "size compressed (bytes)": 24949578, "total_terms": 2236655, "documents": 40221, "unique_terms": 62517, "downloaded": False }, - "beir-v1.0.0-cqadupstack-gaming-flat": { + "beir-v1.0.0-cqadupstack-gaming.flat": { "description": "Lucene flat index of BEIR (v1.0.0): CQADupStack-gaming", - "filename": "lucene-index.beir-v1.0.0-cqadupstack-gaming-flat.20221116.505594.tar.gz", - "readme": "lucene-index.beir-v1.0.0-flat.20221116.505594.README.md", + "filename": "lucene-index.beir-v1.0.0-cqadupstack-gaming.flat.20221116.505594.tar.gz", + "readme": "lucene-index.beir-v1.0.0-cqadupstack-gaming.flat.20221116.505594.README.md", "urls": [ - "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-cqadupstack-gaming-flat.20221116.505594.tar.gz" + "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-cqadupstack-gaming.flat.20221116.505594.tar.gz" ], - "md5": "a369444e1296c54beb6d7eae19f30f3b", - "size compressed (bytes)": 29156968, + "md5": "140e16ee86a69c8fd4d16a83a6d51591", + "size compressed (bytes)": 29156970, "total_terms": 2827717, "documents": 45301, "unique_terms": 60070, "downloaded": False }, - "beir-v1.0.0-cqadupstack-gis-flat": { + "beir-v1.0.0-cqadupstack-gis.flat": { "description": "Lucene flat index of BEIR (v1.0.0): CQADupStack-gis", - "filename": "lucene-index.beir-v1.0.0-cqadupstack-gis-flat.20221116.505594.tar.gz", - "readme": "lucene-index.beir-v1.0.0-flat.20221116.505594.README.md", + "filename": "lucene-index.beir-v1.0.0-cqadupstack-gis.flat.20221116.505594.tar.gz", + "readme": "lucene-index.beir-v1.0.0-cqadupstack-gis.flat.20221116.505594.README.md", "urls": [ - "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-cqadupstack-gis-flat.20221116.505594.tar.gz" + "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-cqadupstack-gis.flat.20221116.505594.tar.gz" ], - "md5": "85ad4715eb06b2a2079385c74504da15", - "size compressed (bytes)": 43396160, + "md5": "4bd93695f28af0a11172f387ef41fee6", + "size compressed (bytes)": 43396154, "total_terms": 4048584, "documents": 37637, "unique_terms": 184133, "downloaded": False }, - "beir-v1.0.0-cqadupstack-mathematica-flat": { + "beir-v1.0.0-cqadupstack-mathematica.flat": { "description": "Lucene flat index of BEIR (v1.0.0): CQADupStack-mathematica", - "filename": "lucene-index.beir-v1.0.0-cqadupstack-mathematica-flat.20221116.505594.tar.gz", - "readme": "lucene-index.beir-v1.0.0-flat.20221116.505594.README.md", + "filename": "lucene-index.beir-v1.0.0-cqadupstack-mathematica.flat.20221116.505594.tar.gz", + "readme": "lucene-index.beir-v1.0.0-cqadupstack-mathematica.flat.20221116.505594.README.md", "urls": [ - "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-cqadupstack-mathematica-flat.20221116.505594.tar.gz" + "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-cqadupstack-mathematica.flat.20221116.505594.tar.gz" ], - "md5": "5b8bd6d8f8f37d449856e0bc14eb16fc", - "size compressed (bytes)": 21589911, + "md5": "5b5b7ab3d0437428e29a5a1431de1ca5", + "size compressed (bytes)": 21589909, "total_terms": 2332642, "documents": 16705, "unique_terms": 111611, "downloaded": False }, - "beir-v1.0.0-cqadupstack-physics-flat": { + "beir-v1.0.0-cqadupstack-physics.flat": { "description": "Lucene flat index of BEIR (v1.0.0): CQADupStack-physics", - "filename": "lucene-index.beir-v1.0.0-cqadupstack-physics-flat.20221116.505594.tar.gz", - "readme": "lucene-index.beir-v1.0.0-flat.20221116.505594.README.md", + "filename": "lucene-index.beir-v1.0.0-cqadupstack-physics.flat.20221116.505594.tar.gz", + "readme": "lucene-index.beir-v1.0.0-cqadupstack-physics.flat.20221116.505594.README.md", "urls": [ - "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-cqadupstack-physics-flat.20221116.505594.tar.gz" + "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-cqadupstack-physics.flat.20221116.505594.tar.gz" ], - "md5": "e4a1dda9c0940277d8bbc2d5e9278eb7", - "size compressed (bytes)": 37956223, + "md5": "6864144bca1bb169a452321e14ef12e0", + "size compressed (bytes)": 37956215, "total_terms": 3785483, "documents": 38316, "unique_terms": 55950, "downloaded": False }, - "beir-v1.0.0-cqadupstack-programmers-flat": { + "beir-v1.0.0-cqadupstack-programmers.flat": { "description": "Lucene flat index of BEIR (v1.0.0): CQADupStack-programmers", - "filename": "lucene-index.beir-v1.0.0-cqadupstack-programmers-flat.20221116.505594.tar.gz", - "readme": "lucene-index.beir-v1.0.0-flat.20221116.505594.README.md", + "filename": "lucene-index.beir-v1.0.0-cqadupstack-programmers.flat.20221116.505594.tar.gz", + "readme": "lucene-index.beir-v1.0.0-cqadupstack-programmers.flat.20221116.505594.README.md", "urls": [ - "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-cqadupstack-programmers-flat.20221116.505594.tar.gz" + "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-cqadupstack-programmers.flat.20221116.505594.tar.gz" ], - "md5": "f8b124fb052d2f14e908f20c47792cc3", - "size compressed (bytes)": 40297081, + "md5": "7b7d2bbf7cc5d53924d09c3b781dba8a", + "size compressed (bytes)": 40297069, "total_terms": 3905694, "documents": 32176, "unique_terms": 74195, "downloaded": False }, - "beir-v1.0.0-cqadupstack-stats-flat": { + "beir-v1.0.0-cqadupstack-stats.flat": { "description": "Lucene flat index of BEIR (v1.0.0): CQADupStack-stats", - "filename": "lucene-index.beir-v1.0.0-cqadupstack-stats-flat.20221116.505594.tar.gz", - "readme": "lucene-index.beir-v1.0.0-flat.20221116.505594.README.md", + "filename": "lucene-index.beir-v1.0.0-cqadupstack-stats.flat.20221116.505594.tar.gz", + "readme": "lucene-index.beir-v1.0.0-cqadupstack-stats.flat.20221116.505594.README.md", "urls": [ - "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-cqadupstack-stats-flat.20221116.505594.tar.gz" + "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-cqadupstack-stats.flat.20221116.505594.tar.gz" ], - "md5": "a622bd3c4d6b413c50c1855b6fa85e64", - "size compressed (bytes)": 52212616, + "md5": "0b09b7bee2b60df0ff73710a93a79218", + "size compressed (bytes)": 52212599, "total_terms": 5356042, "documents": 42269, "unique_terms": 183358, "downloaded": False }, - "beir-v1.0.0-cqadupstack-tex-flat": { + "beir-v1.0.0-cqadupstack-tex.flat": { "description": "Lucene flat index of BEIR (v1.0.0): CQADupStack-tex", - "filename": "lucene-index.beir-v1.0.0-cqadupstack-tex-flat.20221116.505594.tar.gz", - "readme": "lucene-index.beir-v1.0.0-flat.20221116.505594.README.md", + "filename": "lucene-index.beir-v1.0.0-cqadupstack-tex.flat.20221116.505594.tar.gz", + "readme": "lucene-index.beir-v1.0.0-cqadupstack-tex.flat.20221116.505594.README.md", "urls": [ - "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-cqadupstack-tex-flat.20221116.505594.tar.gz" + "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-cqadupstack-tex.flat.20221116.505594.tar.gz" ], - "md5": "939c69cdf1ab563729697bc936f45e6a", - "size compressed (bytes)": 91818976, + "md5": "48a2541bd7d1adec06f053486655e815", + "size compressed (bytes)": 91819025, "total_terms": 9556423, "documents": 68184, "unique_terms": 288088, "downloaded": False }, - "beir-v1.0.0-cqadupstack-unix-flat": { + "beir-v1.0.0-cqadupstack-unix.flat": { "description": "Lucene flat index of BEIR (v1.0.0): CQADupStack-unix", - "filename": "lucene-index.beir-v1.0.0-cqadupstack-unix-flat.20221116.505594.tar.gz", - "readme": "lucene-index.beir-v1.0.0-flat.20221116.505594.README.md", + "filename": "lucene-index.beir-v1.0.0-cqadupstack-unix.flat.20221116.505594.tar.gz", + "readme": "lucene-index.beir-v1.0.0-cqadupstack-unix.flat.20221116.505594.README.md", "urls": [ - "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-cqadupstack-unix-flat.20221116.505594.tar.gz" + "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-cqadupstack-unix.flat.20221116.505594.tar.gz" ], - "md5": "34913c5e811c0399c6ee3c03c68861d1", - "size compressed (bytes)": 53802802, + "md5": "a6cc0a867f6210ad44755c0a36fd682a", + "size compressed (bytes)": 53802808, "total_terms": 5767374, "documents": 47382, "unique_terms": 206323, "downloaded": False }, - "beir-v1.0.0-cqadupstack-webmasters-flat": { + "beir-v1.0.0-cqadupstack-webmasters.flat": { "description": "Lucene flat index of BEIR (v1.0.0): CQADupStack-webmasters", - "filename": "lucene-index.beir-v1.0.0-cqadupstack-webmasters-flat.20221116.505594.tar.gz", - "readme": "lucene-index.beir-v1.0.0-flat.20221116.505594.README.md", + "filename": "lucene-index.beir-v1.0.0-cqadupstack-webmasters.flat.20221116.505594.tar.gz", + "readme": "lucene-index.beir-v1.0.0-cqadupstack-webmasters.flat.20221116.505594.README.md", "urls": [ - "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-cqadupstack-webmasters-flat.20221116.505594.tar.gz" + "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-cqadupstack-webmasters.flat.20221116.505594.tar.gz" ], - "md5": "41f91e1dc4c4d7ee4da89ef2e7210dfb", - "size compressed (bytes)": 15174810, + "md5": "a04f65d575b4233a151c4960b82815b9", + "size compressed (bytes)": 15174811, "total_terms": 1482585, "documents": 17405, "unique_terms": 40547, "downloaded": False }, - "beir-v1.0.0-cqadupstack-wordpress-flat": { + "beir-v1.0.0-cqadupstack-wordpress.flat": { "description": "Lucene flat index of BEIR (v1.0.0): CQADupStack-wordpress", - "filename": "lucene-index.beir-v1.0.0-cqadupstack-wordpress-flat.20221116.505594.tar.gz", - "readme": "lucene-index.beir-v1.0.0-flat.20221116.505594.README.md", + "filename": "lucene-index.beir-v1.0.0-cqadupstack-wordpress.flat.20221116.505594.tar.gz", + "readme": "lucene-index.beir-v1.0.0-cqadupstack-wordpress.flat.20221116.505594.README.md", "urls": [ - "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-cqadupstack-wordpress-flat.20221116.505594.tar.gz" + "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-cqadupstack-wordpress.flat.20221116.505594.tar.gz" ], - "md5": "c10a130177b7ca65ae47640cffc4a5ec", - "size compressed (bytes)": 54807600, + "md5": "4ab079b9f7d0463955ce073b5d53e64d", + "size compressed (bytes)": 54807597, "total_terms": 5463472, "documents": 48605, "unique_terms": 125727, "downloaded": False }, - "beir-v1.0.0-quora-flat": { + "beir-v1.0.0-quora.flat": { "description": "Lucene flat index of BEIR (v1.0.0): Quora", - "filename": "lucene-index.beir-v1.0.0-quora-flat.20221116.505594.tar.gz", - "readme": "lucene-index.beir-v1.0.0-flat.20221116.505594.README.md", + "filename": "lucene-index.beir-v1.0.0-quora.flat.20221116.505594.tar.gz", + "readme": "lucene-index.beir-v1.0.0-quora.flat.20221116.505594.README.md", "urls": [ - "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-quora-flat.20221116.505594.tar.gz" + "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-quora.flat.20221116.505594.tar.gz" ], - "md5": "20cc429516848d0f314ac593c94bf226", - "size compressed (bytes)": 52698681, + "md5": "53fa2bd0667d23a50f95adaf169b87a1", + "size compressed (bytes)": 52698691, "total_terms": 4390852, "documents": 522931, "unique_terms": 69597, "downloaded": False }, - "beir-v1.0.0-dbpedia-entity-flat": { + "beir-v1.0.0-dbpedia-entity.flat": { "description": "Lucene flat index of BEIR (v1.0.0): DBPedia", - "filename": "lucene-index.beir-v1.0.0-dbpedia-entity-flat.20221116.505594.tar.gz", - "readme": "lucene-index.beir-v1.0.0-flat.20221116.505594.README.md", + "filename": "lucene-index.beir-v1.0.0-dbpedia-entity.flat.20221116.505594.tar.gz", + "readme": "lucene-index.beir-v1.0.0-dbpedia-entity.flat.20221116.505594.README.md", "urls": [ - "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-dbpedia-entity-flat.20221116.505594.tar.gz" + "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-dbpedia-entity.flat.20221116.505594.tar.gz" ], - "md5": "8aabf2456b0cccc1a1d013059125b6ef", - "size compressed (bytes)": 2085472968, + "md5": "6bc15a920e262d12ec3842401755e934", + "size compressed (bytes)": 2085473498, "total_terms": 164794982, "documents": 4635922, "unique_terms": 3351459, "downloaded": False }, - "beir-v1.0.0-scidocs-flat": { + "beir-v1.0.0-scidocs.flat": { "description": "Lucene flat index of BEIR (v1.0.0): SCIDOCS", - "filename": "lucene-index.beir-v1.0.0-scidocs-flat.20221116.505594.tar.gz", - "readme": "lucene-index.beir-v1.0.0-flat.20221116.505594.README.md", + "filename": "lucene-index.beir-v1.0.0-scidocs.flat.20221116.505594.tar.gz", + "readme": "lucene-index.beir-v1.0.0-scidocs.flat.20221116.505594.README.md", "urls": [ - "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-scidocs-flat.20221116.505594.tar.gz" + "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-scidocs.flat.20221116.505594.tar.gz" ], - "md5": "a25663e4bce0d588814efb3091a3a53f", - "size compressed (bytes)": 186572828, + "md5": "f1fba96a71a62bc567ecbd167de3794b", + "size compressed (bytes)": 186572809, "total_terms": 3266767, "documents": 25657, "unique_terms": 63604, "downloaded": False }, - "beir-v1.0.0-fever-flat": { + "beir-v1.0.0-fever.flat": { "description": "Lucene flat index of BEIR (v1.0.0): FEVER", - "filename": "lucene-index.beir-v1.0.0-fever-flat.20221116.505594.tar.gz", - "readme": "lucene-index.beir-v1.0.0-flat.20221116.505594.README.md", + "filename": "lucene-index.beir-v1.0.0-fever.flat.20221116.505594.tar.gz", + "readme": "lucene-index.beir-v1.0.0-fever.flat.20221116.505594.README.md", "urls": [ - "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-fever-flat.20221116.505594.tar.gz" + "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-fever.flat.20221116.505594.tar.gz" ], - "md5": "ce620e8c11b44528a498360912d5fe46", - "size compressed (bytes)": 3880155518, + "md5": "1b06f43ea36e2ed450d1b1d90099ae67", + "size compressed (bytes)": 3880155553, "total_terms": 325179165, "documents": 5416568, "unique_terms": 3293639, "downloaded": False }, - "beir-v1.0.0-climate-fever-flat": { + "beir-v1.0.0-climate-fever.flat": { "description": "Lucene flat index of BEIR (v1.0.0): Climate-FEVER", - "filename": "lucene-index.beir-v1.0.0-climate-fever-flat.20221116.505594.tar.gz", - "readme": "lucene-index.beir-v1.0.0-flat.20221116.505594.README.md", + "filename": "lucene-index.beir-v1.0.0-climate-fever.flat.20221116.505594.tar.gz", + "readme": "lucene-index.beir-v1.0.0-climate-fever.flat.20221116.505594.README.md", "urls": [ - "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-climate-fever-flat.20221116.505594.tar.gz" + "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-climate-fever.flat.20221116.505594.tar.gz" ], - "md5": "50adaaac662f7fadaf5494808e50f9d6", - "size compressed (bytes)": 3880208210, + "md5": "68811e2614b3bac9e1b879c883fc722e", + "size compressed (bytes)": 3880208200, "total_terms": 325185072, "documents": 5416593, "unique_terms": 3293621, "downloaded": False }, - "beir-v1.0.0-scifact-flat": { + "beir-v1.0.0-scifact.flat": { "description": "Lucene flat index of BEIR (v1.0.0): SciFact", - "filename": "lucene-index.beir-v1.0.0-scifact-flat.20221116.505594.tar.gz", - "readme": "lucene-index.beir-v1.0.0-flat.20221116.505594.README.md", + "filename": "lucene-index.beir-v1.0.0-scifact.flat.20221116.505594.tar.gz", + "readme": "lucene-index.beir-v1.0.0-scifact.flat.20221116.505594.README.md", "urls": [ - "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-scifact-flat.20221116.505594.tar.gz" + "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-scifact.flat.20221116.505594.tar.gz" ], - "md5": "5fee55eb99327b38a5ca623e68e25edf", - "size compressed (bytes)": 8851172, + "md5": "6f6e55f1cf80c362f86bee65529b71de", + "size compressed (bytes)": 8851173, "total_terms": 838128, "documents": 5183, "unique_terms": 28865, @@ -1480,407 +1480,407 @@ }, # BEIR (v1.0.0) multifield indexes - "beir-v1.0.0-trec-covid-multifield": { + "beir-v1.0.0-trec-covid.multifield": { "description": "Lucene multifield index of BEIR (v1.0.0): TREC-COVID", - "filename": "lucene-index.beir-v1.0.0-trec-covid-multifield.20221116.505594.tar.gz", - "readme": "lucene-index.beir-v1.0.0-multifield.20221116.505594.README.md", + "filename": "lucene-index.beir-v1.0.0-trec-covid.multifield.20221116.505594.tar.gz", + "readme": "lucene-index.beir-v1.0.0-trec-covid.multifield.20221116.505594.README.md", "urls": [ - "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-trec-covid-multifield.20221116.505594.tar.gz" + "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-trec-covid.multifield.20221116.505594.tar.gz" ], - "md5": "ec260e10c9897e736820c22476c3574f", - "size compressed (bytes)": 222831948, + "md5": "7501a330a0c9246e6350413c3f6ced7c", + "size compressed (bytes)": 222831983, "total_terms": 19060122, "documents": 129192, "unique_terms": 193851, "downloaded": False }, - "beir-v1.0.0-bioasq-multifield": { + "beir-v1.0.0-bioasq.multifield": { "description": "Lucene multifield index of BEIR (v1.0.0): BioASQ", - "filename": "lucene-index.beir-v1.0.0-bioasq-multifield.20221116.505594.tar.gz", - "readme": "lucene-index.beir-v1.0.0-multifield.20221116.505594.README.md", + "filename": "lucene-index.beir-v1.0.0-bioasq.multifield.20221116.505594.tar.gz", + "readme": "lucene-index.beir-v1.0.0-bioasq.multifield.20221116.505594.README.md", "urls": [ - "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-bioasq-multifield.20221116.505594.tar.gz" + "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-bioasq.multifield.20221116.505594.tar.gz" ], - "md5": "d754a05c01fd1c3e34d883e4fa912d63", - "size compressed (bytes)": 25346354708, + "md5": "cc01ab450cac0b8865bd1e70e2a58596", + "size compressed (bytes)": 25346354679, "total_terms": 2099554307, "documents": 14914602, "unique_terms": 4889053, "downloaded": False }, - "beir-v1.0.0-nfcorpus-multifield": { + "beir-v1.0.0-nfcorpus.multifield": { "description": "Lucene multifield index of BEIR (v1.0.0): NFCorpus", - "filename": "lucene-index.beir-v1.0.0-nfcorpus-multifield.20221116.505594.tar.gz", - "readme": "lucene-index.beir-v1.0.0-multifield.20221116.505594.README.md", + "filename": "lucene-index.beir-v1.0.0-nfcorpus.multifield.20221116.505594.tar.gz", + "readme": "lucene-index.beir-v1.0.0-nfcorpus.multifield.20221116.505594.README.md", "urls": [ - "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-nfcorpus-multifield.20221116.505594.tar.gz" + "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-nfcorpus.multifield.20221116.505594.tar.gz" ], - "md5": "6e5fc4f35cc1fa8dc98b86b2385c4a0b", - "size compressed (bytes)": 6645574, + "md5": "904e53b80fe04b3844b97847bc77a772", + "size compressed (bytes)": 6645576, "total_terms": 601950, "documents": 3633, "unique_terms": 21819, "downloaded": False }, - "beir-v1.0.0-nq-multifield": { + "beir-v1.0.0-nq.multifield": { "description": "Lucene multifield index of BEIR (v1.0.0): NQ", - "filename": "lucene-index.beir-v1.0.0-nq-multifield.20221116.505594.tar.gz", - "readme": "lucene-index.beir-v1.0.0-multifield.20221116.505594.README.md", + "filename": "lucene-index.beir-v1.0.0-nq.multifield.20221116.505594.tar.gz", + "readme": "lucene-index.beir-v1.0.0-nq.multifield.20221116.505594.README.md", "urls": [ - "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-nq-multifield.20221116.505594.tar.gz" + "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-nq.multifield.20221116.505594.tar.gz" ], - "md5": "db80e0851c81c082688add60b57bd9f1", - "size compressed (bytes)": 1642708222, + "md5": "693ca315de9fbbbf7f664be313a03847", + "size compressed (bytes)": 1642708204, "total_terms": 144050891, "documents": 2680961, "unique_terms": 996653, "downloaded": False }, - "beir-v1.0.0-hotpotqa-multifield": { + "beir-v1.0.0-hotpotqa.multifield": { "description": "Lucene multifield index of BEIR (v1.0.0): HotpotQA", - "filename": "lucene-index.beir-v1.0.0-hotpotqa-multifield.20221116.505594.tar.gz", - "readme": "lucene-index.beir-v1.0.0-multifield.20221116.505594.README.md", + "filename": "lucene-index.beir-v1.0.0-hotpotqa.multifield.20221116.505594.tar.gz", + "readme": "lucene-index.beir-v1.0.0-hotpotqa.multifield.20221116.505594.README.md", "urls": [ - "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-hotpotqa-multifield.20221116.505594.tar.gz" + "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-hotpotqa.multifield.20221116.505594.tar.gz" ], - "md5": "95c657338468ddacfae5874cbd09d7eb", - "size compressed (bytes)": 2083441352, + "md5": "ef8c2f40097e652eec99e6bf25e151cd", + "size compressed (bytes)": 2083441492, "total_terms": 158180692, "documents": 5233235, "unique_terms": 2627639, "downloaded": False }, - "beir-v1.0.0-fiqa-multifield": { + "beir-v1.0.0-fiqa.multifield": { "description": "Lucene multifield index of BEIR (v1.0.0): FiQA-2018", - "filename": "lucene-index.beir-v1.0.0-fiqa-multifield.20221116.505594.tar.gz", - "readme": "lucene-index.beir-v1.0.0-multifield.20221116.505594.README.md", + "filename": "lucene-index.beir-v1.0.0-fiqa.multifield.20221116.505594.tar.gz", + "readme": "lucene-index.beir-v1.0.0-fiqa.multifield.20221116.505594.README.md", "urls": [ - "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-fiqa-multifield.20221116.505594.tar.gz" + "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-fiqa.multifield.20221116.505594.tar.gz" ], - "md5": "ba7f197331d1a84582c5485536b0d2aa", - "size compressed (bytes)": 55984431, + "md5": "073f3f19a94689e5fac511af49316fe1", + "size compressed (bytes)": 55984419, "total_terms": 5288635, "documents": 57600, "unique_terms": 66977, "downloaded": False }, - "beir-v1.0.0-signal1m-multifield": { + "beir-v1.0.0-signal1m.multifield": { "description": "Lucene multifield index of BEIR (v1.0.0): Signal-1M", - "filename": "lucene-index.beir-v1.0.0-signal1m-multifield.20221116.505594.tar.gz", - "readme": "lucene-index.beir-v1.0.0-multifield.20221116.505594.README.md", + "filename": "lucene-index.beir-v1.0.0-signal1m.multifield.20221116.505594.tar.gz", + "readme": "lucene-index.beir-v1.0.0-signal1m.multifield.20221116.505594.README.md", "urls": [ - "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-signal1m-multifield.20221116.505594.tar.gz" + "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-signal1m.multifield.20221116.505594.tar.gz" ], - "md5": "6f5c151ade6efc6567d2b98016a97041", - "size compressed (bytes)": 496603097, + "md5": "4482ae02f18e8336c0a95ea33b5b6ede", + "size compressed (bytes)": 496603092, "total_terms": 32240069, "documents": 2866315, "unique_terms": 796647, "downloaded": False }, - "beir-v1.0.0-trec-news-multifield": { + "beir-v1.0.0-trec-news.multifield": { "description": "Lucene multifield index of BEIR (v1.0.0): TREC-NEWS", - "filename": "lucene-index.beir-v1.0.0-trec-news-multifield.20221116.505594.tar.gz", - "readme": "lucene-index.beir-v1.0.0-multifield.20221116.505594.README.md", + "filename": "lucene-index.beir-v1.0.0-trec-news.multifield.20221116.505594.tar.gz", + "readme": "lucene-index.beir-v1.0.0-trec-news.multifield.20221116.505594.README.md", "urls": [ - "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-trec-news-multifield.20221116.505594.tar.gz" + "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-trec-news.multifield.20221116.505594.tar.gz" ], - "md5": "b9e935edb604edb04524970c40ca107a", - "size compressed (bytes)": 2633899051, + "md5": "3151122da3cf081a0c8894af7b75be43", + "size compressed (bytes)": 2633899363, "total_terms": 270886723, "documents": 578605, "unique_terms": 727856, "downloaded": False }, - "beir-v1.0.0-robust04-multifield": { + "beir-v1.0.0-robust04.multifield": { "description": "Lucene multifield index of BEIR (v1.0.0): Robust04", - "filename": "lucene-index.beir-v1.0.0-robust04-multifield.20221116.505594.tar.gz", - "readme": "lucene-index.beir-v1.0.0-multifield.20221116.505594.README.md", + "filename": "lucene-index.beir-v1.0.0-robust04.multifield.20221116.505594.tar.gz", + "readme": "lucene-index.beir-v1.0.0-robust04.multifield.20221116.505594.README.md", "urls": [ - "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-robust04-multifield.20221116.505594.tar.gz" + "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-robust04.multifield.20221116.505594.tar.gz" ], - "md5": "bb2939df4ca76dec5d6e5f366d08fe00", - "size compressed (bytes)": 1728446264, + "md5": "fdf741a75efe089d0451de5720b52c3a", + "size compressed (bytes)": 1728446303, "total_terms": 174384263, "documents": 528036, "unique_terms": 923466, "downloaded": False }, - "beir-v1.0.0-arguana-multifield": { + "beir-v1.0.0-arguana.multifield": { "description": "Lucene multifield index of BEIR (v1.0.0): ArguAna", - "filename": "lucene-index.beir-v1.0.0-arguana-multifield.20221116.505594.tar.gz", - "readme": "lucene-index.beir-v1.0.0-multifield.20221116.505594.README.md", + "filename": "lucene-index.beir-v1.0.0-arguana.multifield.20221116.505594.tar.gz", + "readme": "lucene-index.beir-v1.0.0-arguana.multifield.20221116.505594.README.md", "urls": [ - "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-arguana-multifield.20221116.505594.tar.gz" + "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-arguana.multifield.20221116.505594.tar.gz" ], - "md5": "3b548e373d85835f478199cdcebd7b0b", - "size compressed (bytes)": 10524119, + "md5": "a8201952860d31c56ea8a54c31e88b51", + "size compressed (bytes)": 10524118, "total_terms": 944123, "documents": 8674, "unique_terms": 23867, "downloaded": False }, - "beir-v1.0.0-webis-touche2020-multifield": { + "beir-v1.0.0-webis-touche2020.multifield": { "description": "Lucene multifield index of BEIR (v1.0.0): Webis-Touche2020", - "filename": "lucene-index.beir-v1.0.0-webis-touche2020-multifield.20221116.505594.tar.gz", - "readme": "lucene-index.beir-v1.0.0-multifield.20221116.505594.README.md", + "filename": "lucene-index.beir-v1.0.0-webis-touche2020.multifield.20221116.505594.tar.gz", + "readme": "lucene-index.beir-v1.0.0-webis-touche2020.multifield.20221116.505594.README.md", "urls": [ - "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-webis-touche2020-multifield.20221116.505594.tar.gz" + "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-webis-touche2020.multifield.20221116.505594.tar.gz" ], - "md5": "2a1549bc27a34a140914e42b709f1a19", - "size compressed (bytes)": 750724431, + "md5": "e160ea813990cff4dbdb9f50d509f8ea", + "size compressed (bytes)": 750724439, "total_terms": 74066724, "documents": 382545, "unique_terms": 524665, "downloaded": False }, - "beir-v1.0.0-cqadupstack-android-multifield": { + "beir-v1.0.0-cqadupstack-android.multifield": { "description": "Lucene multifield index of BEIR (v1.0.0): CQADupStack-android", - "filename": "lucene-index.beir-v1.0.0-cqadupstack-android-multifield.20221116.505594.tar.gz", - "readme": "lucene-index.beir-v1.0.0-multifield.20221116.505594.README.md", + "filename": "lucene-index.beir-v1.0.0-cqadupstack-android.multifield.20221116.505594.tar.gz", + "readme": "lucene-index.beir-v1.0.0-cqadupstack-android.multifield.20221116.505594.README.md", "urls": [ - "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-cqadupstack-android-multifield.20221116.505594.tar.gz" + "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-cqadupstack-android.multifield.20221116.505594.tar.gz" ], - "md5": "858ed389f7a924cd90a8c9c8882954ed", - "size compressed (bytes)": 17887744, + "md5": "de85f92a018d83a7ea496d9ef955b8c5", + "size compressed (bytes)": 17887736, "total_terms": 1591285, "documents": 22998, "unique_terms": 40824, "downloaded": False }, - "beir-v1.0.0-cqadupstack-english-multifield": { + "beir-v1.0.0-cqadupstack-english.multifield": { "description": "Lucene multifield index of BEIR (v1.0.0): CQADupStack-english", - "filename": "lucene-index.beir-v1.0.0-cqadupstack-english-multifield.20221116.505594.tar.gz", - "readme": "lucene-index.beir-v1.0.0-multifield.20221116.505594.README.md", + "filename": "lucene-index.beir-v1.0.0-cqadupstack-english.multifield.20221116.505594.tar.gz", + "readme": "lucene-index.beir-v1.0.0-cqadupstack-english.multifield.20221116.505594.README.md", "urls": [ - "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-cqadupstack-english-multifield.20221116.505594.tar.gz" + "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-cqadupstack-english.multifield.20221116.505594.tar.gz" ], - "md5": "40e87436b956a78ba312751d90116c27", - "size compressed (bytes)": 25558892, + "md5": "71c5d3db04586283772f6069668f5bfa", + "size compressed (bytes)": 25558901, "total_terms": 2006983, "documents": 40221, "unique_terms": 61530, "downloaded": False }, - "beir-v1.0.0-cqadupstack-gaming-multifield": { + "beir-v1.0.0-cqadupstack-gaming.multifield": { "description": "Lucene multifield index of BEIR (v1.0.0): CQADupStack-gaming", - "filename": "lucene-index.beir-v1.0.0-cqadupstack-gaming-multifield.20221116.505594.tar.gz", - "readme": "lucene-index.beir-v1.0.0-multifield.20221116.505594.README.md", + "filename": "lucene-index.beir-v1.0.0-cqadupstack-gaming.multifield.20221116.505594.tar.gz", + "readme": "lucene-index.beir-v1.0.0-cqadupstack-gaming.multifield.20221116.505594.README.md", "urls": [ - "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-cqadupstack-gaming-multifield.20221116.505594.tar.gz" + "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-cqadupstack-gaming.multifield.20221116.505594.tar.gz" ], - "md5": "eed5e2778ed6e22fca99eec8dae8f77f", - "size compressed (bytes)": 29992461, + "md5": "ff7c628b568f916c3bc3f7bf2af831eb", + "size compressed (bytes)": 29992453, "total_terms": 2510477, "documents": 45300, "unique_terms": 59113, "downloaded": False }, - "beir-v1.0.0-cqadupstack-gis-multifield": { + "beir-v1.0.0-cqadupstack-gis.multifield": { "description": "Lucene multifield index of BEIR (v1.0.0): CQADupStack-gis", - "filename": "lucene-index.beir-v1.0.0-cqadupstack-gis-multifield.20221116.505594.tar.gz", - "readme": "lucene-index.beir-v1.0.0-multifield.20221116.505594.README.md", + "filename": "lucene-index.beir-v1.0.0-cqadupstack-gis.multifield.20221116.505594.tar.gz", + "readme": "lucene-index.beir-v1.0.0-cqadupstack-gis.multifield.20221116.505594.README.md", "urls": [ - "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-cqadupstack-gis-multifield.20221116.505594.tar.gz" + "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-cqadupstack-gis.multifield.20221116.505594.tar.gz" ], - "md5": "add51b3adeb981dfb7749b01edc9b4e5", - "size compressed (bytes)": 44188649, + "md5": "4083830da4922d1294b3fb38873ba5a2", + "size compressed (bytes)": 44188661, "total_terms": 3789161, "documents": 37637, "unique_terms": 183298, "downloaded": False }, - "beir-v1.0.0-cqadupstack-mathematica-multifield": { + "beir-v1.0.0-cqadupstack-mathematica.multifield": { "description": "Lucene multifield index of BEIR (v1.0.0): CQADupStack-mathematica", - "filename": "lucene-index.beir-v1.0.0-cqadupstack-mathematica-multifield.20221116.505594.tar.gz", - "readme": "lucene-index.beir-v1.0.0-multifield.20221116.505594.README.md", + "filename": "lucene-index.beir-v1.0.0-cqadupstack-mathematica.multifield.20221116.505594.tar.gz", + "readme": "lucene-index.beir-v1.0.0-cqadupstack-mathematica.multifield.20221116.505594.README.md", "urls": [ - "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-cqadupstack-mathematica-multifield.20221116.505594.tar.gz" + "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-cqadupstack-mathematica.multifield.20221116.505594.tar.gz" ], - "md5": "90e592f86ab093bbc1d77b8de65dc9ba", - "size compressed (bytes)": 21911907, + "md5": "baa9414c385db88eaafffa95d5ec7d48", + "size compressed (bytes)": 21911919, "total_terms": 2234369, "documents": 16705, "unique_terms": 111306, "downloaded": False }, - "beir-v1.0.0-cqadupstack-physics-multifield": { + "beir-v1.0.0-cqadupstack-physics.multifield": { "description": "Lucene multifield index of BEIR (v1.0.0): CQADupStack-physics", - "filename": "lucene-index.beir-v1.0.0-cqadupstack-physics-multifield.20221116.505594.tar.gz", - "readme": "lucene-index.beir-v1.0.0-multifield.20221116.505594.README.md", + "filename": "lucene-index.beir-v1.0.0-cqadupstack-physics.multifield.20221116.505594.tar.gz", + "readme": "lucene-index.beir-v1.0.0-cqadupstack-physics.multifield.20221116.505594.README.md", "urls": [ - "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-cqadupstack-physics-multifield.20221116.505594.tar.gz" + "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-cqadupstack-physics.multifield.20221116.505594.tar.gz" ], - "md5": "a8abde1bced888a706d0a6311c3e13ad", - "size compressed (bytes)": 38736490, + "md5": "342b105462067b87e78730921dd7288d", + "size compressed (bytes)": 38736492, "total_terms": 3542078, "documents": 38316, "unique_terms": 55229, "downloaded": False }, - "beir-v1.0.0-cqadupstack-programmers-multifield": { + "beir-v1.0.0-cqadupstack-programmers.multifield": { "description": "Lucene multifield index of BEIR (v1.0.0): CQADupStack-programmers", - "filename": "lucene-index.beir-v1.0.0-cqadupstack-programmers-multifield.20221116.505594.tar.gz", - "readme": "lucene-index.beir-v1.0.0-multifield.20221116.505594.README.md", + "filename": "lucene-index.beir-v1.0.0-cqadupstack-programmers.multifield.20221116.505594.tar.gz", + "readme": "lucene-index.beir-v1.0.0-cqadupstack-programmers.multifield.20221116.505594.README.md", "urls": [ - "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-cqadupstack-programmers-multifield.20221116.505594.tar.gz" + "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-cqadupstack-programmers.multifield.20221116.505594.tar.gz" ], - "md5": "2f99a6e167475ecdb2265846381e7171", - "size compressed (bytes)": 40982054, + "md5": "2e95b82caf156d0f0b109c62e0011eab", + "size compressed (bytes)": 40982052, "total_terms": 3682227, "documents": 32176, "unique_terms": 73765, "downloaded": False }, - "beir-v1.0.0-cqadupstack-stats-multifield": { + "beir-v1.0.0-cqadupstack-stats.multifield": { "description": "Lucene multifield index of BEIR (v1.0.0): CQADupStack-stats", - "filename": "lucene-index.beir-v1.0.0-cqadupstack-stats-multifield.20221116.505594.tar.gz", - "readme": "lucene-index.beir-v1.0.0-multifield.20221116.505594.README.md", + "filename": "lucene-index.beir-v1.0.0-cqadupstack-stats.multifield.20221116.505594.tar.gz", + "readme": "lucene-index.beir-v1.0.0-cqadupstack-stats.multifield.20221116.505594.README.md", "urls": [ - "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-cqadupstack-stats-multifield.20221116.505594.tar.gz" + "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-cqadupstack-stats.multifield.20221116.505594.tar.gz" ], - "md5": "4b4a103decd723566f1935134434c9a9", - "size compressed (bytes)": 53094503, + "md5": "87c53df624baed7921672286beb94f9c", + "size compressed (bytes)": 53094508, "total_terms": 5073873, "documents": 42269, "unique_terms": 182933, "downloaded": False }, - "beir-v1.0.0-cqadupstack-tex-multifield": { + "beir-v1.0.0-cqadupstack-tex.multifield": { "description": "Lucene multifield index of BEIR (v1.0.0): CQADupStack-tex", - "filename": "lucene-index.beir-v1.0.0-cqadupstack-tex-multifield.20221116.505594.tar.gz", - "readme": "lucene-index.beir-v1.0.0-multifield.20221116.505594.README.md", + "filename": "lucene-index.beir-v1.0.0-cqadupstack-tex.multifield.20221116.505594.tar.gz", + "readme": "lucene-index.beir-v1.0.0-cqadupstack-tex.multifield.20221116.505594.README.md", "urls": [ - "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-cqadupstack-tex-multifield.20221116.505594.tar.gz" + "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-cqadupstack-tex.multifield.20221116.505594.tar.gz" ], - "md5": "93e0cb6018cc7fcf042a2119e3b73d5d", - "size compressed (bytes)": 93081213, + "md5": "86407171e4ff305ecb173afdd49eef7c", + "size compressed (bytes)": 93081190, "total_terms": 9155405, "documents": 68184, "unique_terms": 287393, "downloaded": False }, - "beir-v1.0.0-cqadupstack-unix-multifield": { + "beir-v1.0.0-cqadupstack-unix.multifield": { "description": "Lucene multifield index of BEIR (v1.0.0): CQADupStack-unix", - "filename": "lucene-index.beir-v1.0.0-cqadupstack-unix-multifield.20221116.505594.tar.gz", - "readme": "lucene-index.beir-v1.0.0-multifield.20221116.505594.README.md", + "filename": "lucene-index.beir-v1.0.0-cqadupstack-unix.multifield.20221116.505594.tar.gz", + "readme": "lucene-index.beir-v1.0.0-cqadupstack-unix.multifield.20221116.505594.README.md", "urls": [ - "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-cqadupstack-unix-multifield.20221116.505594.tar.gz" + "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-cqadupstack-unix.multifield.20221116.505594.tar.gz" ], - "md5": "c7d4cebf819aa19c6a0da6df7aed93fe", - "size compressed (bytes)": 54758840, + "md5": "acb0cc50cccb9e8dfca0ed599df0cfaa", + "size compressed (bytes)": 54758816, "total_terms": 5449726, "documents": 47382, "unique_terms": 205471, "downloaded": False }, - "beir-v1.0.0-cqadupstack-webmasters-multifield": { + "beir-v1.0.0-cqadupstack-webmasters.multifield": { "description": "Lucene multifield index of BEIR (v1.0.0): CQADupStack-webmasters", - "filename": "lucene-index.beir-v1.0.0-cqadupstack-webmasters-multifield.20221116.505594.tar.gz", - "readme": "lucene-index.beir-v1.0.0-multifield.20221116.505594.README.md", + "filename": "lucene-index.beir-v1.0.0-cqadupstack-webmasters.multifield.20221116.505594.tar.gz", + "readme": "lucene-index.beir-v1.0.0-cqadupstack-webmasters.multifield.20221116.505594.README.md", "urls": [ - "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-cqadupstack-webmasters-multifield.20221116.505594.tar.gz" + "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-cqadupstack-webmasters.multifield.20221116.505594.tar.gz" ], - "md5": "1ff00797eb6fb43bef05b054281ca016", - "size compressed (bytes)": 15524401, + "md5": "7701f016b6fc643c30630742f7712bbd", + "size compressed (bytes)": 15524400, "total_terms": 1358292, "documents": 17405, "unique_terms": 40073, "downloaded": False }, - "beir-v1.0.0-cqadupstack-wordpress-multifield": { + "beir-v1.0.0-cqadupstack-wordpress.multifield": { "description": "Lucene multifield index of BEIR (v1.0.0): CQADupStack-wordpress", - "filename": "lucene-index.beir-v1.0.0-cqadupstack-wordpress-multifield.20221116.505594.tar.gz", - "readme": "lucene-index.beir-v1.0.0-multifield.20221116.505594.README.md", + "filename": "lucene-index.beir-v1.0.0-cqadupstack-wordpress.multifield.20221116.505594.tar.gz", + "readme": "lucene-index.beir-v1.0.0-cqadupstack-wordpress.multifield.20221116.505594.README.md", "urls": [ - "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-cqadupstack-wordpress-multifield.20221116.505594.tar.gz" + "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-cqadupstack-wordpress.multifield.20221116.505594.tar.gz" ], - "md5": "5e571c3ca3d28cb53e2b280fd05dfaec", - "size compressed (bytes)": 55738645, + "md5": "d791cf8449a18ebe698d404f526375ee", + "size compressed (bytes)": 55738636, "total_terms": 5151575, "documents": 48605, "unique_terms": 125110, "downloaded": False }, - "beir-v1.0.0-quora-multifield": { + "beir-v1.0.0-quora.multifield": { "description": "Lucene multifield index of BEIR (v1.0.0): Quora", - "filename": "lucene-index.beir-v1.0.0-quora-multifield.20221116.505594.tar.gz", - "readme": "lucene-index.beir-v1.0.0-multifield.20221116.505594.README.md", + "filename": "lucene-index.beir-v1.0.0-quora.multifield.20221116.505594.tar.gz", + "readme": "lucene-index.beir-v1.0.0-quora.multifield.20221116.505594.README.md", "urls": [ - "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-quora-multifield.20221116.505594.tar.gz" + "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-quora.multifield.20221116.505594.tar.gz" ], - "md5": "1e033c875d3fdda2f096928003d6f977", - "size compressed (bytes)": 52703125, + "md5": "2d92b46f715df08ce146167ed1b12079", + "size compressed (bytes)": 52703122, "total_terms": 4390852, "documents": 522931, "unique_terms": 69597, "downloaded": False }, - "beir-v1.0.0-dbpedia-entity-multifield": { + "beir-v1.0.0-dbpedia-entity.multifield": { "description": "Lucene multifield index of BEIR (v1.0.0): DBPedia", - "filename": "lucene-index.beir-v1.0.0-dbpedia-entity-multifield.20221116.505594.tar.gz", - "readme": "lucene-index.beir-v1.0.0-multifield.20221116.505594.README.md", + "filename": "lucene-index.beir-v1.0.0-dbpedia-entity.multifield.20221116.505594.tar.gz", + "readme": "lucene-index.beir-v1.0.0-dbpedia-entity.multifield.20221116.505594.README.md", "urls": [ - "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-dbpedia-entity-multifield.20221116.505594.tar.gz" + "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-dbpedia-entity.multifield.20221116.505594.tar.gz" ], - "md5": "107ac2e15140ae889315920148e73ce6", - "size compressed (bytes)": 2144410442, + "md5": "b3f6b64bfd7903ff25ca2fa01a288392", + "size compressed (bytes)": 2144410289, "total_terms": 152205479, "documents": 4635922, "unique_terms": 3338476, "downloaded": False }, - "beir-v1.0.0-scidocs-multifield": { + "beir-v1.0.0-scidocs.multifield": { "description": "Lucene multifield index of BEIR (v1.0.0): SCIDOCS", - "filename": "lucene-index.beir-v1.0.0-scidocs-multifield.20221116.505594.tar.gz", - "readme": "lucene-index.beir-v1.0.0-multifield.20221116.505594.README.md", + "filename": "lucene-index.beir-v1.0.0-scidocs.multifield.20221116.505594.tar.gz", + "readme": "lucene-index.beir-v1.0.0-scidocs.multifield.20221116.505594.README.md", "urls": [ - "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-scidocs-multifield.20221116.505594.tar.gz" + "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-scidocs.multifield.20221116.505594.tar.gz" ], - "md5": "e97467dff3cf15b20471421c4ef7e106", - "size compressed (bytes)": 175887258, + "md5": "04c1e9aad3751dc552027d8bc3491323", + "size compressed (bytes)": 175887267, "total_terms": 3065828, "documents": 25313, "unique_terms": 62562, "downloaded": False }, - "beir-v1.0.0-fever-multifield": { + "beir-v1.0.0-fever.multifield": { "description": "Lucene multifield index of BEIR (v1.0.0): FEVER", - "filename": "lucene-index.beir-v1.0.0-fever-multifield.20221116.505594.tar.gz", - "readme": "lucene-index.beir-v1.0.0-multifield.20221116.505594.README.md", + "filename": "lucene-index.beir-v1.0.0-fever.multifield.20221116.505594.tar.gz", + "readme": "lucene-index.beir-v1.0.0-fever.multifield.20221116.505594.README.md", "urls": [ - "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-fever-multifield.20221116.505594.tar.gz" + "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-fever.multifield.20221116.505594.tar.gz" ], - "md5": "7a10971ad193dddaf604240e54965305", - "size compressed (bytes)": 3947213520, + "md5": "28ea09308760235ea2ec72d6f9b2f432", + "size compressed (bytes)": 3947213444, "total_terms": 310655699, "documents": 5396138, "unique_terms": 3275057, "downloaded": False }, - "beir-v1.0.0-climate-fever-multifield": { + "beir-v1.0.0-climate-fever.multifield": { "description": "Lucene multifield index of BEIR (v1.0.0): Climate-FEVER", - "filename": "lucene-index.beir-v1.0.0-climate-fever-multifield.20221116.505594.tar.gz", - "readme": "lucene-index.beir-v1.0.0-multifield.20221116.505594.README.md", + "filename": "lucene-index.beir-v1.0.0-climate-fever.multifield.20221116.505594.tar.gz", + "readme": "lucene-index.beir-v1.0.0-climate-fever.multifield.20221116.505594.README.md", "urls": [ - "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-climate-fever-multifield.20221116.505594.tar.gz" + "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-climate-fever.multifield.20221116.505594.tar.gz" ], - "md5": "a940bc3477e67f3231e891863ebfe393", - "size compressed (bytes)": 3947277760, + "md5": "827f2759cdfc45c47bbb67835cfcb1f2", + "size compressed (bytes)": 3947277939, "total_terms": 310661477, "documents": 5396163, "unique_terms": 3275068, "downloaded": False }, - "beir-v1.0.0-scifact-multifield": { + "beir-v1.0.0-scifact.multifield": { "description": "Lucene multifield index of BEIR (v1.0.0): SciFact", - "filename": "lucene-index.beir-v1.0.0-scifact-multifield.20221116.505594.tar.gz", - "readme": "lucene-index.beir-v1.0.0-multifield.20221116.505594.README.md", + "filename": "lucene-index.beir-v1.0.0-scifact.multifield.20221116.505594.tar.gz", + "readme": "lucene-index.beir-v1.0.0-scifact.multifield.20221116.505594.README.md", "urls": [ - "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-scifact-multifield.20221116.505594.tar.gz" + "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-scifact.multifield.20221116.505594.tar.gz" ], - "md5": "1a918170d342b0e208b28c9f02d626b3", - "size compressed (bytes)": 9078031, + "md5": "efbafbc3e4909a026fe80bf8b1444b08", + "size compressed (bytes)": 9078032, "total_terms": 784591, "documents": 5183, "unique_terms": 28581, diff --git a/pyserini/resources/beir.yaml b/pyserini/resources/beir.yaml index 57baf978d..3247305d9 100644 --- a/pyserini/resources/beir.yaml +++ b/pyserini/resources/beir.yaml @@ -1,6 +1,6 @@ conditions: - - name: flat - command: python -m pyserini.search.lucene --index beir-v1.0.0-${dataset}-flat --topics beir-v1.0.0-${dataset}-test --output $output --output-format trec --batch 36 --threads 12 --hits 1000 --bm25 --remove-query + - name: bm25-flat + command: python -m pyserini.search.lucene --index beir-v1.0.0-${dataset}.flat --topics beir-v1.0.0-${dataset}-test --output $output --output-format trec --batch 36 --threads 12 --hits 1000 --bm25 --remove-query datasets: - dataset: trec-covid scores: @@ -147,8 +147,8 @@ conditions: - nDCG@10: 0.6789 R@100: 0.9253 R@1000: 0.9767 - - name: multifield - command: python -m pyserini.search.lucene --index beir-v1.0.0-${dataset}-multifield --topics beir-v1.0.0-${dataset}-test --output $output --output-format trec --batch 36 --threads 12 --hits 1000 --bm25 --remove-query --fields contents=1.0 title=1.0 + - name: bm25-multifield + command: python -m pyserini.search.lucene --index beir-v1.0.0-${dataset}.multifield --topics beir-v1.0.0-${dataset}-test --output $output --output-format trec --batch 36 --threads 12 --hits 1000 --bm25 --remove-query --fields contents=1.0 title=1.0 datasets: - dataset: trec-covid scores: diff --git a/scripts/beir/gather_beir_index_stats.py b/scripts/beir/gather_beir_index_stats.py index dcd0eed23..bb7f62413 100644 --- a/scripts/beir/gather_beir_index_stats.py +++ b/scripts/beir/gather_beir_index_stats.py @@ -53,79 +53,19 @@ commitid = '505594' date = '20221116' +type = 'flat' -# We want to generate entries that look like this: -# -# "msmarco-v2-doc-per-passage-unicoil-noexp-0shot": { -# "description": "Lucene impact index of the MS MARCO V2 document corpus per passage encoded by uniCOIL (zero-shot, no expansions) (deprecated; msmarco-v2-doc-segmented-unicoil-noexp-0shot).", -# "filename": "lucene-index.msmarco-v2-doc-per-passage.unicoil-noexp-0shot.20211012.58d286.tar.gz", -# "readme": "lucene-index.msmarco-v2-doc-per-passage.unicoil-noexp-0shot.20211012.58d286.readme.txt", -# "urls": [ -# "https://rgw.cs.uwaterloo.ca/JIMMYLIN-bucket0/pyserini-indexes/lucene-index.msmarco-v2-doc-per-passage.unicoil-noexp-0shot.20211012.58d286.tar.gz", -# "https://vault.cs.uwaterloo.ca/s/BSrJmAFJywsRYXo/download" -# ], -# "md5": "1980db886d969c3393e4da20190eaa8f", -# "size compressed (bytes)": 29229949764, -# "total_terms": 805830282591, -# "documents": 124131404, -# "unique_terms": 29172, -# "downloaded": False -# } - -# Stats for "flat" indexes for key in beir_keys: - index_reader = IndexReader(f'indexes/lucene-index.beir-v1.0.0-{key}-flat.{date}.{commitid}') + index_reader = IndexReader(f'indexes/lucene-index.beir-v1.0.0-{key}.{type}.{date}.{commitid}') stats = index_reader.stats() - md5 = compute_md5(f'indexes/lucene-index.beir-v1.0.0-{key}-flat.{date}.{commitid}.tar.gz') - size = os.path.getsize(f'indexes/lucene-index.beir-v1.0.0-{key}-flat.{date}.{commitid}.tar.gz') - print(f' "beir-v1.0.0-{key}-flat": {{') + md5 = compute_md5(f'indexes/lucene-index.beir-v1.0.0-{key}.{type}.{date}.{commitid}.tar.gz') + size = os.path.getsize(f'indexes/lucene-index.beir-v1.0.0-{key}.{type}.{date}.{commitid}.tar.gz') + print(f' "beir-v1.0.0-{key}.{type}": {{') print(f' "description": "Lucene flat index of BEIR (v1.0.0): {beir_keys[key]}",') - print(f' "filename": "lucene-index.beir-v1.0.0-{key}-flat.{date}.{commitid}.tar.gz",') - print(f' "readme": "lucene-index.beir-v1.0.0-{key}-flat.{date}.{commitid}.README.md",') - print(f' "urls": [') - print(f' "https://rgw.cs.uwaterloo.ca/JIMMYLIN-bucket0/pyserini-indexes/lucene-index.beir-v1.0.0-{key}-flat.{date}.{commitid}.tar.gz"') - print(f' ],') - print(f' "md5": "{md5}",') - print(f' "size compressed (bytes)": {size},') - print(f' "total_terms": {stats["total_terms"]},') - print(f' "documents": {stats["documents"]},') - print(f' "unique_terms": {stats["unique_terms"]},') - print(f' "downloaded": False') - print(f' }},') - -# Stats for "multifield" indexes -for key in beir_keys: - index_reader = IndexReader(f'indexes/lucene-index.beir-v1.0.0-{key}-multifield.{date}.{commitid}') - stats = index_reader.stats() - md5 = compute_md5(f'indexes/lucene-index.beir-v1.0.0-{key}-multifield.{date}.{commitid}.tar.gz') - size = os.path.getsize(f'indexes/lucene-index.beir-v1.0.0-{key}-multifield.{date}.{commitid}.tar.gz') - print(f' "beir-v1.0.0-{key}-multifield": {{') - print(f' "description": "Lucene multifield index of BEIR (v1.0.0): {beir_keys[key]}",') - print(f' "filename": "lucene-index.beir-v1.0.0-{key}-multifield.{date}.{commitid}.tar.gz",') - print(f' "readme": "lucene-index.beir-v1.0.0-{key}-multifield.{date}.{commitid}.README.md",') - print(f' "urls": [') - print(f' "https://rgw.cs.uwaterloo.ca/JIMMYLIN-bucket0/pyserini-indexes/lucene-index.beir-v1.0.0-{key}-multifield.{date}.{commitid}.tar.gz"') - print(f' ],') - print(f' "md5": "{md5}",') - print(f' "size compressed (bytes)": {size},') - print(f' "total_terms": {stats["total_terms"]},') - print(f' "documents": {stats["documents"]},') - print(f' "unique_terms": {stats["unique_terms"]},') - print(f' "downloaded": False') - print(f' }},') - -# Stats for SPLADE-distill CoCodenser-medium indexes -for key in beir_keys: - index_reader = IndexReader(f'indexes/lucene-index.beir-v1.0.0-{key}-splade_distil_cocodenser_medium.{date}.{commitid}') - stats = index_reader.stats() - md5 = compute_md5(f'indexes/lucene-index.beir-v1.0.0-{key}-splade_distil_cocodenser_medium.{date}.{commitid}.tar.gz') - size = os.path.getsize(f'indexes/lucene-index.beir-v1.0.0-{key}-splade_distil_cocodenser_medium.{date}.{commitid}.tar.gz') - print(f' "beir-v1.0.0-{key}-splade_distil_cocodenser_medium": {{') - print(f' "description": "Lucene impact index of BEIR (v1.0.0): {beir_keys[key]} encoded by SPLADE-distill CoCodenser-medium",') - print(f' "filename": "lucene-index.beir-v1.0.0-{key}-splade_distil_cocodenser_medium.{date}.{commitid}.tar.gz",') - print(f' "readme": "lucene-index.beir-v1.0.0-{key}-splade_distil_cocodenser_medium.{date}.{commitid}.README.md",') + print(f' "filename": "lucene-index.beir-v1.0.0-{key}.{type}.{date}.{commitid}.tar.gz",') + print(f' "readme": "lucene-index.beir-v1.0.0-{key}.{type}.{date}.{commitid}.README.md",') print(f' "urls": [') - print(f' "https://rgw.cs.uwaterloo.ca/JIMMYLIN-bucket0/pyserini-indexes/lucene-index.beir-v1.0.0-{key}-splade_distil_cocodenser_medium.{date}.{commitid}.tar.gz"') + print(f' "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-{key}.{type}.{date}.{commitid}.tar.gz"') print(f' ],') print(f' "md5": "{md5}",') print(f' "size compressed (bytes)": {size},') diff --git a/scripts/repro_matrix/run_all_beir.py b/scripts/repro_matrix/run_all_beir.py index f620f52e8..146ee5d9e 100644 --- a/scripts/repro_matrix/run_all_beir.py +++ b/scripts/repro_matrix/run_all_beir.py @@ -48,7 +48,7 @@ print(f' - dataset: {dataset}') - runfile = f'runs/run.beir-{name}.{dataset}.txt' + runfile = f'runs/run.beir.{name}.{dataset}.txt' cmd = Template(cmd_template).substitute(dataset=dataset, output=runfile) if not os.path.exists(runfile): @@ -70,7 +70,7 @@ print('') - models = ['flat', 'multifield', 'splade-distil-cocodenser-medium'] + models = ['bm25-flat', 'bm25-multifield', 'splade-distil-cocodenser-medium'] metrics = ['nDCG@10', 'R@100', 'R@1000'] top_level_sums = defaultdict(lambda: defaultdict(float)) @@ -101,12 +101,12 @@ print(' ' * 27 + '-' * 14 + ' ' + '-' * 14 + ' ' + '-' * 14) for dataset in beir_keys: print(f'{dataset:25}' + - f'{table[dataset]["flat"]["nDCG@10"]:8.4f}{table[dataset]["flat"]["R@100"]:8.4f} ' + - f'{table[dataset]["multifield"]["nDCG@10"]:8.4f}{table[dataset]["multifield"]["R@100"]:8.4f} ' + + f'{table[dataset]["bm25-flat"]["nDCG@10"]:8.4f}{table[dataset]["bm25-flat"]["R@100"]:8.4f} ' + + f'{table[dataset]["bm25-multifield"]["nDCG@10"]:8.4f}{table[dataset]["bm25-multifield"]["R@100"]:8.4f} ' + f'{table[dataset]["splade-distil-cocodenser-medium"]["nDCG@10"]:8.4f}{table[dataset]["splade-distil-cocodenser-medium"]["R@100"]:8.4f}') print(' ' * 27 + '-' * 14 + ' ' + '-' * 14 + ' ' + '-' * 14) - print('avg' + ' ' * 22 + f'{final_scores["flat"]["nDCG@10"]:8.4f}{final_scores["flat"]["R@100"]:8.4f} ' + - f'{final_scores["multifield"]["nDCG@10"]:8.4f}{final_scores["multifield"]["R@100"]:8.4f} ' + + print('avg' + ' ' * 22 + f'{final_scores["bm25-flat"]["nDCG@10"]:8.4f}{final_scores["bm25-flat"]["R@100"]:8.4f} ' + + f'{final_scores["bm25-multifield"]["nDCG@10"]:8.4f}{final_scores["bm25-multifield"]["R@100"]:8.4f} ' + f'{final_scores["splade-distil-cocodenser-medium"]["nDCG@10"]:8.4f}{final_scores["splade-distil-cocodenser-medium"]["R@100"]:8.4f} ') end = time.time()