Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add option to enable Lucene 8 index compatibility #1953

Merged
merged 50 commits into from
Aug 3, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
50 commits
Select commit Hold shift + click to select a range
11031a1
no args constructor for FakeWordsEncoderAnalyzer
tteofili Mar 22, 2020
1d09456
Merge branch 'master' of github.com:castorini/anserini
tteofili Mar 22, 2020
7e69299
Merge branch 'master' of github.com:castorini/anserini
tteofili Mar 23, 2020
3bb78ae
Merge branch 'master' of github.com:castorini/anserini
tteofili Apr 1, 2020
3b0f7e4
Merge branch 'master' of github.com:castorini/anserini
tteofili Apr 4, 2020
0347890
Merge branch 'master' of github.com:castorini/anserini
tteofili Apr 5, 2020
30226c5
Merge branch 'master' of github.com:castorini/anserini
tteofili Apr 5, 2020
961064a
Merge branch 'master' of github.com:castorini/anserini
tteofili Apr 17, 2020
6964f45
Merge branch 'master' of github.com:castorini/anserini
tteofili Apr 17, 2020
507ea04
Merge branch 'master' of github.com:castorini/anserini
tteofili Apr 18, 2020
9342dba
Merge branch 'master' of github.com:castorini/anserini
tteofili Apr 29, 2020
d82c087
Merge branch 'master' of github.com:castorini/anserini
tteofili Apr 29, 2020
85c136d
Merge branch 'master' of github.com:castorini/anserini
tteofili May 4, 2020
9335c13
Merge branch 'master' of github.com:castorini/anserini
tteofili Jun 8, 2020
e3e351c
Merge branch 'master' of github.com:tteofili/anserini
tteofili Jun 10, 2020
236b4d5
Merge branch 'master' of github.com:castorini/anserini
tteofili Jun 10, 2020
647b89e
Merge branch 'master' of github.com:castorini/anserini
tteofili Jul 9, 2020
0efeebb
Merge branch 'master' of github.com:castorini/anserini
tteofili Oct 7, 2020
685402f
Merge branch 'master' of github.com:castorini/anserini
tteofili Nov 7, 2020
c65a71e
Merge branch 'master' of github.com:castorini/anserini
tteofili Nov 11, 2020
eb375b1
Merge branch 'master' of github.com:tteofili/anserini
tteofili Nov 11, 2020
24dd74b
Merge branch 'master' of github.com:castorini/anserini
tteofili Nov 24, 2020
56424c8
Merge branch 'master' of github.com:tteofili/anserini
tteofili Nov 24, 2020
c6b979c
Merge branch 'master' of github.com:castorini/anserini
tteofili Dec 3, 2020
f600abf
Merge branch 'master' of github.com:castorini/anserini
tteofili Jan 5, 2021
681b251
Merge branch 'master' of github.com:castorini/anserini
tteofili Jan 13, 2021
8948fa7
merge
tteofili Jan 14, 2021
6e09bfb
avoid loading vectors in a map
tteofili Jan 25, 2021
f8f841f
Merge branch 'master' of github.com:castorini/anserini into ann-nomap
tteofili Jan 25, 2021
4a1f88d
Merge branch 'master' into ann-nomap
tteofili Jan 26, 2021
fdedaeb
Merge branch 'master' of github.com:tteofili/anserini into ann-nomap
tteofili Jan 26, 2021
26395c1
Merge branch 'ann-nomap' of github.com:tteofili/anserini into ann-nomap
tteofili Jan 26, 2021
933d44a
Merge branch 'ann-nomap'
tteofili Jan 26, 2021
614422e
Merge branch 'master' of github.com:castorini/anserini
tteofili Jan 29, 2021
4628c01
Merge branch 'master' of github.com:castorini/anserini
tteofili May 2, 2022
a914ee3
upgrade lucene and solr to 9.0 versions
tteofili May 19, 2022
09a7dba
upgrade solr twitter conf to 90
tteofili May 19, 2022
9a1ccaf
Merge branch 'master' of github.com:castorini/anserini into lucene90
tteofili May 19, 2022
4e24559
merged upstream changes
tteofili May 19, 2022
41c475c
comparing covid19 query generated query at object level
tteofili May 19, 2022
1d24ac5
Merge branch 'master' into tteofili-lucene90
lintool Jul 27, 2022
9f298ab
Fix to enable successful Lucene 9 upgrade.
lintool Jul 27, 2022
1498d13
Merge branch 'master' into solr-es-removal
lintool Aug 1, 2022
7257291
Removal of Solr and ES code paths.
lintool Aug 1, 2022
1c576c1
Merge branch 'master' into solr-es-removal
lintool Aug 1, 2022
dedfbbb
Reordered.
lintool Aug 1, 2022
3dd6c34
Bumped up to Lucene 9.
lintool Aug 1, 2022
c3f0ba2
Added backward codecs.
lintool Aug 1, 2022
be33a26
Fix Lucene 8/9 index compat issue
lintool Aug 1, 2022
9a3e282
Merge branch 'master' into lucene8-compat
lintool Aug 2, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions src/main/java/io/anserini/search/SearchArgs.java
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,9 @@ public class SearchArgs {
@Option(name = "-topicreader", required = true, usage = "TopicReader to use.")
public String topicReader;

@Option(name = "-lucene8", usage = "Enable Lucene 8 index compatibility.")
public Boolean lucene8 = false;

// optional arguments
@Option(name = "-querygenerator", usage = "QueryGenerator to use.")
public String queryGenerator = "BagOfWordsQueryGenerator";
Expand Down
6 changes: 6 additions & 0 deletions src/main/java/io/anserini/search/SearchCollection.java
Original file line number Diff line number Diff line change
Expand Up @@ -494,6 +494,12 @@ public SearchCollection(SearchArgs args) throws IOException {
loadQrels(args.rf_qrels);
}

// See https://github.com/castorini/anserini/issues/1952
// The solution to the issue described above is to turn off deterministic tie-breaking.
if (args.lucene8) {
args.arbitraryScoreTieBreak = true;
args.axiom_deterministic = false;
}
}

@Override
Expand Down
16 changes: 14 additions & 2 deletions src/main/python/run_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,10 @@ def is_close(a, b, rel_tol=1e-09, abs_tol=0.0):
return abs(a-b) <= max(rel_tol * max(abs(a), abs(b)), abs_tol)


def is_close_lucene8(a, b):
return abs(a-b) <= 0.001


def check_output(command):
# Python 2.6 compatible subprocess.check_output
process = Popen(command, shell=True, stdout=PIPE)
Expand Down Expand Up @@ -131,6 +135,7 @@ def construct_search_commands(yaml_data):
'-topicreader', topic_set['topic_reader'] if 'topic_reader' in topic_set and topic_set['topic_reader'] else yaml_data['topic_reader'],
'-output', construct_runfile_path(yaml_data['corpus'], topic_set['id'], model['name']),
model['params'],
'-lucene8' if args.lucene8 else ''
]
for (model, topic_set) in list(itertools.product(yaml_data['models'], yaml_data['topics']))
]
Expand All @@ -154,6 +159,7 @@ def construct_convert_commands(yaml_data):
def evaluate_and_verify(yaml_data, dry_run):
fail_str = '\033[91m[FAIL]\033[0m '
ok_str = ' [OK] '
okish_str = ' \033[94m[OK*]\033[0m '
failures = False

logger.info('='*10 + ' Verifying Results: ' + yaml_data['corpus'] + ' ' + '='*10)
Expand Down Expand Up @@ -181,8 +187,11 @@ def evaluate_and_verify(yaml_data, dry_run):
if is_close(expected, actual):
logger.info(ok_str + result_str)
else:
logger.error(fail_str + result_str)
failures = True
if args.lucene8 and is_close_lucene8(expected, actual):
logger.info(okish_str + result_str)
else:
logger.error(fail_str + result_str)
failures = True

if not dry_run:
if failures:
Expand Down Expand Up @@ -280,6 +289,7 @@ def download_url(url, save_dir, local_filename=None, md5=None, force=False, verb
help='Number of converting runs to execute in parallel.')
parser.add_argument('--dry-run', dest='dry_run', action='store_true',
help='Output commands without actual execution.')
parser.add_argument('--lucene8', dest='lucene8', action='store_true', help='Enable Lucene 8 index compatibility.')
args = parser.parse_args()

with open('src/main/resources/regression/{}.yaml'.format(args.regression)) as f:
Expand Down Expand Up @@ -340,6 +350,8 @@ def download_url(url, save_dir, local_filename=None, md5=None, force=False, verb
# Search and verify results.
if args.search:
logger.info('='*10 + ' Ranking ' + '='*10)
if args.lucene8:
logger.info('Enabling Lucene 8 index compatibility.')
search_cmds = construct_search_commands(yaml_data)
if args.dry_run:
for cmd in search_cmds:
Expand Down