Skip to content

Commit

Permalink
Align XCombinedFieldQuery with latest changes (#75483)
Browse files Browse the repository at this point in the history
In #74678 we released an early fix for a Lucene bug around `combined_fields`
queries with missing fields. This PR brings our fix up-to-date with what was
actually committed to Lucene.
  • Loading branch information
jtibshirani committed Jul 20, 2021
1 parent 5643c4f commit 4407fe5
Show file tree
Hide file tree
Showing 3 changed files with 55 additions and 19 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
*/
package org.apache.lucene.search;

import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.PostingsEnum;
Expand Down Expand Up @@ -69,6 +71,9 @@
* compute norms the same way as {@link SimilarityBase#computeNorm}, which includes {@link
* BM25Similarity} and {@link DFRSimilarity}. Per-field similarities are not supported.
*
* <p>The query also requires that either all fields or no fields have norms enabled. Having only
* some fields with norms enabled can result in errors.
*
* <p>The scoring is based on BM25F's simple formula described in:
* http://www.staff.city.ac.uk/~sb317/papers/foundations_bm25_review.pdf. This query implements the
* same approach but allows other similarities besides {@link
Expand Down Expand Up @@ -264,6 +269,7 @@ private BooleanQuery rewriteToBoolean() {
@Override
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost)
throws IOException {
validateConsistentNorms(searcher.getIndexReader());
if (scoreMode.needsScores()) {
return new CombinedFieldWeight(this, searcher, scoreMode, boost);
} else {
Expand All @@ -273,6 +279,29 @@ public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float bo
}
}

private void validateConsistentNorms(IndexReader reader) {
boolean allFieldsHaveNorms = true;
boolean noFieldsHaveNorms = true;

for (LeafReaderContext context : reader.leaves()) {
FieldInfos fieldInfos = context.reader().getFieldInfos();
for (String field : fieldAndWeights.keySet()) {
FieldInfo fieldInfo = fieldInfos.fieldInfo(field);
if (fieldInfo != null) {
allFieldsHaveNorms &= fieldInfo.hasNorms();
noFieldsHaveNorms &= fieldInfo.omitsNorms();
}
}
}

if (allFieldsHaveNorms == false && noFieldsHaveNorms == false) {
throw new IllegalArgumentException(
getClass().getSimpleName()
+ " requires norms to be consistent across fields: some fields cannot "
+ " have norms enabled, while others have norms disabled");
}
}

class CombinedFieldWeight extends Weight {
private final IndexSearcher searcher;
private final TermStates termStates[];
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,9 @@
* Copy of {@link MultiNormsLeafSimScorer} that contains a fix for LUCENE-9999.
* TODO: remove once LUCENE-9999 is fixed and integrated
*
* <p>This scorer requires that either all fields or no fields have norms enabled. It will throw an
* error if some fields have norms enabled, while others have norms disabled.
* <p>For all fields, norms must be encoded using {@link SmallFloat#intToByte4}. This scorer also
* requires that either all fields or no fields have norms enabled. Having only some fields with
* norms enabled can result in errors or undefined behavior.
*/
final class XMultiNormsLeafSimScorer {
/** Cache of decoded norms. */
Expand Down Expand Up @@ -69,13 +70,6 @@ final class XMultiNormsLeafSimScorer {
}
}

if (normsList.isEmpty() == false && normsList.size() != normFields.size()) {
throw new IllegalArgumentException(
getClass().getSimpleName()
+ " requires norms to be consistent across fields: some fields cannot"
+ " have norms enabled, while others have norms disabled");
}

if (normsList.isEmpty()) {
norms = null;
} else if (normsList.size() == 1) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,12 @@ public void testNormsDisabled() throws IOException {
doc.add(new StringField("b", "value", Store.NO));
doc.add(new TextField("c", "value", Store.NO));
w.addDocument(doc);
w.commit();

doc = new Document();
doc.add(new StringField("a", "value", Store.NO));
doc.add(new TextField("c", "value", Store.NO));
w.addDocument(doc);

IndexReader reader = w.getReader();
IndexSearcher searcher = newSearcher(reader);
Expand All @@ -105,19 +111,26 @@ public void testNormsDisabled() throws IOException {
searcher.setSimilarity(searchSimilarity);
TopScoreDocCollector collector = TopScoreDocCollector.create(10, null, 10);

XCombinedFieldQuery query = new XCombinedFieldQuery.Builder().addField("a", 1.0f)
.addField("b", 1.0f)
.addTerm(new BytesRef("value"))
.build();
XCombinedFieldQuery query =
new XCombinedFieldQuery.Builder()
.addField("a", 1.0f)
.addField("b", 1.0f)
.addTerm(new BytesRef("value"))
.build();
searcher.search(query, collector);
TopDocs topDocs = collector.topDocs();
assertEquals(new TotalHits(1, TotalHits.Relation.EQUAL_TO), topDocs.totalHits);
assertEquals(new TotalHits(2, TotalHits.Relation.EQUAL_TO), topDocs.totalHits);

XCombinedFieldQuery invalidQuery = new XCombinedFieldQuery.Builder().addField("b", 1.0f)
.addField("c", 1.0f)
.addTerm(new BytesRef("value"))
.build();
IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> searcher.search(invalidQuery, collector));
TopScoreDocCollector invalidCollector = TopScoreDocCollector.create(10, null, 10);
XCombinedFieldQuery invalidQuery =
new XCombinedFieldQuery.Builder()
.addField("b", 1.0f)
.addField("c", 1.0f)
.addTerm(new BytesRef("value"))
.build();
IllegalArgumentException e =
expectThrows(
IllegalArgumentException.class, () -> searcher.search(invalidQuery, invalidCollector));
assertTrue(e.getMessage().contains("requires norms to be consistent across fields"));

reader.close();
Expand Down

0 comments on commit 4407fe5

Please sign in to comment.