Skip to content

Commit

Permalink
Fix position increment gap on phrase/prefix analyzers (#70096)
Browse files Browse the repository at this point in the history
Custom position increments are handled by wrapping analyzers
with a NamedAnalyzer and passing the custom increment through
to its constructor. However, phrase and prefix analyzers use
delegating analyzer wrappers to add extra filtering to their parent
analyzers, and we can't wrap analyzers multiple times because this
wrecks reuse strategies, so we unwrap the parent before passing
it to phrase and prefix builders. This unwrapping means that we
lose the custom position increments; in particular, it means that
we can end up with a position increment gap of -1, which is the
sentinel value for the unset parameter - and that means exceptions
at index time for backwards-moving positions on fields with multiple
values.

This commit removes the sentinel value and uses standard parameter
defaults and the isConfigured() method instead, plus it adds some
more comprehensive testing for position increments when combined
with phrase/prefix index options on text fields.

Fixes #70049
  • Loading branch information
romseygeek committed Mar 9, 2021
1 parent e7ad8c3 commit 6fd96d1
Show file tree
Hide file tree
Showing 4 changed files with 63 additions and 9 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ public AnnotatedTextFieldMapper build(ContentPath contentPath) {
if (fieldType.indexOptions() == IndexOptions.NONE ) {
throw new IllegalArgumentException("[" + CONTENT_TYPE + "] fields must be indexed");
}
if (analyzers.positionIncrementGap.get() != TextParams.POSITION_INCREMENT_GAP_USE_ANALYZER) {
if (analyzers.positionIncrementGap.isConfigured()) {
if (fieldType.indexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) {
throw new IllegalArgumentException("Cannot set position_increment_gap on field ["
+ name + "] without positions enabled");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -305,7 +305,7 @@ protected List<Parameter<?>> getParameters() {
private TextFieldType buildFieldType(FieldType fieldType, ContentPath contentPath) {
NamedAnalyzer searchAnalyzer = analyzers.getSearchAnalyzer();
NamedAnalyzer searchQuoteAnalyzer = analyzers.getSearchQuoteAnalyzer();
if (analyzers.positionIncrementGap.get() != TextParams.POSITION_INCREMENT_GAP_USE_ANALYZER) {
if (analyzers.positionIncrementGap.isConfigured()) {
if (fieldType.indexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) {
throw new IllegalArgumentException("Cannot set position_increment_gap on field ["
+ name + "] without positions enabled");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,6 @@
*/
public final class TextParams {

public static final int POSITION_INCREMENT_GAP_USE_ANALYZER = -1;

private TextParams() {}

public static final class Analyzers {
Expand Down Expand Up @@ -66,9 +64,9 @@ public Analyzers(IndexAnalyzers indexAnalyzers,
})
.setValidator(a -> a.checkAllowedInMode(AnalysisMode.SEARCH_TIME));
this.positionIncrementGap = Parameter.intParam("position_increment_gap", false,
m -> analyzerInitFunction.apply(m).positionIncrementGap.get(), POSITION_INCREMENT_GAP_USE_ANALYZER)
m -> analyzerInitFunction.apply(m).positionIncrementGap.get(), TextFieldMapper.Defaults.POSITION_INCREMENT_GAP)
.setValidator(v -> {
if (v != POSITION_INCREMENT_GAP_USE_ANALYZER && v < 0) {
if (v < 0) {
throw new MapperParsingException("[position_increment_gap] must be positive, got [" + v + "]");
}
});
Expand All @@ -88,7 +86,7 @@ public NamedAnalyzer getSearchQuoteAnalyzer() {
}

private NamedAnalyzer wrapAnalyzer(NamedAnalyzer a) {
if (positionIncrementGap.get() == POSITION_INCREMENT_GAP_USE_ANALYZER) {
if (positionIncrementGap.isConfigured() == false) {
return a;
}
return new NamedAnalyzer(a, positionIncrementGap.get());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,14 @@
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MultiPhraseQuery;
import org.apache.lucene.search.NormsFieldExistsQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.SynonymQuery;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.spans.FieldMaskingSpanQuery;
import org.apache.lucene.search.spans.SpanNearQuery;
import org.apache.lucene.search.spans.SpanOrQuery;
Expand Down Expand Up @@ -344,6 +346,29 @@ public void testDefaultPositionIncrementGap() throws IOException {
});
}

public void testDefaultPositionIncrementGapOnSubfields() throws IOException {
MapperService mapperService = createMapperService(fieldMapping(b -> {
b.field("type", "text");
b.field("index_phrases", true);
b.startObject("index_prefixes").endObject();
}));

ParsedDocument doc = mapperService.documentMapper().parse(source(b -> b.array("field", "aargle bargle", "foo bar")));
withLuceneIndex(mapperService, iw -> iw.addDocument(doc.rootDoc()), reader -> {
TermsEnum phraseTerms = getOnlyLeafReader(reader).terms("field._index_phrase").iterator();
assertTrue(phraseTerms.seekExact(new BytesRef("foo bar")));
PostingsEnum phrasePostings = phraseTerms.postings(null, PostingsEnum.POSITIONS);
assertEquals(0, phrasePostings.nextDoc());
assertEquals(TextFieldMapper.Defaults.POSITION_INCREMENT_GAP + 1, phrasePostings.nextPosition());

TermsEnum prefixTerms = getOnlyLeafReader(reader).terms("field._index_prefix").iterator();
assertTrue(prefixTerms.seekExact(new BytesRef("foo")));
PostingsEnum prefixPostings = prefixTerms.postings(null, PostingsEnum.POSITIONS);
assertEquals(0, prefixPostings.nextDoc());
assertEquals(TextFieldMapper.Defaults.POSITION_INCREMENT_GAP + 2, prefixPostings.nextPosition());
});
}

public void testPositionIncrementGap() throws IOException {
final int positionIncrementGap = randomIntBetween(1, 1000);
MapperService mapperService = createMapperService(
Expand All @@ -365,6 +390,30 @@ public void testPositionIncrementGap() throws IOException {
});
}

public void testPositionIncrementGapOnSubfields() throws IOException {
MapperService mapperService = createMapperService(fieldMapping(b -> {
b.field("type", "text");
b.field("position_increment_gap", 10);
b.field("index_phrases", true);
b.startObject("index_prefixes").endObject();
}));

ParsedDocument doc = mapperService.documentMapper().parse(source(b -> b.array("field", "aargle bargle", "foo bar")));
withLuceneIndex(mapperService, iw -> iw.addDocument(doc.rootDoc()), reader -> {
TermsEnum phraseTerms = getOnlyLeafReader(reader).terms("field._index_phrase").iterator();
assertTrue(phraseTerms.seekExact(new BytesRef("foo bar")));
PostingsEnum phrasePostings = phraseTerms.postings(null, PostingsEnum.POSITIONS);
assertEquals(0, phrasePostings.nextDoc());
assertEquals(11, phrasePostings.nextPosition());

TermsEnum prefixTerms = getOnlyLeafReader(reader).terms("field._index_prefix").iterator();
assertTrue(prefixTerms.seekExact(new BytesRef("foo")));
PostingsEnum prefixPostings = prefixTerms.postings(null, PostingsEnum.POSITIONS);
assertEquals(0, prefixPostings.nextDoc());
assertEquals(12, prefixPostings.nextPosition());
});
}

public void testSearchAnalyzerSerialization() throws IOException {
XContentBuilder mapping = fieldMapping(
b -> b.field("type", "text").field("analyzer", "standard").field("search_analyzer", "keyword")
Expand Down Expand Up @@ -738,10 +787,10 @@ protected TokenStreamComponents createComponents(String fieldName) {
.build(), BooleanClause.Occur.SHOULD).build()));

ParsedDocument doc = mapperService.documentMapper()
.parse(source(b -> b.field("field", "Some English text that is going to be very useful")));
.parse(source(b -> b.array("field", "Some English text that is going to be very useful", "bad", "Prio 1")));

IndexableField[] fields = doc.rootDoc().getFields("field._index_phrase");
assertEquals(1, fields.length);
assertEquals(3, fields.length);

try (TokenStream ts = fields[0].tokenStream(mapperService.indexAnalyzer(fields[0].name(), f -> null), null)) {
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
Expand All @@ -750,6 +799,13 @@ protected TokenStreamComponents createComponents(String fieldName) {
assertEquals("Some English", termAtt.toString());
}

withLuceneIndex(mapperService, iw -> iw.addDocuments(doc.docs()), ir -> {
IndexSearcher searcher = new IndexSearcher(ir);
MatchPhraseQueryBuilder queryBuilder = new MatchPhraseQueryBuilder("field", "Prio 1");
TopDocs td = searcher.search(queryBuilder.toQuery(searchExecutionContext), 1);
assertEquals(1, td.totalHits.value);
});

Exception e = expectThrows(
MapperParsingException.class,
() -> createMapperService(fieldMapping(b -> b.field("type", "text").field("index", "false").field("index_phrases", true)))
Expand Down

0 comments on commit 6fd96d1

Please sign in to comment.