Fix position increment gap on phrase/prefix analyzers (#70096)

Custom position increments are handled by wrapping analyzers with a NamedAnalyzer and passing the custom increment through to its constructor. However, phrase and prefix analyzers use delegating analyzer wrappers to add extra filtering to their parent analyzers, and we can't wrap analyzers multiple times because this wrecks reuse strategies, so we unwrap the parent before passing it to phrase and prefix builders. This unwrapping means that we lose the custom position increments; in particular, it means that we can end up with a position increment gap of -1, which is the sentinel value for the unset parameter - and that means exceptions at index time for backwards-moving positions on fields with multiple values. This commit removes the sentinel value and uses standard parameter defaults and the isConfigured() method instead, plus it adds some more comprehensive testing for position increments when combined with phrase/prefix index options on text fields. Fixes #70049
elastic · Mar 9, 2021 · 6fd96d1 · 6fd96d1
1 parent e7ad8c3
commit 6fd96d1
Show file tree

Hide file tree

Showing 4 changed files with 63 additions and 9 deletions.
diff --git a/.../src/main/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldMapper.java b/.../src/main/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldMapper.java
@@ -119,7 +119,7 @@ public AnnotatedTextFieldMapper build(ContentPath contentPath) {
             if (fieldType.indexOptions() == IndexOptions.NONE ) {
                 throw new IllegalArgumentException("[" + CONTENT_TYPE + "] fields must be indexed");
             }
-            if (analyzers.positionIncrementGap.get() != TextParams.POSITION_INCREMENT_GAP_USE_ANALYZER) {
+            if (analyzers.positionIncrementGap.isConfigured()) {
                 if (fieldType.indexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) {
                     throw new IllegalArgumentException("Cannot set position_increment_gap on field ["
                         + name + "] without positions enabled");

diff --git a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java
@@ -305,7 +305,7 @@ protected List<Parameter<?>> getParameters() {
         private TextFieldType buildFieldType(FieldType fieldType, ContentPath contentPath) {
             NamedAnalyzer searchAnalyzer = analyzers.getSearchAnalyzer();
             NamedAnalyzer searchQuoteAnalyzer = analyzers.getSearchQuoteAnalyzer();
-            if (analyzers.positionIncrementGap.get() != TextParams.POSITION_INCREMENT_GAP_USE_ANALYZER) {
+            if (analyzers.positionIncrementGap.isConfigured()) {
                 if (fieldType.indexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) {
                     throw new IllegalArgumentException("Cannot set position_increment_gap on field ["
                         + name + "] without positions enabled");

diff --git a/server/src/main/java/org/elasticsearch/index/mapper/TextParams.java b/server/src/main/java/org/elasticsearch/index/mapper/TextParams.java
@@ -26,8 +26,6 @@
  */
 public final class TextParams {
 
-    public static final int POSITION_INCREMENT_GAP_USE_ANALYZER = -1;
-
     private TextParams() {}
 
     public static final class Analyzers {
@@ -66,9 +64,9 @@ public Analyzers(IndexAnalyzers indexAnalyzers,
                 })
                 .setValidator(a -> a.checkAllowedInMode(AnalysisMode.SEARCH_TIME));
             this.positionIncrementGap = Parameter.intParam("position_increment_gap", false,
-                m -> analyzerInitFunction.apply(m).positionIncrementGap.get(), POSITION_INCREMENT_GAP_USE_ANALYZER)
+                m -> analyzerInitFunction.apply(m).positionIncrementGap.get(), TextFieldMapper.Defaults.POSITION_INCREMENT_GAP)
                 .setValidator(v -> {
-                    if (v != POSITION_INCREMENT_GAP_USE_ANALYZER && v < 0) {
+                    if (v < 0) {
                         throw new MapperParsingException("[position_increment_gap] must be positive, got [" + v + "]");
                     }
                 });
@@ -88,7 +86,7 @@ public NamedAnalyzer getSearchQuoteAnalyzer() {
         }
 
         private NamedAnalyzer wrapAnalyzer(NamedAnalyzer a) {
-            if (positionIncrementGap.get() == POSITION_INCREMENT_GAP_USE_ANALYZER) {
+            if (positionIncrementGap.isConfigured() == false) {
                 return a;
             }
             return new NamedAnalyzer(a, positionIncrementGap.get());

diff --git a/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java
@@ -30,12 +30,14 @@
 import org.apache.lucene.search.BooleanClause;
 import org.apache.lucene.search.BooleanQuery;
 import org.apache.lucene.search.ConstantScoreQuery;
+import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.MultiPhraseQuery;
 import org.apache.lucene.search.NormsFieldExistsQuery;
 import org.apache.lucene.search.PhraseQuery;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.SynonymQuery;
 import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.TopDocs;
 import org.apache.lucene.search.spans.FieldMaskingSpanQuery;
 import org.apache.lucene.search.spans.SpanNearQuery;
 import org.apache.lucene.search.spans.SpanOrQuery;
@@ -344,6 +346,29 @@ public void testDefaultPositionIncrementGap() throws IOException {
         });
     }
 
+    public void testDefaultPositionIncrementGapOnSubfields() throws IOException {
+        MapperService mapperService = createMapperService(fieldMapping(b -> {
+            b.field("type", "text");
+            b.field("index_phrases", true);
+            b.startObject("index_prefixes").endObject();
+        }));
+
+        ParsedDocument doc = mapperService.documentMapper().parse(source(b -> b.array("field", "aargle bargle", "foo bar")));
+        withLuceneIndex(mapperService, iw -> iw.addDocument(doc.rootDoc()), reader -> {
+            TermsEnum phraseTerms = getOnlyLeafReader(reader).terms("field._index_phrase").iterator();
+            assertTrue(phraseTerms.seekExact(new BytesRef("foo bar")));
+            PostingsEnum phrasePostings = phraseTerms.postings(null, PostingsEnum.POSITIONS);
+            assertEquals(0, phrasePostings.nextDoc());
+            assertEquals(TextFieldMapper.Defaults.POSITION_INCREMENT_GAP + 1, phrasePostings.nextPosition());
+
+            TermsEnum prefixTerms = getOnlyLeafReader(reader).terms("field._index_prefix").iterator();
+            assertTrue(prefixTerms.seekExact(new BytesRef("foo")));
+            PostingsEnum prefixPostings = prefixTerms.postings(null, PostingsEnum.POSITIONS);
+            assertEquals(0, prefixPostings.nextDoc());
+            assertEquals(TextFieldMapper.Defaults.POSITION_INCREMENT_GAP + 2, prefixPostings.nextPosition());
+        });
+    }
+
     public void testPositionIncrementGap() throws IOException {
         final int positionIncrementGap = randomIntBetween(1, 1000);
         MapperService mapperService = createMapperService(
@@ -365,6 +390,30 @@ public void testPositionIncrementGap() throws IOException {
         });
     }
 
+    public void testPositionIncrementGapOnSubfields() throws IOException {
+        MapperService mapperService = createMapperService(fieldMapping(b -> {
+            b.field("type", "text");
+            b.field("position_increment_gap", 10);
+            b.field("index_phrases", true);
+            b.startObject("index_prefixes").endObject();
+        }));
+
+        ParsedDocument doc = mapperService.documentMapper().parse(source(b -> b.array("field", "aargle bargle", "foo bar")));
+        withLuceneIndex(mapperService, iw -> iw.addDocument(doc.rootDoc()), reader -> {
+            TermsEnum phraseTerms = getOnlyLeafReader(reader).terms("field._index_phrase").iterator();
+            assertTrue(phraseTerms.seekExact(new BytesRef("foo bar")));
+            PostingsEnum phrasePostings = phraseTerms.postings(null, PostingsEnum.POSITIONS);
+            assertEquals(0, phrasePostings.nextDoc());
+            assertEquals(11, phrasePostings.nextPosition());
+
+            TermsEnum prefixTerms = getOnlyLeafReader(reader).terms("field._index_prefix").iterator();
+            assertTrue(prefixTerms.seekExact(new BytesRef("foo")));
+            PostingsEnum prefixPostings = prefixTerms.postings(null, PostingsEnum.POSITIONS);
+            assertEquals(0, prefixPostings.nextDoc());
+            assertEquals(12, prefixPostings.nextPosition());
+        });
+    }
+
     public void testSearchAnalyzerSerialization() throws IOException {
         XContentBuilder mapping = fieldMapping(
             b -> b.field("type", "text").field("analyzer", "standard").field("search_analyzer", "keyword")
@@ -738,10 +787,10 @@ protected TokenStreamComponents createComponents(String fieldName) {
             .build(), BooleanClause.Occur.SHOULD).build()));
 
         ParsedDocument doc = mapperService.documentMapper()
-            .parse(source(b -> b.field("field", "Some English text that is going to be very useful")));
+            .parse(source(b -> b.array("field", "Some English text that is going to be very useful", "bad", "Prio 1")));
 
         IndexableField[] fields = doc.rootDoc().getFields("field._index_phrase");
-        assertEquals(1, fields.length);
+        assertEquals(3, fields.length);
 
         try (TokenStream ts = fields[0].tokenStream(mapperService.indexAnalyzer(fields[0].name(), f -> null), null)) {
             CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
@@ -750,6 +799,13 @@ protected TokenStreamComponents createComponents(String fieldName) {
             assertEquals("Some English", termAtt.toString());
         }
 
+        withLuceneIndex(mapperService, iw -> iw.addDocuments(doc.docs()), ir -> {
+            IndexSearcher searcher = new IndexSearcher(ir);
+            MatchPhraseQueryBuilder queryBuilder = new MatchPhraseQueryBuilder("field", "Prio 1");
+            TopDocs td = searcher.search(queryBuilder.toQuery(searchExecutionContext), 1);
+            assertEquals(1, td.totalHits.value);
+        });
+
         Exception e = expectThrows(
             MapperParsingException.class,
             () -> createMapperService(fieldMapping(b -> b.field("type", "text").field("index", "false").field("index_phrases", true)))