From d60ba6a1687566efdcee1f29f1666f4ec9d4fba5 Mon Sep 17 00:00:00 2001
From: David Smiley <dsmiley@apache.org>
Date: Fri, 25 May 2018 17:53:02 -0400
Subject: [PATCH 1/3] initial move of CompletionTokenStream to
 ConcatenateGraphFilter

---
 .../ConcatenateGraphFilter.java}              | 130 ++++++++++--------
 .../ConcatenateGraphFilterFactory.java        |  61 ++++++++
 .../miscellaneous/FingerprintFilter.java      |   4 +-
 ...he.lucene.analysis.util.TokenFilterFactory |   1 +
 .../TestConcatenateGraphFilter.java}          |  95 ++++++++-----
 .../TestConcatenateGraphFilterFactory.java    |  55 ++++++++
 .../miscellaneous/TestFingerprintFilter.java  |   9 ++
 .../analysis/TokenStreamToAutomaton.java      |   2 +
 .../suggest/document/CompletionAnalyzer.java  |  23 +---
 .../suggest/document/CompletionQuery.java     |   4 +-
 .../search/suggest/document/ContextQuery.java |  11 +-
 .../suggest/document/ContextSuggestField.java |  24 ++--
 .../document/FuzzyCompletionQuery.java        |   3 +-
 .../suggest/document/NRTSuggesterBuilder.java |   4 +
 .../document/PrefixCompletionQuery.java       |   3 +-
 .../search/suggest/document/SuggestField.java |  15 +-
 .../document/TestContextSuggestField.java     |  13 +-
 .../suggest/document/TestSuggestField.java    |  29 +++-
 18 files changed, 342 insertions(+), 144 deletions(-)
 rename lucene/{suggest/src/java/org/apache/lucene/search/suggest/document/CompletionTokenStream.java => analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/ConcatenateGraphFilter.java} (67%)
 create mode 100644 lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/ConcatenateGraphFilterFactory.java
 rename lucene/{suggest/src/test/org/apache/lucene/search/suggest/document/CompletionTokenStreamTest.java => analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestConcatenateGraphFilter.java} (69%)
 create mode 100644 lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestConcatenateGraphFilterFactory.java

diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/CompletionTokenStream.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/ConcatenateGraphFilter.java
similarity index 67%
rename from lucene/suggest/src/java/org/apache/lucene/search/suggest/document/CompletionTokenStream.java
rename to lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/ConcatenateGraphFilter.java
index 7308e65acc9c..e222d6b680d1 100644
--- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/CompletionTokenStream.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/ConcatenateGraphFilter.java
@@ -14,14 +14,16 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.lucene.search.suggest.document;
+package org.apache.lucene.analysis.miscellaneous;
 
 import java.io.IOException;
 
+import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.TokenStreamToAutomaton;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
 import org.apache.lucene.util.AttributeImpl;
 import org.apache.lucene.util.AttributeReflector;
@@ -31,55 +33,80 @@
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.IntsRef;
 import org.apache.lucene.util.automaton.Automaton;
-import org.apache.lucene.util.automaton.FiniteStringsIterator;
 import org.apache.lucene.util.automaton.LimitedFiniteStringsIterator;
 import org.apache.lucene.util.automaton.Operations;
+import org.apache.lucene.util.automaton.TooComplexToDeterminizeException;
 import org.apache.lucene.util.automaton.Transition;
 import org.apache.lucene.util.fst.Util;
 
-import static org.apache.lucene.search.suggest.document.CompletionAnalyzer.DEFAULT_MAX_GRAPH_EXPANSIONS;
-import static org.apache.lucene.search.suggest.document.CompletionAnalyzer.DEFAULT_PRESERVE_POSITION_INCREMENTS;
-import static org.apache.lucene.search.suggest.document.CompletionAnalyzer.DEFAULT_PRESERVE_SEP;
-import static org.apache.lucene.search.suggest.document.CompletionAnalyzer.SEP_LABEL;
-
 /**
- * Token stream which converts a provided token stream to an automaton.
- * The accepted strings enumeration from the automaton are available through the
- * {@link org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute} attribute
- * The token stream uses a {@link org.apache.lucene.analysis.tokenattributes.PayloadAttribute} to store
- * a completion's payload (see {@link CompletionTokenStream#setPayload(org.apache.lucene.util.BytesRef)})
+ * Concatenates/Joins every incoming token with a separator into one output token for every path through the
+ * token stream (which is a graph).  In simple cases this yields one token, but in the presence of any tokens with
+ * a zero positionIncrmeent (e.g. synonyms) it will be more.  This filter uses the token bytes, position increment,
+ * and position length of the incoming stream.  Other attributes are not used or manipulated.
  *
  * @lucene.experimental
  */
-public final class CompletionTokenStream extends TokenStream {
+public final class ConcatenateGraphFilter extends TokenFilter {
+
+  /*
+   * Token stream which converts a provided token stream to an automaton.
+   * The accepted strings enumeration from the automaton are available through the
+   * {@link org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute} attribute
+   * The token stream uses a {@link org.apache.lucene.analysis.tokenattributes.PayloadAttribute} to store
+   * a completion's payload (see {@link ConcatenateGraphFilter#setPayload(org.apache.lucene.util.BytesRef)})
+   */
+
+  /**
+   * Represents the separation between tokens, if
+   * <code>preserveSep</code> is <code>true</code>.
+   */
+  public final static char SEP_CHAR = '\u001F';
+  public final static int DEFAULT_MAX_GRAPH_EXPANSIONS = Operations.DEFAULT_MAX_DETERMINIZED_STATES;
+  public final static boolean DEFAULT_PRESERVE_SEP = true;
+  public final static boolean DEFAULT_PRESERVE_POSITION_INCREMENTS = true;
 
   private final PayloadAttribute payloadAttr = addAttribute(PayloadAttribute.class);
   private final BytesRefBuilderTermAttribute bytesAtt = addAttribute(BytesRefBuilderTermAttribute.class);
+  private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
 
-  final TokenStream inputTokenStream;
-  final boolean preserveSep;
-  final boolean preservePositionIncrements;
-  final int maxGraphExpansions;
+  //nocommit add getters for these instead?  Accessor: org.apache.lucene.search.suggest.document.ContextSuggestField.wrapTokenStream
+  public final boolean preserveSep;
+  public final boolean preservePositionIncrements;
+  public final int maxGraphExpansions;
+  public TokenStream getInput() { return super.input; } // nocommit remove? see ContextSuggestField.wrapTokenStream
 
-  private FiniteStringsIterator finiteStrings;
+  private LimitedFiniteStringsIterator finiteStrings;
   private BytesRef payload;
   private CharTermAttribute charTermAttribute;
+  private State endState;
 
   /**
    * Creates a token stream to convert <code>input</code> to a token stream
-   * of accepted strings by its automaton.
+   * of accepted strings by its token stream graph.
    * <p>
-   * The token stream <code>input</code> is converted to an automaton
-   * with the default settings of {@link org.apache.lucene.search.suggest.document.CompletionAnalyzer}
+   * This constructor uses the default settings of the constants in this class.
    */
-  CompletionTokenStream(TokenStream inputTokenStream) {
+  public ConcatenateGraphFilter(TokenStream inputTokenStream) {
     this(inputTokenStream, DEFAULT_PRESERVE_SEP, DEFAULT_PRESERVE_POSITION_INCREMENTS, DEFAULT_MAX_GRAPH_EXPANSIONS);
   }
 
-  CompletionTokenStream(TokenStream inputTokenStream, boolean preserveSep, boolean preservePositionIncrements, int maxGraphExpansions) {
-    // Don't call the super(input) ctor - this is a true delegate and has a new attribute source since we consume
-    // the input stream entirely in the first call to incrementToken
-    this.inputTokenStream = inputTokenStream;
+  /**
+   * Creates a token stream to convert <code>input</code> to a token stream
+   * of accepted strings by its token stream graph.
+   *
+   * @param inputTokenStream The input/incoming TokenStream
+   * @param preserveSep
+   * @param preservePositionIncrements Whether to generate holes in the automaton for missing positions
+   * @param maxGraphExpansions If the tokenStream graph has more than this many possible paths through, then we'll throw
+   *                           {@link TooComplexToDeterminizeException} to preserve the stability and memory of the
+   *                           machine.
+   * @throws TooComplexToDeterminizeException if the tokenStream graph has more than {@code maxGraphExpansions}
+   *         expansions
+   *
+   */
+  public ConcatenateGraphFilter(TokenStream inputTokenStream, boolean preserveSep, boolean preservePositionIncrements, int maxGraphExpansions) {
+    super(inputTokenStream);
     this.preserveSep = preserveSep;
     this.preservePositionIncrements = preservePositionIncrements;
     this.maxGraphExpansions = maxGraphExpansions;
@@ -87,6 +114,7 @@ public final class CompletionTokenStream extends TokenStream {
 
   /**
    * Sets a payload available throughout successive token stream enumeration
+   * @lucene.internal
    */
   public void setPayload(BytesRef payload) {
     this.payload = payload;
@@ -94,17 +122,17 @@ public void setPayload(BytesRef payload) {
 
   @Override
   public boolean incrementToken() throws IOException {
-    clearAttributes();
-    if (finiteStrings == null) {
-      Automaton automaton = toAutomaton();
-      finiteStrings = new LimitedFiniteStringsIterator(automaton, maxGraphExpansions);
-    }
-
     IntsRef string = finiteStrings.next();
     if (string == null) {
       return false;
     }
 
+    clearAttributes();
+
+    if (finiteStrings.size() > 1) { // if number of iterated strings so far is more than one...
+      posIncrAtt.setPositionIncrement(0); // stacked
+    }
+
     Util.toBytesRef(string, bytesAtt.builder()); // now we have UTF-8
     if (charTermAttribute != null) {
       charTermAttribute.setLength(0);
@@ -119,27 +147,19 @@ public boolean incrementToken() throws IOException {
 
   @Override
   public void end() throws IOException {
-    super.end();
-    if (finiteStrings == null) {
-      inputTokenStream.end();
-    }
-  }
-
-  @Override
-  public void close() throws IOException {
-    if (finiteStrings == null) {
-      inputTokenStream.close();
-    }
+    restoreState(endState);
   }
 
+  //nocommit move method to before incrementToken
   @Override
   public void reset() throws IOException {
-    super.reset();
-    if (hasAttribute(CharTermAttribute.class)) {
-      // we only create this if we really need it to safe the UTF-8 to UTF-16 conversion
-      charTermAttribute = getAttribute(CharTermAttribute.class);
+    Automaton automaton = toAutomaton(); // calls reset(), incrementToken() repeatedly, then end()
+    endState = captureState();
+    finiteStrings = new LimitedFiniteStringsIterator(automaton, maxGraphExpansions);
+    if (charTermAttribute == null) {
+      // we only capture this if we really need it to save the UTF-8 to UTF-16 conversion
+      charTermAttribute = getAttribute(CharTermAttribute.class); // null if none; it's okay
     }
-    finiteStrings = null;
   }
 
   /**
@@ -163,7 +183,7 @@ public Automaton toAutomaton(boolean unicodeAware) throws IOException {
       // separator between tokens:
       final TokenStreamToAutomaton tsta;
       if (preserveSep) {
-        tsta = new EscapingTokenStreamToAutomaton((char) SEP_LABEL);
+        tsta = new EscapingTokenStreamToAutomaton(SEP_CHAR);
       } else {
         // When we're not preserving sep, we don't steal 0xff
         // byte, so we don't need to do any escaping:
@@ -172,14 +192,14 @@ public Automaton toAutomaton(boolean unicodeAware) throws IOException {
       tsta.setPreservePositionIncrements(preservePositionIncrements);
       tsta.setUnicodeArcs(unicodeAware);
 
-      automaton = tsta.toAutomaton(inputTokenStream);
+      automaton = tsta.toAutomaton(input);
     } finally {
-      IOUtils.closeWhileHandlingException(inputTokenStream);
+      IOUtils.closeWhileHandlingException(input);
     }
 
     // TODO: we can optimize this somewhat by determinizing
     // while we convert
-    automaton = replaceSep(automaton, preserveSep, SEP_LABEL);
+    automaton = replaceSep(automaton, preserveSep, SEP_CHAR);
     // This automaton should not blow up during determinize:
     return Operations.determinize(automaton, maxGraphExpansions);
   }
@@ -240,7 +260,7 @@ private static Automaton replaceSep(Automaton a, boolean preserveSep, int sepLab
         if (t.min == TokenStreamToAutomaton.POS_SEP) {
           assert t.max == TokenStreamToAutomaton.POS_SEP;
           if (preserveSep) {
-            // Remap to SEP_LABEL:
+            // Remap to SEP_CHAR:
             result.addTransition(state, t.dest, sepLabel);
           } else {
             result.addEpsilon(state, t.dest);
@@ -269,6 +289,7 @@ private static Automaton replaceSep(Automaton a, boolean preserveSep, int sepLab
 
   /**
    * Attribute providing access to the term builder and UTF-16 conversion
+   * @lucene.internal
    */
   public interface BytesRefBuilderTermAttribute extends TermToBytesRefAttribute {
     /**
@@ -283,7 +304,8 @@ public interface BytesRefBuilderTermAttribute extends TermToBytesRefAttribute {
   }
 
   /**
-   * Custom attribute implementation for completion token stream
+   * Implementation of {@link BytesRefBuilderTermAttribute}
+   * @lucene.internal
    */
   public static final class BytesRefBuilderTermAttributeImpl extends AttributeImpl implements BytesRefBuilderTermAttribute, TermToBytesRefAttribute {
     private final BytesRefBuilder bytes = new BytesRefBuilder();
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/ConcatenateGraphFilterFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/ConcatenateGraphFilterFactory.java
new file mode 100644
index 000000000000..d8d103944010
--- /dev/null
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/ConcatenateGraphFilterFactory.java
@@ -0,0 +1,61 @@
+/*
+ This software was produced for the U. S. Government
+ under Contract No. W15P7T-11-C-F600, and is
+ subject to the Rights in Noncommercial Computer Software
+ and Noncommercial Computer Software Documentation
+ Clause 252.227-7014 (JUN 1995)
+
+ Copyright 2013 The MITRE Corporation. All Rights Reserved.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ */
+
+package org.apache.lucene.analysis.miscellaneous;
+
+import java.util.Map;
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
+
+/**
+ * Factory for {@link ConcatenateGraphFilter}.
+ *
+ * <pre class="prettyprint">
+ * nocommit update
+ * </pre>
+ * @see ConcatenateGraphFilter
+ * @since 7.4.0
+ */
+public class ConcatenateGraphFilterFactory extends TokenFilterFactory {
+
+  private boolean preserveSep;
+  private boolean preservePositionIncrements;
+  private int maxGraphExpansions;
+
+  public ConcatenateGraphFilterFactory(Map<String, String> args) {
+    super(args);
+
+    preserveSep = getBoolean(args, "preserveSep", ConcatenateGraphFilter.DEFAULT_PRESERVE_SEP);
+    preservePositionIncrements = getBoolean(args, "preservePositionIncrements", ConcatenateGraphFilter.DEFAULT_PRESERVE_POSITION_INCREMENTS);
+    maxGraphExpansions = getInt(args, "maxGraphExpansions", ConcatenateGraphFilter.DEFAULT_MAX_GRAPH_EXPANSIONS);
+
+    if (!args.isEmpty()) {
+      throw new IllegalArgumentException("Unknown parameters: " + args);
+    }
+  }
+
+  @Override
+  public TokenStream create(TokenStream input) {
+    return new ConcatenateGraphFilter(input, preserveSep, preservePositionIncrements, maxGraphExpansions);
+  }
+}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/FingerprintFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/FingerprintFilter.java
index dfe06c88fbf2..71dab429191b 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/FingerprintFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/FingerprintFilter.java
@@ -81,8 +81,7 @@ public FingerprintFilter(TokenStream input, int maxOutputTokenSize,
 
   @Override
   public final boolean incrementToken() throws IOException {
-    if (uniqueTerms != null) {
-      // We have already built the single output token - there's no more 
+    if (inputEnded) {
       return false;
     }
     boolean result = buildSingleOutputToken();
@@ -177,6 +176,7 @@ public int compare(Object o1, Object o2) {
       }
     });
 
+    //TODO lets append directly to termAttribute?
     StringBuilder sb = new StringBuilder();
     for (Object item : items) {
       if (sb.length() >= 1) {
diff --git a/lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory b/lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory
index 181192021797..801ac71c0cae 100644
--- a/lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory
+++ b/lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory
@@ -63,6 +63,7 @@ org.apache.lucene.analysis.lv.LatvianStemFilterFactory
 org.apache.lucene.analysis.minhash.MinHashFilterFactory
 org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilterFactory
 org.apache.lucene.analysis.miscellaneous.CapitalizationFilterFactory
+org.apache.lucene.analysis.miscellaneous.ConcatenateGraphFilterFactory
 org.apache.lucene.analysis.miscellaneous.CodepointCountFilterFactory
 org.apache.lucene.analysis.miscellaneous.DateRecognizerFilterFactory
 org.apache.lucene.analysis.miscellaneous.DelimitedTermFrequencyTokenFilterFactory
diff --git a/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/CompletionTokenStreamTest.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestConcatenateGraphFilter.java
similarity index 69%
rename from lucene/suggest/src/test/org/apache/lucene/search/suggest/document/CompletionTokenStreamTest.java
rename to lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestConcatenateGraphFilter.java
index 6f558d1985d9..c4cb0d5aa259 100644
--- a/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/CompletionTokenStreamTest.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestConcatenateGraphFilter.java
@@ -14,27 +14,27 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.lucene.search.suggest.document;
+package org.apache.lucene.analysis.miscellaneous;
 
 import java.io.IOException;
 import java.io.StringReader;
 
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.StopFilter;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.synonym.SynonymFilter;
 import org.apache.lucene.analysis.synonym.SynonymMap;
 import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
-import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.CharsRef;
 import org.apache.lucene.util.CharsRefBuilder;
 import org.junit.Test;
 
-public class CompletionTokenStreamTest extends BaseTokenStreamTestCase {
+public class TestConcatenateGraphFilter extends BaseTokenStreamTestCase {
 
   @Test
   public void testBasic() throws Exception {
@@ -42,9 +42,9 @@ public void testBasic() throws Exception {
     String input = "mykeyword";
     BytesRef payload = new BytesRef("payload");
     tokenStream.setReader(new StringReader(input));
-    CompletionTokenStream completionTokenStream = new CompletionTokenStream(tokenStream);
-    completionTokenStream.setPayload(payload);
-    PayloadAttrToTypeAttrFilter stream = new PayloadAttrToTypeAttrFilter(completionTokenStream);
+    ConcatenateGraphFilter concatStream = new ConcatenateGraphFilter(tokenStream);
+    concatStream.setPayload(payload);
+    PayloadAttrToTypeAttrFilter stream = new PayloadAttrToTypeAttrFilter(concatStream);
     assertTokenStreamContents(stream, new String[] {input}, null, null, new String[] {payload.utf8ToString()}, new int[] { 1 }, null, null);
   }
 
@@ -54,9 +54,9 @@ public void testWithNoPreserveSep() throws Exception {
     String input = "mykeyword another keyword";
     BytesRef payload = new BytesRef("payload");
     tokenStream.setReader(new StringReader(input));
-    CompletionTokenStream completionTokenStream = new CompletionTokenStream(tokenStream, false, false, 100);
-    completionTokenStream.setPayload(payload);
-    PayloadAttrToTypeAttrFilter stream = new PayloadAttrToTypeAttrFilter(completionTokenStream);
+    ConcatenateGraphFilter concatStream = new ConcatenateGraphFilter(tokenStream, false, false, 100);
+    concatStream.setPayload(payload);
+    PayloadAttrToTypeAttrFilter stream = new PayloadAttrToTypeAttrFilter(concatStream);
     assertTokenStreamContents(stream, new String[] {"mykeywordanotherkeyword"}, null, null, new String[] {payload.utf8ToString()}, new int[] { 1 }, null, null);
   }
 
@@ -66,14 +66,14 @@ public void testWithMultipleTokens() throws Exception {
     String input = "mykeyword another keyword";
     tokenStream.setReader(new StringReader(input));
     BytesRef payload = new BytesRef("payload");
-    CompletionTokenStream completionTokenStream = new CompletionTokenStream(tokenStream);
-    completionTokenStream.setPayload(payload);
-    PayloadAttrToTypeAttrFilter stream = new PayloadAttrToTypeAttrFilter(completionTokenStream);
+    ConcatenateGraphFilter concatStream = new ConcatenateGraphFilter(tokenStream);
+    concatStream.setPayload(payload);
+    PayloadAttrToTypeAttrFilter stream = new PayloadAttrToTypeAttrFilter(concatStream);
     CharsRefBuilder builder = new CharsRefBuilder();
     builder.append("mykeyword");
-    builder.append(((char) CompletionAnalyzer.SEP_LABEL));
+    builder.append((ConcatenateGraphFilter.SEP_CHAR));
     builder.append("another");
-    builder.append(((char) CompletionAnalyzer.SEP_LABEL));
+    builder.append((ConcatenateGraphFilter.SEP_CHAR));
     builder.append("keyword");
     assertTokenStreamContents(stream, new String[]{builder.toCharsRef().toString()}, null, null, new String[]{payload.utf8ToString()}, new int[]{1}, null, null);
   }
@@ -85,11 +85,11 @@ public void testWithSynonym() throws Exception {
     Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, true);
     tokenizer.setReader(new StringReader("mykeyword"));
     SynonymFilter filter = new SynonymFilter(tokenizer, builder.build(), true);
-    CompletionTokenStream completionTokenStream = new CompletionTokenStream(filter);
+    ConcatenateGraphFilter concatStream = new ConcatenateGraphFilter(filter);
     BytesRef payload = new BytesRef("payload");
-    completionTokenStream.setPayload(payload);
-    PayloadAttrToTypeAttrFilter stream = new PayloadAttrToTypeAttrFilter(completionTokenStream);
-    assertTokenStreamContents(stream, new String[] {"mykeyword", "mysynonym"}, null, null, new String[] {payload.utf8ToString(), payload.utf8ToString()}, new int[] { 1, 1 }, null, null);
+    concatStream.setPayload(payload);
+    PayloadAttrToTypeAttrFilter stream = new PayloadAttrToTypeAttrFilter(concatStream);
+    assertTokenStreamContents(stream, new String[] {"mykeyword", "mysynonym"}, null, null, new String[] {payload.utf8ToString(), payload.utf8ToString()}, new int[] { 1, 0 }, null, null);
   }
 
   @Test
@@ -101,25 +101,50 @@ public void testWithSynonyms() throws Exception {
     tokenStream.setReader(new StringReader(input));
     SynonymFilter filter = new SynonymFilter(tokenStream, builder.build(), true);
     BytesRef payload = new BytesRef("payload");
-    CompletionTokenStream completionTokenStream = new CompletionTokenStream(filter, true, false, 100);
-    completionTokenStream.setPayload(payload);
-    PayloadAttrToTypeAttrFilter stream = new PayloadAttrToTypeAttrFilter(completionTokenStream);
+    ConcatenateGraphFilter concatStream = new ConcatenateGraphFilter(filter, true, false, 100);
+    concatStream.setPayload(payload);
+    PayloadAttrToTypeAttrFilter stream = new PayloadAttrToTypeAttrFilter(concatStream);
     String[] expectedOutputs = new String[2];
     CharsRefBuilder expectedOutput = new CharsRefBuilder();
     expectedOutput.append("mykeyword");
-    expectedOutput.append(((char) CompletionAnalyzer.SEP_LABEL));
+    expectedOutput.append(ConcatenateGraphFilter.SEP_CHAR);
     expectedOutput.append("another");
-    expectedOutput.append(((char) CompletionAnalyzer.SEP_LABEL));
+    expectedOutput.append(ConcatenateGraphFilter.SEP_CHAR);
     expectedOutput.append("keyword");
     expectedOutputs[0] = expectedOutput.toCharsRef().toString();
     expectedOutput.clear();
     expectedOutput.append("mysynonym");
-    expectedOutput.append(((char) CompletionAnalyzer.SEP_LABEL));
+    expectedOutput.append(ConcatenateGraphFilter.SEP_CHAR);
     expectedOutput.append("another");
-    expectedOutput.append(((char) CompletionAnalyzer.SEP_LABEL));
+    expectedOutput.append(ConcatenateGraphFilter.SEP_CHAR);
     expectedOutput.append("keyword");
     expectedOutputs[1] = expectedOutput.toCharsRef().toString();
-    assertTokenStreamContents(stream, expectedOutputs, null, null, new String[]{payload.utf8ToString(), payload.utf8ToString()}, new int[]{1, 1}, null, null);
+    assertTokenStreamContents(stream, expectedOutputs, null, null, new String[]{payload.utf8ToString(), payload.utf8ToString()}, new int[]{1, 0}, null, null);
+  }
+
+  @Test
+  public void testWithStopword() throws Exception {
+    for (boolean preservePosInc : new boolean[]{true, false}) {
+      Tokenizer tokenStream = new MockTokenizer(MockTokenizer.WHITESPACE, true);
+      String input = "a mykeyword a keyword a";
+      tokenStream.setReader(new StringReader(input));
+      TokenFilter tokenFilter = new StopFilter(tokenStream, StopFilter.makeStopSet("a"));
+      ConcatenateGraphFilter concatStream = new ConcatenateGraphFilter(tokenFilter, true, preservePosInc, 10);
+      CharsRefBuilder builder = new CharsRefBuilder();
+      if (preservePosInc) {
+        builder.append((ConcatenateGraphFilter.SEP_CHAR));
+      }
+      builder.append("mykeyword");
+      builder.append((ConcatenateGraphFilter.SEP_CHAR));
+      if (preservePosInc) {
+        builder.append((ConcatenateGraphFilter.SEP_CHAR));
+      }
+      builder.append("keyword");
+      if (preservePosInc) {
+        builder.append((ConcatenateGraphFilter.SEP_CHAR));
+      }
+      assertTokenStreamContents(concatStream, new String[]{builder.toCharsRef().toString()});
+    }
   }
 
   @Test
@@ -137,23 +162,25 @@ public void testValidNumberOfExpansions() throws IOException {
     tokenizer.setReader(new StringReader(valueBuilder.toString()));
     SynonymFilter filter = new SynonymFilter(tokenizer, builder.build(), true);
 
-    CompletionTokenStream completionTokenStream = new CompletionTokenStream(filter);
-    completionTokenStream.setPayload(new BytesRef());
-    PayloadAttrToTypeAttrFilter stream = new PayloadAttrToTypeAttrFilter(completionTokenStream);
+    ConcatenateGraphFilter concatStream = new ConcatenateGraphFilter(filter);
+    concatStream.setPayload(new BytesRef());
+    PayloadAttrToTypeAttrFilter stream = new PayloadAttrToTypeAttrFilter(concatStream);
     stream.reset();
-    CompletionTokenStream.BytesRefBuilderTermAttribute attr = stream.addAttribute(CompletionTokenStream.BytesRefBuilderTermAttribute.class);
-    PositionIncrementAttribute posAttr = stream.addAttribute(PositionIncrementAttribute.class);
-    int maxPos = 0;
+    ConcatenateGraphFilter.BytesRefBuilderTermAttribute attr = stream.addAttribute(ConcatenateGraphFilter.BytesRefBuilderTermAttribute.class);
     int count = 0;
     while(stream.incrementToken()) {
       count++;
       assertNotNull(attr.getBytesRef());
       assertTrue(attr.getBytesRef().length > 0);
-      maxPos += posAttr.getPositionIncrement();
     }
     stream.close();
     assertEquals(count, 256);
-    assertEquals(count, maxPos);
+  }
+
+  public void testEmpty() throws IOException {
+    Tokenizer tokenizer = whitespaceMockTokenizer("");
+    ConcatenateGraphFilter filter = new ConcatenateGraphFilter(tokenizer);
+    assertTokenStreamContents(filter, new String[0]);
   }
 
   public final static class PayloadAttrToTypeAttrFilter extends TokenFilter {
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestConcatenateGraphFilterFactory.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestConcatenateGraphFilterFactory.java
new file mode 100644
index 000000000000..5b3ebacf5495
--- /dev/null
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestConcatenateGraphFilterFactory.java
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.analysis.miscellaneous;
+
+import java.io.Reader;
+import java.io.StringReader;
+
+import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
+
+public class TestConcatenateGraphFilterFactory extends BaseTokenStreamFactoryTestCase {
+  public void test() throws Exception {
+    for (final boolean consumeAll : new boolean[]{true, false}) {
+      final String input = "A1 B2 A1 D4 C3";
+      Reader reader = new StringReader(input);
+      MockTokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
+      tokenizer.setReader(reader);
+      tokenizer.setEnableChecks(consumeAll);
+      TokenStream stream = tokenizer;
+      stream = tokenFilterFactory("Concatenate"//nocommit test an arg
+      ).create(stream);
+      assertTokenStreamContents(stream, new String[]{input.replace(' ', ConcatenateGraphFilter.SEP_CHAR)});
+    }
+  }
+
+  public void testRequired() throws Exception {
+    // no params are required
+    tokenFilterFactory("Concatenate");
+  }
+
+  /**
+   * Test that bogus arguments result in exception
+   */
+  public void testBogusArguments() throws Exception {
+    IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () ->
+        tokenFilterFactory("Concatenate", "bogusArg", "bogusValue"));
+    assertTrue(expected.getMessage().contains("Unknown parameters"));
+  }
+}
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestFingerprintFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestFingerprintFilter.java
index 450447ac9bab..76bd617f4088 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestFingerprintFilter.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestFingerprintFilter.java
@@ -69,4 +69,13 @@ public void testSingleToken() throws Exception {
     }
   }
 
+  public void testEmpty() throws Exception {
+    for (final boolean consumeAll : new boolean[] { true, false }) {
+      MockTokenizer tokenizer = whitespaceMockTokenizer("");
+      tokenizer.setEnableChecks(consumeAll);
+      TokenStream stream = new FingerprintFilter(tokenizer);
+      assertTokenStreamContents(stream, new String[0]);
+    }
+  }
+
 }
diff --git a/lucene/core/src/java/org/apache/lucene/analysis/TokenStreamToAutomaton.java b/lucene/core/src/java/org/apache/lucene/analysis/TokenStreamToAutomaton.java
index 0675abe94a36..881366a93dc5 100644
--- a/lucene/core/src/java/org/apache/lucene/analysis/TokenStreamToAutomaton.java
+++ b/lucene/core/src/java/org/apache/lucene/analysis/TokenStreamToAutomaton.java
@@ -214,6 +214,8 @@ public Automaton toAutomaton(TokenStream in) throws IOException {
 
     if (endPosInc == 0 && finalOffsetGapAsHole && offsetAtt.endOffset() > maxOffset) {
       endPosInc = 1;
+    } else if (endPosInc > 0 && preservePositionIncrements==false) {
+      endPosInc = 0;
     }
     
     if (endPosInc > 0) {
diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/CompletionAnalyzer.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/CompletionAnalyzer.java
index 13bd392aa9df..cdb49de2062f 100644
--- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/CompletionAnalyzer.java
+++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/CompletionAnalyzer.java
@@ -19,7 +19,7 @@
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.AnalyzerWrapper;
 import org.apache.lucene.analysis.TokenStreamToAutomaton;
-import org.apache.lucene.util.automaton.Operations;
+import org.apache.lucene.analysis.miscellaneous.ConcatenateGraphFilter;
 
 /**
  * Wraps an {@link org.apache.lucene.analysis.Analyzer}
@@ -37,24 +37,11 @@
  */
 public final class CompletionAnalyzer extends AnalyzerWrapper {
 
-  /**
-   * Represents the separation between tokens, if
-   * <code>preserveSep</code> is <code>true</code>
-   * <p>
-   * Same label is used as a delimiter in the {@link org.apache.lucene.search.suggest.document.CompletionTokenStream}
-   * payload
-   */
-  final static int SEP_LABEL = NRTSuggesterBuilder.PAYLOAD_SEP;
-
   /**
    * Represent a hole character, inserted by {@link org.apache.lucene.analysis.TokenStreamToAutomaton}
    */
   final static int HOLE_CHARACTER = TokenStreamToAutomaton.HOLE;
 
-  final static int DEFAULT_MAX_GRAPH_EXPANSIONS = Operations.DEFAULT_MAX_DETERMINIZED_STATES;
-  final static boolean DEFAULT_PRESERVE_SEP = true;
-  final static boolean DEFAULT_PRESERVE_POSITION_INCREMENTS = true;
-
   private final Analyzer analyzer;
 
   /**
@@ -101,7 +88,7 @@ public CompletionAnalyzer(Analyzer analyzer, boolean preserveSep, boolean preser
    * preserving token separation, position increments and no limit on graph expansions
    */
   public CompletionAnalyzer(Analyzer analyzer) {
-    this(analyzer, DEFAULT_PRESERVE_SEP, DEFAULT_PRESERVE_POSITION_INCREMENTS, DEFAULT_MAX_GRAPH_EXPANSIONS);
+    this(analyzer, ConcatenateGraphFilter.DEFAULT_PRESERVE_SEP, ConcatenateGraphFilter.DEFAULT_PRESERVE_POSITION_INCREMENTS, ConcatenateGraphFilter.DEFAULT_MAX_GRAPH_EXPANSIONS);
   }
 
   /**
@@ -109,7 +96,7 @@ public CompletionAnalyzer(Analyzer analyzer) {
    * with no limit on graph expansions
    */
   public CompletionAnalyzer(Analyzer analyzer, boolean preserveSep, boolean preservePositionIncrements) {
-    this(analyzer, preserveSep, preservePositionIncrements, DEFAULT_MAX_GRAPH_EXPANSIONS);
+    this(analyzer, preserveSep, preservePositionIncrements, ConcatenateGraphFilter.DEFAULT_MAX_GRAPH_EXPANSIONS);
   }
 
   /**
@@ -117,7 +104,7 @@ public CompletionAnalyzer(Analyzer analyzer, boolean preserveSep, boolean preser
    * preserving token separation and position increments
    */
   public CompletionAnalyzer(Analyzer analyzer, int maxGraphExpansions) {
-    this(analyzer, DEFAULT_PRESERVE_SEP, DEFAULT_PRESERVE_POSITION_INCREMENTS, maxGraphExpansions);
+    this(analyzer, ConcatenateGraphFilter.DEFAULT_PRESERVE_SEP, ConcatenateGraphFilter.DEFAULT_PRESERVE_POSITION_INCREMENTS, maxGraphExpansions);
   }
 
   /**
@@ -143,7 +130,7 @@ protected Analyzer getWrappedAnalyzer(String fieldName) {
 
   @Override
   protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) {
-    CompletionTokenStream tokenStream = new CompletionTokenStream(components.getTokenStream(),
+    ConcatenateGraphFilter tokenStream = new ConcatenateGraphFilter(components.getTokenStream(),
         preserveSep, preservePositionIncrements, maxGraphExpansions);
     return new TokenStreamComponents(components.getTokenizer(), tokenStream);
   }
diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/CompletionQuery.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/CompletionQuery.java
index 49fe7d08dff1..1ed13734d88a 100644
--- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/CompletionQuery.java
+++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/CompletionQuery.java
@@ -27,7 +27,7 @@
 import org.apache.lucene.search.suggest.BitsProducer;
 
 import static org.apache.lucene.search.suggest.document.CompletionAnalyzer.HOLE_CHARACTER;
-import static org.apache.lucene.search.suggest.document.CompletionAnalyzer.SEP_LABEL;
+import static org.apache.lucene.analysis.miscellaneous.ConcatenateGraphFilter.SEP_CHAR;
 
 /**
  * Abstract {@link Query} that match documents containing terms with a specified prefix
@@ -158,7 +158,7 @@ private void validate(String termText) {
         case HOLE_CHARACTER:
           throw new IllegalArgumentException(
               "Term text cannot contain HOLE character U+001E; this character is reserved");
-        case SEP_LABEL:
+        case SEP_CHAR:
           throw new IllegalArgumentException(
               "Term text cannot contain unit separator character U+001F; this character is reserved");
         default:
diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/ContextQuery.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/ContextQuery.java
index 6217ca38f85f..5d56795c4b84 100644
--- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/ContextQuery.java
+++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/ContextQuery.java
@@ -22,6 +22,7 @@
 import java.util.Map;
 import java.util.TreeSet;
 
+import org.apache.lucene.analysis.miscellaneous.ConcatenateGraphFilter;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.ScoreMode;
 import org.apache.lucene.search.Weight;
@@ -175,10 +176,10 @@ public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float bo
       return new CompletionWeight(this, innerAutomaton);
     }
 
-    // if separators are preserved the fst contains a SEP_LABEL
+    // if separators are preserved the fst contains a SEP_CHAR
     // behind each gap. To have a matching automaton, we need to
-    // include the SEP_LABEL in the query as well
-    Automaton optionalSepLabel = Operations.optional(Automata.makeChar(CompletionAnalyzer.SEP_LABEL));
+    // include the SEP_CHAR in the query as well
+    Automaton optionalSepLabel = Operations.optional(Automata.makeChar(ConcatenateGraphFilter.SEP_CHAR));
     Automaton prefixAutomaton = Operations.concatenate(optionalSepLabel, innerAutomaton);
     Automaton contextsAutomaton = Operations.concatenate(toContextAutomaton(contexts, matchAllContexts), prefixAutomaton);
     contextsAutomaton = Operations.determinize(contextsAutomaton, Operations.DEFAULT_MAX_DETERMINIZED_STATES);
@@ -302,9 +303,9 @@ private void setInnerWeight(IntsRef ref, int offset) {
           }
           ref.offset = ++i;
           assert ref.offset < ref.length : "input should not end with the context separator";
-          if (ref.ints[i] == CompletionAnalyzer.SEP_LABEL) {
+          if (ref.ints[i] == ConcatenateGraphFilter.SEP_CHAR) {
             ref.offset++;
-            assert ref.offset < ref.length : "input should not end with a context separator followed by SEP_LABEL";
+            assert ref.offset < ref.length : "input should not end with a context separator followed by SEP_CHAR";
           }
           ref.length = ref.length - ref.offset;
           refBuilder.copyInts(ref.ints, ref.offset, ref.length);
diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/ContextSuggestField.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/ContextSuggestField.java
index 4cb91b8053c5..84a99c07c658 100644
--- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/ContextSuggestField.java
+++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/ContextSuggestField.java
@@ -24,6 +24,7 @@
 
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.miscellaneous.ConcatenateGraphFilter;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 
@@ -83,23 +84,24 @@ protected Iterable<CharSequence> contexts() {
   }
 
   @Override
-  protected CompletionTokenStream wrapTokenStream(TokenStream stream) {
+  protected ConcatenateGraphFilter wrapTokenStream(TokenStream stream) {
     final Iterable<CharSequence> contexts = contexts();
     for (CharSequence context : contexts) {
       validate(context);
     }
-    CompletionTokenStream completionTokenStream;
-    if (stream instanceof CompletionTokenStream) {
-      completionTokenStream = (CompletionTokenStream) stream;
-      PrefixTokenFilter prefixTokenFilter = new PrefixTokenFilter(completionTokenStream.inputTokenStream, (char) CONTEXT_SEPARATOR, contexts);
-      completionTokenStream = new CompletionTokenStream(prefixTokenFilter,
-          completionTokenStream.preserveSep,
-          completionTokenStream.preservePositionIncrements,
-          completionTokenStream.maxGraphExpansions);
+    ConcatenateGraphFilter concatStream;
+    if (stream instanceof ConcatenateGraphFilter) {
+      //nocommit this is awkward; is there a better way avoiding re-creating the chain?
+      concatStream = (ConcatenateGraphFilter) stream;
+      PrefixTokenFilter prefixTokenFilter = new PrefixTokenFilter(concatStream.getInput(), (char) CONTEXT_SEPARATOR, contexts);
+      concatStream = new ConcatenateGraphFilter(prefixTokenFilter,
+          concatStream.preserveSep,
+          concatStream.preservePositionIncrements,
+          concatStream.maxGraphExpansions);
     } else {
-      completionTokenStream = new CompletionTokenStream(new PrefixTokenFilter(stream, (char) CONTEXT_SEPARATOR, contexts));
+      concatStream = new ConcatenateGraphFilter(new PrefixTokenFilter(stream, (char) CONTEXT_SEPARATOR, contexts));
     }
-    return completionTokenStream;
+    return concatStream;
   }
 
   @Override
diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/FuzzyCompletionQuery.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/FuzzyCompletionQuery.java
index b243f4ede83d..b5754e8d765c 100644
--- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/FuzzyCompletionQuery.java
+++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/FuzzyCompletionQuery.java
@@ -23,6 +23,7 @@
 import java.util.Set;
 
 import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.miscellaneous.ConcatenateGraphFilter;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.ScoreMode;
@@ -144,7 +145,7 @@ public FuzzyCompletionQuery(Analyzer analyzer, Term term, BitsProducer filter, i
 
   @Override
   public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {
-    CompletionTokenStream stream = (CompletionTokenStream) analyzer.tokenStream(getField(), getTerm().text());
+    ConcatenateGraphFilter stream = (ConcatenateGraphFilter) analyzer.tokenStream(getField(), getTerm().text());
     Set<IntsRef> refs = new HashSet<>();
     Automaton automaton = toLevenshteinAutomata(stream.toAutomaton(unicodeAware), refs);
     if (unicodeAware) {
diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/NRTSuggesterBuilder.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/NRTSuggesterBuilder.java
index 270463175d79..1dd2b1f7ae49 100644
--- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/NRTSuggesterBuilder.java
+++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/NRTSuggesterBuilder.java
@@ -19,6 +19,7 @@
 import java.io.IOException;
 import java.util.PriorityQueue;
 
+import org.apache.lucene.analysis.miscellaneous.ConcatenateGraphFilter;
 import org.apache.lucene.store.DataOutput;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.BytesRefBuilder;
@@ -43,6 +44,9 @@ final class NRTSuggesterBuilder {
    * in the output
    */
   public static final int PAYLOAD_SEP = '\u001F';
+  static {
+    assert PAYLOAD_SEP == ConcatenateGraphFilter.SEP_CHAR;
+  }
 
   /**
    * Marks end of the analyzed input and start of dedup
diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/PrefixCompletionQuery.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/PrefixCompletionQuery.java
index 7bb75e9261ca..2cffc761f69f 100644
--- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/PrefixCompletionQuery.java
+++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/PrefixCompletionQuery.java
@@ -19,6 +19,7 @@
 import java.io.IOException;
 
 import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.miscellaneous.ConcatenateGraphFilter;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.ScoreMode;
@@ -68,7 +69,7 @@ public PrefixCompletionQuery(Analyzer analyzer, Term term, BitsProducer filter)
 
   @Override
   public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {
-    CompletionTokenStream stream = (CompletionTokenStream) analyzer.tokenStream(getField(), getTerm().text());
+    ConcatenateGraphFilter stream = (ConcatenateGraphFilter) analyzer.tokenStream(getField(), getTerm().text());
     return new CompletionWeight(this, stream.toAutomaton());
   }
 
diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/SuggestField.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/SuggestField.java
index 7f06328ee1b0..0ee338f4e8c2 100644
--- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/SuggestField.java
+++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/SuggestField.java
@@ -21,6 +21,7 @@
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.miscellaneous.ConcatenateGraphFilter;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.FieldType;
 import org.apache.lucene.index.IndexOptions;
@@ -100,21 +101,21 @@ public SuggestField(String name, String value, int weight) {
 
   @Override
   public TokenStream tokenStream(Analyzer analyzer, TokenStream reuse) {
-    CompletionTokenStream completionStream = wrapTokenStream(super.tokenStream(analyzer, reuse));
+    ConcatenateGraphFilter completionStream = wrapTokenStream(super.tokenStream(analyzer, reuse));
     completionStream.setPayload(buildSuggestPayload());
     return completionStream;
   }
 
   /**
-   * Wraps a <code>stream</code> with a CompletionTokenStream.
+   * Wraps a <code>stream</code> with a ConcatenateGraphFilter.
    *
    * Subclasses can override this method to change the indexing pipeline.
    */
-  protected CompletionTokenStream wrapTokenStream(TokenStream stream) {
-    if (stream instanceof CompletionTokenStream) {
-      return (CompletionTokenStream) stream;
+  protected ConcatenateGraphFilter wrapTokenStream(TokenStream stream) {
+    if (stream instanceof ConcatenateGraphFilter) {
+      return (ConcatenateGraphFilter) stream;
     } else {
-      return new CompletionTokenStream(stream);
+      return new ConcatenateGraphFilter(stream);
     }
   }
 
@@ -140,7 +141,7 @@ private BytesRef buildSuggestPayload() {
 
   private boolean isReserved(char c) {
     switch (c) {
-      case CompletionAnalyzer.SEP_LABEL:
+      case ConcatenateGraphFilter.SEP_CHAR:
       case CompletionAnalyzer.HOLE_CHARACTER:
       case NRTSuggesterBuilder.END_BYTE:
         return true;
diff --git a/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestContextSuggestField.java b/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestContextSuggestField.java
index 0c3b254c132e..23b1267a19a1 100644
--- a/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestContextSuggestField.java
+++ b/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestContextSuggestField.java
@@ -21,6 +21,7 @@
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.miscellaneous.ConcatenateGraphFilter;
 import org.apache.lucene.analysis.standard.StandardAnalyzer;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.index.DirectoryReader;
@@ -109,21 +110,21 @@ public void testTokenStream() throws Exception {
     CharsRefBuilder builder = new CharsRefBuilder();
     builder.append("context1");
     builder.append(((char) ContextSuggestField.CONTEXT_SEPARATOR));
-    builder.append(((char) CompletionAnalyzer.SEP_LABEL));
+    builder.append(ConcatenateGraphFilter.SEP_CHAR);
     builder.append("input");
     expectedOutputs[0] = builder.toCharsRef().toString();
     builder.clear();
     builder.append("context2");
     builder.append(((char) ContextSuggestField.CONTEXT_SEPARATOR));
-    builder.append(((char) CompletionAnalyzer.SEP_LABEL));
+    builder.append(ConcatenateGraphFilter.SEP_CHAR);
     builder.append("input");
     expectedOutputs[1] = builder.toCharsRef().toString();
-    TokenStream stream = new CompletionTokenStreamTest.PayloadAttrToTypeAttrFilter(field.tokenStream(analyzer, null));
-    assertTokenStreamContents(stream, expectedOutputs, null, null, new String[]{payload.utf8ToString(), payload.utf8ToString()}, new int[]{1, 1}, null, null);
+    TokenStream stream = new TestSuggestField.PayloadAttrToTypeAttrFilter(field.tokenStream(analyzer, null));
+    assertTokenStreamContents(stream, expectedOutputs, null, null, new String[]{payload.utf8ToString(), payload.utf8ToString()}, new int[]{1, 0}, null, null);
 
     CompletionAnalyzer completionAnalyzer = new CompletionAnalyzer(analyzer);
-    stream = new CompletionTokenStreamTest.PayloadAttrToTypeAttrFilter(field.tokenStream(completionAnalyzer, null));
-    assertTokenStreamContents(stream, expectedOutputs, null, null, new String[]{payload.utf8ToString(), payload.utf8ToString()}, new int[]{1, 1}, null, null);
+    stream = new TestSuggestField.PayloadAttrToTypeAttrFilter(field.tokenStream(completionAnalyzer, null));
+    assertTokenStreamContents(stream, expectedOutputs, null, null, new String[]{payload.utf8ToString(), payload.utf8ToString()}, new int[]{1, 0}, null, null);
   }
 
   @Test
diff --git a/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestSuggestField.java b/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestSuggestField.java
index a6659e082d5d..a083535917ac 100644
--- a/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestSuggestField.java
+++ b/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestSuggestField.java
@@ -32,7 +32,11 @@
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.miscellaneous.ConcatenateGraphFilter;
+import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
+import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
 import org.apache.lucene.codecs.Codec;
 import org.apache.lucene.codecs.PostingsFormat;
 import org.apache.lucene.codecs.lucene70.Lucene70Codec;
@@ -99,7 +103,7 @@ public void testNegativeWeight() throws Exception {
   public void testReservedChars() throws Exception {
     CharsRefBuilder charsRefBuilder = new CharsRefBuilder();
     charsRefBuilder.append("sugg");
-    charsRefBuilder.setCharAt(2, (char) CompletionAnalyzer.SEP_LABEL);
+    charsRefBuilder.setCharAt(2, ConcatenateGraphFilter.SEP_CHAR);
     IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> {
       new SuggestField("name", charsRefBuilder.toString(), 1);
     });
@@ -144,11 +148,11 @@ public void testTokenStream() throws Exception {
       output.writeByte(SuggestField.TYPE);
     }
     BytesRef payload = new BytesRef(byteArrayOutputStream.toByteArray());
-    TokenStream stream = new CompletionTokenStreamTest.PayloadAttrToTypeAttrFilter(suggestField.tokenStream(analyzer, null));
+    TokenStream stream = new PayloadAttrToTypeAttrFilter(suggestField.tokenStream(analyzer, null));
     assertTokenStreamContents(stream, new String[] {"input"}, null, null, new String[]{payload.utf8ToString()}, new int[]{1}, null, null);
 
     CompletionAnalyzer completionAnalyzer = new CompletionAnalyzer(analyzer);
-    stream = new CompletionTokenStreamTest.PayloadAttrToTypeAttrFilter(suggestField.tokenStream(completionAnalyzer, null));
+    stream = new PayloadAttrToTypeAttrFilter(suggestField.tokenStream(completionAnalyzer, null));
     assertTokenStreamContents(stream, new String[] {"input"}, null, null, new String[]{payload.utf8ToString()}, new int[]{1}, null, null);
   }
 
@@ -894,4 +898,23 @@ public PostingsFormat getPostingsFormatForField(String field) {
     iwc.setCodec(filterCodec);
     return iwc;
   }
+
+  public final static class PayloadAttrToTypeAttrFilter extends TokenFilter {
+    private PayloadAttribute payload = addAttribute(PayloadAttribute.class);
+    private TypeAttribute type = addAttribute(TypeAttribute.class);
+
+    protected PayloadAttrToTypeAttrFilter(TokenStream input) {
+      super(input);
+    }
+
+    @Override
+    public boolean incrementToken() throws IOException {
+      if (input.incrementToken()) {
+        // we move them over so we can assert them more easily in the tests
+        type.setType(payload.getPayload().utf8ToString());
+        return true;
+      }
+      return false;
+    }
+  }
 }

From d813eca1353d32c4187970141beefe7e784bd2a3 Mon Sep 17 00:00:00 2001
From: David Smiley <dsmiley@apache.org>
Date: Thu, 31 May 2018 16:39:09 -0400
Subject: [PATCH 2/3] * ConcatenateGraphFilter now extends TokenStream and
 calls toAutomaton in incrementToken() * ConcatenateGraphFilter SEP_CHAR put
 back to SEP_LABEL (int) * ConcatenateGraphFilter end() like was before; no
 captureState * Added a CompletionTokenStream wrapper to
 ConcatenateGraphFilter to ** provide the NRT suggester access to state it
 wants (now is private in CGF) ** setPayload (now removed from CGF). * Fixed
 CGF Factory license header to be ASF

---
 .../miscellaneous/ConcatenateGraphFilter.java | 108 ++++++++++--------
 .../ConcatenateGraphFilterFactory.java        |  55 +++++----
 .../analysis/core/TestRandomChains.java       |   7 +-
 .../TestConcatenateGraphFilter.java           |  98 ++++++----------
 .../TestConcatenateGraphFilterFactory.java    |  38 +++++-
 .../suggest/document/CompletionAnalyzer.java  |   2 +-
 .../suggest/document/CompletionQuery.java     |   4 +-
 .../document/CompletionTokenStream.java       |  82 +++++++++++++
 .../search/suggest/document/ContextQuery.java |  10 +-
 .../suggest/document/ContextSuggestField.java |  17 ++-
 .../document/FuzzyCompletionQuery.java        |   3 +-
 .../suggest/document/NRTSuggesterBuilder.java |   5 +-
 .../document/PrefixCompletionQuery.java       |   3 +-
 .../search/suggest/document/SuggestField.java |  12 +-
 .../document/TestContextSuggestField.java     |   4 +-
 .../suggest/document/TestSuggestField.java    |   2 +-
 16 files changed, 271 insertions(+), 179 deletions(-)
 create mode 100644 lucene/suggest/src/java/org/apache/lucene/search/suggest/document/CompletionTokenStream.java

diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/ConcatenateGraphFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/ConcatenateGraphFilter.java
index e222d6b680d1..b90ed45260a0 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/ConcatenateGraphFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/ConcatenateGraphFilter.java
@@ -18,11 +18,9 @@
 
 import java.io.IOException;
 
-import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.TokenStreamToAutomaton;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
 import org.apache.lucene.util.AttributeImpl;
@@ -47,7 +45,7 @@
  *
  * @lucene.experimental
  */
-public final class ConcatenateGraphFilter extends TokenFilter {
+public final class ConcatenateGraphFilter extends TokenStream {
 
   /*
    * Token stream which converts a provided token stream to an automaton.
@@ -61,25 +59,22 @@ public final class ConcatenateGraphFilter extends TokenFilter {
    * Represents the separation between tokens, if
    * <code>preserveSep</code> is <code>true</code>.
    */
-  public final static char SEP_CHAR = '\u001F';
+  public final static int SEP_LABEL = TokenStreamToAutomaton.POS_SEP;
   public final static int DEFAULT_MAX_GRAPH_EXPANSIONS = Operations.DEFAULT_MAX_DETERMINIZED_STATES;
   public final static boolean DEFAULT_PRESERVE_SEP = true;
   public final static boolean DEFAULT_PRESERVE_POSITION_INCREMENTS = true;
 
-  private final PayloadAttribute payloadAttr = addAttribute(PayloadAttribute.class);
   private final BytesRefBuilderTermAttribute bytesAtt = addAttribute(BytesRefBuilderTermAttribute.class);
   private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
 
-  //nocommit add getters for these instead?  Accessor: org.apache.lucene.search.suggest.document.ContextSuggestField.wrapTokenStream
-  public final boolean preserveSep;
-  public final boolean preservePositionIncrements;
-  public final int maxGraphExpansions;
-  public TokenStream getInput() { return super.input; } // nocommit remove? see ContextSuggestField.wrapTokenStream
+  private final TokenStream inputTokenStream;
+  private final boolean preserveSep;
+  private final boolean preservePositionIncrements;
+  private final int maxGraphExpansions;
 
   private LimitedFiniteStringsIterator finiteStrings;
-  private BytesRef payload;
   private CharTermAttribute charTermAttribute;
-  private State endState;
+  private boolean wasReset = false;
 
   /**
    * Creates a token stream to convert <code>input</code> to a token stream
@@ -96,8 +91,11 @@ public ConcatenateGraphFilter(TokenStream inputTokenStream) {
    * of accepted strings by its token stream graph.
    *
    * @param inputTokenStream The input/incoming TokenStream
-   * @param preserveSep
-   * @param preservePositionIncrements Whether to generate holes in the automaton for missing positions
+   * @param preserveSep Whether {@link #SEP_LABEL} should separate the input tokens in the concatenated token
+   * @param preservePositionIncrements Whether to add an empty token for missing positions.
+   *                                   The effect is a consecutive {@link #SEP_LABEL}.
+   *                                   When false, it's as if there were no missing positions
+   *                                     (we pretend the surrounding tokens were adjacent).
    * @param maxGraphExpansions If the tokenStream graph has more than this many possible paths through, then we'll throw
    *                           {@link TooComplexToDeterminizeException} to preserve the stability and memory of the
    *                           machine.
@@ -106,22 +104,34 @@ public ConcatenateGraphFilter(TokenStream inputTokenStream) {
    *
    */
   public ConcatenateGraphFilter(TokenStream inputTokenStream, boolean preserveSep, boolean preservePositionIncrements, int maxGraphExpansions) {
-    super(inputTokenStream);
+    // Don't call the super(input) ctor - this is a true delegate and has a new attribute source since we consume
+    // the input stream entirely in the first call to incrementToken
+    this.inputTokenStream = inputTokenStream;
     this.preserveSep = preserveSep;
     this.preservePositionIncrements = preservePositionIncrements;
     this.maxGraphExpansions = maxGraphExpansions;
   }
 
-  /**
-   * Sets a payload available throughout successive token stream enumeration
-   * @lucene.internal
-   */
-  public void setPayload(BytesRef payload) {
-    this.payload = payload;
+  @Override
+  public void reset() throws IOException {
+    super.reset();
+    // we only capture this if we really need it to save the UTF-8 to UTF-16 conversion
+    charTermAttribute = getAttribute(CharTermAttribute.class); // may return null
+    finiteStrings = null;
+    wasReset = true;
   }
 
   @Override
   public boolean incrementToken() throws IOException {
+    if (finiteStrings == null) {
+      if (wasReset == false) {
+        throw new IllegalStateException("reset() missing before incrementToken");
+      }
+      // lazy init/consume
+      Automaton automaton = toAutomaton(); // calls reset(), incrementToken() repeatedly, end(), and close() on inputTokenStream
+      finiteStrings = new LimitedFiniteStringsIterator(automaton, maxGraphExpansions);
+    }
+
     IntsRef string = finiteStrings.next();
     if (string == null) {
       return false;
@@ -138,52 +148,53 @@ public boolean incrementToken() throws IOException {
       charTermAttribute.setLength(0);
       charTermAttribute.append(bytesAtt.toUTF16());
     }
-    if (payload != null) {
-      payloadAttr.setPayload(this.payload);
-    }
 
     return true;
   }
 
   @Override
   public void end() throws IOException {
-    restoreState(endState);
+    super.end();
+    //nocommit I think we don't need to delegate this under any condition
+    if (finiteStrings == null) {
+      //toAutomaton not called yet, so delegate lifecycle
+      inputTokenStream.end();
+    }
   }
 
-  //nocommit move method to before incrementToken
   @Override
-  public void reset() throws IOException {
-    Automaton automaton = toAutomaton(); // calls reset(), incrementToken() repeatedly, then end()
-    endState = captureState();
-    finiteStrings = new LimitedFiniteStringsIterator(automaton, maxGraphExpansions);
-    if (charTermAttribute == null) {
-      // we only capture this if we really need it to save the UTF-8 to UTF-16 conversion
-      charTermAttribute = getAttribute(CharTermAttribute.class); // null if none; it's okay
+  public void close() throws IOException {
+    super.close();
+    if (finiteStrings == null) {
+      //toAutomaton not called yet, so delegate lifecycle
+      inputTokenStream.close();
+    } else {
+      finiteStrings = null;
     }
+    wasReset = false;//reset
   }
 
   /**
-   * Converts the token stream to an automaton,
-   * treating the transition labels as utf-8
+   * Converts the tokenStream to an automaton, treating the transition labels as utf-8.  Closes it.
    */
   public Automaton toAutomaton() throws IOException {
     return toAutomaton(false);
   }
 
   /**
-   * Converts the tokenStream to an automaton
+   * Converts the tokenStream to an automaton.  Closes it.
    */
   public Automaton toAutomaton(boolean unicodeAware) throws IOException {
     // TODO refactor this
     // maybe we could hook up a modified automaton from TermAutomatonQuery here?
-    Automaton automaton = null;
+    Automaton automaton;
     try {
       // Create corresponding automaton: labels are bytes
       // from each analyzed token, with byte 0 used as
       // separator between tokens:
       final TokenStreamToAutomaton tsta;
       if (preserveSep) {
-        tsta = new EscapingTokenStreamToAutomaton(SEP_CHAR);
+        tsta = new EscapingTokenStreamToAutomaton(SEP_LABEL);
       } else {
         // When we're not preserving sep, we don't steal 0xff
         // byte, so we don't need to do any escaping:
@@ -192,28 +203,29 @@ public Automaton toAutomaton(boolean unicodeAware) throws IOException {
       tsta.setPreservePositionIncrements(preservePositionIncrements);
       tsta.setUnicodeArcs(unicodeAware);
 
-      automaton = tsta.toAutomaton(input);
+      automaton = tsta.toAutomaton(inputTokenStream);
     } finally {
-      IOUtils.closeWhileHandlingException(input);
+      IOUtils.closeWhileHandlingException(inputTokenStream);
     }
 
     // TODO: we can optimize this somewhat by determinizing
     // while we convert
-    automaton = replaceSep(automaton, preserveSep, SEP_CHAR);
+    automaton = replaceSep(automaton, preserveSep, SEP_LABEL);
     // This automaton should not blow up during determinize:
     return Operations.determinize(automaton, maxGraphExpansions);
   }
 
   /**
-   * Just escapes the 0xff byte (which we still for SEP).
+   * Just escapes the {@link #SEP_LABEL} byte with an extra.
    */
   private static final class EscapingTokenStreamToAutomaton extends TokenStreamToAutomaton {
 
     final BytesRefBuilder spare = new BytesRefBuilder();
-    private char sepLabel;
+    final byte sepLabel;
 
-    public EscapingTokenStreamToAutomaton(char sepLabel) {
-      this.sepLabel = sepLabel;
+    public EscapingTokenStreamToAutomaton(int sepLabel) {
+      assert sepLabel <= Byte.MAX_VALUE;
+      this.sepLabel = (byte) sepLabel;
     }
 
     @Override
@@ -221,9 +233,9 @@ protected BytesRef changeToken(BytesRef in) {
       int upto = 0;
       for (int i = 0; i < in.length; i++) {
         byte b = in.bytes[in.offset + i];
-        if (b == (byte) sepLabel) {
+        if (b == sepLabel) {
           spare.grow(upto + 2);
-          spare.setByteAt(upto++, (byte) sepLabel);
+          spare.setByteAt(upto++, sepLabel);
           spare.setByteAt(upto++, b);
         } else {
           spare.grow(upto + 1);
@@ -260,7 +272,7 @@ private static Automaton replaceSep(Automaton a, boolean preserveSep, int sepLab
         if (t.min == TokenStreamToAutomaton.POS_SEP) {
           assert t.max == TokenStreamToAutomaton.POS_SEP;
           if (preserveSep) {
-            // Remap to SEP_CHAR:
+            // Remap to SEP_LABEL:
             result.addTransition(state, t.dest, sepLabel);
           } else {
             result.addEpsilon(state, t.dest);
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/ConcatenateGraphFilterFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/ConcatenateGraphFilterFactory.java
index d8d103944010..5d8ccbacf3d9 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/ConcatenateGraphFilterFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/ConcatenateGraphFilterFactory.java
@@ -1,38 +1,47 @@
 /*
- This software was produced for the U. S. Government
- under Contract No. W15P7T-11-C-F600, and is
- subject to the Rights in Noncommercial Computer Software
- and Noncommercial Computer Software Documentation
- Clause 252.227-7014 (JUN 1995)
-
- Copyright 2013 The MITRE Corporation. All Rights Reserved.
-
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
  */
-
 package org.apache.lucene.analysis.miscellaneous;
 
 import java.util.Map;
 
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.util.TokenFilterFactory;
+import org.apache.lucene.util.automaton.TooComplexToDeterminizeException;
 
 /**
  * Factory for {@link ConcatenateGraphFilter}.
  *
- * <pre class="prettyprint">
- * nocommit update
- * </pre>
+ * <ul>
+ *   <li><tt>preserveSep</tt>:
+ *                            Whether {@link ConcatenateGraphFilter#SEP_LABEL}
+ *                            should separate the input tokens in the concatenated token
+ *                            </li>
+ *   <li><tt>preservePositionIncrements</tt>:
+ *                                       Whether to add an empty token for missing positions.
+ *                                       The effect is a consecutive {@link ConcatenateGraphFilter#SEP_LABEL}.
+ *                                       When false, it's as if there were no missing positions
+ *                                         (we pretend the surrounding tokens were adjacent).
+ *                                       </li>
+ *   <li><tt>maxGraphExpansions</tt>:
+ *                            If the tokenStream graph has more than this many possible paths through, then we'll throw
+ *                            {@link TooComplexToDeterminizeException} to preserve the stability and memory of the
+ *                            machine.
+ *                            </li>
+ * </ul>
  * @see ConcatenateGraphFilter
  * @since 7.4.0
  */
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java
index 8cb159129ef6..d94b39607bb8 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java
@@ -72,6 +72,7 @@
 import org.apache.lucene.analysis.hunspell.Dictionary;
 import org.apache.lucene.analysis.hunspell.TestHunspellStemFilter;
 import org.apache.lucene.analysis.minhash.MinHashFilter;
+import org.apache.lucene.analysis.miscellaneous.ConcatenateGraphFilter;
 import org.apache.lucene.analysis.miscellaneous.ConditionalTokenFilter;
 import org.apache.lucene.analysis.miscellaneous.DelimitedTermFrequencyTokenFilter;
 import org.apache.lucene.analysis.miscellaneous.FingerprintFilter;
@@ -119,10 +120,10 @@ public class TestRandomChains extends BaseTokenStreamTestCase {
 
   private static final Set<Class<?>> avoidConditionals = new HashSet<>();
   static {
-    // Fingerprint filter needs to consume the whole tokenstream, so conditionals don't make sense here
+    // These filters needs to consume the whole tokenstream, so conditionals don't make sense here
     avoidConditionals.add(FingerprintFilter.class);
-    // Ditto MinHashFilter
     avoidConditionals.add(MinHashFilter.class);
+    avoidConditionals.add(ConcatenateGraphFilter.class);
   }
 
   private static final Map<Constructor<?>,Predicate<Object[]>> brokenConstructors = new HashMap<>();
@@ -156,7 +157,7 @@ public class TestRandomChains extends BaseTokenStreamTestCase {
               return !((Boolean) args[2]); // args are broken if consumeAllTokens is false
           });
       for (Class<?> c : Arrays.<Class<?>>asList(
-          // doesn't actual reset itself!
+          // doesn't actual reset itself!  TODO this statement is probably obsolete as of LUCENE-6121 ?
           CachingTokenFilter.class,
           // LUCENE-8092: doesn't handle graph inputs
           CJKBigramFilter.class,
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestConcatenateGraphFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestConcatenateGraphFilter.java
index c4cb0d5aa259..925d5405e2a6 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestConcatenateGraphFilter.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestConcatenateGraphFilter.java
@@ -23,12 +23,9 @@
 import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.StopFilter;
 import org.apache.lucene.analysis.TokenFilter;
-import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.synonym.SynonymFilter;
 import org.apache.lucene.analysis.synonym.SynonymMap;
-import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
-import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.CharsRef;
 import org.apache.lucene.util.CharsRefBuilder;
@@ -36,28 +33,24 @@
 
 public class TestConcatenateGraphFilter extends BaseTokenStreamTestCase {
 
+  private static final char SEP_LABEL = (char) ConcatenateGraphFilter.SEP_LABEL;
+  
   @Test
   public void testBasic() throws Exception {
     Tokenizer tokenStream = new MockTokenizer(MockTokenizer.WHITESPACE, true);
     String input = "mykeyword";
-    BytesRef payload = new BytesRef("payload");
     tokenStream.setReader(new StringReader(input));
-    ConcatenateGraphFilter concatStream = new ConcatenateGraphFilter(tokenStream);
-    concatStream.setPayload(payload);
-    PayloadAttrToTypeAttrFilter stream = new PayloadAttrToTypeAttrFilter(concatStream);
-    assertTokenStreamContents(stream, new String[] {input}, null, null, new String[] {payload.utf8ToString()}, new int[] { 1 }, null, null);
+    ConcatenateGraphFilter stream = new ConcatenateGraphFilter(tokenStream);
+    assertTokenStreamContents(stream, new String[] {input}, null, null, new int[] { 1 });
   }
 
   @Test
   public void testWithNoPreserveSep() throws Exception {
     Tokenizer tokenStream = new MockTokenizer(MockTokenizer.WHITESPACE, true);
     String input = "mykeyword another keyword";
-    BytesRef payload = new BytesRef("payload");
     tokenStream.setReader(new StringReader(input));
-    ConcatenateGraphFilter concatStream = new ConcatenateGraphFilter(tokenStream, false, false, 100);
-    concatStream.setPayload(payload);
-    PayloadAttrToTypeAttrFilter stream = new PayloadAttrToTypeAttrFilter(concatStream);
-    assertTokenStreamContents(stream, new String[] {"mykeywordanotherkeyword"}, null, null, new String[] {payload.utf8ToString()}, new int[] { 1 }, null, null);
+    ConcatenateGraphFilter stream = new ConcatenateGraphFilter(tokenStream, false, false, 100);
+    assertTokenStreamContents(stream, new String[] {"mykeywordanotherkeyword"}, null, null, new int[] { 1 });
   }
 
   @Test
@@ -65,17 +58,14 @@ public void testWithMultipleTokens() throws Exception {
     Tokenizer tokenStream = new MockTokenizer(MockTokenizer.WHITESPACE, true);
     String input = "mykeyword another keyword";
     tokenStream.setReader(new StringReader(input));
-    BytesRef payload = new BytesRef("payload");
-    ConcatenateGraphFilter concatStream = new ConcatenateGraphFilter(tokenStream);
-    concatStream.setPayload(payload);
-    PayloadAttrToTypeAttrFilter stream = new PayloadAttrToTypeAttrFilter(concatStream);
+    ConcatenateGraphFilter stream = new ConcatenateGraphFilter(tokenStream);
     CharsRefBuilder builder = new CharsRefBuilder();
     builder.append("mykeyword");
-    builder.append((ConcatenateGraphFilter.SEP_CHAR));
+    builder.append(SEP_LABEL);
     builder.append("another");
-    builder.append((ConcatenateGraphFilter.SEP_CHAR));
+    builder.append(SEP_LABEL);
     builder.append("keyword");
-    assertTokenStreamContents(stream, new String[]{builder.toCharsRef().toString()}, null, null, new String[]{payload.utf8ToString()}, new int[]{1}, null, null);
+    assertTokenStreamContents(stream, new String[]{builder.toCharsRef().toString()}, null, null, new int[]{1});
   }
 
   @Test
@@ -85,11 +75,8 @@ public void testWithSynonym() throws Exception {
     Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, true);
     tokenizer.setReader(new StringReader("mykeyword"));
     SynonymFilter filter = new SynonymFilter(tokenizer, builder.build(), true);
-    ConcatenateGraphFilter concatStream = new ConcatenateGraphFilter(filter);
-    BytesRef payload = new BytesRef("payload");
-    concatStream.setPayload(payload);
-    PayloadAttrToTypeAttrFilter stream = new PayloadAttrToTypeAttrFilter(concatStream);
-    assertTokenStreamContents(stream, new String[] {"mykeyword", "mysynonym"}, null, null, new String[] {payload.utf8ToString(), payload.utf8ToString()}, new int[] { 1, 0 }, null, null);
+    ConcatenateGraphFilter stream = new ConcatenateGraphFilter(filter);
+    assertTokenStreamContents(stream, new String[] {"mykeyword", "mysynonym"}, null, null, new int[] { 1, 0 });
   }
 
   @Test
@@ -101,25 +88,23 @@ public void testWithSynonyms() throws Exception {
     tokenStream.setReader(new StringReader(input));
     SynonymFilter filter = new SynonymFilter(tokenStream, builder.build(), true);
     BytesRef payload = new BytesRef("payload");
-    ConcatenateGraphFilter concatStream = new ConcatenateGraphFilter(filter, true, false, 100);
-    concatStream.setPayload(payload);
-    PayloadAttrToTypeAttrFilter stream = new PayloadAttrToTypeAttrFilter(concatStream);
+    ConcatenateGraphFilter stream = new ConcatenateGraphFilter(filter, true, false, 100);
     String[] expectedOutputs = new String[2];
     CharsRefBuilder expectedOutput = new CharsRefBuilder();
     expectedOutput.append("mykeyword");
-    expectedOutput.append(ConcatenateGraphFilter.SEP_CHAR);
+    expectedOutput.append(SEP_LABEL);
     expectedOutput.append("another");
-    expectedOutput.append(ConcatenateGraphFilter.SEP_CHAR);
+    expectedOutput.append(SEP_LABEL);
     expectedOutput.append("keyword");
     expectedOutputs[0] = expectedOutput.toCharsRef().toString();
     expectedOutput.clear();
     expectedOutput.append("mysynonym");
-    expectedOutput.append(ConcatenateGraphFilter.SEP_CHAR);
+    expectedOutput.append(SEP_LABEL);
     expectedOutput.append("another");
-    expectedOutput.append(ConcatenateGraphFilter.SEP_CHAR);
+    expectedOutput.append(SEP_LABEL);
     expectedOutput.append("keyword");
     expectedOutputs[1] = expectedOutput.toCharsRef().toString();
-    assertTokenStreamContents(stream, expectedOutputs, null, null, new String[]{payload.utf8ToString(), payload.utf8ToString()}, new int[]{1, 0}, null, null);
+    assertTokenStreamContents(stream, expectedOutputs, null, null, new int[]{1, 0});
   }
 
   @Test
@@ -132,16 +117,16 @@ public void testWithStopword() throws Exception {
       ConcatenateGraphFilter concatStream = new ConcatenateGraphFilter(tokenFilter, true, preservePosInc, 10);
       CharsRefBuilder builder = new CharsRefBuilder();
       if (preservePosInc) {
-        builder.append((ConcatenateGraphFilter.SEP_CHAR));
+        builder.append(SEP_LABEL);
       }
       builder.append("mykeyword");
-      builder.append((ConcatenateGraphFilter.SEP_CHAR));
+      builder.append(SEP_LABEL);
       if (preservePosInc) {
-        builder.append((ConcatenateGraphFilter.SEP_CHAR));
+        builder.append(SEP_LABEL);
       }
       builder.append("keyword");
       if (preservePosInc) {
-        builder.append((ConcatenateGraphFilter.SEP_CHAR));
+        builder.append(SEP_LABEL);
       }
       assertTokenStreamContents(concatStream, new String[]{builder.toCharsRef().toString()});
     }
@@ -162,18 +147,17 @@ public void testValidNumberOfExpansions() throws IOException {
     tokenizer.setReader(new StringReader(valueBuilder.toString()));
     SynonymFilter filter = new SynonymFilter(tokenizer, builder.build(), true);
 
-    ConcatenateGraphFilter concatStream = new ConcatenateGraphFilter(filter);
-    concatStream.setPayload(new BytesRef());
-    PayloadAttrToTypeAttrFilter stream = new PayloadAttrToTypeAttrFilter(concatStream);
-    stream.reset();
-    ConcatenateGraphFilter.BytesRefBuilderTermAttribute attr = stream.addAttribute(ConcatenateGraphFilter.BytesRefBuilderTermAttribute.class);
-    int count = 0;
-    while(stream.incrementToken()) {
-      count++;
-      assertNotNull(attr.getBytesRef());
-      assertTrue(attr.getBytesRef().length > 0);
+    int count;
+    try (ConcatenateGraphFilter stream = new ConcatenateGraphFilter(filter)) {
+      stream.reset();
+      ConcatenateGraphFilter.BytesRefBuilderTermAttribute attr = stream.addAttribute(ConcatenateGraphFilter.BytesRefBuilderTermAttribute.class);
+      count = 0;
+      while (stream.incrementToken()) {
+        count++;
+        assertNotNull(attr.getBytesRef());
+        assertTrue(attr.getBytesRef().length > 0);
+      }
     }
-    stream.close();
     assertEquals(count, 256);
   }
 
@@ -183,22 +167,4 @@ public void testEmpty() throws IOException {
     assertTokenStreamContents(filter, new String[0]);
   }
 
-  public final static class PayloadAttrToTypeAttrFilter extends TokenFilter {
-    private PayloadAttribute payload = addAttribute(PayloadAttribute.class);
-    private TypeAttribute type = addAttribute(TypeAttribute.class);
-
-    protected PayloadAttrToTypeAttrFilter(TokenStream input) {
-      super(input);
-    }
-
-    @Override
-    public boolean incrementToken() throws IOException {
-      if (input.incrementToken()) {
-        // we move them over so we can assert them more easily in the tests
-        type.setType(payload.getPayload().utf8ToString());
-        return true;
-      }
-      return false;
-    }
-  }
 }
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestConcatenateGraphFilterFactory.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestConcatenateGraphFilterFactory.java
index 5b3ebacf5495..1e149f03b1ba 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestConcatenateGraphFilterFactory.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestConcatenateGraphFilterFactory.java
@@ -21,6 +21,7 @@
 import java.io.StringReader;
 
 import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.StopFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
 
@@ -33,15 +34,42 @@ public void test() throws Exception {
       tokenizer.setReader(reader);
       tokenizer.setEnableChecks(consumeAll);
       TokenStream stream = tokenizer;
-      stream = tokenFilterFactory("Concatenate"//nocommit test an arg
-      ).create(stream);
-      assertTokenStreamContents(stream, new String[]{input.replace(' ', ConcatenateGraphFilter.SEP_CHAR)});
+      stream = tokenFilterFactory("ConcatenateGraph").create(stream);
+      assertTokenStreamContents(stream, new String[]{input.replace(' ', (char) ConcatenateGraphFilter.SEP_LABEL)});
     }
   }
 
+  public void testPreserveSep() throws Exception {
+    final String input = "A1 B2 A1 D4 C3";
+    final String output = "A1A1D4C3";
+    Reader reader = new StringReader(input);
+    MockTokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
+    tokenizer.setReader(reader);
+    TokenStream stream = tokenizer;
+    stream = new StopFilter(stream, StopFilter.makeStopSet("B2"));
+    stream = tokenFilterFactory("ConcatenateGraph",
+        "preserveSep", "false"
+    ).create(stream);
+    assertTokenStreamContents(stream, new String[]{output});
+  }
+
+  public void testPreservePositionIncrements() throws Exception {
+    final String input = "A1 B2 A1 D4 C3";
+    final String output = "A1 A1 D4 C3";
+    Reader reader = new StringReader(input);
+    MockTokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
+    tokenizer.setReader(reader);
+    TokenStream stream = tokenizer;
+    stream = new StopFilter(stream, StopFilter.makeStopSet("B2"));
+    stream = tokenFilterFactory("ConcatenateGraph",
+        "preservePositionIncrements", "false"
+        ).create(stream);
+    assertTokenStreamContents(stream, new String[]{output.replace(' ', (char) ConcatenateGraphFilter.SEP_LABEL)});
+  }
+
   public void testRequired() throws Exception {
     // no params are required
-    tokenFilterFactory("Concatenate");
+    tokenFilterFactory("ConcatenateGraph");
   }
 
   /**
@@ -49,7 +77,7 @@ public void testRequired() throws Exception {
    */
   public void testBogusArguments() throws Exception {
     IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () ->
-        tokenFilterFactory("Concatenate", "bogusArg", "bogusValue"));
+        tokenFilterFactory("ConcatenateGraph", "bogusArg", "bogusValue"));
     assertTrue(expected.getMessage().contains("Unknown parameters"));
   }
 }
diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/CompletionAnalyzer.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/CompletionAnalyzer.java
index cdb49de2062f..8888382a5caa 100644
--- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/CompletionAnalyzer.java
+++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/CompletionAnalyzer.java
@@ -130,7 +130,7 @@ protected Analyzer getWrappedAnalyzer(String fieldName) {
 
   @Override
   protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) {
-    ConcatenateGraphFilter tokenStream = new ConcatenateGraphFilter(components.getTokenStream(),
+    CompletionTokenStream tokenStream = new CompletionTokenStream(components.getTokenStream(),
         preserveSep, preservePositionIncrements, maxGraphExpansions);
     return new TokenStreamComponents(components.getTokenizer(), tokenStream);
   }
diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/CompletionQuery.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/CompletionQuery.java
index 1ed13734d88a..6be0c91117f3 100644
--- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/CompletionQuery.java
+++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/CompletionQuery.java
@@ -27,7 +27,7 @@
 import org.apache.lucene.search.suggest.BitsProducer;
 
 import static org.apache.lucene.search.suggest.document.CompletionAnalyzer.HOLE_CHARACTER;
-import static org.apache.lucene.analysis.miscellaneous.ConcatenateGraphFilter.SEP_CHAR;
+import static org.apache.lucene.analysis.miscellaneous.ConcatenateGraphFilter.SEP_LABEL;
 
 /**
  * Abstract {@link Query} that match documents containing terms with a specified prefix
@@ -158,7 +158,7 @@ private void validate(String termText) {
         case HOLE_CHARACTER:
           throw new IllegalArgumentException(
               "Term text cannot contain HOLE character U+001E; this character is reserved");
-        case SEP_CHAR:
+        case SEP_LABEL:
           throw new IllegalArgumentException(
               "Term text cannot contain unit separator character U+001F; this character is reserved");
         default:
diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/CompletionTokenStream.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/CompletionTokenStream.java
new file mode 100644
index 000000000000..0713f5bc17b9
--- /dev/null
+++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/CompletionTokenStream.java
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.search.suggest.document;
+
+import java.io.IOException;
+
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.miscellaneous.ConcatenateGraphFilter;
+import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.automaton.Automaton;
+
+/**
+ * A {@link ConcatenateGraphFilter} but we can set the payload and provide access to config options.
+ * @lucene.internal
+ */
+public final class CompletionTokenStream extends TokenFilter {
+  // package accessible on purpose
+  final TokenStream inputTokenStream;
+  final boolean preserveSep;
+  final boolean preservePositionIncrements;
+  final int maxGraphExpansions;
+
+  private final PayloadAttribute payloadAtt = addAttribute(PayloadAttribute.class);
+
+  private BytesRef payload; // note doesn't participate in TokenStream lifecycle; it's effectively constant
+
+  CompletionTokenStream(TokenStream inputTokenStream) {
+    this(inputTokenStream,
+        ConcatenateGraphFilter.DEFAULT_PRESERVE_SEP,
+        ConcatenateGraphFilter.DEFAULT_PRESERVE_POSITION_INCREMENTS,
+        ConcatenateGraphFilter.DEFAULT_MAX_GRAPH_EXPANSIONS);
+  }
+
+  CompletionTokenStream(TokenStream inputTokenStream, boolean preserveSep, boolean preservePositionIncrements, int maxGraphExpansions) {
+    super(new ConcatenateGraphFilter(inputTokenStream, preserveSep, preservePositionIncrements, maxGraphExpansions));
+    this.inputTokenStream = inputTokenStream;
+    this.preserveSep = preserveSep;
+    this.preservePositionIncrements = preservePositionIncrements;
+    this.maxGraphExpansions = maxGraphExpansions;
+  }
+
+  public void setPayload(BytesRef payload) {
+    this.payload = payload;
+  }
+
+  @Override
+  public boolean incrementToken() throws IOException {
+    if (input.incrementToken()) {
+      payloadAtt.setPayload(payload);
+      return true;
+    } else {
+      return false;
+    }
+  }
+
+  /** @see ConcatenateGraphFilter#toAutomaton()  */
+  public Automaton toAutomaton() throws IOException {
+    return ((ConcatenateGraphFilter)input).toAutomaton();
+  }
+
+  /** @see ConcatenateGraphFilter#toAutomaton(boolean) */
+  public Automaton toAutomaton(boolean unicodeAware) throws IOException {
+    return ((ConcatenateGraphFilter)input).toAutomaton(unicodeAware);
+  }
+}
diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/ContextQuery.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/ContextQuery.java
index 5d56795c4b84..1a2680cb553e 100644
--- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/ContextQuery.java
+++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/ContextQuery.java
@@ -176,10 +176,10 @@ public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float bo
       return new CompletionWeight(this, innerAutomaton);
     }
 
-    // if separators are preserved the fst contains a SEP_CHAR
+    // if separators are preserved the fst contains a SEP_LABEL
     // behind each gap. To have a matching automaton, we need to
-    // include the SEP_CHAR in the query as well
-    Automaton optionalSepLabel = Operations.optional(Automata.makeChar(ConcatenateGraphFilter.SEP_CHAR));
+    // include the SEP_LABEL in the query as well
+    Automaton optionalSepLabel = Operations.optional(Automata.makeChar(ConcatenateGraphFilter.SEP_LABEL));
     Automaton prefixAutomaton = Operations.concatenate(optionalSepLabel, innerAutomaton);
     Automaton contextsAutomaton = Operations.concatenate(toContextAutomaton(contexts, matchAllContexts), prefixAutomaton);
     contextsAutomaton = Operations.determinize(contextsAutomaton, Operations.DEFAULT_MAX_DETERMINIZED_STATES);
@@ -303,9 +303,9 @@ private void setInnerWeight(IntsRef ref, int offset) {
           }
           ref.offset = ++i;
           assert ref.offset < ref.length : "input should not end with the context separator";
-          if (ref.ints[i] == ConcatenateGraphFilter.SEP_CHAR) {
+          if (ref.ints[i] == ConcatenateGraphFilter.SEP_LABEL) {
             ref.offset++;
-            assert ref.offset < ref.length : "input should not end with a context separator followed by SEP_CHAR";
+            assert ref.offset < ref.length : "input should not end with a context separator followed by SEP_LABEL";
           }
           ref.length = ref.length - ref.offset;
           refBuilder.copyInts(ref.ints, ref.offset, ref.length);
diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/ContextSuggestField.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/ContextSuggestField.java
index 84a99c07c658..d608339a1f8b 100644
--- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/ContextSuggestField.java
+++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/ContextSuggestField.java
@@ -24,7 +24,6 @@
 
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.miscellaneous.ConcatenateGraphFilter;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 
@@ -84,22 +83,22 @@ protected Iterable<CharSequence> contexts() {
   }
 
   @Override
-  protected ConcatenateGraphFilter wrapTokenStream(TokenStream stream) {
+  protected CompletionTokenStream wrapTokenStream(TokenStream stream) {
     final Iterable<CharSequence> contexts = contexts();
     for (CharSequence context : contexts) {
       validate(context);
     }
-    ConcatenateGraphFilter concatStream;
-    if (stream instanceof ConcatenateGraphFilter) {
-      //nocommit this is awkward; is there a better way avoiding re-creating the chain?
-      concatStream = (ConcatenateGraphFilter) stream;
-      PrefixTokenFilter prefixTokenFilter = new PrefixTokenFilter(concatStream.getInput(), (char) CONTEXT_SEPARATOR, contexts);
-      concatStream = new ConcatenateGraphFilter(prefixTokenFilter,
+    CompletionTokenStream concatStream;
+    if (stream instanceof CompletionTokenStream) {
+      //TODO this is awkward; is there a better way avoiding re-creating the chain?
+      concatStream = (CompletionTokenStream) stream;
+      PrefixTokenFilter prefixTokenFilter = new PrefixTokenFilter(concatStream.inputTokenStream, (char) CONTEXT_SEPARATOR, contexts);
+      concatStream = new CompletionTokenStream(prefixTokenFilter,
           concatStream.preserveSep,
           concatStream.preservePositionIncrements,
           concatStream.maxGraphExpansions);
     } else {
-      concatStream = new ConcatenateGraphFilter(new PrefixTokenFilter(stream, (char) CONTEXT_SEPARATOR, contexts));
+      concatStream = new CompletionTokenStream(new PrefixTokenFilter(stream, (char) CONTEXT_SEPARATOR, contexts));
     }
     return concatStream;
   }
diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/FuzzyCompletionQuery.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/FuzzyCompletionQuery.java
index b5754e8d765c..b243f4ede83d 100644
--- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/FuzzyCompletionQuery.java
+++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/FuzzyCompletionQuery.java
@@ -23,7 +23,6 @@
 import java.util.Set;
 
 import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.miscellaneous.ConcatenateGraphFilter;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.ScoreMode;
@@ -145,7 +144,7 @@ public FuzzyCompletionQuery(Analyzer analyzer, Term term, BitsProducer filter, i
 
   @Override
   public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {
-    ConcatenateGraphFilter stream = (ConcatenateGraphFilter) analyzer.tokenStream(getField(), getTerm().text());
+    CompletionTokenStream stream = (CompletionTokenStream) analyzer.tokenStream(getField(), getTerm().text());
     Set<IntsRef> refs = new HashSet<>();
     Automaton automaton = toLevenshteinAutomata(stream.toAutomaton(unicodeAware), refs);
     if (unicodeAware) {
diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/NRTSuggesterBuilder.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/NRTSuggesterBuilder.java
index 1dd2b1f7ae49..5ca4993396fa 100644
--- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/NRTSuggesterBuilder.java
+++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/NRTSuggesterBuilder.java
@@ -43,10 +43,7 @@ final class NRTSuggesterBuilder {
    * Label used to separate surface form and docID
    * in the output
    */
-  public static final int PAYLOAD_SEP = '\u001F';
-  static {
-    assert PAYLOAD_SEP == ConcatenateGraphFilter.SEP_CHAR;
-  }
+  public static final int PAYLOAD_SEP = ConcatenateGraphFilter.SEP_LABEL;
 
   /**
    * Marks end of the analyzed input and start of dedup
diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/PrefixCompletionQuery.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/PrefixCompletionQuery.java
index 2cffc761f69f..7bb75e9261ca 100644
--- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/PrefixCompletionQuery.java
+++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/PrefixCompletionQuery.java
@@ -19,7 +19,6 @@
 import java.io.IOException;
 
 import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.miscellaneous.ConcatenateGraphFilter;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.ScoreMode;
@@ -69,7 +68,7 @@ public PrefixCompletionQuery(Analyzer analyzer, Term term, BitsProducer filter)
 
   @Override
   public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {
-    ConcatenateGraphFilter stream = (ConcatenateGraphFilter) analyzer.tokenStream(getField(), getTerm().text());
+    CompletionTokenStream stream = (CompletionTokenStream) analyzer.tokenStream(getField(), getTerm().text());
     return new CompletionWeight(this, stream.toAutomaton());
   }
 
diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/SuggestField.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/SuggestField.java
index 0ee338f4e8c2..ce8c2e94cd13 100644
--- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/SuggestField.java
+++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/SuggestField.java
@@ -101,7 +101,7 @@ public SuggestField(String name, String value, int weight) {
 
   @Override
   public TokenStream tokenStream(Analyzer analyzer, TokenStream reuse) {
-    ConcatenateGraphFilter completionStream = wrapTokenStream(super.tokenStream(analyzer, reuse));
+    CompletionTokenStream completionStream = wrapTokenStream(super.tokenStream(analyzer, reuse));
     completionStream.setPayload(buildSuggestPayload());
     return completionStream;
   }
@@ -111,11 +111,11 @@ public TokenStream tokenStream(Analyzer analyzer, TokenStream reuse) {
    *
    * Subclasses can override this method to change the indexing pipeline.
    */
-  protected ConcatenateGraphFilter wrapTokenStream(TokenStream stream) {
-    if (stream instanceof ConcatenateGraphFilter) {
-      return (ConcatenateGraphFilter) stream;
+  protected CompletionTokenStream wrapTokenStream(TokenStream stream) {
+    if (stream instanceof CompletionTokenStream) {
+      return (CompletionTokenStream) stream;
     } else {
-      return new ConcatenateGraphFilter(stream);
+      return new CompletionTokenStream(stream);
     }
   }
 
@@ -141,7 +141,7 @@ private BytesRef buildSuggestPayload() {
 
   private boolean isReserved(char c) {
     switch (c) {
-      case ConcatenateGraphFilter.SEP_CHAR:
+      case ConcatenateGraphFilter.SEP_LABEL:
       case CompletionAnalyzer.HOLE_CHARACTER:
       case NRTSuggesterBuilder.END_BYTE:
         return true;
diff --git a/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestContextSuggestField.java b/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestContextSuggestField.java
index 23b1267a19a1..8beea1296227 100644
--- a/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestContextSuggestField.java
+++ b/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestContextSuggestField.java
@@ -110,13 +110,13 @@ public void testTokenStream() throws Exception {
     CharsRefBuilder builder = new CharsRefBuilder();
     builder.append("context1");
     builder.append(((char) ContextSuggestField.CONTEXT_SEPARATOR));
-    builder.append(ConcatenateGraphFilter.SEP_CHAR);
+    builder.append((char) ConcatenateGraphFilter.SEP_LABEL);
     builder.append("input");
     expectedOutputs[0] = builder.toCharsRef().toString();
     builder.clear();
     builder.append("context2");
     builder.append(((char) ContextSuggestField.CONTEXT_SEPARATOR));
-    builder.append(ConcatenateGraphFilter.SEP_CHAR);
+    builder.append((char) ConcatenateGraphFilter.SEP_LABEL);
     builder.append("input");
     expectedOutputs[1] = builder.toCharsRef().toString();
     TokenStream stream = new TestSuggestField.PayloadAttrToTypeAttrFilter(field.tokenStream(analyzer, null));
diff --git a/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestSuggestField.java b/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestSuggestField.java
index a083535917ac..e6d7062c925b 100644
--- a/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestSuggestField.java
+++ b/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestSuggestField.java
@@ -103,7 +103,7 @@ public void testNegativeWeight() throws Exception {
   public void testReservedChars() throws Exception {
     CharsRefBuilder charsRefBuilder = new CharsRefBuilder();
     charsRefBuilder.append("sugg");
-    charsRefBuilder.setCharAt(2, ConcatenateGraphFilter.SEP_CHAR);
+    charsRefBuilder.setCharAt(2, (char) ConcatenateGraphFilter.SEP_LABEL);
     IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> {
       new SuggestField("name", charsRefBuilder.toString(), 1);
     });

From ac301b4075e494070d5851e358b4626b69e6439f Mon Sep 17 00:00:00 2001
From: David Smiley <dsmiley@apache.org>
Date: Fri, 1 Jun 2018 00:24:23 -0400
Subject: [PATCH 3/3] Made comply more with TokenStream contract * made it
 cranky if close not called.  toAutomaton no longer closes. * added proper end
 offset tracking.

---
 .../miscellaneous/ConcatenateGraphFilter.java | 61 +++++++++----------
 .../document/FuzzyCompletionQuery.java        |  7 ++-
 .../document/PrefixCompletionQuery.java       |  5 +-
 3 files changed, 36 insertions(+), 37 deletions(-)

diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/ConcatenateGraphFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/ConcatenateGraphFilter.java
index b90ed45260a0..eb16c36866eb 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/ConcatenateGraphFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/ConcatenateGraphFilter.java
@@ -21,6 +21,7 @@
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.TokenStreamToAutomaton;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
 import org.apache.lucene.util.AttributeImpl;
@@ -28,7 +29,6 @@
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.BytesRefBuilder;
 import org.apache.lucene.util.CharsRefBuilder;
-import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.IntsRef;
 import org.apache.lucene.util.automaton.Automaton;
 import org.apache.lucene.util.automaton.LimitedFiniteStringsIterator;
@@ -66,6 +66,7 @@ public final class ConcatenateGraphFilter extends TokenStream {
 
   private final BytesRefBuilderTermAttribute bytesAtt = addAttribute(BytesRefBuilderTermAttribute.class);
   private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
+  private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
 
   private final TokenStream inputTokenStream;
   private final boolean preserveSep;
@@ -75,6 +76,7 @@ public final class ConcatenateGraphFilter extends TokenStream {
   private LimitedFiniteStringsIterator finiteStrings;
   private CharTermAttribute charTermAttribute;
   private boolean wasReset = false;
+  private int endOffset;
 
   /**
    * Creates a token stream to convert <code>input</code> to a token stream
@@ -128,8 +130,10 @@ public boolean incrementToken() throws IOException {
         throw new IllegalStateException("reset() missing before incrementToken");
       }
       // lazy init/consume
-      Automaton automaton = toAutomaton(); // calls reset(), incrementToken() repeatedly, end(), and close() on inputTokenStream
+      Automaton automaton = toAutomaton(); // calls reset(), incrementToken() repeatedly, and end() on inputTokenStream
       finiteStrings = new LimitedFiniteStringsIterator(automaton, maxGraphExpansions);
+      //note: would be nice to know the startOffset but toAutomaton doesn't capture it.  We'll assume 0
+      endOffset = inputTokenStream.getAttribute(OffsetAttribute.class).endOffset();
     }
 
     IntsRef string = finiteStrings.next();
@@ -143,6 +147,8 @@ public boolean incrementToken() throws IOException {
       posIncrAtt.setPositionIncrement(0); // stacked
     }
 
+    offsetAtt.setOffset(0, endOffset);
+
     Util.toBytesRef(string, bytesAtt.builder()); // now we have UTF-8
     if (charTermAttribute != null) {
       charTermAttribute.setLength(0);
@@ -155,58 +161,47 @@ public boolean incrementToken() throws IOException {
   @Override
   public void end() throws IOException {
     super.end();
-    //nocommit I think we don't need to delegate this under any condition
-    if (finiteStrings == null) {
-      //toAutomaton not called yet, so delegate lifecycle
-      inputTokenStream.end();
-    }
+    offsetAtt.setOffset(0, endOffset);
   }
 
   @Override
   public void close() throws IOException {
     super.close();
-    if (finiteStrings == null) {
-      //toAutomaton not called yet, so delegate lifecycle
-      inputTokenStream.close();
-    } else {
-      finiteStrings = null;
-    }
+    //delegate lifecycle.  Note toAutomaton does not close the stream
+    inputTokenStream.close();
     wasReset = false;//reset
+    endOffset = -1;//reset
   }
 
   /**
-   * Converts the tokenStream to an automaton, treating the transition labels as utf-8.  Closes it.
+   * Converts the tokenStream to an automaton, treating the transition labels as utf-8.  Does *not* close it.
    */
   public Automaton toAutomaton() throws IOException {
     return toAutomaton(false);
   }
 
   /**
-   * Converts the tokenStream to an automaton.  Closes it.
+   * Converts the tokenStream to an automaton.  Does *not* close it.
    */
   public Automaton toAutomaton(boolean unicodeAware) throws IOException {
     // TODO refactor this
     // maybe we could hook up a modified automaton from TermAutomatonQuery here?
-    Automaton automaton;
-    try {
-      // Create corresponding automaton: labels are bytes
-      // from each analyzed token, with byte 0 used as
-      // separator between tokens:
-      final TokenStreamToAutomaton tsta;
-      if (preserveSep) {
-        tsta = new EscapingTokenStreamToAutomaton(SEP_LABEL);
-      } else {
-        // When we're not preserving sep, we don't steal 0xff
-        // byte, so we don't need to do any escaping:
-        tsta = new TokenStreamToAutomaton();
-      }
-      tsta.setPreservePositionIncrements(preservePositionIncrements);
-      tsta.setUnicodeArcs(unicodeAware);
 
-      automaton = tsta.toAutomaton(inputTokenStream);
-    } finally {
-      IOUtils.closeWhileHandlingException(inputTokenStream);
+    // Create corresponding automaton: labels are bytes
+    // from each analyzed token, with byte 0 used as
+    // separator between tokens:
+    final TokenStreamToAutomaton tsta;
+    if (preserveSep) {
+      tsta = new EscapingTokenStreamToAutomaton(SEP_LABEL);
+    } else {
+      // When we're not preserving sep, we don't steal 0xff
+      // byte, so we don't need to do any escaping:
+      tsta = new TokenStreamToAutomaton();
     }
+    tsta.setPreservePositionIncrements(preservePositionIncrements);
+    tsta.setUnicodeArcs(unicodeAware);
+
+    Automaton automaton = tsta.toAutomaton(inputTokenStream);
 
     // TODO: we can optimize this somewhat by determinizing
     // while we convert
diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/FuzzyCompletionQuery.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/FuzzyCompletionQuery.java
index b243f4ede83d..14479fecd123 100644
--- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/FuzzyCompletionQuery.java
+++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/FuzzyCompletionQuery.java
@@ -144,9 +144,12 @@ public FuzzyCompletionQuery(Analyzer analyzer, Term term, BitsProducer filter, i
 
   @Override
   public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {
-    CompletionTokenStream stream = (CompletionTokenStream) analyzer.tokenStream(getField(), getTerm().text());
+    final Automaton originalAutomata;
+    try (CompletionTokenStream stream = (CompletionTokenStream) analyzer.tokenStream(getField(), getTerm().text()) ) {
+      originalAutomata = stream.toAutomaton(unicodeAware);
+    }
     Set<IntsRef> refs = new HashSet<>();
-    Automaton automaton = toLevenshteinAutomata(stream.toAutomaton(unicodeAware), refs);
+    Automaton automaton = toLevenshteinAutomata(originalAutomata, refs);
     if (unicodeAware) {
       Automaton utf8automaton = new UTF32ToUTF8().convert(automaton);
       utf8automaton = Operations.determinize(utf8automaton, maxDeterminizedStates);
diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/PrefixCompletionQuery.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/PrefixCompletionQuery.java
index 7bb75e9261ca..a8da150f5045 100644
--- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/PrefixCompletionQuery.java
+++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/PrefixCompletionQuery.java
@@ -68,8 +68,9 @@ public PrefixCompletionQuery(Analyzer analyzer, Term term, BitsProducer filter)
 
   @Override
   public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {
-    CompletionTokenStream stream = (CompletionTokenStream) analyzer.tokenStream(getField(), getTerm().text());
-    return new CompletionWeight(this, stream.toAutomaton());
+    try (CompletionTokenStream stream = (CompletionTokenStream) analyzer.tokenStream(getField(), getTerm().text())) {
+      return new CompletionWeight(this, stream.toAutomaton());
+    }
   }
 
   /**