From 8a12584b246190ee6c8cab94b164835943c04632 Mon Sep 17 00:00:00 2001
From: David Smiley <dsmiley@apache.org>
Date: Sun, 24 Jun 2018 23:45:32 -0400
Subject: [PATCH 01/14] LUCENE-8286: UH initial support for Weight.matches

---
 .../uhighlight/FieldOffsetStrategy.java       | 40 ++++++++++++++++++-
 .../lucene/search/uhighlight/OffsetsEnum.java | 35 ++++++++++++++++
 .../search/uhighlight/PhraseHelper.java       | 35 +++++++++++++---
 .../search/uhighlight/UnifiedHighlighter.java | 11 ++++-
 4 files changed, 112 insertions(+), 9 deletions(-)
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/FieldOffsetStrategy.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/FieldOffsetStrategy.java
index cf564f4ddba3..3a4d50863798 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/FieldOffsetStrategy.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/FieldOffsetStrategy.java
@@ -25,6 +25,10 @@
 import org.apache.lucene.index.PostingsEnum;
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.Matches;
+import org.apache.lucene.search.MatchesIterator;
+import org.apache.lucene.search.ScoreMode;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.CharsRefBuilder;
 import org.apache.lucene.util.automaton.CharacterRunAutomaton;
@@ -70,9 +74,13 @@ protected OffsetsEnum createOffsetsEnumFromReader(LeafReader leafReader, int doc
 
     final List<OffsetsEnum> offsetsEnums = new ArrayList<>(terms.length + automata.length);
 
+    final boolean useWeightMatchesIter = phraseHelper.useWeightMatchesIter();
+
     // Handle position insensitive terms (a subset of this.terms field):
     final BytesRef[] insensitiveTerms;
-    if (phraseHelper.hasPositionSensitivity()) {
+    if (useWeightMatchesIter) {
+      insensitiveTerms = new BytesRef[0]; // don't -- WEIGHT_MATCHES will handle these
+    } else if (phraseHelper.hasPositionSensitivity()) {
       insensitiveTerms = phraseHelper.getAllPositionInsensitiveTerms();
       assert insensitiveTerms.length <= terms.length : "insensitive terms should be smaller set of all terms";
     } else {
@@ -82,16 +90,22 @@ protected OffsetsEnum createOffsetsEnumFromReader(LeafReader leafReader, int doc
       createOffsetsEnumsForTerms(insensitiveTerms, termsIndex, doc, offsetsEnums);
     }
 
+    // Handle Weight.matches approach
+    if (useWeightMatchesIter) {
+      createOffsetsEnumsWeightMatcher(leafReader, doc, offsetsEnums);
+    }
+
     // Handle spans
     if (phraseHelper.hasPositionSensitivity()) {
       phraseHelper.createOffsetsEnumsForSpans(leafReader, doc, offsetsEnums);
     }
 
     // Handle automata
-    if (automata.length > 0) {
+    if (automata.length > 0 && useWeightMatchesIter==false) {// WEIGHT_MATCHES handled these already
       createOffsetsEnumsForAutomata(termsIndex, doc, offsetsEnums);
     }
 
+    //TODO if only one OE then return it; don't wrap in Multi.  If none, return NONE
     return new OffsetsEnum.MultiOffsetsEnum(offsetsEnums);
   }
 
@@ -111,6 +125,28 @@ protected void createOffsetsEnumsForTerms(BytesRef[] sourceTerms, Terms termsInd
     }
   }
 
+  protected void createOffsetsEnumsWeightMatcher(LeafReader leafReader, int docId, List<OffsetsEnum> results) throws IOException {
+    IndexSearcher indexSearcher = new IndexSearcher(leafReader);
+    indexSearcher.setQueryCache(null);
+    Matches matches = indexSearcher.rewrite(phraseHelper.getQuery())
+        .createWeight(indexSearcher, ScoreMode.COMPLETE_NO_SCORES, 1.0f)
+        .matches(leafReader.getContext(), docId);
+    if (matches == null) {
+      return; // doc doesn't match
+    }
+    for (String field : matches) {
+      if (phraseHelper.getFieldMatcher().test(field)) {
+        MatchesIterator iterator = matches.getMatches(field);
+        if (iterator == null) {
+          continue;
+        }
+        //note: the iterator should be positioned on the first one already
+        results.add(new OffsetsEnum.OfMatchesIterator(iterator));
+      }
+    }
+
+  }
+
   protected void createOffsetsEnumsForAutomata(Terms termsIndex, int doc, List<OffsetsEnum> results) throws IOException {
     List<List<PostingsEnum>> automataPostings = new ArrayList<>(automata.length);
     for (int i = 0; i < automata.length; i++) {
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/OffsetsEnum.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/OffsetsEnum.java
index 55f3c37d4934..8e759b970128 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/OffsetsEnum.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/OffsetsEnum.java
@@ -24,6 +24,7 @@
 import java.util.PriorityQueue;
 
 import org.apache.lucene.index.PostingsEnum;
+import org.apache.lucene.search.MatchesIterator;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.IOUtils;
 
@@ -153,6 +154,39 @@ public int freq() throws IOException {
     }
   }
 
+  public static class OfMatchesIterator extends OffsetsEnum {
+    private final MatchesIterator matchesIterator;
+
+    public OfMatchesIterator(MatchesIterator matchesIterator) {
+      this.matchesIterator = matchesIterator;
+    }
+
+    @Override
+    public boolean nextPosition() throws IOException {
+      return matchesIterator.next();
+    }
+
+    @Override
+    public int freq() throws IOException {
+      return 1; // nocommit terrible
+    }
+
+    @Override
+    public BytesRef getTerm() throws IOException {
+      return new BytesRef(); // nocommit terrible
+    }
+
+    @Override
+    public int startOffset() throws IOException {
+      return matchesIterator.startOffset();
+    }
+
+    @Override
+    public int endOffset() throws IOException {
+      return matchesIterator.endOffset();
+    }
+  }
+
   /**
    * Empty enumeration
    */
@@ -187,6 +221,7 @@ public int freq() throws IOException {
   /**
    * A view over several OffsetsEnum instances, merging them in-place
    */
+  //If OffsetsEnum and MatchesIterator ever truly merge then this could go away in lieu of DisjunctionMatchesIterator
   public static class MultiOffsetsEnum extends OffsetsEnum {
 
     private final PriorityQueue<OffsetsEnum> queue;
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/PhraseHelper.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/PhraseHelper.java
index ab439d09a2bc..dd00b3d7d008 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/PhraseHelper.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/PhraseHelper.java
@@ -38,7 +38,9 @@
 import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.MatchAllDocsQuery;
+import org.apache.lucene.search.MultiPhraseQuery;
 import org.apache.lucene.search.MultiTermQuery;
+import org.apache.lucene.search.PhraseQuery;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.ScoreMode;
 import org.apache.lucene.search.Scorer;
@@ -65,13 +67,15 @@
 public class PhraseHelper {
 
   public static final PhraseHelper NONE = new PhraseHelper(new MatchAllDocsQuery(), "_ignored_",
-      (s) -> false, spanQuery -> null, query -> null, true);
+      (s) -> false, spanQuery -> null, query -> null, true, false);
 
+  private final Query query;
   private final String fieldName;
   private final Set<BytesRef> positionInsensitiveTerms; // (TermQuery terms)
   private final Set<SpanQuery> spanQueries;
   private final boolean willRewrite;
   private final Predicate<String> fieldMatcher;
+  private final boolean useWeightMatchesIter;
 
   /**
    * Constructor.
@@ -86,13 +90,18 @@ public class PhraseHelper {
    */
   public PhraseHelper(Query query, String field, Predicate<String> fieldMatcher, Function<SpanQuery, Boolean> rewriteQueryPred,
                       Function<Query, Collection<Query>> preExtractRewriteFunction,
-                      boolean ignoreQueriesNeedingRewrite) {
+                      boolean ignoreQueriesNeedingRewrite, boolean useWeightMatchesIter) {
+    this.query = query;
     this.fieldName = field;
     this.fieldMatcher = fieldMatcher;
+    this.useWeightMatchesIter = useWeightMatchesIter;
     // filter terms to those we want
     positionInsensitiveTerms = new HashSet<>();
     spanQueries = new HashSet<>();
 
+    // if useWeightMatchesIter:
+    // TODO once SpanQueries implement Weight.matches, we needn't capture them here.  But note SpanMultiTermQueryWrapper
+
     // TODO Have toSpanQuery(query) Function as an extension point for those with custom Query impls
 
     boolean[] mustRewriteHolder = {false}; // boolean wrapped in 1-ary array so it's mutable from inner class
@@ -132,6 +141,8 @@ protected void extract(Query query, float boost, Map<String, WeightedSpanTerm> t
           for (Query newQuery : newQueriesToExtract) {
             extract(newQuery, boost, terms);
           }
+        } else if (useWeightMatchesIter && (query instanceof PhraseQuery || query instanceof MultiPhraseQuery)) {
+          return;// do nothing; these queries have Weight impls with proper getMulti
         } else {
           super.extract(query, boost, terms);
         }
@@ -149,8 +160,10 @@ protected boolean isQueryUnsupported(Class<? extends Query> clazz) {
       @Override
       protected void extractWeightedTerms(Map<String, WeightedSpanTerm> terms, Query query, float boost)
           throws IOException {
-        query.createWeight(UnifiedHighlighter.EMPTY_INDEXSEARCHER, ScoreMode.COMPLETE_NO_SCORES, boost)
-            .extractTerms(extractPosInsensitiveTermsTarget);
+        if (!useWeightMatchesIter) { // when useWeightMatchesIterator, we needn't track these terms
+          query.createWeight(UnifiedHighlighter.EMPTY_INDEXSEARCHER, ScoreMode.COMPLETE_NO_SCORES, boost)
+              .extractTerms(extractPosInsensitiveTermsTarget);
+        }
       }
 
       // called on SpanQueries. Some other position-sensitive queries like PhraseQuery are converted beforehand
@@ -185,6 +198,18 @@ protected boolean mustRewriteQuery(SpanQuery spanQuery) {
     willRewrite = mustRewriteHolder[0];
   }
 
+  public Query getQuery() {
+    return query;
+  }
+
+  public boolean useWeightMatchesIter() {
+    return useWeightMatchesIter;
+  }
+
+  public Predicate<String> getFieldMatcher() {
+    return fieldMatcher;
+  }
+
   public Set<SpanQuery> getSpanQueries() {
     return spanQueries;
   }
@@ -205,7 +230,7 @@ public boolean willRewrite() {
     return willRewrite;
   }
 
-  /** Returns the terms that are position-insensitive (sorted). */
+  /** Returns the terms that are position-insensitive (sorted).  Always empty if {@link #useWeightMatchesIter}. */
   public BytesRef[] getAllPositionInsensitiveTerms() {
     BytesRef[] result = positionInsensitiveTerms.toArray(new BytesRef[positionInsensitiveTerms.size()]);
     Arrays.sort(result);
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/UnifiedHighlighter.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/UnifiedHighlighter.java
index cfc918f878c6..a52f99bbe785 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/UnifiedHighlighter.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/UnifiedHighlighter.java
@@ -769,6 +769,7 @@ protected static BytesRef[] filterExtractedTerms(Predicate<String> fieldMatcher,
 
   protected Set<HighlightFlag> getFlags(String field) {
     Set<HighlightFlag> highlightFlags = EnumSet.noneOf(HighlightFlag.class);
+    highlightFlags.add(HighlightFlag.WEIGHT_MATCHES);//nocommit for experimentation, use as default
     if (shouldHandleMultiTermQuery(field)) {
       highlightFlags.add(HighlightFlag.MULTI_TERM_QUERY);
     }
@@ -784,9 +785,14 @@ protected Set<HighlightFlag> getFlags(String field) {
   protected PhraseHelper getPhraseHelper(String field, Query query, Set<HighlightFlag> highlightFlags) {
     boolean highlightPhrasesStrictly = highlightFlags.contains(HighlightFlag.PHRASES);
     boolean handleMultiTermQuery = highlightFlags.contains(HighlightFlag.MULTI_TERM_QUERY);
+    boolean useWeightMatchesIter = highlightFlags.contains(HighlightFlag.WEIGHT_MATCHES);
     return highlightPhrasesStrictly ?
         new PhraseHelper(query, field, getFieldMatcher(field),
-            this::requiresRewrite, this::preSpanQueryRewrite, !handleMultiTermQuery) : PhraseHelper.NONE;
+            this::requiresRewrite,
+            this::preSpanQueryRewrite,
+            !handleMultiTermQuery,
+            useWeightMatchesIter)
+        : PhraseHelper.NONE;
   }
 
   protected CharacterRunAutomaton[] getAutomata(String field, Query query, Set<HighlightFlag> highlightFlags) {
@@ -1090,7 +1096,8 @@ public CacheHelper getReaderCacheHelper() {
   public enum HighlightFlag {
     PHRASES,
     MULTI_TERM_QUERY,
-    PASSAGE_RELEVANCY_OVER_SPEED
+    PASSAGE_RELEVANCY_OVER_SPEED,
+    WEIGHT_MATCHES
     // TODO: ignoreQueryFields
     // TODO: useQueryBoosts
   }

From d01b968ca46434e4282207e50260449713c6fbb2 Mon Sep 17 00:00:00 2001
From: David Smiley <dsmiley@apache.org>
Date: Wed, 11 Jul 2018 16:13:22 -0400
Subject: [PATCH 02/14] LUCENE-8286: Refactor out a UHComponents of
 FieldOffsetStrategy constructor (to reduce number of args to all the
 OffsetStrategy impls)

---
 .../uhighlight/AnalysisOffsetStrategy.java    |  6 +-
 .../uhighlight/FieldOffsetStrategy.java       | 13 ++--
 .../uhighlight/MemoryIndexOffsetStrategy.java |  9 ++-
 .../search/uhighlight/NoOpOffsetStrategy.java |  2 +-
 .../uhighlight/PostingsOffsetStrategy.java    |  8 +--
 ...PostingsWithTermVectorsOffsetStrategy.java |  6 +-
 .../uhighlight/TermVectorOffsetStrategy.java  |  6 +-
 .../uhighlight/TokenStreamOffsetStrategy.java | 10 ++-
 .../search/uhighlight/UHComponents.java       | 64 +++++++++++++++++++
 .../search/uhighlight/UnifiedHighlighter.java | 12 ++--
 .../TestUnifiedHighlighterExtensibility.java  |  6 +-
 11 files changed, 103 insertions(+), 39 deletions(-)
 create mode 100644 lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/UHComponents.java

diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/AnalysisOffsetStrategy.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/AnalysisOffsetStrategy.java
index 162d27003947..6b0b60804862 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/AnalysisOffsetStrategy.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/AnalysisOffsetStrategy.java
@@ -23,8 +23,6 @@
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.automaton.CharacterRunAutomaton;
 
 /**
  * Provides a base class for analysis based offset strategies to extend from.
@@ -37,8 +35,8 @@ public abstract class AnalysisOffsetStrategy extends FieldOffsetStrategy {
 
   protected final Analyzer analyzer;
 
-  public AnalysisOffsetStrategy(String field, BytesRef[] queryTerms, PhraseHelper phraseHelper, CharacterRunAutomaton[] automata, Analyzer analyzer) {
-    super(field, queryTerms, phraseHelper, automata);
+  public AnalysisOffsetStrategy(UHComponents components, Analyzer analyzer) {
+    super(components);
     this.analyzer = analyzer;
     if (analyzer.getOffsetGap(field) != 1) { // note: 1 is the default. It is RARELY changed.
       throw new IllegalArgumentException(
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/FieldOffsetStrategy.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/FieldOffsetStrategy.java
index 3a4d50863798..4e3be826cff8 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/FieldOffsetStrategy.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/FieldOffsetStrategy.java
@@ -41,16 +41,19 @@
  */
 public abstract class FieldOffsetStrategy {
 
+  protected final UHComponents components;
+  //nocommit remove these?:
   protected final String field;
   protected final PhraseHelper phraseHelper; // Query: position-sensitive information
   protected final BytesRef[] terms; // Query: all terms we extracted (some may be position sensitive)
   protected final CharacterRunAutomaton[] automata; // Query: wildcards (i.e. multi-term query), not position sensitive
 
-  public FieldOffsetStrategy(String field, BytesRef[] queryTerms, PhraseHelper phraseHelper, CharacterRunAutomaton[] automata) {
-    this.field = field;
-    this.terms = queryTerms;
-    this.phraseHelper = phraseHelper;
-    this.automata = automata;
+  public FieldOffsetStrategy(UHComponents components) {
+    this.components = components;
+    this.field = components.getField();
+    this.terms = components.getExtractedTerms();
+    this.phraseHelper = components.getPhraseHelper();
+    this.automata = components.getAutomata();
   }
 
   public String getField() {
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/MemoryIndexOffsetStrategy.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/MemoryIndexOffsetStrategy.java
index 6364f3fcd5cd..a3b84746a023 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/MemoryIndexOffsetStrategy.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/MemoryIndexOffsetStrategy.java
@@ -50,15 +50,14 @@ public class MemoryIndexOffsetStrategy extends AnalysisOffsetStrategy {
   private final LeafReader leafReader;
   private final CharacterRunAutomaton preMemIndexFilterAutomaton;
 
-  public MemoryIndexOffsetStrategy(String field, Predicate<String> fieldMatcher, BytesRef[] extractedTerms, PhraseHelper phraseHelper,
-                                   CharacterRunAutomaton[] automata, Analyzer analyzer,
+  public MemoryIndexOffsetStrategy(UHComponents components, Analyzer analyzer,
                                    Function<Query, Collection<Query>> multiTermQueryRewrite) {
-    super(field, extractedTerms, phraseHelper, automata, analyzer);
-    boolean storePayloads = phraseHelper.hasPositionSensitivity(); // might be needed
+    super(components, analyzer);
+    boolean storePayloads = components.getPhraseHelper().hasPositionSensitivity(); // might be needed
     memoryIndex = new MemoryIndex(true, storePayloads);//true==store offsets
     leafReader = (LeafReader) memoryIndex.createSearcher().getIndexReader(); // appears to be re-usable
     // preFilter for MemoryIndex
-    preMemIndexFilterAutomaton = buildCombinedAutomaton(fieldMatcher, terms, this.automata, phraseHelper, multiTermQueryRewrite);
+    preMemIndexFilterAutomaton = buildCombinedAutomaton(components.getFieldMatcher(), terms, this.automata, components.getPhraseHelper(), multiTermQueryRewrite);
   }
 
   /**
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/NoOpOffsetStrategy.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/NoOpOffsetStrategy.java
index 44a23f707294..8e9e7aa67578 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/NoOpOffsetStrategy.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/NoOpOffsetStrategy.java
@@ -32,7 +32,7 @@ public class NoOpOffsetStrategy extends FieldOffsetStrategy {
   public static final NoOpOffsetStrategy INSTANCE = new NoOpOffsetStrategy();
 
   private NoOpOffsetStrategy() {
-    super("_ignored_", new BytesRef[0], PhraseHelper.NONE, new CharacterRunAutomaton[0]);
+    super(new UHComponents("_ignored_", (s) -> false, new BytesRef[0], PhraseHelper.NONE, new CharacterRunAutomaton[0]));
   }
 
   @Override
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/PostingsOffsetStrategy.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/PostingsOffsetStrategy.java
index b7df77a8f297..e848a3e90526 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/PostingsOffsetStrategy.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/PostingsOffsetStrategy.java
@@ -22,10 +22,6 @@
 import org.apache.lucene.index.IndexOptions;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.LeafReader;
-import org.apache.lucene.index.LeafReaderContext;
-import org.apache.lucene.index.ReaderUtil;
-import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.automaton.CharacterRunAutomaton;
 
 /**
  * Uses offsets in postings -- {@link IndexOptions#DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS}.  This
@@ -35,8 +31,8 @@
  */
 public class PostingsOffsetStrategy extends FieldOffsetStrategy {
 
-  public PostingsOffsetStrategy(String field, BytesRef[] queryTerms, PhraseHelper phraseHelper, CharacterRunAutomaton[] automata) {
-    super(field, queryTerms, phraseHelper, automata);
+  public PostingsOffsetStrategy(UHComponents components) {
+    super(components);
   }
 
   @Override
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/PostingsWithTermVectorsOffsetStrategy.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/PostingsWithTermVectorsOffsetStrategy.java
index 9097e4d68b5a..e0eb3212abf2 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/PostingsWithTermVectorsOffsetStrategy.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/PostingsWithTermVectorsOffsetStrategy.java
@@ -24,8 +24,6 @@
 import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.index.ReaderUtil;
 import org.apache.lucene.index.Terms;
-import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.automaton.CharacterRunAutomaton;
 
 /**
  * Like {@link PostingsOffsetStrategy} but also uses term vectors (only terms needed) for multi-term queries.
@@ -34,8 +32,8 @@
  */
 public class PostingsWithTermVectorsOffsetStrategy extends FieldOffsetStrategy {
 
-  public PostingsWithTermVectorsOffsetStrategy(String field, BytesRef[] queryTerms, PhraseHelper phraseHelper, CharacterRunAutomaton[] automata) {
-    super(field, queryTerms, phraseHelper, automata);
+  public PostingsWithTermVectorsOffsetStrategy(UHComponents components) {
+    super(components);
   }
 
   @Override
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/TermVectorOffsetStrategy.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/TermVectorOffsetStrategy.java
index cd19bb9d0d4b..5d0e0b933f32 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/TermVectorOffsetStrategy.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/TermVectorOffsetStrategy.java
@@ -22,8 +22,6 @@
 import org.apache.lucene.index.LeafReader;
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.search.highlight.TermVectorLeafReader;
-import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.automaton.CharacterRunAutomaton;
 
 /**
  * Uses term vectors that contain offsets.
@@ -32,8 +30,8 @@
  */
 public class TermVectorOffsetStrategy extends FieldOffsetStrategy {
 
-  public TermVectorOffsetStrategy(String field, BytesRef[] queryTerms, PhraseHelper phraseHelper, CharacterRunAutomaton[] automata) {
-    super(field, queryTerms, phraseHelper, automata);
+  public TermVectorOffsetStrategy(UHComponents components) {
+    super(components);
   }
 
   @Override
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/TokenStreamOffsetStrategy.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/TokenStreamOffsetStrategy.java
index b81e63e282e9..fbe3246e27eb 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/TokenStreamOffsetStrategy.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/TokenStreamOffsetStrategy.java
@@ -36,8 +36,14 @@ public class TokenStreamOffsetStrategy extends AnalysisOffsetStrategy {
 
   private static final BytesRef[] ZERO_LEN_BYTES_REF_ARRAY = new BytesRef[0];
 
-  public TokenStreamOffsetStrategy(String field, BytesRef[] terms, PhraseHelper phraseHelper, CharacterRunAutomaton[] automata, Analyzer indexAnalyzer) {
-    super(field, ZERO_LEN_BYTES_REF_ARRAY, phraseHelper, convertTermsToAutomata(terms, automata), indexAnalyzer);
+  public TokenStreamOffsetStrategy(UHComponents components, Analyzer indexAnalyzer) {
+    super(new UHComponents(
+          components.getField(),
+          components.getFieldMatcher(),
+          ZERO_LEN_BYTES_REF_ARRAY,
+          components.getPhraseHelper(),
+          convertTermsToAutomata(components.getExtractedTerms(), components.getAutomata())),
+        indexAnalyzer);
     assert phraseHelper.hasPositionSensitivity() == false;
   }
 
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/UHComponents.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/UHComponents.java
new file mode 100644
index 000000000000..a8ea1c0273cf
--- /dev/null
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/UHComponents.java
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.search.uhighlight;
+
+import java.util.function.Predicate;
+
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.automaton.CharacterRunAutomaton;
+
+/**
+ * A parameter object to hold the components a {@link FieldOffsetStrategy} needs.
+ *
+ * @lucene.internal
+ */
+public class UHComponents {
+  private final String field;
+  private final Predicate<String> fieldMatcher;
+  private final BytesRef[] extractedTerms;
+  private final PhraseHelper phraseHelper;
+  private final CharacterRunAutomaton[] automata;
+
+  public UHComponents(String field, Predicate<String> fieldMatcher, BytesRef[] extractedTerms, PhraseHelper phraseHelper, CharacterRunAutomaton[] automata) {
+    this.field = field;
+    this.fieldMatcher = fieldMatcher;
+    this.extractedTerms = extractedTerms;
+    this.phraseHelper = phraseHelper;
+    this.automata = automata;
+  }
+
+  public String getField() {
+    return field;
+  }
+
+  public Predicate<String> getFieldMatcher() {
+    return fieldMatcher;
+  }
+
+  public BytesRef[] getExtractedTerms() {
+    return extractedTerms;
+  }
+
+  public PhraseHelper getPhraseHelper() {
+    return phraseHelper;
+  }
+
+  public CharacterRunAutomaton[] getAutomata() {
+    return automata;
+  }
+}
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/UnifiedHighlighter.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/UnifiedHighlighter.java
index a52f99bbe785..d604df7e3186 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/UnifiedHighlighter.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/UnifiedHighlighter.java
@@ -838,24 +838,24 @@ protected OffsetSource getOptimizedOffsetSource(String field, BytesRef[] terms,
   protected FieldOffsetStrategy getOffsetStrategy(OffsetSource offsetSource, String field, BytesRef[] terms,
                                                   PhraseHelper phraseHelper, CharacterRunAutomaton[] automata,
                                                   Set<HighlightFlag> highlightFlags) {
+    final UHComponents components = new UHComponents(field, getFieldMatcher(field), terms, phraseHelper, automata);
     switch (offsetSource) {
       case ANALYSIS:
         if (!phraseHelper.hasPositionSensitivity() &&
             !highlightFlags.contains(HighlightFlag.PASSAGE_RELEVANCY_OVER_SPEED)) {
           //skip using a memory index since it's pure term filtering
-          return new TokenStreamOffsetStrategy(field, terms, phraseHelper, automata, getIndexAnalyzer());
+          return new TokenStreamOffsetStrategy(components, getIndexAnalyzer());
         } else {
-          return new MemoryIndexOffsetStrategy(field, getFieldMatcher(field), terms, phraseHelper, automata, getIndexAnalyzer(),
-              this::preMultiTermQueryRewrite);
+          return new MemoryIndexOffsetStrategy(components, getIndexAnalyzer(), this::preMultiTermQueryRewrite);
         }
       case NONE_NEEDED:
         return NoOpOffsetStrategy.INSTANCE;
       case TERM_VECTORS:
-        return new TermVectorOffsetStrategy(field, terms, phraseHelper, automata);
+        return new TermVectorOffsetStrategy(components);
       case POSTINGS:
-        return new PostingsOffsetStrategy(field, terms, phraseHelper, automata);
+        return new PostingsOffsetStrategy(components);
       case POSTINGS_WITH_TERM_VECTORS:
-        return new PostingsWithTermVectorsOffsetStrategy(field, terms, phraseHelper, automata);
+        return new PostingsWithTermVectorsOffsetStrategy(components);
       default:
         throw new IllegalArgumentException("Unrecognized offset source " + offsetSource);
     }
diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/visibility/TestUnifiedHighlighterExtensibility.java b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/visibility/TestUnifiedHighlighterExtensibility.java
index 4eaa821b3c37..742117be1aeb 100644
--- a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/visibility/TestUnifiedHighlighterExtensibility.java
+++ b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/visibility/TestUnifiedHighlighterExtensibility.java
@@ -40,6 +40,7 @@
 import org.apache.lucene.search.uhighlight.PassageScorer;
 import org.apache.lucene.search.uhighlight.PhraseHelper;
 import org.apache.lucene.search.uhighlight.SplittingBreakIterator;
+import org.apache.lucene.search.uhighlight.UHComponents;
 import org.apache.lucene.search.uhighlight.UnifiedHighlighter;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.LuceneTestCase;
@@ -58,10 +59,11 @@ public class TestUnifiedHighlighterExtensibility extends LuceneTestCase {
   @Test
   public void testFieldOffsetStrategyExtensibility() {
     final UnifiedHighlighter.OffsetSource offsetSource = UnifiedHighlighter.OffsetSource.NONE_NEEDED;
-    FieldOffsetStrategy strategy = new FieldOffsetStrategy("field",
+    FieldOffsetStrategy strategy = new FieldOffsetStrategy(new UHComponents("field",
+        (s) -> false,
         new BytesRef[0],
         PhraseHelper.NONE,
-        new CharacterRunAutomaton[0]) {
+        new CharacterRunAutomaton[0])) {
       @Override
       public UnifiedHighlighter.OffsetSource getOffsetSource() {
         return offsetSource;

From df8398173ddbff2227ffb002c66020da00bc72d0 Mon Sep 17 00:00:00 2001
From: David Smiley <dsmiley@apache.org>
Date: Wed, 11 Jul 2018 17:13:41 -0400
Subject: [PATCH 03/14] LUCENE-8286 OffsetsEnum should order by endOffset
 following startOffset. Was a non-issue when OE == term but more of an issue
 in other cases. Now matches order within the Passage is more sane in such
 cases.

---
 .../java/org/apache/lucene/search/uhighlight/OffsetsEnum.java | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/OffsetsEnum.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/OffsetsEnum.java
index 8e759b970128..906117247c2f 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/OffsetsEnum.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/OffsetsEnum.java
@@ -47,6 +47,10 @@ public int compareTo(OffsetsEnum other) {
       if (cmp != 0) {
         return cmp; // vast majority of the time we return here.
       }
+      cmp = Integer.compare(endOffset(), other.endOffset());
+      if (cmp != 0) {
+        return cmp;
+      }
       final BytesRef thisTerm = this.getTerm();
       final BytesRef otherTerm = other.getTerm();
       if (thisTerm == null || otherTerm == null) {

From 73f8888a812da5aa83dd80bfbb07c9a9dd420c6f Mon Sep 17 00:00:00 2001
From: David Smiley <dsmiley@apache.org>
Date: Wed, 11 Jul 2018 17:16:30 -0400
Subject: [PATCH 04/14] LUCENE-8286 UH Formatter: Overlapping matches within
 passage should be merged. When matches overlap, the formatted passage could
 contain weird end/open pairs. Formatting as if the matches were merged is
 more sane I think.

---
 .../uhighlight/DefaultPassageFormatter.java   | 25 ++++++++++++-------
 1 file changed, 16 insertions(+), 9 deletions(-)

diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/DefaultPassageFormatter.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/DefaultPassageFormatter.java
index 62d58df70f08..8d2a424d1004 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/DefaultPassageFormatter.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/DefaultPassageFormatter.java
@@ -70,17 +70,24 @@ public String format(Passage passages[], String content) {
       pos = passage.getStartOffset();
       for (int i = 0; i < passage.getNumMatches(); i++) {
         int start = passage.getMatchStarts()[i];
+        assert start >= pos && start < passage.getEndOffset();
+        //append content before this start
+        append(sb, content, pos, start);
+
         int end = passage.getMatchEnds()[i];
-        // its possible to have overlapping terms
-        if (start > pos) {
-          append(sb, content, pos, start);
-        }
-        if (end > pos) {
-          sb.append(preTag);
-          append(sb, content, Math.max(pos, start), end);
-          sb.append(postTag);
-          pos = end;
+        assert end > start;
+        // its possible to have overlapping terms.
+        //   Look ahead to expand 'end' past all overlapping:
+        while (i + 1 < passage.getNumMatches() && passage.getMatchStarts()[i+1] < end) {
+          end = passage.getMatchEnds()[++i];
         }
+        end = Math.min(end, passage.getEndOffset()); // in case match straddles past passage
+
+        sb.append(preTag);
+        append(sb, content, start, end);
+        sb.append(postTag);
+
+        pos = end;
       }
       // its possible a "term" from the analyzer could span a sentence boundary.
       append(sb, content, pos, Math.max(pos, passage.getEndOffset()));

From 93c4663a763decdf0d4bff2d9c7520045db81b74 Mon Sep 17 00:00:00 2001
From: David Smiley <dsmiley@apache.org>
Date: Wed, 11 Jul 2018 17:24:22 -0400
Subject: [PATCH 05/14] LUCENE-8286 UH: Support for
 requireFieldMatch/fieldMatcher when using Weight Matches.

---
 .../uhighlight/FieldOffsetStrategy.java       | 24 ++++++++++++++++++-
 1 file changed, 23 insertions(+), 1 deletion(-)

diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/FieldOffsetStrategy.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/FieldOffsetStrategy.java
index 4e3be826cff8..a1ff51ba4041 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/FieldOffsetStrategy.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/FieldOffsetStrategy.java
@@ -128,7 +128,29 @@ protected void createOffsetsEnumsForTerms(BytesRef[] sourceTerms, Terms termsInd
     }
   }
 
-  protected void createOffsetsEnumsWeightMatcher(LeafReader leafReader, int docId, List<OffsetsEnum> results) throws IOException {
+  protected void createOffsetsEnumsWeightMatcher(LeafReader _leafReader, int docId, List<OffsetsEnum> results) throws IOException {
+    // remap fieldMatcher/requireFieldMatch fields to the field we are highlighting
+    LeafReader leafReader = new FilterLeafReader(_leafReader) {
+      @Override
+      public Terms terms(String field) throws IOException {
+        if (components.getFieldMatcher().test(field)) {
+          return super.terms(components.getField());
+        } else {
+          return super.terms(field);
+        }
+      }
+
+      //these ought to be a default?  So many subclasses do this!
+      @Override
+      public CacheHelper getCoreCacheHelper() {
+        return null;
+      }
+
+      @Override
+      public CacheHelper getReaderCacheHelper() {
+        return null;
+      }
+    };
     IndexSearcher indexSearcher = new IndexSearcher(leafReader);
     indexSearcher.setQueryCache(null);
     Matches matches = indexSearcher.rewrite(phraseHelper.getQuery())

From a3ef8be7c981f90b50385832283f460773af5304 Mon Sep 17 00:00:00 2001
From: David Smiley <dsmiley@apache.org>
Date: Wed, 11 Jul 2018 17:30:01 -0400
Subject: [PATCH 06/14] forgot these imports during selective committing

---
 .../apache/lucene/search/uhighlight/FieldOffsetStrategy.java    | 1 +
 .../apache/lucene/search/uhighlight/PostingsOffsetStrategy.java | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/FieldOffsetStrategy.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/FieldOffsetStrategy.java
index a1ff51ba4041..086a3d5752a5 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/FieldOffsetStrategy.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/FieldOffsetStrategy.java
@@ -20,6 +20,7 @@
 import java.util.ArrayList;
 import java.util.List;
 
+import org.apache.lucene.index.FilterLeafReader;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.LeafReader;
 import org.apache.lucene.index.PostingsEnum;
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/PostingsOffsetStrategy.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/PostingsOffsetStrategy.java
index e848a3e90526..469ce440730f 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/PostingsOffsetStrategy.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/PostingsOffsetStrategy.java
@@ -22,6 +22,8 @@
 import org.apache.lucene.index.IndexOptions;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.LeafReader;
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.ReaderUtil;
 
 /**
  * Uses offsets in postings -- {@link IndexOptions#DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS}.  This

From 42947da3892b325308baf89ad5dbde521226b0ff Mon Sep 17 00:00:00 2001
From: David Smiley <dsmiley@apache.org>
Date: Wed, 11 Jul 2018 17:57:35 -0400
Subject: [PATCH 07/14] LUCENE-8286 Overlay LeafReader with offsets with the
 real LeafReader when different. Weight Matches highlighting needs this for
 some cases; especially  when requireFieldMatch/fieldMatcher is in use.
 Refactored FieldOffsetStrategy.getOffsetsEnum to take a LeafReader not
 IndexReader.

---
 .../search/uhighlight/FieldHighlighter.java   |   4 +-
 .../uhighlight/FieldOffsetStrategy.java       |   3 +-
 .../uhighlight/MemoryIndexOffsetStrategy.java |  21 ++--
 .../search/uhighlight/NoOpOffsetStrategy.java |   4 +-
 .../OverlaySingleDocTermsLeafReader.java      | 113 ++++++++++++++++++
 .../uhighlight/PostingsOffsetStrategy.java    |  18 +--
 ...PostingsWithTermVectorsOffsetStrategy.java |  18 +--
 .../TermVectorFilteredLeafReader.java         |  11 +-
 .../uhighlight/TermVectorOffsetStrategy.java  |  15 ++-
 .../uhighlight/TokenStreamOffsetStrategy.java |   4 +-
 .../search/uhighlight/UnifiedHighlighter.java |  21 +++-
 .../TestUnifiedHighlighterExtensibility.java  |   5 +-
 12 files changed, 174 insertions(+), 63 deletions(-)
 create mode 100644 lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/OverlaySingleDocTermsLeafReader.java

diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/FieldHighlighter.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/FieldHighlighter.java
index da6e5256ac5b..219091ac829f 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/FieldHighlighter.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/FieldHighlighter.java
@@ -24,7 +24,7 @@
 import java.util.List;
 import java.util.PriorityQueue;
 
-import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.LeafReader;
 import org.apache.lucene.util.BytesRef;
 
 /**
@@ -65,7 +65,7 @@ public UnifiedHighlighter.OffsetSource getOffsetSource() {
   /**
    * The primary method -- highlight this doc, assuming a specific field and given this content.
    */
-  public Object highlightFieldForDoc(IndexReader reader, int docId, String content) throws IOException {
+  public Object highlightFieldForDoc(LeafReader reader, int docId, String content) throws IOException {
     // TODO accept LeafReader instead?
     // note: it'd be nice to accept a CharSequence for content, but we need a CharacterIterator impl for it.
     if (content.length() == 0) {
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/FieldOffsetStrategy.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/FieldOffsetStrategy.java
index 086a3d5752a5..7898ce148029 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/FieldOffsetStrategy.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/FieldOffsetStrategy.java
@@ -21,7 +21,6 @@
 import java.util.List;
 
 import org.apache.lucene.index.FilterLeafReader;
-import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.LeafReader;
 import org.apache.lucene.index.PostingsEnum;
 import org.apache.lucene.index.Terms;
@@ -68,7 +67,7 @@ public String getField() {
    *
    * Callers are expected to close the returned OffsetsEnum when it has been finished with
    */
-  public abstract OffsetsEnum getOffsetsEnum(IndexReader reader, int docId, String content) throws IOException;
+  public abstract OffsetsEnum getOffsetsEnum(LeafReader reader, int docId, String content) throws IOException;
 
   protected OffsetsEnum createOffsetsEnumFromReader(LeafReader leafReader, int doc) throws IOException {
     final Terms termsIndex = leafReader.terms(field);
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/MemoryIndexOffsetStrategy.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/MemoryIndexOffsetStrategy.java
index a3b84746a023..a35591b71ae4 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/MemoryIndexOffsetStrategy.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/MemoryIndexOffsetStrategy.java
@@ -29,7 +29,6 @@
 import org.apache.lucene.analysis.FilteringTokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.LeafReader;
 import org.apache.lucene.index.memory.MemoryIndex;
 import org.apache.lucene.search.Query;
@@ -47,7 +46,7 @@
 public class MemoryIndexOffsetStrategy extends AnalysisOffsetStrategy {
 
   private final MemoryIndex memoryIndex;
-  private final LeafReader leafReader;
+  private final LeafReader memIndexLeafReader;
   private final CharacterRunAutomaton preMemIndexFilterAutomaton;
 
   public MemoryIndexOffsetStrategy(UHComponents components, Analyzer analyzer,
@@ -55,7 +54,7 @@ public MemoryIndexOffsetStrategy(UHComponents components, Analyzer analyzer,
     super(components, analyzer);
     boolean storePayloads = components.getPhraseHelper().hasPositionSensitivity(); // might be needed
     memoryIndex = new MemoryIndex(true, storePayloads);//true==store offsets
-    leafReader = (LeafReader) memoryIndex.createSearcher().getIndexReader(); // appears to be re-usable
+    memIndexLeafReader = (LeafReader) memoryIndex.createSearcher().getIndexReader(); // appears to be re-usable
     // preFilter for MemoryIndex
     preMemIndexFilterAutomaton = buildCombinedAutomaton(components.getFieldMatcher(), terms, this.automata, components.getPhraseHelper(), multiTermQueryRewrite);
   }
@@ -99,7 +98,7 @@ public boolean run(char[] chars, int offset, int length) {
   }
 
   @Override
-  public OffsetsEnum getOffsetsEnum(IndexReader reader, int docId, String content) throws IOException {
+  public OffsetsEnum getOffsetsEnum(LeafReader reader, int docId, String content) throws IOException {
     // note: don't need LimitTokenOffsetFilter since content is already truncated to maxLength
     TokenStream tokenStream = tokenStream(content);
 
@@ -107,12 +106,20 @@ public OffsetsEnum getOffsetsEnum(IndexReader reader, int docId, String content)
     tokenStream = newKeepWordFilter(tokenStream, preMemIndexFilterAutomaton);
     memoryIndex.reset();
     memoryIndex.addField(field, tokenStream);//note: calls tokenStream.reset() & close()
-    docId = 0;
 
-    return createOffsetsEnumFromReader(leafReader, docId);
+    if (reader == null) {
+      return createOffsetsEnumFromReader(memIndexLeafReader, 0);
+    } else {
+      return createOffsetsEnumFromReader(
+          new OverlaySingleDocTermsLeafReader(
+              reader,
+              memIndexLeafReader,
+              field,
+              docId),
+          docId);
+    }
   }
 
-
   private static FilteringTokenFilter newKeepWordFilter(final TokenStream tokenStream,
                                                         final CharacterRunAutomaton charRunAutomaton) {
     // it'd be nice to use KeepWordFilter but it demands a CharArraySet. TODO File JIRA? Need a new interface?
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/NoOpOffsetStrategy.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/NoOpOffsetStrategy.java
index 8e9e7aa67578..e42ff2353f1e 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/NoOpOffsetStrategy.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/NoOpOffsetStrategy.java
@@ -18,7 +18,7 @@
 
 import java.io.IOException;
 
-import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.LeafReader;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.automaton.CharacterRunAutomaton;
 
@@ -41,7 +41,7 @@ public UnifiedHighlighter.OffsetSource getOffsetSource() {
   }
 
   @Override
-  public OffsetsEnum getOffsetsEnum(IndexReader reader, int docId, String content) throws IOException {
+  public OffsetsEnum getOffsetsEnum(LeafReader reader, int docId, String content) throws IOException {
     return OffsetsEnum.EMPTY;
   }
 
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/OverlaySingleDocTermsLeafReader.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/OverlaySingleDocTermsLeafReader.java
new file mode 100644
index 000000000000..7b9412d9739a
--- /dev/null
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/OverlaySingleDocTermsLeafReader.java
@@ -0,0 +1,113 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.search.uhighlight;
+
+import java.io.IOException;
+
+import org.apache.lucene.index.FilterLeafReader;
+import org.apache.lucene.index.LeafReader;
+import org.apache.lucene.index.PostingsEnum;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.automaton.CompiledAutomaton;
+
+/**
+ * Overlays a 2nd LeafReader for the terms of one field, otherwise the primary reader is
+ * consulted.  The 2nd reader is assumed to have one document of 0 and we remap it to a target doc ID.
+ *
+ * @lucene.experimental
+ */
+public class OverlaySingleDocTermsLeafReader extends FilterLeafReader {
+
+  private final LeafReader in2;
+  private final String in2Field;
+  private final int in2TargetDocId;
+
+  public OverlaySingleDocTermsLeafReader(LeafReader in, LeafReader in2, String in2Field, int in2TargetDocId) {
+    super(in);
+    this.in2 = in2;
+    this.in2Field = in2Field;
+    this.in2TargetDocId = in2TargetDocId;
+    assert in2.maxDoc() == 1;
+  }
+
+  @Override
+  public Terms terms(String field) throws IOException {
+    if (!in2Field.equals(field)) {
+      return in.terms(field);
+    }
+
+    // Shifts leafReader in2 with only doc ID 0 to a target doc ID
+    final Terms terms = in2.terms(field);
+    if (terms == null) {
+      return null;
+    }
+    if (in2TargetDocId == 0) { // no doc ID remapping to do
+      return terms;
+    }
+    return new FilterTerms(terms) {
+      @Override
+      public TermsEnum iterator() throws IOException {
+        return filterTermsEnum(super.iterator());
+      }
+
+      @Override
+      public TermsEnum intersect(CompiledAutomaton compiled, BytesRef startTerm) throws IOException {
+        return filterTermsEnum(super.intersect(compiled, startTerm));
+      }
+
+      private TermsEnum filterTermsEnum(TermsEnum termsEnum) throws IOException {
+        return new FilterTermsEnum(termsEnum) {
+          @Override
+          public PostingsEnum postings(PostingsEnum reuse, int flags) throws IOException {
+            //TODO 'reuse' will always fail to reuse unless we unwrap it
+            return new FilterPostingsEnum(super.postings(reuse, flags)) {
+              @Override
+              public int nextDoc() throws IOException {
+                final int doc = super.nextDoc();
+                return doc == 0 ? in2TargetDocId : doc;
+              }
+
+              @Override
+              public int advance(int target) throws IOException {
+                return slowAdvance(target);
+              }
+
+              @Override
+              public int docID() {
+                final int doc = super.docID();
+                return doc == 0 ? in2TargetDocId : doc;
+              }
+            };
+          }
+        };
+      }
+    };
+  }
+
+  @Override
+  public CacheHelper getCoreCacheHelper() {
+    return null;
+  }
+
+  @Override
+  public CacheHelper getReaderCacheHelper() {
+    return null;
+  }
+}
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/PostingsOffsetStrategy.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/PostingsOffsetStrategy.java
index 469ce440730f..53c1c95b4b6c 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/PostingsOffsetStrategy.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/PostingsOffsetStrategy.java
@@ -17,13 +17,9 @@
 package org.apache.lucene.search.uhighlight;
 
 import java.io.IOException;
-import java.util.List;
 
 import org.apache.lucene.index.IndexOptions;
-import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.LeafReader;
-import org.apache.lucene.index.LeafReaderContext;
-import org.apache.lucene.index.ReaderUtil;
 
 /**
  * Uses offsets in postings -- {@link IndexOptions#DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS}.  This
@@ -38,18 +34,8 @@ public PostingsOffsetStrategy(UHComponents components) {
   }
 
   @Override
-  public OffsetsEnum getOffsetsEnum(IndexReader reader, int docId, String content) throws IOException {
-    final LeafReader leafReader;
-    if (reader instanceof LeafReader) {
-      leafReader = (LeafReader) reader;
-    } else {
-      List<LeafReaderContext> leaves = reader.leaves();
-      LeafReaderContext leafReaderContext = leaves.get(ReaderUtil.subIndex(docId, leaves));
-      leafReader = leafReaderContext.reader();
-      docId -= leafReaderContext.docBase; // adjust 'doc' to be within this leaf reader
-    }
-
-    return createOffsetsEnumFromReader(leafReader, docId);
+  public OffsetsEnum getOffsetsEnum(LeafReader reader, int docId, String content) throws IOException {
+    return createOffsetsEnumFromReader(reader, docId);
   }
 
 
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/PostingsWithTermVectorsOffsetStrategy.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/PostingsWithTermVectorsOffsetStrategy.java
index e0eb3212abf2..ab391213aba8 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/PostingsWithTermVectorsOffsetStrategy.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/PostingsWithTermVectorsOffsetStrategy.java
@@ -17,12 +17,8 @@
 package org.apache.lucene.search.uhighlight;
 
 import java.io.IOException;
-import java.util.List;
 
-import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.LeafReader;
-import org.apache.lucene.index.LeafReaderContext;
-import org.apache.lucene.index.ReaderUtil;
 import org.apache.lucene.index.Terms;
 
 /**
@@ -37,22 +33,12 @@ public PostingsWithTermVectorsOffsetStrategy(UHComponents components) {
   }
 
   @Override
-  public OffsetsEnum getOffsetsEnum(IndexReader reader, int docId, String content) throws IOException {
-    LeafReader leafReader;
-    if (reader instanceof LeafReader) {
-      leafReader = (LeafReader) reader;
-    } else {
-      List<LeafReaderContext> leaves = reader.leaves();
-      LeafReaderContext LeafReaderContext = leaves.get(ReaderUtil.subIndex(docId, leaves));
-      leafReader = LeafReaderContext.reader();
-      docId -= LeafReaderContext.docBase; // adjust 'doc' to be within this atomic reader
-    }
-
+  public OffsetsEnum getOffsetsEnum(LeafReader leafReader, int docId, String content) throws IOException {
     Terms docTerms = leafReader.getTermVector(docId, field);
     if (docTerms == null) {
       return OffsetsEnum.EMPTY;
     }
-    leafReader = new TermVectorFilteredLeafReader(leafReader, docTerms);
+    leafReader = new TermVectorFilteredLeafReader(leafReader, docTerms, field);
 
     return createOffsetsEnumFromReader(leafReader, docId);
   }
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/TermVectorFilteredLeafReader.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/TermVectorFilteredLeafReader.java
index 4165af45a9e8..4b93e16af053 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/TermVectorFilteredLeafReader.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/TermVectorFilteredLeafReader.java
@@ -37,21 +37,26 @@ final class TermVectorFilteredLeafReader extends FilterLeafReader {
   // NOTE: super ("in") is baseLeafReader
 
   private final Terms filterTerms;
+  private final String fieldFilter;
 
   /**
    * <p>Construct a FilterLeafReader based on the specified base reader.
    * <p>Note that base reader is closed if this FilterLeafReader is closed.</p>
-   *
    * @param baseLeafReader full/original reader.
    * @param filterTerms set of terms to filter by -- probably from a TermVector or MemoryIndex.
+   * @param fieldFilter the field to do this on
    */
-  TermVectorFilteredLeafReader(LeafReader baseLeafReader, Terms filterTerms) {
+  TermVectorFilteredLeafReader(LeafReader baseLeafReader, Terms filterTerms, String fieldFilter) {
     super(baseLeafReader);
     this.filterTerms = filterTerms;
+    this.fieldFilter = fieldFilter;
   }
 
   @Override
   public Terms terms(String field) throws IOException {
+    if (!field.equals(fieldFilter)) {
+      return super.terms(field); // proceed like normal for fields we're not interested in
+    }
     Terms terms = in.terms(field);
     return terms==null ? null : new TermsFilteredTerms(terms, filterTerms);
   }
@@ -106,7 +111,7 @@ void moveToCurrentTerm() throws IOException {
       boolean termInBothTermsEnum = baseTermsEnum.seekExact(currentTerm);
 
       if (!termInBothTermsEnum) {
-        throw new IllegalStateException("Term vector term " + currentTerm + " does not appear in full index.");
+        throw new IllegalStateException("Term vector term '" + currentTerm.utf8ToString() + "' does not appear in full index.");
       }
     }
 
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/TermVectorOffsetStrategy.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/TermVectorOffsetStrategy.java
index 5d0e0b933f32..895e95e50919 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/TermVectorOffsetStrategy.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/TermVectorOffsetStrategy.java
@@ -18,7 +18,6 @@
 
 import java.io.IOException;
 
-import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.LeafReader;
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.search.highlight.TermVectorLeafReader;
@@ -40,16 +39,20 @@ public UnifiedHighlighter.OffsetSource getOffsetSource() {
   }
 
   @Override
-  public OffsetsEnum getOffsetsEnum(IndexReader reader, int docId, String content) throws IOException {
+  public OffsetsEnum getOffsetsEnum(LeafReader reader, int docId, String content) throws IOException {
     Terms tvTerms = reader.getTermVector(docId, field);
     if (tvTerms == null) {
       return OffsetsEnum.EMPTY;
     }
 
-    LeafReader leafReader = new TermVectorLeafReader(field, tvTerms);
-    docId = 0;
-
-    return createOffsetsEnumFromReader(leafReader, docId);
+    LeafReader singleDocReader = new TermVectorLeafReader(field, tvTerms);
+    return createOffsetsEnumFromReader(
+        new OverlaySingleDocTermsLeafReader(
+            reader,
+            singleDocReader,
+            field,
+            docId),
+        docId);
   }
 
 }
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/TokenStreamOffsetStrategy.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/TokenStreamOffsetStrategy.java
index fbe3246e27eb..e851ca660cdd 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/TokenStreamOffsetStrategy.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/TokenStreamOffsetStrategy.java
@@ -22,7 +22,7 @@
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
-import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.LeafReader;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.automaton.Automata;
 import org.apache.lucene.util.automaton.CharacterRunAutomaton;
@@ -64,7 +64,7 @@ public String toString() {
   }
 
   @Override
-  public OffsetsEnum getOffsetsEnum(IndexReader reader, int docId, String content) throws IOException {
+  public OffsetsEnum getOffsetsEnum(LeafReader reader, int docId, String content) throws IOException {
     return new TokenStreamOffsetsEnum(tokenStream(content), automata);
   }
 
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/UnifiedHighlighter.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/UnifiedHighlighter.java
index d604df7e3186..d55ec3fefefa 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/UnifiedHighlighter.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/UnifiedHighlighter.java
@@ -48,6 +48,7 @@
 import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.index.MultiFields;
 import org.apache.lucene.index.MultiReader;
+import org.apache.lucene.index.ReaderUtil;
 import org.apache.lucene.index.StoredFieldVisitor;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.search.DocIdSetIterator;
@@ -623,11 +624,20 @@ protected Map<String, Object[]> highlightFieldsAsObjects(String[] fieldsIn, Quer
                   && indexReaderWithTermVecCache != null)
                   ? indexReaderWithTermVecCache
                   : searcher.getIndexReader();
+          final LeafReader leafReader;
+          if (indexReader instanceof LeafReader) {
+            leafReader = (LeafReader) indexReader;
+          } else {
+            List<LeafReaderContext> leaves = indexReader.leaves();
+            LeafReaderContext leafReaderContext = leaves.get(ReaderUtil.subIndex(docId, leaves));
+            leafReader = leafReaderContext.reader();
+            docId -= leafReaderContext.docBase; // adjust 'doc' to be within this leaf reader
+          }
           int docInIndex = docInIndexes[docIdx];//original input order
           assert resultByDocIn[docInIndex] == null;
           resultByDocIn[docInIndex] =
               fieldHighlighter
-                  .highlightFieldForDoc(indexReader, docId, content.toString());
+                  .highlightFieldForDoc(leafReader, docId, content.toString());
         }
 
       }
@@ -769,13 +779,15 @@ protected static BytesRef[] filterExtractedTerms(Predicate<String> fieldMatcher,
 
   protected Set<HighlightFlag> getFlags(String field) {
     Set<HighlightFlag> highlightFlags = EnumSet.noneOf(HighlightFlag.class);
-    highlightFlags.add(HighlightFlag.WEIGHT_MATCHES);//nocommit for experimentation, use as default
     if (shouldHandleMultiTermQuery(field)) {
       highlightFlags.add(HighlightFlag.MULTI_TERM_QUERY);
     }
     if (shouldHighlightPhrasesStrictly(field)) {
       highlightFlags.add(HighlightFlag.PHRASES);
     }
+    if (highlightFlags.contains(HighlightFlag.PHRASES) && highlightFlags.contains(HighlightFlag.MULTI_TERM_QUERY)) {
+      highlightFlags.add(HighlightFlag.WEIGHT_MATCHES);//nocommit for experimentation, use as default
+    }
     if (shouldPreferPassageRelevancyOverSpeed(field)) {
       highlightFlags.add(HighlightFlag.PASSAGE_RELEVANCY_OVER_SPEED);
     }
@@ -791,7 +803,7 @@ protected PhraseHelper getPhraseHelper(String field, Query query, Set<HighlightF
             this::requiresRewrite,
             this::preSpanQueryRewrite,
             !handleMultiTermQuery,
-            useWeightMatchesIter)
+            useWeightMatchesIter && handleMultiTermQuery)
         : PhraseHelper.NONE;
   }
 
@@ -842,7 +854,8 @@ protected FieldOffsetStrategy getOffsetStrategy(OffsetSource offsetSource, Strin
     switch (offsetSource) {
       case ANALYSIS:
         if (!phraseHelper.hasPositionSensitivity() &&
-            !highlightFlags.contains(HighlightFlag.PASSAGE_RELEVANCY_OVER_SPEED)) {
+            !highlightFlags.contains(HighlightFlag.PASSAGE_RELEVANCY_OVER_SPEED) &&
+            !highlightFlags.contains(HighlightFlag.WEIGHT_MATCHES)) {
           //skip using a memory index since it's pure term filtering
           return new TokenStreamOffsetStrategy(components, getIndexAnalyzer());
         } else {
diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/visibility/TestUnifiedHighlighterExtensibility.java b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/visibility/TestUnifiedHighlighterExtensibility.java
index 742117be1aeb..ca21000137dc 100644
--- a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/visibility/TestUnifiedHighlighterExtensibility.java
+++ b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/visibility/TestUnifiedHighlighterExtensibility.java
@@ -25,7 +25,6 @@
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.MockAnalyzer;
-import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.LeafReader;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.search.DocIdSetIterator;
@@ -70,7 +69,7 @@ public UnifiedHighlighter.OffsetSource getOffsetSource() {
       }
 
       @Override
-      public OffsetsEnum getOffsetsEnum(IndexReader reader, int docId, String content) throws IOException {
+      public OffsetsEnum getOffsetsEnum(LeafReader reader, int docId, String content) throws IOException {
         return OffsetsEnum.EMPTY;
       }
 
@@ -209,7 +208,7 @@ private static class CustomFieldHighlighter extends FieldHighlighter {
     }
 
     @Override
-    public Object highlightFieldForDoc(IndexReader reader, int docId, String content) throws IOException {
+    public Object highlightFieldForDoc(LeafReader reader, int docId, String content) throws IOException {
       return super.highlightFieldForDoc(reader, docId, content);
     }
 

From 4d1b633006f3ef6c30a6c73985bd3fcef39e624b Mon Sep 17 00:00:00 2001
From: David Smiley <dsmiley@apache.org>
Date: Wed, 11 Jul 2018 17:59:47 -0400
Subject: [PATCH 08/14] LUCENE-8286 UH: Update some tests to work with Weight
 Matcher mode

---
 .../uhighlight/TestUnifiedHighlighter.java    | 82 ++++++++++++++++---
 .../TestUnifiedHighlighterStrictPhrases.java  | 42 ++++++----
 .../TestUnifiedHighlighterTermVec.java        | 34 ++++++--
 3 files changed, 123 insertions(+), 35 deletions(-)

diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighter.java b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighter.java
index fa6f2867dbc3..d89499cd5e83 100644
--- a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighter.java
+++ b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighter.java
@@ -95,8 +95,12 @@ static UnifiedHighlighter randomUnifiedHighlighter(IndexSearcher searcher, Analy
       return new UnifiedHighlighter(searcher, indexAnalyzer);
     } else {
       final UnifiedHighlighter uh = new UnifiedHighlighter(searcher, indexAnalyzer) {
+        Set<HighlightFlag> flags; // consistently random set of flags for this test run
         @Override
         protected Set<HighlightFlag> getFlags(String field) {
+          if (flags != null) {
+            return flags;
+          }
           final EnumSet<HighlightFlag> result = EnumSet.copyOf(mandatoryFlags);
           int r = random().nextInt();
           for (HighlightFlag highlightFlag : HighlightFlag.values()) {
@@ -104,7 +108,12 @@ protected Set<HighlightFlag> getFlags(String field) {
               result.add(highlightFlag);
             }
           }
-          return result;
+          if (result.contains(HighlightFlag.WEIGHT_MATCHES)) {
+            // these two are required for WEIGHT_MATCHES
+            result.add(HighlightFlag.MULTI_TERM_QUERY);
+            result.add(HighlightFlag.PHRASES);
+          }
+          return flags = result;
         }
       };
       uh.setCacheFieldValCharsThreshold(random().nextInt(100));
@@ -420,7 +429,11 @@ public void testBuddhism() throws Exception {
     highlighter.setHighlightPhrasesStrictly(false);
     String snippets[] = highlighter.highlight("body", query, topDocs, 2);
     assertEquals(1, snippets.length);
-    assertTrue(snippets[0].contains("<b>Buddhist</b> <b>origins</b>"));
+    if (highlighter.getFlags("body").containsAll(EnumSet.of(HighlightFlag.WEIGHT_MATCHES, HighlightFlag.PHRASES))) {
+      assertTrue(snippets[0], snippets[0].contains("<b>Buddhist origins</b>"));
+    } else {
+      assertTrue(snippets[0], snippets[0].contains("<b>Buddhist</b> <b>origins</b>"));
+    }
     ir.close();
   }
 
@@ -1187,6 +1200,19 @@ protected Predicate<String> getFieldMatcher(String field) {
     ir.close();
   }
 
+  public void testMatchesSlopBug() throws IOException {
+    IndexReader ir = indexSomeFields();
+    IndexSearcher searcher = newSearcher(ir);
+    UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
+    Query query = new PhraseQuery(2, "title", "this", "is", "the", "field");
+    TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
+    assertEquals(1, topDocs.totalHits);
+    String[] snippets = highlighter.highlight("title", query, topDocs, 10);
+    assertEquals(1, snippets.length);
+    assertEquals("<b>This is the title field</b>.", snippets[0]);
+    ir.close();
+  }
+
   public void testFieldMatcherPhraseQuery() throws Exception {
     IndexReader ir = indexSomeFields();
     IndexSearcher searcher = newSearcher(ir);
@@ -1197,7 +1223,7 @@ protected Predicate<String> getFieldMatcher(String field) {
         return (qf) -> true;
       }
     };
-    UnifiedHighlighter highlighterFieldMatch = randomUnifiedHighlighter(searcher, indexAnalyzer, EnumSet.of(HighlightFlag.PHRASES));
+    UnifiedHighlighter highlighterFieldMatch = randomUnifiedHighlighter(searcher, indexAnalyzer, EnumSet.of(HighlightFlag.PHRASES, HighlightFlag.MULTI_TERM_QUERY));
     highlighterFieldMatch.setFieldMatcher(null);//default
     BooleanQuery.Builder queryBuilder =
         new BooleanQuery.Builder()
@@ -1214,16 +1240,28 @@ protected Predicate<String> getFieldMatcher(String field) {
       assertEquals(1, topDocs.totalHits.value);
       String[] snippets = highlighterNoFieldMatch.highlight("title", query, topDocs, 10);
       assertEquals(1, snippets.length);
-      assertEquals("<b>This</b> <b>is</b> <b>the</b> <b>title</b> <b>field</b>.", snippets[0]);
+      if (highlighterNoFieldMatch.getFlags("title").contains(HighlightFlag.WEIGHT_MATCHES)) {
+        assertEquals("<b>This is the title field</b>.", snippets[0]);
+      } else {
+        assertEquals("<b>This</b> <b>is</b> <b>the</b> <b>title</b> <b>field</b>.", snippets[0]);
+      }
 
       snippets = highlighterFieldMatch.highlight("title", query, topDocs, 10);
       assertEquals(1, snippets.length);
-      assertEquals("<b>This</b> <b>is</b> <b>the</b> <b>title</b> field.", snippets[0]);
+      if (highlighterFieldMatch.getFlags("title").contains(HighlightFlag.WEIGHT_MATCHES)) {
+        assertEquals("<b>This is the title</b> field.", snippets[0]);
+      } else {
+        assertEquals("<b>This</b> <b>is</b> <b>the</b> <b>title</b> field.", snippets[0]);
+      }
 
       highlighterFieldMatch.setFieldMatcher((fq) -> "text".equals(fq));
       snippets = highlighterFieldMatch.highlight("title", query, topDocs, 10);
       assertEquals(1, snippets.length);
-      assertEquals("<b>This</b> <b>is</b> the title field.", snippets[0]);
+      if (highlighterFieldMatch.getFlags("title").contains(HighlightFlag.WEIGHT_MATCHES)) {
+        assertEquals("<b>This is</b> the title field.", snippets[0]);
+      } else {
+        assertEquals("<b>This</b> <b>is</b> the title field.", snippets[0]);
+      }
       highlighterFieldMatch.setFieldMatcher(null);
     }
 
@@ -1233,11 +1271,20 @@ protected Predicate<String> getFieldMatcher(String field) {
       assertEquals(1, topDocs.totalHits.value);
       String[] snippets = highlighterNoFieldMatch.highlight("text", query, topDocs, 10);
       assertEquals(1, snippets.length);
-      assertEquals("<b>This</b> <b>is</b> <b>the</b> <b>text</b> <b>field</b>. <b>You</b> <b>can</b> <b>put</b> some <b>text</b> if you want.", snippets[0]);
+      if (highlighterNoFieldMatch.getFlags("text").contains(HighlightFlag.WEIGHT_MATCHES)) {
+        assertEquals("<b>This is the text field</b>. <b>You can put some text</b> if you want.", snippets[0]);
+      } else {
+        assertEquals("<b>This</b> <b>is</b> <b>the</b> <b>text</b> <b>field</b>. <b>You</b> <b>can</b> <b>put</b> some <b>text</b> if you want.", snippets[0]);
+      }
 
       snippets = highlighterFieldMatch.highlight("text", query, topDocs, 10);
       assertEquals(1, snippets.length);
-      assertEquals("<b>This</b> <b>is</b> the <b>text</b> field. <b>You</b> <b>can</b> <b>put</b> some <b>text</b> if you want.", snippets[0]);
+      if (highlighterFieldMatch.getFlags("text").contains(HighlightFlag.WEIGHT_MATCHES)) {
+        assertEquals("<b>This is</b> the text field. <b>You can put some text</b> if you want.", snippets[0]);
+      } else {
+        //nocommit why does this highlight the first "text"?
+        assertEquals("<b>This</b> <b>is</b> the <b>text</b> field. <b>You</b> <b>can</b> <b>put</b> some <b>text</b> if you want.", snippets[0]);
+      }
 
       highlighterFieldMatch.setFieldMatcher((fq) -> "title".equals(fq));
       snippets = highlighterFieldMatch.highlight("text", query, topDocs, 10);
@@ -1252,17 +1299,28 @@ protected Predicate<String> getFieldMatcher(String field) {
       assertEquals(1, topDocs.totalHits.value);
       String[] snippets = highlighterNoFieldMatch.highlight("category", query, topDocs, 10);
       assertEquals(1, snippets.length);
-      assertEquals("<b>This</b> <b>is</b> <b>the</b> category <b>field</b>.", snippets[0]);
+      if (highlighterNoFieldMatch.getFlags("category").contains(HighlightFlag.WEIGHT_MATCHES)) {
+        assertEquals("<b>This is the category field</b>.", snippets[0]);
+      } else {
+        assertEquals("<b>This</b> <b>is</b> <b>the</b> category <b>field</b>.", snippets[0]);
+      }
 
       snippets = highlighterFieldMatch.highlight("category", query, topDocs, 10);
       assertEquals(1, snippets.length);
-      assertEquals("<b>This</b> <b>is</b> <b>the</b> category <b>field</b>.", snippets[0]);
-
+      if (highlighterFieldMatch.getFlags("category").contains(HighlightFlag.WEIGHT_MATCHES)) {
+        assertEquals("<b>This is the category field</b>.", snippets[0]);
+      } else {
+        assertEquals("<b>This</b> <b>is</b> <b>the</b> category <b>field</b>.", snippets[0]);
+      }
 
       highlighterFieldMatch.setFieldMatcher((fq) -> "text".equals(fq));
       snippets = highlighterFieldMatch.highlight("category", query, topDocs, 10);
       assertEquals(1, snippets.length);
-      assertEquals("<b>This</b> <b>is</b> the category field.", snippets[0]);
+      if (highlighterFieldMatch.getFlags("category").contains(HighlightFlag.WEIGHT_MATCHES)) {
+        assertEquals("<b>This is</b> the category field.", snippets[0]);
+      } else {
+        assertEquals("<b>This</b> <b>is</b> the category field.", snippets[0]);
+      }
       highlighterFieldMatch.setFieldMatcher(null);
     }
     ir.close();
diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterStrictPhrases.java b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterStrictPhrases.java
index 2fefaba26f6a..ee9b9d224a63 100644
--- a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterStrictPhrases.java
+++ b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterStrictPhrases.java
@@ -20,6 +20,7 @@
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.Collections;
+import java.util.Set;
 
 import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;
 import org.apache.lucene.analysis.MockAnalyzer;
@@ -52,6 +53,7 @@
 import org.apache.lucene.search.spans.SpanOrQuery;
 import org.apache.lucene.search.spans.SpanQuery;
 import org.apache.lucene.search.spans.SpanTermQuery;
+import org.apache.lucene.search.uhighlight.UnifiedHighlighter.HighlightFlag;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.LuceneTestCase;
@@ -138,7 +140,7 @@ public void testBasics() throws IOException {
     TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
     String[] snippets = highlighter.highlight("body", query, topDocs);
 
-    assertArrayEquals(new String[]{"yin alone, <b>Yin</b> <b>yang</b>, yin gap yang"}, snippets);
+    assertArrayEquals(new String[]{"yin alone, <b>Yin yang</b>, yin gap yang"}, snippets);
   }
 
   public void testWithSameTermQuery() throws IOException {
@@ -155,7 +157,7 @@ public void testWithSameTermQuery() throws IOException {
     TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
     String[] snippets = highlighter.highlight("body", query, topDocs);
 
-    assertArrayEquals(new String[]{"<b>Yin</b> <b>yang</b>, <b>yin</b> gap yang"}, snippets);
+    assertArrayEquals(new String[]{"<b>Yin yang</b>, <b>yin</b> gap yang"}, snippets);
 
     // test the Passage only has 3 matches.  We don't want duplicates from "Yin" being in TermQuery & PhraseQuery.
     highlighter.setFormatter(new PassageFormatter() {
@@ -199,7 +201,7 @@ public void testSubPhrases() throws IOException {
     TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
     String[] snippets = highlighter.highlight("body", query, topDocs);
 
-    assertArrayEquals(new String[]{"<b>alpha</b> <b>bravo</b> <b>charlie</b> - charlie bravo alpha"}, snippets);
+    assertArrayEquals(new String[]{"<b>alpha bravo charlie</b> - charlie bravo alpha"}, snippets);
 
     // test the Passage only has 3 matches.  We don't want duplicates from both PhraseQuery
     highlighter.setFormatter(new PassageFormatter() {
@@ -224,7 +226,7 @@ public void testSynonyms() throws IOException {
     TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
     String[] snippets = highlighter.highlight("body", query, topDocs);
 
-    assertArrayEquals(new String[]{"<b>mother</b> <b>father</b> w <b>mom</b> <b>father</b> w dad"}, snippets);
+    assertArrayEquals(new String[]{"<b>mother father</b> w <b>mom father</b> w dad"}, snippets);
   }
 
   /**
@@ -252,7 +254,7 @@ public void testRewriteAndMtq() throws IOException {
     TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
     String[] snippets = highlighter.highlight("body", query, topDocs);
 
-    assertArrayEquals(new String[]{"<b>alpha</b> <b>bravo</b> <b>charlie</b> - charlie bravo <b>alpha</b>"},
+    assertArrayEquals(new String[]{"<b>alpha bravo</b> <b>charlie</b> - charlie bravo <b>alpha</b>"},
         snippets);
 
     // do again, this time with MTQ disabled.  We should only find "alpha bravo".
@@ -288,7 +290,7 @@ public void testRewrite() throws IOException {
     TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
     String[] snippets = highlighter.highlight("body", query, topDocs);
 
-    assertArrayEquals(new String[]{"<b>alpha</b> <b>bravo</b> <b>charlie</b> - charlie bravo alpha"},
+    assertArrayEquals(new String[]{"<b>alpha bravo</b> <b>charlie</b> - charlie bravo alpha"},
         snippets);
 
     // do again, this time with MTQ disabled.  We should only find "alpha bravo".
@@ -325,7 +327,7 @@ public void testMtq() throws IOException {
     TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
     String[] snippets = highlighter.highlight("body", query, topDocs);
 
-    assertArrayEquals(new String[]{"<b>alpha</b> <b>bravo</b> <b>charlie</b> - charlie bravo <b>alpha</b>"},
+    assertArrayEquals(new String[]{"<b>alpha bravo</b> <b>charlie</b> - charlie bravo <b>alpha</b>"},
         snippets);
 
     // do again, this time with MTQ disabled.
@@ -334,6 +336,7 @@ public void testMtq() throws IOException {
     topDocs = searcher.search(query, 10, Sort.INDEXORDER);
     snippets = highlighter.highlight("body", query, topDocs);
 
+    //note: without MTQ, the WEIGHT_MATCHES is disabled which affects the snippet boundaries
     assertArrayEquals(new String[]{"<b>alpha</b> <b>bravo</b> <b>charlie</b> - charlie bravo alpha"},
         snippets);
   }
@@ -351,7 +354,7 @@ public void testMultiValued() throws IOException {
     TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
     String[] snippets = highlighter.highlight("body", query, topDocs, 2);
 
-    assertArrayEquals(new String[]{"<b>one</b> <b>bravo</b> three... <b>four</b> <b>bravo</b> six"},
+    assertArrayEquals(new String[]{"<b>one bravo</b> three... <b>four bravo</b> six"},
         snippets);
 
 
@@ -381,7 +384,11 @@ public void testMultiValued() throws IOException {
     topDocs = searcher.search(query, 10);
     assertEquals(1, topDocs.totalHits.value);
     snippets = highlighter.highlight("body", query, topDocs, 2);
-    assertEquals("one <b>bravo</b> <b>three</b>... <b>four</b> <b>bravo</b> six", snippets[0]);
+    if (highlighter.getFlags("body").contains(HighlightFlag.WEIGHT_MATCHES)) {
+      assertEquals("one <b>bravo</b> <b>three</b>... four <b>bravo</b> six", snippets[0]);
+    } else {
+      assertEquals("one <b>bravo</b> <b>three</b>... <b>four</b> <b>bravo</b> six", snippets[0]);
+    }
   }
 
   public void testMaxLen() throws IOException {
@@ -390,18 +397,18 @@ public void testMaxLen() throws IOException {
     highlighter.setMaxLength(21);
 
     BooleanQuery query = new BooleanQuery.Builder()
-        .add(newPhraseQuery("body", "alpha bravo"), BooleanClause.Occur.MUST)
-        .add(newPhraseQuery("body", "gap alpha"), BooleanClause.Occur.MUST)
+        .add(newPhraseQuery("body", "alpha bravo"), BooleanClause.Occur.SHOULD)
+        .add(newPhraseQuery("body", "gap alpha"), BooleanClause.Occur.SHOULD)
         .add(newPhraseQuery("body", "charlie gap"), BooleanClause.Occur.SHOULD)
         .build();
 
     TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
     String[] snippets = highlighter.highlight("body", query, topDocs);
 
-    if (fieldType == UHTestHelper.reanalysisType) {
-      assertArrayEquals(new String[]{"<b>alpha</b> <b>bravo</b> charlie -"}, snippets);
+    if (fieldType == UHTestHelper.reanalysisType || highlighter.getFlags("body").contains(HighlightFlag.WEIGHT_MATCHES)) {
+      assertArrayEquals(new String[]{"<b>alpha bravo</b> charlie -"}, snippets);
     } else {
-      assertArrayEquals(new String[]{"<b>alpha</b> <b>bravo</b> <b>charlie</b> -"}, snippets);
+      assertArrayEquals(new String[]{"<b>alpha bravo</b> <b>charlie</b> -"}, snippets);
     }
   }
 
@@ -436,6 +443,13 @@ public void testPreSpanQueryRewrite() throws IOException {
     initReaderSearcherHighlighter();
 
     highlighter = new UnifiedHighlighter(searcher, indexAnalyzer) {
+      @Override
+      protected Set<HighlightFlag> getFlags(String field) {
+        final Set<HighlightFlag> flags = super.getFlags(field);
+        flags.remove(HighlightFlag.WEIGHT_MATCHES);//unsupported
+        return flags;
+      }
+
       @Override
       protected Collection<Query> preSpanQueryRewrite(Query query) {
         if (query instanceof MyQuery) {
diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterTermVec.java b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterTermVec.java
index 2d9fb0e18df1..6a92546d8f00 100644
--- a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterTermVec.java
+++ b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterTermVec.java
@@ -16,13 +16,31 @@
  */
 package org.apache.lucene.search.uhighlight;
 
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.BitSet;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.FieldType;
-import org.apache.lucene.index.*;
+import org.apache.lucene.index.CheckIndex;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.Fields;
+import org.apache.lucene.index.FilterDirectoryReader;
+import org.apache.lucene.index.FilterLeafReader;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.LeafReader;
+import org.apache.lucene.index.ParallelLeafReader;
+import org.apache.lucene.index.RandomIndexWriter;
+import org.apache.lucene.index.Term;
 import org.apache.lucene.search.BooleanClause;
 import org.apache.lucene.search.BooleanQuery;
 import org.apache.lucene.search.IndexSearcher;
@@ -35,13 +53,6 @@
 import org.junit.Before;
 import org.junit.Test;
 
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.BitSet;
-import java.util.List;
-import java.util.Map;
-
 /**
  * Tests highlighting for matters *expressly* relating to term vectors.
  * <p>
@@ -182,7 +193,12 @@ public void testUserFailedToIndexOffsets() throws IOException {
     iw.close();
 
     IndexSearcher searcher = newSearcher(ir);
-    UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
+    UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer) {
+      @Override
+      protected Set<HighlightFlag> getFlags(String field) {
+        return Collections.emptySet();//no WEIGHT_MATCHES
+      }
+    };
     TermQuery query = new TermQuery(new Term("body", "vectors"));
     TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
     try {

From 6098a36a008b464e946323184af8452df2395186 Mon Sep 17 00:00:00 2001
From: David Smiley <dsmiley@apache.org>
Date: Mon, 13 Aug 2018 14:29:09 -0400
Subject: [PATCH 09/14] LUCENE-8286 UH: Use MI.getSubMatches().  Removed
 PhraseHelper changes; not necessary anymore. Updated based on MI improvements
 in master. With subMatches, we have better fidelity on span queries. And
 since MI can handle span queries now, no need to touch PhraseHelper. * added
 to UHComponents: query, and highlightFlags * updated tests to handle
 with/without WEIGHT_MATCHES * TestUnifiedHighlighterStrictPhrases uses more
 randomization.   Removed brittle score calculation dependence. * Test Passage
 matches data is in order TODO: OE freq & term()

---
 .../uhighlight/FieldOffsetStrategy.java       |  94 ++++++-------
 .../search/uhighlight/NoOpOffsetStrategy.java |   4 +-
 .../lucene/search/uhighlight/OffsetsEnum.java | 132 +++++++++++++++++-
 .../search/uhighlight/PhraseHelper.java       |  35 +----
 .../uhighlight/TokenStreamOffsetStrategy.java |  12 +-
 .../search/uhighlight/UHComponents.java       |  26 +++-
 .../search/uhighlight/UnifiedHighlighter.java |  37 ++---
 .../uhighlight/TestUnifiedHighlighter.java    |  67 ++++-----
 .../uhighlight/TestUnifiedHighlighterMTQ.java |   2 +-
 .../TestUnifiedHighlighterStrictPhrases.java  | 128 +++++++++++------
 .../TestUnifiedHighlighterExtensibility.java  |  17 ++-
 11 files changed, 367 insertions(+), 187 deletions(-)

diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/FieldOffsetStrategy.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/FieldOffsetStrategy.java
index 7898ce148029..daf7ef4f4e40 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/FieldOffsetStrategy.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/FieldOffsetStrategy.java
@@ -51,7 +51,7 @@ public abstract class FieldOffsetStrategy {
   public FieldOffsetStrategy(UHComponents components) {
     this.components = components;
     this.field = components.getField();
-    this.terms = components.getExtractedTerms();
+    this.terms = components.getTerms();
     this.phraseHelper = components.getPhraseHelper();
     this.automata = components.getAutomata();
   }
@@ -75,57 +75,40 @@ protected OffsetsEnum createOffsetsEnumFromReader(LeafReader leafReader, int doc
       return OffsetsEnum.EMPTY;
     }
 
-    final List<OffsetsEnum> offsetsEnums = new ArrayList<>(terms.length + automata.length);
-
-    final boolean useWeightMatchesIter = phraseHelper.useWeightMatchesIter();
-
-    // Handle position insensitive terms (a subset of this.terms field):
-    final BytesRef[] insensitiveTerms;
-    if (useWeightMatchesIter) {
-      insensitiveTerms = new BytesRef[0]; // don't -- WEIGHT_MATCHES will handle these
-    } else if (phraseHelper.hasPositionSensitivity()) {
-      insensitiveTerms = phraseHelper.getAllPositionInsensitiveTerms();
-      assert insensitiveTerms.length <= terms.length : "insensitive terms should be smaller set of all terms";
-    } else {
-      insensitiveTerms = terms;
-    }
-    if (insensitiveTerms.length > 0) {
-      createOffsetsEnumsForTerms(insensitiveTerms, termsIndex, doc, offsetsEnums);
-    }
+    final List<OffsetsEnum> offsetsEnums = new ArrayList<>();
 
     // Handle Weight.matches approach
-    if (useWeightMatchesIter) {
+    if (components.getHighlightFlags().contains(UnifiedHighlighter.HighlightFlag.WEIGHT_MATCHES)) {
+
       createOffsetsEnumsWeightMatcher(leafReader, doc, offsetsEnums);
-    }
 
-    // Handle spans
-    if (phraseHelper.hasPositionSensitivity()) {
-      phraseHelper.createOffsetsEnumsForSpans(leafReader, doc, offsetsEnums);
-    }
+    } else { // classic approach
 
-    // Handle automata
-    if (automata.length > 0 && useWeightMatchesIter==false) {// WEIGHT_MATCHES handled these already
-      createOffsetsEnumsForAutomata(termsIndex, doc, offsetsEnums);
-    }
+      // Handle position insensitive terms (a subset of this.terms field):
+      final BytesRef[] insensitiveTerms;
+      if (phraseHelper.hasPositionSensitivity()) {
+        insensitiveTerms = phraseHelper.getAllPositionInsensitiveTerms();
+        assert insensitiveTerms.length <= terms.length : "insensitive terms should be smaller set of all terms";
+      } else {
+        insensitiveTerms = terms;
+      }
+      if (insensitiveTerms.length > 0) {
+        createOffsetsEnumsForTerms(insensitiveTerms, termsIndex, doc, offsetsEnums);
+      }
 
-    //TODO if only one OE then return it; don't wrap in Multi.  If none, return NONE
-    return new OffsetsEnum.MultiOffsetsEnum(offsetsEnums);
-  }
+      // Handle spans
+      if (phraseHelper.hasPositionSensitivity()) {
+        phraseHelper.createOffsetsEnumsForSpans(leafReader, doc, offsetsEnums);
+      }
 
-  protected void createOffsetsEnumsForTerms(BytesRef[] sourceTerms, Terms termsIndex, int doc, List<OffsetsEnum> results) throws IOException {
-    TermsEnum termsEnum = termsIndex.iterator();//does not return null
-    for (BytesRef term : sourceTerms) {
-      if (termsEnum.seekExact(term)) {
-        PostingsEnum postingsEnum = termsEnum.postings(null, PostingsEnum.OFFSETS);
-        if (postingsEnum == null) {
-          // no offsets or positions available
-          throw new IllegalArgumentException("field '" + field + "' was indexed without offsets, cannot highlight");
-        }
-        if (doc == postingsEnum.advance(doc)) { // now it's positioned, although may be exhausted
-          results.add(new OffsetsEnum.OfPostings(term, postingsEnum));
-        }
+      // Handle automata
+      if (automata.length > 0) {
+        createOffsetsEnumsForAutomata(termsIndex, doc, offsetsEnums);
       }
     }
+
+    //TODO if only one OE then return it; don't wrap in Multi.  If none, return NONE
+    return new OffsetsEnum.MultiOffsetsEnum(offsetsEnums);
   }
 
   protected void createOffsetsEnumsWeightMatcher(LeafReader _leafReader, int docId, List<OffsetsEnum> results) throws IOException {
@@ -140,7 +123,8 @@ public Terms terms(String field) throws IOException {
         }
       }
 
-      //these ought to be a default?  So many subclasses do this!
+      //  So many subclasses do this!
+      //these ought to be a default or added via some intermediary like "FilterTransientLeafReader" (exception on close).
       @Override
       public CacheHelper getCoreCacheHelper() {
         return null;
@@ -153,25 +137,41 @@ public CacheHelper getReaderCacheHelper() {
     };
     IndexSearcher indexSearcher = new IndexSearcher(leafReader);
     indexSearcher.setQueryCache(null);
-    Matches matches = indexSearcher.rewrite(phraseHelper.getQuery())
+    Matches matches = indexSearcher.rewrite(components.getQuery())
         .createWeight(indexSearcher, ScoreMode.COMPLETE_NO_SCORES, 1.0f)
         .matches(leafReader.getContext(), docId);
     if (matches == null) {
       return; // doc doesn't match
     }
     for (String field : matches) {
-      if (phraseHelper.getFieldMatcher().test(field)) {
+      if (components.getFieldMatcher().test(field)) {
         MatchesIterator iterator = matches.getMatches(field);
         if (iterator == null) {
           continue;
         }
         //note: the iterator should be positioned on the first one already
-        results.add(new OffsetsEnum.OfMatchesIterator(iterator));
+        results.add(new OffsetsEnum.OfMatchesIteratorWithSubs(iterator));
       }
     }
 
   }
 
+  protected void createOffsetsEnumsForTerms(BytesRef[] sourceTerms, Terms termsIndex, int doc, List<OffsetsEnum> results) throws IOException {
+    TermsEnum termsEnum = termsIndex.iterator();//does not return null
+    for (BytesRef term : sourceTerms) {
+      if (termsEnum.seekExact(term)) {
+        PostingsEnum postingsEnum = termsEnum.postings(null, PostingsEnum.OFFSETS);
+        if (postingsEnum == null) {
+          // no offsets or positions available
+          throw new IllegalArgumentException("field '" + field + "' was indexed without offsets, cannot highlight");
+        }
+        if (doc == postingsEnum.advance(doc)) { // now it's positioned, although may be exhausted
+          results.add(new OffsetsEnum.OfPostings(term, postingsEnum));
+        }
+      }
+    }
+  }
+
   protected void createOffsetsEnumsForAutomata(Terms termsIndex, int doc, List<OffsetsEnum> results) throws IOException {
     List<List<PostingsEnum>> automataPostings = new ArrayList<>(automata.length);
     for (int i = 0; i < automata.length; i++) {
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/NoOpOffsetStrategy.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/NoOpOffsetStrategy.java
index e42ff2353f1e..80528ce56123 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/NoOpOffsetStrategy.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/NoOpOffsetStrategy.java
@@ -17,8 +17,10 @@
 package org.apache.lucene.search.uhighlight;
 
 import java.io.IOException;
+import java.util.Collections;
 
 import org.apache.lucene.index.LeafReader;
+import org.apache.lucene.search.MatchNoDocsQuery;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.automaton.CharacterRunAutomaton;
 
@@ -32,7 +34,7 @@ public class NoOpOffsetStrategy extends FieldOffsetStrategy {
   public static final NoOpOffsetStrategy INSTANCE = new NoOpOffsetStrategy();
 
   private NoOpOffsetStrategy() {
-    super(new UHComponents("_ignored_", (s) -> false, new BytesRef[0], PhraseHelper.NONE, new CharacterRunAutomaton[0]));
+    super(new UHComponents("_ignored_", (s) -> false, new MatchNoDocsQuery(), new BytesRef[0], PhraseHelper.NONE, new CharacterRunAutomaton[0], Collections.emptySet()));
   }
 
   @Override
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/OffsetsEnum.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/OffsetsEnum.java
index 906117247c2f..14f8875269bc 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/OffsetsEnum.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/OffsetsEnum.java
@@ -94,8 +94,14 @@ public void close() throws IOException {
   @Override
   public String toString() {
     final String name = getClass().getSimpleName();
+    String offset = "";
     try {
-      return name + "(term:" + getTerm().utf8ToString() +")";
+      offset = ",[" + startOffset() + "-" + endOffset() + "]";
+    } catch (Exception e) {
+      //ignore; for debugging only
+    }
+    try {
+      return name + "(term:" + getTerm().utf8ToString() + offset + ")";
     } catch (Exception e) {
       return name;
     }
@@ -158,6 +164,130 @@ public int freq() throws IOException {
     }
   }
 
+  /** Based on a {@link MatchesIterator} with submatches. */
+  public static class OfMatchesIteratorWithSubs extends OffsetsEnum {
+    //Either CachedOE impls (which are the submatches) or OfMatchesIterator impls
+    private final PriorityQueue<OffsetsEnum> pendingQueue = new PriorityQueue<>();
+
+    public OfMatchesIteratorWithSubs(MatchesIterator matchesIterator) {
+      pendingQueue.add(new OfMatchesIterator(matchesIterator));
+    }
+
+    @Override
+    public boolean nextPosition() throws IOException {
+      OffsetsEnum formerHeadOE = pendingQueue.poll(); // removes the head
+      if (formerHeadOE instanceof CachedOE) {
+        // we're done with the former head.  CachedOE's are one use only.
+        // Look at the new head...
+        OffsetsEnum newHeadOE = pendingQueue.peek();
+        if (newHeadOE instanceof OfMatchesIterator) {
+          // We found the matchesIterator.  Requires processing.
+          nextWhenMatchesIterator((OfMatchesIterator) newHeadOE);  // May or may not remove or re-queue itself
+        } // else new head is a CacheOE or no more.  Nothing to do with it.
+
+      } else { // formerHeadOE is OfMatchesIterator; advance it
+        OfMatchesIterator miOE = (OfMatchesIterator) formerHeadOE;
+        if (miOE.nextPosition()) {
+          nextWhenMatchesIterator(miOE); // requires processing.  May or may not re-enqueue itself
+        }
+      }
+      return pendingQueue.isEmpty() == false;
+    }
+
+    private void nextWhenMatchesIterator(OfMatchesIterator miOE) throws IOException {
+      boolean isHead = miOE == pendingQueue.peek();
+      MatchesIterator subMatches = miOE.matchesIterator.getSubMatches();
+      if (subMatches != null) {
+        // remove this miOE from the queue, add it's submatches, next() it, then re-enqueue it
+        if (isHead) {
+          pendingQueue.poll(); // remove
+        }
+
+        enqueueCachedMatches(subMatches);
+
+        if (miOE.nextPosition()) {
+          pendingQueue.add(miOE);
+          assert pendingQueue.peek() != miOE; // miOE should follow cached entries
+        }
+
+      } else { // else has no subMatches.  It will stay enqueued.
+        if (!isHead) {
+          pendingQueue.add(miOE);
+        } // else it's *already* in pendingQueue
+      }
+    }
+
+    private boolean enqueueCachedMatches(MatchesIterator thisMI) throws IOException {
+      if (thisMI == null) {
+        return false;
+      } else {
+        while (thisMI.next()) {
+          if (false == enqueueCachedMatches(thisMI.getSubMatches())) { // recursion
+            // if no sub-matches then add ourselves
+            pendingQueue.add(new CachedOE(thisMI.startOffset(), thisMI.endOffset()));
+          }
+        }
+        return true;
+      }
+    }
+
+    @Override
+    public int freq() throws IOException {
+      return pendingQueue.peek().freq();
+    }
+
+    @Override
+    public BytesRef getTerm() throws IOException {
+      return pendingQueue.peek().getTerm();
+    }
+
+    @Override
+    public int startOffset() throws IOException {
+      return pendingQueue.peek().startOffset();
+    }
+
+    @Override
+    public int endOffset() throws IOException {
+      return pendingQueue.peek().endOffset();
+    }
+
+    private static class CachedOE extends OffsetsEnum {
+      final int startOffset;
+      final int endOffset;
+
+      private CachedOE(int startOffset, int endOffset) {
+        this.startOffset = startOffset;
+        this.endOffset = endOffset;
+      }
+
+      @Override
+      public boolean nextPosition() throws IOException {
+        return false;
+      }
+
+      @Override
+      public int freq() throws IOException {
+        return 1; // nocommit terrible
+      }
+
+      @Override
+      public BytesRef getTerm() throws IOException {
+        return new BytesRef(); // nocommit terrible
+      }
+
+      @Override
+      public int startOffset() throws IOException {
+        return startOffset;
+      }
+
+      @Override
+      public int endOffset() throws IOException {
+        return endOffset;
+      }
+    }
+  }
+
+  /** Based on a {@link MatchesIterator}; does not look at submatches. */
   public static class OfMatchesIterator extends OffsetsEnum {
     private final MatchesIterator matchesIterator;
 
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/PhraseHelper.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/PhraseHelper.java
index dd00b3d7d008..ab439d09a2bc 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/PhraseHelper.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/PhraseHelper.java
@@ -38,9 +38,7 @@
 import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.MatchAllDocsQuery;
-import org.apache.lucene.search.MultiPhraseQuery;
 import org.apache.lucene.search.MultiTermQuery;
-import org.apache.lucene.search.PhraseQuery;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.ScoreMode;
 import org.apache.lucene.search.Scorer;
@@ -67,15 +65,13 @@
 public class PhraseHelper {
 
   public static final PhraseHelper NONE = new PhraseHelper(new MatchAllDocsQuery(), "_ignored_",
-      (s) -> false, spanQuery -> null, query -> null, true, false);
+      (s) -> false, spanQuery -> null, query -> null, true);
 
-  private final Query query;
   private final String fieldName;
   private final Set<BytesRef> positionInsensitiveTerms; // (TermQuery terms)
   private final Set<SpanQuery> spanQueries;
   private final boolean willRewrite;
   private final Predicate<String> fieldMatcher;
-  private final boolean useWeightMatchesIter;
 
   /**
    * Constructor.
@@ -90,18 +86,13 @@ public class PhraseHelper {
    */
   public PhraseHelper(Query query, String field, Predicate<String> fieldMatcher, Function<SpanQuery, Boolean> rewriteQueryPred,
                       Function<Query, Collection<Query>> preExtractRewriteFunction,
-                      boolean ignoreQueriesNeedingRewrite, boolean useWeightMatchesIter) {
-    this.query = query;
+                      boolean ignoreQueriesNeedingRewrite) {
     this.fieldName = field;
     this.fieldMatcher = fieldMatcher;
-    this.useWeightMatchesIter = useWeightMatchesIter;
     // filter terms to those we want
     positionInsensitiveTerms = new HashSet<>();
     spanQueries = new HashSet<>();
 
-    // if useWeightMatchesIter:
-    // TODO once SpanQueries implement Weight.matches, we needn't capture them here.  But note SpanMultiTermQueryWrapper
-
     // TODO Have toSpanQuery(query) Function as an extension point for those with custom Query impls
 
     boolean[] mustRewriteHolder = {false}; // boolean wrapped in 1-ary array so it's mutable from inner class
@@ -141,8 +132,6 @@ protected void extract(Query query, float boost, Map<String, WeightedSpanTerm> t
           for (Query newQuery : newQueriesToExtract) {
             extract(newQuery, boost, terms);
           }
-        } else if (useWeightMatchesIter && (query instanceof PhraseQuery || query instanceof MultiPhraseQuery)) {
-          return;// do nothing; these queries have Weight impls with proper getMulti
         } else {
           super.extract(query, boost, terms);
         }
@@ -160,10 +149,8 @@ protected boolean isQueryUnsupported(Class<? extends Query> clazz) {
       @Override
       protected void extractWeightedTerms(Map<String, WeightedSpanTerm> terms, Query query, float boost)
           throws IOException {
-        if (!useWeightMatchesIter) { // when useWeightMatchesIterator, we needn't track these terms
-          query.createWeight(UnifiedHighlighter.EMPTY_INDEXSEARCHER, ScoreMode.COMPLETE_NO_SCORES, boost)
-              .extractTerms(extractPosInsensitiveTermsTarget);
-        }
+        query.createWeight(UnifiedHighlighter.EMPTY_INDEXSEARCHER, ScoreMode.COMPLETE_NO_SCORES, boost)
+            .extractTerms(extractPosInsensitiveTermsTarget);
       }
 
       // called on SpanQueries. Some other position-sensitive queries like PhraseQuery are converted beforehand
@@ -198,18 +185,6 @@ protected boolean mustRewriteQuery(SpanQuery spanQuery) {
     willRewrite = mustRewriteHolder[0];
   }
 
-  public Query getQuery() {
-    return query;
-  }
-
-  public boolean useWeightMatchesIter() {
-    return useWeightMatchesIter;
-  }
-
-  public Predicate<String> getFieldMatcher() {
-    return fieldMatcher;
-  }
-
   public Set<SpanQuery> getSpanQueries() {
     return spanQueries;
   }
@@ -230,7 +205,7 @@ public boolean willRewrite() {
     return willRewrite;
   }
 
-  /** Returns the terms that are position-insensitive (sorted).  Always empty if {@link #useWeightMatchesIter}. */
+  /** Returns the terms that are position-insensitive (sorted). */
   public BytesRef[] getAllPositionInsensitiveTerms() {
     BytesRef[] result = positionInsensitiveTerms.toArray(new BytesRef[positionInsensitiveTerms.size()]);
     Arrays.sort(result);
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/TokenStreamOffsetStrategy.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/TokenStreamOffsetStrategy.java
index e851ca660cdd..e829769909ef 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/TokenStreamOffsetStrategy.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/TokenStreamOffsetStrategy.java
@@ -38,11 +38,13 @@ public class TokenStreamOffsetStrategy extends AnalysisOffsetStrategy {
 
   public TokenStreamOffsetStrategy(UHComponents components, Analyzer indexAnalyzer) {
     super(new UHComponents(
-          components.getField(),
-          components.getFieldMatcher(),
-          ZERO_LEN_BYTES_REF_ARRAY,
-          components.getPhraseHelper(),
-          convertTermsToAutomata(components.getExtractedTerms(), components.getAutomata())),
+            components.getField(),
+            components.getFieldMatcher(),
+            components.getQuery(),
+            ZERO_LEN_BYTES_REF_ARRAY,
+            components.getPhraseHelper(),
+            convertTermsToAutomata(components.getTerms(), components.getAutomata()),
+            components.getHighlightFlags()),
         indexAnalyzer);
     assert phraseHelper.hasPositionSensitivity() == false;
   }
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/UHComponents.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/UHComponents.java
index a8ea1c0273cf..9719fab27485 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/UHComponents.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/UHComponents.java
@@ -17,8 +17,10 @@
 
 package org.apache.lucene.search.uhighlight;
 
+import java.util.Set;
 import java.util.function.Predicate;
 
+import org.apache.lucene.search.Query;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.automaton.CharacterRunAutomaton;
 
@@ -30,16 +32,22 @@
 public class UHComponents {
   private final String field;
   private final Predicate<String> fieldMatcher;
-  private final BytesRef[] extractedTerms;
+  private final Query query;
+  private final BytesRef[] terms;
   private final PhraseHelper phraseHelper;
   private final CharacterRunAutomaton[] automata;
+  private final Set<UnifiedHighlighter.HighlightFlag> highlightFlags;
 
-  public UHComponents(String field, Predicate<String> fieldMatcher, BytesRef[] extractedTerms, PhraseHelper phraseHelper, CharacterRunAutomaton[] automata) {
+  public UHComponents(String field, Predicate<String> fieldMatcher, Query query,
+                      BytesRef[] terms, PhraseHelper phraseHelper, CharacterRunAutomaton[] automata,
+                      Set<UnifiedHighlighter.HighlightFlag> highlightFlags) {
     this.field = field;
     this.fieldMatcher = fieldMatcher;
-    this.extractedTerms = extractedTerms;
+    this.query = query;
+    this.terms = terms;
     this.phraseHelper = phraseHelper;
     this.automata = automata;
+    this.highlightFlags = highlightFlags;
   }
 
   public String getField() {
@@ -50,8 +58,12 @@ public Predicate<String> getFieldMatcher() {
     return fieldMatcher;
   }
 
-  public BytesRef[] getExtractedTerms() {
-    return extractedTerms;
+  public Query getQuery() {
+    return query;
+  }
+
+  public BytesRef[] getTerms() {
+    return terms;
   }
 
   public PhraseHelper getPhraseHelper() {
@@ -61,4 +73,8 @@ public PhraseHelper getPhraseHelper() {
   public CharacterRunAutomaton[] getAutomata() {
     return automata;
   }
+
+  public Set<UnifiedHighlighter.HighlightFlag> getHighlightFlags() {
+    return highlightFlags;
+  }
 }
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/UnifiedHighlighter.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/UnifiedHighlighter.java
index d55ec3fefefa..4e4d81528733 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/UnifiedHighlighter.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/UnifiedHighlighter.java
@@ -752,13 +752,15 @@ public Object highlightWithoutSearcher(String field, Query query, String content
   }
 
   protected FieldHighlighter getFieldHighlighter(String field, Query query, Set<Term> allTerms, int maxPassages) {
-    BytesRef[] terms = filterExtractedTerms(getFieldMatcher(field), allTerms);
+    Predicate<String> fieldMatcher = getFieldMatcher(field);
+    BytesRef[] terms = filterExtractedTerms(fieldMatcher, allTerms);
     Set<HighlightFlag> highlightFlags = getFlags(field);
     PhraseHelper phraseHelper = getPhraseHelper(field, query, highlightFlags);
     CharacterRunAutomaton[] automata = getAutomata(field, query, highlightFlags);
     OffsetSource offsetSource = getOptimizedOffsetSource(field, terms, phraseHelper, automata);
+    UHComponents components = new UHComponents(field, fieldMatcher, query, terms, phraseHelper, automata, highlightFlags);
     return new FieldHighlighter(field,
-        getOffsetStrategy(offsetSource, field, terms, phraseHelper, automata, highlightFlags),
+        getOffsetStrategy(offsetSource, components),
         new SplittingBreakIterator(getBreakIterator(field), UnifiedHighlighter.MULTIVAL_SEP_CHAR),
         getScorer(field),
         maxPassages,
@@ -785,9 +787,6 @@ protected Set<HighlightFlag> getFlags(String field) {
     if (shouldHighlightPhrasesStrictly(field)) {
       highlightFlags.add(HighlightFlag.PHRASES);
     }
-    if (highlightFlags.contains(HighlightFlag.PHRASES) && highlightFlags.contains(HighlightFlag.MULTI_TERM_QUERY)) {
-      highlightFlags.add(HighlightFlag.WEIGHT_MATCHES);//nocommit for experimentation, use as default
-    }
     if (shouldPreferPassageRelevancyOverSpeed(field)) {
       highlightFlags.add(HighlightFlag.PASSAGE_RELEVANCY_OVER_SPEED);
     }
@@ -795,21 +794,30 @@ protected Set<HighlightFlag> getFlags(String field) {
   }
 
   protected PhraseHelper getPhraseHelper(String field, Query query, Set<HighlightFlag> highlightFlags) {
+    boolean useWeightMatchesIter = highlightFlags.contains(HighlightFlag.WEIGHT_MATCHES);
+    if (useWeightMatchesIter) {
+      return PhraseHelper.NONE; // will be handled by Weight.matches which always considers phrases
+    }
     boolean highlightPhrasesStrictly = highlightFlags.contains(HighlightFlag.PHRASES);
     boolean handleMultiTermQuery = highlightFlags.contains(HighlightFlag.MULTI_TERM_QUERY);
-    boolean useWeightMatchesIter = highlightFlags.contains(HighlightFlag.WEIGHT_MATCHES);
     return highlightPhrasesStrictly ?
         new PhraseHelper(query, field, getFieldMatcher(field),
             this::requiresRewrite,
             this::preSpanQueryRewrite,
-            !handleMultiTermQuery,
-            useWeightMatchesIter && handleMultiTermQuery)
+            !handleMultiTermQuery
+        )
         : PhraseHelper.NONE;
   }
 
   protected CharacterRunAutomaton[] getAutomata(String field, Query query, Set<HighlightFlag> highlightFlags) {
+    // do we "eagerly" look in span queries for automata here, or do we not and let PhraseHelper handle those?
+    // if don't highlight phrases strictly,
+    final boolean lookInSpan =
+        !highlightFlags.contains(HighlightFlag.PHRASES) // no PhraseHelper
+        || highlightFlags.contains(HighlightFlag.WEIGHT_MATCHES); // Weight.Matches will find all
+
     return highlightFlags.contains(HighlightFlag.MULTI_TERM_QUERY)
-        ? MultiTermHighlighting.extractAutomata(query, getFieldMatcher(field), !highlightFlags.contains(HighlightFlag.PHRASES), this::preMultiTermQueryRewrite)
+        ? MultiTermHighlighting.extractAutomata(query, getFieldMatcher(field), lookInSpan, this::preMultiTermQueryRewrite)
         : ZERO_LEN_AUTOMATA_ARRAY;
   }
 
@@ -847,15 +855,12 @@ protected OffsetSource getOptimizedOffsetSource(String field, BytesRef[] terms,
     return offsetSource;
   }
 
-  protected FieldOffsetStrategy getOffsetStrategy(OffsetSource offsetSource, String field, BytesRef[] terms,
-                                                  PhraseHelper phraseHelper, CharacterRunAutomaton[] automata,
-                                                  Set<HighlightFlag> highlightFlags) {
-    final UHComponents components = new UHComponents(field, getFieldMatcher(field), terms, phraseHelper, automata);
+  protected FieldOffsetStrategy getOffsetStrategy(OffsetSource offsetSource, UHComponents components) {
     switch (offsetSource) {
       case ANALYSIS:
-        if (!phraseHelper.hasPositionSensitivity() &&
-            !highlightFlags.contains(HighlightFlag.PASSAGE_RELEVANCY_OVER_SPEED) &&
-            !highlightFlags.contains(HighlightFlag.WEIGHT_MATCHES)) {
+        if (!components.getPhraseHelper().hasPositionSensitivity() &&
+            !components.getHighlightFlags().contains(HighlightFlag.PASSAGE_RELEVANCY_OVER_SPEED) &&
+            !components.getHighlightFlags().contains(HighlightFlag.WEIGHT_MATCHES)) {
           //skip using a memory index since it's pure term filtering
           return new TokenStreamOffsetStrategy(components, getIndexAnalyzer());
         } else {
diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighter.java b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighter.java
index d89499cd5e83..c9435a04471d 100644
--- a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighter.java
+++ b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighter.java
@@ -86,42 +86,39 @@ public void doAfter() throws IOException {
   }
 
   static UnifiedHighlighter randomUnifiedHighlighter(IndexSearcher searcher, Analyzer indexAnalyzer) {
-    return randomUnifiedHighlighter(searcher, indexAnalyzer, EnumSet.noneOf(HighlightFlag.class));
+    return randomUnifiedHighlighter(searcher, indexAnalyzer, EnumSet.noneOf(HighlightFlag.class), null);
   }
 
   static UnifiedHighlighter randomUnifiedHighlighter(IndexSearcher searcher, Analyzer indexAnalyzer,
-                                                             EnumSet<HighlightFlag> mandatoryFlags) {
-    if (random().nextBoolean()) {
-      return new UnifiedHighlighter(searcher, indexAnalyzer);
-    } else {
-      final UnifiedHighlighter uh = new UnifiedHighlighter(searcher, indexAnalyzer) {
-        Set<HighlightFlag> flags; // consistently random set of flags for this test run
-        @Override
-        protected Set<HighlightFlag> getFlags(String field) {
-          if (flags != null) {
-            return flags;
-          }
-          final EnumSet<HighlightFlag> result = EnumSet.copyOf(mandatoryFlags);
-          int r = random().nextInt();
-          for (HighlightFlag highlightFlag : HighlightFlag.values()) {
-            if (((1 << highlightFlag.ordinal()) & r) == 0) {
-              result.add(highlightFlag);
-            }
-          }
-          if (result.contains(HighlightFlag.WEIGHT_MATCHES)) {
-            // these two are required for WEIGHT_MATCHES
-            result.add(HighlightFlag.MULTI_TERM_QUERY);
-            result.add(HighlightFlag.PHRASES);
+                                                     EnumSet<HighlightFlag> mandatoryFlags, Boolean requireFieldMatch) {
+    final UnifiedHighlighter uh = new UnifiedHighlighter(searcher, indexAnalyzer) {
+      Set<HighlightFlag> flags; // consistently random set of flags for this test run
+      @Override
+      protected Set<HighlightFlag> getFlags(String field) {
+        if (flags != null) {
+          return flags;
+        }
+        final EnumSet<HighlightFlag> result = EnumSet.copyOf(mandatoryFlags);
+        int r = random().nextInt();
+        for (HighlightFlag highlightFlag : HighlightFlag.values()) {
+          if (((1 << highlightFlag.ordinal()) & r) == 0) {
+            result.add(highlightFlag);
           }
-          return flags = result;
         }
-      };
-      uh.setCacheFieldValCharsThreshold(random().nextInt(100));
-      if (random().nextBoolean()) {
-        uh.setFieldMatcher(f -> true); // requireFieldMatch==false
+        if (result.contains(HighlightFlag.WEIGHT_MATCHES)) {
+          // these two are required for WEIGHT_MATCHES
+          result.add(HighlightFlag.MULTI_TERM_QUERY);
+          result.add(HighlightFlag.PHRASES);
+        }
+        return flags = result;
       }
-      return uh;
+    };
+    uh.setCacheFieldValCharsThreshold(random().nextInt(100));
+    if (requireFieldMatch == Boolean.FALSE || (requireFieldMatch == null && random().nextBoolean())) {
+      uh.setFieldMatcher(f -> true); // requireFieldMatch==false
     }
+    return uh;
+
   }
 
   //
@@ -1126,7 +1123,7 @@ protected Predicate<String> getFieldMatcher(String field) {
         return (qf) -> true;
       }
     };
-    UnifiedHighlighter highlighterFieldMatch = randomUnifiedHighlighter(searcher, indexAnalyzer, EnumSet.of(HighlightFlag.MULTI_TERM_QUERY));
+    UnifiedHighlighter highlighterFieldMatch = randomUnifiedHighlighter(searcher, indexAnalyzer, EnumSet.of(HighlightFlag.MULTI_TERM_QUERY), null);
     highlighterFieldMatch.setFieldMatcher(null);//default
     BooleanQuery.Builder queryBuilder =
         new BooleanQuery.Builder()
@@ -1206,10 +1203,14 @@ public void testMatchesSlopBug() throws IOException {
     UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
     Query query = new PhraseQuery(2, "title", "this", "is", "the", "field");
     TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
-    assertEquals(1, topDocs.totalHits);
+    assertEquals(1, topDocs.totalHits.value);
     String[] snippets = highlighter.highlight("title", query, topDocs, 10);
     assertEquals(1, snippets.length);
-    assertEquals("<b>This is the title field</b>.", snippets[0]);
+    if (highlighter.getFlags("title").contains(HighlightFlag.WEIGHT_MATCHES)) {
+      assertEquals("<b>This is the title field</b>.", snippets[0]);
+    } else {
+      assertEquals("<b>This</b> <b>is</b> <b>the</b> title <b>field</b>.", snippets[0]);
+    }
     ir.close();
   }
 
@@ -1223,7 +1224,7 @@ protected Predicate<String> getFieldMatcher(String field) {
         return (qf) -> true;
       }
     };
-    UnifiedHighlighter highlighterFieldMatch = randomUnifiedHighlighter(searcher, indexAnalyzer, EnumSet.of(HighlightFlag.PHRASES, HighlightFlag.MULTI_TERM_QUERY));
+    UnifiedHighlighter highlighterFieldMatch = randomUnifiedHighlighter(searcher, indexAnalyzer, EnumSet.of(HighlightFlag.PHRASES, HighlightFlag.MULTI_TERM_QUERY), null);
     highlighterFieldMatch.setFieldMatcher(null);//default
     BooleanQuery.Builder queryBuilder =
         new BooleanQuery.Builder()
diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterMTQ.java b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterMTQ.java
index 2f97ad4eeb05..44d6f7b0d686 100644
--- a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterMTQ.java
+++ b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterMTQ.java
@@ -154,7 +154,7 @@ public void testWildcards() throws Exception {
 
   private UnifiedHighlighter randomUnifiedHighlighter(IndexSearcher searcher, Analyzer indexAnalyzer) {
     return TestUnifiedHighlighter.randomUnifiedHighlighter(searcher, indexAnalyzer,
-        EnumSet.of(HighlightFlag.MULTI_TERM_QUERY));
+        EnumSet.of(HighlightFlag.MULTI_TERM_QUERY), null);
   }
 
   public void testOnePrefix() throws Exception {
diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterStrictPhrases.java b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterStrictPhrases.java
index ee9b9d224a63..a6d5d01566ed 100644
--- a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterStrictPhrases.java
+++ b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterStrictPhrases.java
@@ -17,10 +17,12 @@
 package org.apache.lucene.search.uhighlight;
 
 import java.io.IOException;
-import java.util.Arrays;
 import java.util.Collection;
 import java.util.Collections;
+import java.util.EnumSet;
+import java.util.Locale;
 import java.util.Set;
+import java.util.concurrent.atomic.AtomicBoolean;
 
 import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;
 import org.apache.lucene.analysis.MockAnalyzer;
@@ -73,6 +75,9 @@ public class TestUnifiedHighlighterStrictPhrases extends LuceneTestCase {
   UnifiedHighlighter highlighter;
   IndexReader indexReader;
 
+  // Is it okay if a match (identified by offset pair) appears multiple times in the passage?
+  AtomicBoolean dupMatchAllowed = new AtomicBoolean(true);
+
   @ParametersFactory
   public static Iterable<Object[]> parameters() {
     return UHTestHelper.parametersFactoryList();
@@ -106,8 +111,32 @@ private Document newDoc(String... bodyVals) {
   private void initReaderSearcherHighlighter() throws IOException {
     indexReader = indexWriter.getReader();
     searcher = newSearcher(indexReader);
-    highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
-    highlighter.setHighlightPhrasesStrictly(true);
+    highlighter = TestUnifiedHighlighter.randomUnifiedHighlighter(searcher, indexAnalyzer,
+        EnumSet.of(HighlightFlag.PHRASES, HighlightFlag.MULTI_TERM_QUERY), true);
+    // intercept the formatter in order to check constraints on the passage.
+    final PassageFormatter defaultFormatter = highlighter.getFormatter(null);
+    highlighter.setFormatter(new PassageFormatter() {
+      @Override
+      public Object format(Passage[] passages, String content) {
+        boolean thisDupMatchAllowed = dupMatchAllowed.getAndSet(true);
+        for (Passage passage : passages) {
+          String prevPair = "";
+          for (int i = 0; i < passage.getNumMatches(); i++) {
+            // pad each to make comparable
+            String pair = String.format(Locale.ROOT, "%03d-%03d", passage.getMatchStarts()[i], passage.getMatchEnds()[i]);
+            int cmp = prevPair.compareTo(pair);
+            if (cmp == 0) {
+              assertTrue("dup match in passage at offset " + pair, thisDupMatchAllowed);
+            } else if (cmp > 0) {
+              fail("bad match order in passage at offset " + pair);
+            }
+            prevPair = pair;
+          }
+
+        }
+        return defaultFormatter.format(passages, content);
+      }
+    });
   }
 
   private PhraseQuery newPhraseQuery(String field, String phrase) {
@@ -139,8 +168,11 @@ public void testBasics() throws IOException {
 
     TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
     String[] snippets = highlighter.highlight("body", query, topDocs);
-
-    assertArrayEquals(new String[]{"yin alone, <b>Yin yang</b>, yin gap yang"}, snippets);
+    if (highlighter.getFlags("body").contains(HighlightFlag.WEIGHT_MATCHES)) {
+      assertArrayEquals(new String[]{"yin alone, <b>Yin yang</b>, yin gap yang"}, snippets);
+    } else {
+      assertArrayEquals(new String[]{"yin alone, <b>Yin</b> <b>yang</b>, yin gap yang"}, snippets);
+    }
   }
 
   public void testWithSameTermQuery() throws IOException {
@@ -155,19 +187,13 @@ public void testWithSameTermQuery() throws IOException {
         .build();
 
     TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
+    dupMatchAllowed.set(false); // We don't want duplicates from "Yin" being in TermQuery & PhraseQuery.
     String[] snippets = highlighter.highlight("body", query, topDocs);
-
-    assertArrayEquals(new String[]{"<b>Yin yang</b>, <b>yin</b> gap yang"}, snippets);
-
-    // test the Passage only has 3 matches.  We don't want duplicates from "Yin" being in TermQuery & PhraseQuery.
-    highlighter.setFormatter(new PassageFormatter() {
-      @Override
-      public Object format(Passage[] passages, String content) {
-        return Arrays.toString(passages);
-      }
-    });
-    assertArrayEquals(new String[]{"[Passage[0-22]{yin[0-3],yang[4-8],yin[10-13]}score=2.4964213]"},
-        highlighter.highlight("body", query, topDocs));
+    if (highlighter.getFlags("body").contains(HighlightFlag.WEIGHT_MATCHES)) {
+      assertArrayEquals(new String[]{"<b>Yin yang</b>, <b>yin</b> gap yang"}, snippets);
+    } else {
+      assertArrayEquals(new String[]{"<b>Yin</b> <b>yang</b>, <b>yin</b> gap yang"}, snippets);
+    }
   }
 
   public void testPhraseNotInDoc() throws IOException {
@@ -199,19 +225,14 @@ public void testSubPhrases() throws IOException {
         .build();
 
     TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
+    dupMatchAllowed.set(false); // We don't want duplicates from both PhraseQuery
     String[] snippets = highlighter.highlight("body", query, topDocs);
 
-    assertArrayEquals(new String[]{"<b>alpha bravo charlie</b> - charlie bravo alpha"}, snippets);
-
-    // test the Passage only has 3 matches.  We don't want duplicates from both PhraseQuery
-    highlighter.setFormatter(new PassageFormatter() {
-      @Override
-      public Object format(Passage[] passages, String content) {
-        return Arrays.toString(passages);
-      }
-    });
-    assertArrayEquals(new String[]{"[Passage[0-41]{alpha[0-5],bravo[6-11],charlie[12-19]}score=3.931102]"},
-        highlighter.highlight("body", query, topDocs));
+    if (highlighter.getFlags("body").contains(HighlightFlag.WEIGHT_MATCHES)) {
+      assertArrayEquals(new String[]{"<b>alpha bravo charlie</b> - charlie bravo alpha"}, snippets);
+    } else {
+      assertArrayEquals(new String[]{"<b>alpha</b> <b>bravo</b> <b>charlie</b> - charlie bravo alpha"}, snippets);
+    }
   }
 
   public void testSynonyms() throws IOException {
@@ -225,8 +246,11 @@ public void testSynonyms() throws IOException {
 
     TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
     String[] snippets = highlighter.highlight("body", query, topDocs);
-
-    assertArrayEquals(new String[]{"<b>mother father</b> w <b>mom father</b> w dad"}, snippets);
+    if (highlighter.getFlags("body").contains(HighlightFlag.WEIGHT_MATCHES)) {
+      assertArrayEquals(new String[]{"<b>mother father</b> w <b>mom father</b> w dad"}, snippets);
+    } else {
+      assertArrayEquals(new String[]{"<b>mother</b> <b>father</b> w <b>mom</b> <b>father</b> w dad"}, snippets);
+    }
   }
 
   /**
@@ -254,10 +278,14 @@ public void testRewriteAndMtq() throws IOException {
     TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
     String[] snippets = highlighter.highlight("body", query, topDocs);
 
-    assertArrayEquals(new String[]{"<b>alpha bravo</b> <b>charlie</b> - charlie bravo <b>alpha</b>"},
-        snippets);
+    if (highlighter.getFlags("body").contains(HighlightFlag.WEIGHT_MATCHES)) {
+      assertArrayEquals(new String[]{"<b>alpha bravo</b> <b>charlie</b> - charlie bravo <b>alpha</b>"}, snippets);
+    } else {
+      assertArrayEquals(new String[]{"<b>alpha</b> <b>bravo</b> <b>charlie</b> - charlie bravo <b>alpha</b>"}, snippets);
+    }
 
     // do again, this time with MTQ disabled.  We should only find "alpha bravo".
+    highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
     highlighter.setHandleMultiTermQuery(false);//disable but leave phrase processing enabled
 
     topDocs = searcher.search(query, 10, Sort.INDEXORDER);
@@ -290,10 +318,14 @@ public void testRewrite() throws IOException {
     TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
     String[] snippets = highlighter.highlight("body", query, topDocs);
 
-    assertArrayEquals(new String[]{"<b>alpha bravo</b> <b>charlie</b> - charlie bravo alpha"},
-        snippets);
+    if (highlighter.getFlags("body").contains(HighlightFlag.WEIGHT_MATCHES)) {
+      assertArrayEquals(new String[]{"<b>alpha bravo</b> <b>charlie</b> - charlie bravo alpha"}, snippets);
+    } else {
+      assertArrayEquals(new String[]{"<b>alpha</b> <b>bravo</b> <b>charlie</b> - charlie bravo alpha"}, snippets);
+    }
 
     // do again, this time with MTQ disabled.  We should only find "alpha bravo".
+    highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
     highlighter.setHandleMultiTermQuery(false);//disable but leave phrase processing enabled
 
     topDocs = searcher.search(query, 10, Sort.INDEXORDER);
@@ -327,10 +359,14 @@ public void testMtq() throws IOException {
     TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
     String[] snippets = highlighter.highlight("body", query, topDocs);
 
-    assertArrayEquals(new String[]{"<b>alpha bravo</b> <b>charlie</b> - charlie bravo <b>alpha</b>"},
-        snippets);
+    if (highlighter.getFlags("body").contains(HighlightFlag.WEIGHT_MATCHES)) {
+      assertArrayEquals(new String[]{"<b>alpha bravo</b> <b>charlie</b> - charlie bravo <b>alpha</b>"}, snippets);
+    } else {
+      assertArrayEquals(new String[]{"<b>alpha</b> <b>bravo</b> <b>charlie</b> - charlie bravo <b>alpha</b>"}, snippets);
+    }
 
     // do again, this time with MTQ disabled.
+    highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
     highlighter.setHandleMultiTermQuery(false);//disable but leave phrase processing enabled
 
     topDocs = searcher.search(query, 10, Sort.INDEXORDER);
@@ -354,9 +390,11 @@ public void testMultiValued() throws IOException {
     TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
     String[] snippets = highlighter.highlight("body", query, topDocs, 2);
 
-    assertArrayEquals(new String[]{"<b>one bravo</b> three... <b>four bravo</b> six"},
-        snippets);
-
+    if (highlighter.getFlags("body").contains(HighlightFlag.WEIGHT_MATCHES)) {
+      assertArrayEquals(new String[]{"<b>one bravo</b> three... <b>four bravo</b> six"}, snippets);
+    } else {
+      assertArrayEquals(new String[]{"<b>one</b> <b>bravo</b> three... <b>four</b> <b>bravo</b> six"}, snippets);
+    }
 
     // now test phraseQuery won't span across values
     assert indexAnalyzer.getPositionIncrementGap("body") > 0;
@@ -405,10 +443,15 @@ public void testMaxLen() throws IOException {
     TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
     String[] snippets = highlighter.highlight("body", query, topDocs);
 
-    if (fieldType == UHTestHelper.reanalysisType || highlighter.getFlags("body").contains(HighlightFlag.WEIGHT_MATCHES)) {
-      assertArrayEquals(new String[]{"<b>alpha bravo</b> charlie -"}, snippets);
+    final boolean weightMatches = highlighter.getFlags("body").contains(HighlightFlag.WEIGHT_MATCHES);
+    if (fieldType == UHTestHelper.reanalysisType || weightMatches) {
+      if (weightMatches) {
+        assertArrayEquals(new String[]{"<b>alpha bravo</b> charlie -"}, snippets);
+      } else {
+        assertArrayEquals(new String[]{"<b>alpha</b> <b>bravo</b> charlie -"}, snippets);
+      }
     } else {
-      assertArrayEquals(new String[]{"<b>alpha bravo</b> <b>charlie</b> -"}, snippets);
+      assertArrayEquals(new String[]{"<b>alpha</b> <b>bravo</b> <b>charlie</b> -"}, snippets);
     }
   }
 
@@ -427,6 +470,7 @@ public void testFilteredOutSpan() throws IOException {
 
     TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
     String[] snippets = highlighter.highlight("body", query, topDocs);
+    // spans' MatchesIterator exposes each underlying term; thus doesn't enclose intermediate "of"
     assertArrayEquals(new String[]{"freezing cold stuff like stuff <b>freedom</b> of <b>speech</b>"}, snippets);
   }
 
diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/visibility/TestUnifiedHighlighterExtensibility.java b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/visibility/TestUnifiedHighlighterExtensibility.java
index ca21000137dc..52a790cbed5a 100644
--- a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/visibility/TestUnifiedHighlighterExtensibility.java
+++ b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/visibility/TestUnifiedHighlighterExtensibility.java
@@ -19,9 +19,11 @@
 
 import java.io.IOException;
 import java.text.BreakIterator;
+import java.util.Collections;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
+import java.util.function.Predicate;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.MockAnalyzer;
@@ -29,6 +31,7 @@
 import org.apache.lucene.index.Term;
 import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.MatchAllDocsQuery;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.spans.SpanQuery;
 import org.apache.lucene.search.uhighlight.FieldHighlighter;
@@ -60,9 +63,9 @@ public void testFieldOffsetStrategyExtensibility() {
     final UnifiedHighlighter.OffsetSource offsetSource = UnifiedHighlighter.OffsetSource.NONE_NEEDED;
     FieldOffsetStrategy strategy = new FieldOffsetStrategy(new UHComponents("field",
         (s) -> false,
-        new BytesRef[0],
+        new MatchAllDocsQuery(), new BytesRef[0],
         PhraseHelper.NONE,
-        new CharacterRunAutomaton[0])) {
+        new CharacterRunAutomaton[0], Collections.emptySet())) {
       @Override
       public UnifiedHighlighter.OffsetSource getOffsetSource() {
         return offsetSource;
@@ -149,13 +152,15 @@ protected List<CharSequence[]> loadFieldValues(String[] fields, DocIdSetIterator
       @Override
       protected FieldHighlighter getFieldHighlighter(String field, Query query, Set<Term> allTerms, int maxPassages) {
         // THIS IS A COPY of the superclass impl; but use CustomFieldHighlighter
-        BytesRef[] terms = filterExtractedTerms(getFieldMatcher(field), allTerms);
+        Predicate<String> fieldMatcher = getFieldMatcher(field);
+        BytesRef[] terms = filterExtractedTerms(fieldMatcher, allTerms);
         Set<HighlightFlag> highlightFlags = getFlags(field);
         PhraseHelper phraseHelper = getPhraseHelper(field, query, highlightFlags);
         CharacterRunAutomaton[] automata = getAutomata(field, query, highlightFlags);
         OffsetSource offsetSource = getOptimizedOffsetSource(field, terms, phraseHelper, automata);
+        UHComponents components = new UHComponents(field, fieldMatcher, query, terms, phraseHelper, automata, highlightFlags);
         return new CustomFieldHighlighter(field,
-            getOffsetStrategy(offsetSource, field, terms, phraseHelper, automata, highlightFlags),
+            getOffsetStrategy(offsetSource, components),
             new SplittingBreakIterator(getBreakIterator(field), UnifiedHighlighter.MULTIVAL_SEP_CHAR),
             getScorer(field),
             maxPassages,
@@ -164,8 +169,8 @@ protected FieldHighlighter getFieldHighlighter(String field, Query query, Set<Te
       }
 
       @Override
-      protected FieldOffsetStrategy getOffsetStrategy(OffsetSource offsetSource, String field, BytesRef[] terms, PhraseHelper phraseHelper, CharacterRunAutomaton[] automata, Set<HighlightFlag> highlightFlags) {
-        return super.getOffsetStrategy(offsetSource, field, terms, phraseHelper, automata, highlightFlags);
+      protected FieldOffsetStrategy getOffsetStrategy(OffsetSource offsetSource, UHComponents components) {
+        return super.getOffsetStrategy(offsetSource, components);
       }
 
       @Override

From 39a5c6243b09db36078cfd0772f44b875bf885aa Mon Sep 17 00:00:00 2001
From: David Smiley <dsmiley@apache.org>
Date: Sat, 18 Aug 2018 23:18:05 -0400
Subject: [PATCH 10/14] LUCENE-8286 UH: Add highlight test showing we fix
 LUCENE-7903

---
 .../uhighlight/TestUnifiedHighlighter.java    | 30 +++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighter.java b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighter.java
index c9435a04471d..301788b7700f 100644
--- a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighter.java
+++ b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighter.java
@@ -1326,4 +1326,34 @@ protected Predicate<String> getFieldMatcher(String field) {
     }
     ir.close();
   }
+
+  // LUCENE-7909
+  public void testNestedBooleanQueryAccuracy() throws IOException {
+    IndexReader ir = indexSomeFields();
+    IndexSearcher searcher = newSearcher(ir);
+    UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer,
+        EnumSet.of(HighlightFlag.WEIGHT_MATCHES), true);
+
+    // This query contains an inner Boolean of two MUST clauses (== "AND").  Only one of them is
+    //  actually in the data, the other is not.  We should highlight neither.  We can highlight the outer
+    //  SHOULD clauses though.
+    Query query = new BooleanQuery.Builder()
+        .add(new PhraseQuery("title", "title", "field"), BooleanClause.Occur.SHOULD)
+        .add(new BooleanQuery.Builder()
+            .add(new TermQuery(new Term("category", "category")), BooleanClause.Occur.MUST)
+            .add(new TermQuery(new Term("category", "nonexistent")), BooleanClause.Occur.MUST)
+            .build(), BooleanClause.Occur.SHOULD)
+        .build();
+
+    TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
+
+    String[] snippets = highlighter.highlight("title", query, topDocs);
+    assertArrayEquals(new String[]{"This is the <b>title field</b>."}, snippets);
+
+    // no highlights, not of "category" since "nonexistent" wasn't there
+    snippets = highlighter.highlight("category", query, topDocs);
+    assertArrayEquals(new String[]{"This is the category field."}, snippets);
+
+    ir.close();
+  }
 }

From bbeeb44716ed3ec46683c6d5f038ea5003ac6dee Mon Sep 17 00:00:00 2001
From: David Smiley <dsmiley@apache.org>
Date: Sun, 26 Aug 2018 10:00:05 -0400
Subject: [PATCH 11/14] LUCENE-8286: UH Implement OffsetsEnum.getTerm for
 MatchesIterator impls

---
 .../lucene/search/uhighlight/OffsetsEnum.java | 65 ++++++++++++++++---
 .../lucene/search/uhighlight/Passage.java     | 11 +++-
 .../TestUnifiedHighlighterRanking.java        | 20 ++++++
 3 files changed, 86 insertions(+), 10 deletions(-)

diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/OffsetsEnum.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/OffsetsEnum.java
index 14f8875269bc..b73087e47a73 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/OffsetsEnum.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/OffsetsEnum.java
@@ -19,13 +19,20 @@
 
 import java.io.Closeable;
 import java.io.IOException;
+import java.util.HashMap;
 import java.util.List;
 import java.util.Objects;
 import java.util.PriorityQueue;
+import java.util.TreeSet;
+import java.util.function.Supplier;
 
 import org.apache.lucene.index.PostingsEnum;
+import org.apache.lucene.index.Term;
 import org.apache.lucene.search.MatchesIterator;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.ScoreMode;
 import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.BytesRefBuilder;
 import org.apache.lucene.util.IOUtils;
 
 /**
@@ -78,8 +85,10 @@ public int compareTo(OffsetsEnum other) {
   public abstract int freq() throws IOException;
 
   /**
-   * The term at this position; usually always the same.
+   * The term at this position.
    * This BytesRef is safe to continue to refer to, even after we move to the next position.
+   *
+   * @see {@link Passage#getMatchTerms()}.
    */
   public abstract BytesRef getTerm() throws IOException;
 
@@ -168,9 +177,10 @@ public int freq() throws IOException {
   public static class OfMatchesIteratorWithSubs extends OffsetsEnum {
     //Either CachedOE impls (which are the submatches) or OfMatchesIterator impls
     private final PriorityQueue<OffsetsEnum> pendingQueue = new PriorityQueue<>();
+    private final HashMap<Query,BytesRef> queryToTermMap = new HashMap<>();
 
     public OfMatchesIteratorWithSubs(MatchesIterator matchesIterator) {
-      pendingQueue.add(new OfMatchesIterator(matchesIterator));
+      pendingQueue.add(new OfMatchesIterator(matchesIterator, () -> queryToTerm(matchesIterator.getQuery())));
     }
 
     @Override
@@ -224,13 +234,46 @@ private boolean enqueueCachedMatches(MatchesIterator thisMI) throws IOException
         while (thisMI.next()) {
           if (false == enqueueCachedMatches(thisMI.getSubMatches())) { // recursion
             // if no sub-matches then add ourselves
-            pendingQueue.add(new CachedOE(thisMI.startOffset(), thisMI.endOffset()));
+            pendingQueue.add(new CachedOE(queryToTerm(thisMI.getQuery()), thisMI.startOffset(), thisMI.endOffset()));
           }
         }
         return true;
       }
     }
 
+    /** Maps a Query from {@link MatchesIterator#getQuery()} to {@link OffsetsEnum#getTerm()}.
+     * See {@link Passage#getMatchTerms()}. */
+    private BytesRef queryToTerm(Query query) {
+      // compute an approximate BytesRef term of a Query.  We cache this since we're likely to see the same query again.
+      // Our approach is to call extractTerms and visit each term in order, concatenating them with an adjoining space.
+      //  If we don't have any (perhaps due to an MTQ like a wildcard) then we fall back on the toString() of the query.
+      return queryToTermMap.computeIfAbsent(query, (Query q) -> {
+        try {
+          BytesRefBuilder bytesRefBuilder = new BytesRefBuilder();
+          UnifiedHighlighter.EMPTY_INDEXSEARCHER
+              .createWeight(UnifiedHighlighter.EMPTY_INDEXSEARCHER.rewrite(q), ScoreMode.COMPLETE_NO_SCORES, 1f)
+              .extractTerms(new TreeSet<Term>() {
+            @Override
+            public boolean add(Term term) {
+              if (bytesRefBuilder.length() > 0) {
+                bytesRefBuilder.append((byte) ' ');
+              }
+              bytesRefBuilder.append(term.bytes());
+              return true;
+            }
+          });
+          if (bytesRefBuilder.length() > 0) {
+            return bytesRefBuilder.get();
+          }
+        } catch (IOException e) {//ignore
+          // go to fallback...
+        }
+
+        // fallback:  (likely a MultiTermQuery)
+        return new BytesRef(q.toString());
+      });
+    }
+
     @Override
     public int freq() throws IOException {
       return pendingQueue.peek().freq();
@@ -252,10 +295,12 @@ public int endOffset() throws IOException {
     }
 
     private static class CachedOE extends OffsetsEnum {
+      final BytesRef term;
       final int startOffset;
       final int endOffset;
 
-      private CachedOE(int startOffset, int endOffset) {
+      private CachedOE(BytesRef term, int startOffset, int endOffset) {
+        this.term = term;
         this.startOffset = startOffset;
         this.endOffset = endOffset;
       }
@@ -267,12 +312,12 @@ public boolean nextPosition() throws IOException {
 
       @Override
       public int freq() throws IOException {
-        return 1; // nocommit terrible
+        return 1; // documented short-coming of MatchesIterator based UnifiedHighlighter
       }
 
       @Override
       public BytesRef getTerm() throws IOException {
-        return new BytesRef(); // nocommit terrible
+        return term;
       }
 
       @Override
@@ -290,9 +335,11 @@ public int endOffset() throws IOException {
   /** Based on a {@link MatchesIterator}; does not look at submatches. */
   public static class OfMatchesIterator extends OffsetsEnum {
     private final MatchesIterator matchesIterator;
+    private final Supplier<BytesRef> termSupplier;
 
-    public OfMatchesIterator(MatchesIterator matchesIterator) {
+    public OfMatchesIterator(MatchesIterator matchesIterator, Supplier<BytesRef> termSupplier) {
       this.matchesIterator = matchesIterator;
+      this.termSupplier = termSupplier;
     }
 
     @Override
@@ -302,12 +349,12 @@ public boolean nextPosition() throws IOException {
 
     @Override
     public int freq() throws IOException {
-      return 1; // nocommit terrible
+      return 1; // documented short-coming of MatchesIterator based UnifiedHighlighter
     }
 
     @Override
     public BytesRef getTerm() throws IOException {
-      return new BytesRef(); // nocommit terrible
+      return termSupplier.get();
     }
 
     @Override
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/Passage.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/Passage.java
index 570fcdf666f7..cbcde9e0bde1 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/Passage.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/Passage.java
@@ -157,7 +157,16 @@ public int[] getMatchEnds() {
   }
 
   /**
-   * BytesRef (term text) of the matches, corresponding with {@link #getMatchStarts()}.
+   * BytesRef (term text) of the matches, corresponding with {@link #getMatchStarts()}.  The primary purpose of this
+   * method is to calculate the number of unique terms per passage for use in passage scoring.
+   * The actual term byte content is not well defined by this highlighter, and thus use of it is more subject to
+   * change.
+   * <p>
+   * The term might be simply the analyzed term at this position.
+   * Depending on the highlighter's configuration, the match term may be a phrase (instead of a word), and in such
+   * a case might be a series of space-separated analyzed terms.
+   * If the match is from a {@link org.apache.lucene.search.MultiTermQuery} then the match term may be the toString() of
+   * that query.
    * <p>
    * Only {@link #getNumMatches()} are valid.
    */
diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterRanking.java b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterRanking.java
index ac7e9bc90f1a..2ddd2eb6dd30 100644
--- a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterRanking.java
+++ b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterRanking.java
@@ -17,8 +17,10 @@
 package org.apache.lucene.search.uhighlight;
 
 import java.io.IOException;
+import java.util.EnumSet;
 import java.util.HashSet;
 import java.util.Random;
+import java.util.Set;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.MockAnalyzer;
@@ -274,6 +276,15 @@ public void testCustomB() throws Exception {
 
     IndexSearcher searcher = newSearcher(ir);
     UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer) {
+      @Override
+      protected Set<HighlightFlag> getFlags(String field) {
+        if (random().nextBoolean()) {
+          return EnumSet.of(HighlightFlag.MULTI_TERM_QUERY, HighlightFlag.PHRASES, HighlightFlag.WEIGHT_MATCHES);
+        } else {
+          return super.getFlags(field);
+        }
+      }
+
       @Override
       protected PassageScorer getScorer(String field) {
         return new PassageScorer(1.2f, 0, 87);
@@ -314,6 +325,15 @@ public void testCustomK1() throws Exception {
 
     IndexSearcher searcher = newSearcher(ir);
     UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer) {
+      @Override
+      protected Set<HighlightFlag> getFlags(String field) {
+        if (random().nextBoolean()) {
+          return EnumSet.of(HighlightFlag.MULTI_TERM_QUERY, HighlightFlag.PHRASES, HighlightFlag.WEIGHT_MATCHES);
+        } else {
+          return super.getFlags(field);
+        }
+      }
+
       @Override
       protected PassageScorer getScorer(String field) {
         return new PassageScorer(0, 0.75f, 87);

From 376ac79c9129fca8169e1b9e9f649221735b01a7 Mon Sep 17 00:00:00 2001
From: David Smiley <dsmiley@apache.org>
Date: Mon, 27 Aug 2018 00:59:02 -0400
Subject: [PATCH 12/14] LUCENE-8286: UH use one UHComponents field in
 FieldOffsetStrategy instead of its parts. Resolved nocommits.

---
 .../uhighlight/AnalysisOffsetStrategy.java    | 10 ++++----
 .../uhighlight/FieldOffsetStrategy.java       | 20 ++++++----------
 .../uhighlight/MemoryIndexOffsetStrategy.java | 23 ++++++++-----------
 .../lucene/search/uhighlight/OffsetsEnum.java |  2 +-
 ...PostingsWithTermVectorsOffsetStrategy.java |  4 ++--
 .../uhighlight/TermVectorOffsetStrategy.java  |  6 ++---
 .../uhighlight/TokenStreamOffsetStrategy.java |  4 ++--
 .../search/uhighlight/UHComponents.java       |  6 ++---
 .../uhighlight/TestUnifiedHighlighter.java    |  3 ++-
 9 files changed, 34 insertions(+), 44 deletions(-)

diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/AnalysisOffsetStrategy.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/AnalysisOffsetStrategy.java
index 6b0b60804862..63785e35f8a0 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/AnalysisOffsetStrategy.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/AnalysisOffsetStrategy.java
@@ -38,9 +38,9 @@ public abstract class AnalysisOffsetStrategy extends FieldOffsetStrategy {
   public AnalysisOffsetStrategy(UHComponents components, Analyzer analyzer) {
     super(components);
     this.analyzer = analyzer;
-    if (analyzer.getOffsetGap(field) != 1) { // note: 1 is the default. It is RARELY changed.
+    if (analyzer.getOffsetGap(getField()) != 1) { // note: 1 is the default. It is RARELY changed.
       throw new IllegalArgumentException(
-          "offset gap of the provided analyzer should be 1 (field " + field + ")");
+          "offset gap of the provided analyzer should be 1 (field " + getField() + ")");
     }
   }
 
@@ -53,12 +53,12 @@ protected TokenStream tokenStream(String content) throws IOException {
     // If there is no splitChar in content then we needn't wrap:
     int splitCharIdx = content.indexOf(UnifiedHighlighter.MULTIVAL_SEP_CHAR);
     if (splitCharIdx == -1) {
-      return analyzer.tokenStream(field, content);
+      return analyzer.tokenStream(getField(), content);
     }
 
-    TokenStream subTokenStream = analyzer.tokenStream(field, content.substring(0, splitCharIdx));
+    TokenStream subTokenStream = analyzer.tokenStream(getField(), content.substring(0, splitCharIdx));
 
-    return new MultiValueTokenStream(subTokenStream, field, analyzer, content, UnifiedHighlighter.MULTIVAL_SEP_CHAR, splitCharIdx);
+    return new MultiValueTokenStream(subTokenStream, getField(), analyzer, content, UnifiedHighlighter.MULTIVAL_SEP_CHAR, splitCharIdx);
   }
 
   /**
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/FieldOffsetStrategy.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/FieldOffsetStrategy.java
index daf7ef4f4e40..27d9318bae36 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/FieldOffsetStrategy.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/FieldOffsetStrategy.java
@@ -42,22 +42,13 @@
 public abstract class FieldOffsetStrategy {
 
   protected final UHComponents components;
-  //nocommit remove these?:
-  protected final String field;
-  protected final PhraseHelper phraseHelper; // Query: position-sensitive information
-  protected final BytesRef[] terms; // Query: all terms we extracted (some may be position sensitive)
-  protected final CharacterRunAutomaton[] automata; // Query: wildcards (i.e. multi-term query), not position sensitive
 
   public FieldOffsetStrategy(UHComponents components) {
     this.components = components;
-    this.field = components.getField();
-    this.terms = components.getTerms();
-    this.phraseHelper = components.getPhraseHelper();
-    this.automata = components.getAutomata();
   }
 
   public String getField() {
-    return field;
+    return components.getField();
   }
 
   public abstract UnifiedHighlighter.OffsetSource getOffsetSource();
@@ -70,7 +61,7 @@ public String getField() {
   public abstract OffsetsEnum getOffsetsEnum(LeafReader reader, int docId, String content) throws IOException;
 
   protected OffsetsEnum createOffsetsEnumFromReader(LeafReader leafReader, int doc) throws IOException {
-    final Terms termsIndex = leafReader.terms(field);
+    final Terms termsIndex = leafReader.terms(getField());
     if (termsIndex == null) {
       return OffsetsEnum.EMPTY;
     }
@@ -86,6 +77,8 @@ protected OffsetsEnum createOffsetsEnumFromReader(LeafReader leafReader, int doc
 
       // Handle position insensitive terms (a subset of this.terms field):
       final BytesRef[] insensitiveTerms;
+      final PhraseHelper phraseHelper = components.getPhraseHelper();
+      final BytesRef[] terms = components.getTerms();
       if (phraseHelper.hasPositionSensitivity()) {
         insensitiveTerms = phraseHelper.getAllPositionInsensitiveTerms();
         assert insensitiveTerms.length <= terms.length : "insensitive terms should be smaller set of all terms";
@@ -102,7 +95,7 @@ protected OffsetsEnum createOffsetsEnumFromReader(LeafReader leafReader, int doc
       }
 
       // Handle automata
-      if (automata.length > 0) {
+      if (components.getAutomata().length > 0) {
         createOffsetsEnumsForAutomata(termsIndex, doc, offsetsEnums);
       }
     }
@@ -163,7 +156,7 @@ protected void createOffsetsEnumsForTerms(BytesRef[] sourceTerms, Terms termsInd
         PostingsEnum postingsEnum = termsEnum.postings(null, PostingsEnum.OFFSETS);
         if (postingsEnum == null) {
           // no offsets or positions available
-          throw new IllegalArgumentException("field '" + field + "' was indexed without offsets, cannot highlight");
+          throw new IllegalArgumentException("field '" + getField() + "' was indexed without offsets, cannot highlight");
         }
         if (doc == postingsEnum.advance(doc)) { // now it's positioned, although may be exhausted
           results.add(new OffsetsEnum.OfPostings(term, postingsEnum));
@@ -173,6 +166,7 @@ protected void createOffsetsEnumsForTerms(BytesRef[] sourceTerms, Terms termsInd
   }
 
   protected void createOffsetsEnumsForAutomata(Terms termsIndex, int doc, List<OffsetsEnum> results) throws IOException {
+    final CharacterRunAutomaton[] automata = components.getAutomata();
     List<List<PostingsEnum>> automataPostings = new ArrayList<>(automata.length);
     for (int i = 0; i < automata.length; i++) {
       automataPostings.add(new ArrayList<>());
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/MemoryIndexOffsetStrategy.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/MemoryIndexOffsetStrategy.java
index a35591b71ae4..30dbdd4da4b5 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/MemoryIndexOffsetStrategy.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/MemoryIndexOffsetStrategy.java
@@ -23,7 +23,6 @@
 import java.util.Collections;
 import java.util.List;
 import java.util.function.Function;
-import java.util.function.Predicate;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.FilteringTokenFilter;
@@ -33,7 +32,6 @@
 import org.apache.lucene.index.memory.MemoryIndex;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.spans.SpanQuery;
-import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.automaton.Automata;
 import org.apache.lucene.util.automaton.CharacterRunAutomaton;
 
@@ -56,25 +54,22 @@ public MemoryIndexOffsetStrategy(UHComponents components, Analyzer analyzer,
     memoryIndex = new MemoryIndex(true, storePayloads);//true==store offsets
     memIndexLeafReader = (LeafReader) memoryIndex.createSearcher().getIndexReader(); // appears to be re-usable
     // preFilter for MemoryIndex
-    preMemIndexFilterAutomaton = buildCombinedAutomaton(components.getFieldMatcher(), terms, this.automata, components.getPhraseHelper(), multiTermQueryRewrite);
+    preMemIndexFilterAutomaton = buildCombinedAutomaton(components, multiTermQueryRewrite);
   }
 
   /**
    * Build one {@link CharacterRunAutomaton} matching any term the query might match.
    */
-  private static CharacterRunAutomaton buildCombinedAutomaton(Predicate<String> fieldMatcher,
-                                                              BytesRef[] terms,
-                                                              CharacterRunAutomaton[] automata,
-                                                              PhraseHelper strictPhrases,
+  private static CharacterRunAutomaton buildCombinedAutomaton(UHComponents components,
                                                               Function<Query, Collection<Query>> multiTermQueryRewrite) {
     List<CharacterRunAutomaton> allAutomata = new ArrayList<>();
-    if (terms.length > 0) {
-      allAutomata.add(new CharacterRunAutomaton(Automata.makeStringUnion(Arrays.asList(terms))));
+    if (components.getTerms().length > 0) {
+      allAutomata.add(new CharacterRunAutomaton(Automata.makeStringUnion(Arrays.asList(components.getTerms()))));
     }
-    Collections.addAll(allAutomata, automata);
-    for (SpanQuery spanQuery : strictPhrases.getSpanQueries()) {
+    Collections.addAll(allAutomata, components.getAutomata());
+    for (SpanQuery spanQuery : components.getPhraseHelper().getSpanQueries()) {
       Collections.addAll(allAutomata,
-          MultiTermHighlighting.extractAutomata(spanQuery, fieldMatcher, true, multiTermQueryRewrite));//true==lookInSpan
+          MultiTermHighlighting.extractAutomata(spanQuery, components.getFieldMatcher(), true, multiTermQueryRewrite));//true==lookInSpan
     }
 
     if (allAutomata.size() == 1) {
@@ -105,7 +100,7 @@ public OffsetsEnum getOffsetsEnum(LeafReader reader, int docId, String content)
     // Filter the tokenStream to applicable terms
     tokenStream = newKeepWordFilter(tokenStream, preMemIndexFilterAutomaton);
     memoryIndex.reset();
-    memoryIndex.addField(field, tokenStream);//note: calls tokenStream.reset() & close()
+    memoryIndex.addField(getField(), tokenStream);//note: calls tokenStream.reset() & close()
 
     if (reader == null) {
       return createOffsetsEnumFromReader(memIndexLeafReader, 0);
@@ -114,7 +109,7 @@ public OffsetsEnum getOffsetsEnum(LeafReader reader, int docId, String content)
           new OverlaySingleDocTermsLeafReader(
               reader,
               memIndexLeafReader,
-              field,
+              getField(),
               docId),
           docId);
     }
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/OffsetsEnum.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/OffsetsEnum.java
index b73087e47a73..0e6a2216751e 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/OffsetsEnum.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/OffsetsEnum.java
@@ -88,7 +88,7 @@ public int compareTo(OffsetsEnum other) {
    * The term at this position.
    * This BytesRef is safe to continue to refer to, even after we move to the next position.
    *
-   * @see {@link Passage#getMatchTerms()}.
+   * @see Passage#getMatchTerms()
    */
   public abstract BytesRef getTerm() throws IOException;
 
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/PostingsWithTermVectorsOffsetStrategy.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/PostingsWithTermVectorsOffsetStrategy.java
index ab391213aba8..b0249fa56a3f 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/PostingsWithTermVectorsOffsetStrategy.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/PostingsWithTermVectorsOffsetStrategy.java
@@ -34,11 +34,11 @@ public PostingsWithTermVectorsOffsetStrategy(UHComponents components) {
 
   @Override
   public OffsetsEnum getOffsetsEnum(LeafReader leafReader, int docId, String content) throws IOException {
-    Terms docTerms = leafReader.getTermVector(docId, field);
+    Terms docTerms = leafReader.getTermVector(docId, getField());
     if (docTerms == null) {
       return OffsetsEnum.EMPTY;
     }
-    leafReader = new TermVectorFilteredLeafReader(leafReader, docTerms, field);
+    leafReader = new TermVectorFilteredLeafReader(leafReader, docTerms, getField());
 
     return createOffsetsEnumFromReader(leafReader, docId);
   }
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/TermVectorOffsetStrategy.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/TermVectorOffsetStrategy.java
index 895e95e50919..94528e2c29b4 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/TermVectorOffsetStrategy.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/TermVectorOffsetStrategy.java
@@ -40,17 +40,17 @@ public UnifiedHighlighter.OffsetSource getOffsetSource() {
 
   @Override
   public OffsetsEnum getOffsetsEnum(LeafReader reader, int docId, String content) throws IOException {
-    Terms tvTerms = reader.getTermVector(docId, field);
+    Terms tvTerms = reader.getTermVector(docId, getField());
     if (tvTerms == null) {
       return OffsetsEnum.EMPTY;
     }
 
-    LeafReader singleDocReader = new TermVectorLeafReader(field, tvTerms);
+    LeafReader singleDocReader = new TermVectorLeafReader(getField(), tvTerms);
     return createOffsetsEnumFromReader(
         new OverlaySingleDocTermsLeafReader(
             reader,
             singleDocReader,
-            field,
+            getField(),
             docId),
         docId);
   }
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/TokenStreamOffsetStrategy.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/TokenStreamOffsetStrategy.java
index e829769909ef..677ee4b7b021 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/TokenStreamOffsetStrategy.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/TokenStreamOffsetStrategy.java
@@ -46,7 +46,7 @@ public TokenStreamOffsetStrategy(UHComponents components, Analyzer indexAnalyzer
             convertTermsToAutomata(components.getTerms(), components.getAutomata()),
             components.getHighlightFlags()),
         indexAnalyzer);
-    assert phraseHelper.hasPositionSensitivity() == false;
+    assert components.getPhraseHelper().hasPositionSensitivity() == false;
   }
 
   private static CharacterRunAutomaton[] convertTermsToAutomata(BytesRef[] terms, CharacterRunAutomaton[] automata) {
@@ -67,7 +67,7 @@ public String toString() {
 
   @Override
   public OffsetsEnum getOffsetsEnum(LeafReader reader, int docId, String content) throws IOException {
-    return new TokenStreamOffsetsEnum(tokenStream(content), automata);
+    return new TokenStreamOffsetsEnum(tokenStream(content), components.getAutomata());
   }
 
   private static class TokenStreamOffsetsEnum extends OffsetsEnum {
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/UHComponents.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/UHComponents.java
index 9719fab27485..eed1e9c7de19 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/UHComponents.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/UHComponents.java
@@ -33,9 +33,9 @@ public class UHComponents {
   private final String field;
   private final Predicate<String> fieldMatcher;
   private final Query query;
-  private final BytesRef[] terms;
-  private final PhraseHelper phraseHelper;
-  private final CharacterRunAutomaton[] automata;
+  private final BytesRef[] terms; // Query: all terms we extracted (some may be position sensitive)
+  private final PhraseHelper phraseHelper; // Query: position-sensitive information
+  private final CharacterRunAutomaton[] automata; // Query: wildcards (i.e. multi-term query), not position sensitive
   private final Set<UnifiedHighlighter.HighlightFlag> highlightFlags;
 
   public UHComponents(String field, Predicate<String> fieldMatcher, Query query,
diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighter.java b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighter.java
index 301788b7700f..3e2cc2e5f9a4 100644
--- a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighter.java
+++ b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighter.java
@@ -1283,7 +1283,8 @@ protected Predicate<String> getFieldMatcher(String field) {
       if (highlighterFieldMatch.getFlags("text").contains(HighlightFlag.WEIGHT_MATCHES)) {
         assertEquals("<b>This is</b> the text field. <b>You can put some text</b> if you want.", snippets[0]);
       } else {
-        //nocommit why does this highlight the first "text"?
+        // note: odd that the first "text" is highlighted.  Apparently WSTE converts PhraseQuery to a SpanNearQuery with
+        //   with inorder=false when non-0 slop.  Probably a bug.
         assertEquals("<b>This</b> <b>is</b> the <b>text</b> field. <b>You</b> <b>can</b> <b>put</b> some <b>text</b> if you want.", snippets[0]);
       }
 

From a3d87e2297f53afba3ed0b41868b063713103080 Mon Sep 17 00:00:00 2001
From: David Smiley <dsmiley@apache.org>
Date: Mon, 27 Aug 2018 01:02:40 -0400
Subject: [PATCH 13/14] LUCENE-8286: UH don't use MultiOffsetsEnum for 0 or 1
 OEs.

---
 .../lucene/search/uhighlight/FieldOffsetStrategy.java      | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/FieldOffsetStrategy.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/FieldOffsetStrategy.java
index 27d9318bae36..7ba80d08afe4 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/FieldOffsetStrategy.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/FieldOffsetStrategy.java
@@ -100,8 +100,11 @@ protected OffsetsEnum createOffsetsEnumFromReader(LeafReader leafReader, int doc
       }
     }
 
-    //TODO if only one OE then return it; don't wrap in Multi.  If none, return NONE
-    return new OffsetsEnum.MultiOffsetsEnum(offsetsEnums);
+    switch (offsetsEnums.size()) {
+      case 0: return OffsetsEnum.EMPTY;
+      case 1: return offsetsEnums.get(0);
+      default: return new OffsetsEnum.MultiOffsetsEnum(offsetsEnums);
+    }
   }
 
   protected void createOffsetsEnumsWeightMatcher(LeafReader _leafReader, int docId, List<OffsetsEnum> results) throws IOException {

From f054ffcf580a499a0f180f2b6d2c7cd4839617d0 Mon Sep 17 00:00:00 2001
From: David Smiley <dsmiley@apache.org>
Date: Mon, 27 Aug 2018 09:31:50 -0400
Subject: [PATCH 14/14] LUCENE-8286: UH document HighlightFlags

---
 .../search/uhighlight/UnifiedHighlighter.java      | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/UnifiedHighlighter.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/UnifiedHighlighter.java
index 4e4d81528733..e1ece502637f 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/UnifiedHighlighter.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/UnifiedHighlighter.java
@@ -1112,11 +1112,23 @@ public CacheHelper getReaderCacheHelper() {
    * Flags for controlling highlighting behavior.
    */
   public enum HighlightFlag {
+    /** @see UnifiedHighlighter#setHighlightPhrasesStrictly(boolean) */
     PHRASES,
+
+    /** @see UnifiedHighlighter#setHandleMultiTermQuery(boolean) */
     MULTI_TERM_QUERY,
+
+    /** Passage relevancy is more important than speed.  True by default. */
     PASSAGE_RELEVANCY_OVER_SPEED,
+
+    /**
+     * Internally use the {@link Weight#matches(LeafReaderContext, int)} API for highlighting.
+     * It's more accurate to the query, though might not calculate passage relevancy as well.
+     * Use of this flag requires {@link #MULTI_TERM_QUERY} and {@link #PHRASES}.
+     * {@link #PASSAGE_RELEVANCY_OVER_SPEED} will be ignored.  False by default.
+     */
     WEIGHT_MATCHES
-    // TODO: ignoreQueryFields
+
     // TODO: useQueryBoosts
   }
 }