From 871c6d24bca90e32a3c5dc3de54dd48d6229ffc7 Mon Sep 17 00:00:00 2001
From: Michael Braun <mbraun38@bloomberg.net>
Date: Mon, 7 Nov 2016 15:36:41 -0500
Subject: [PATCH 1/2] LUCENE-7544 - add UnifiedHighlighter extension points for
 custom queries

---
 .../uhighlight/AnalysisOffsetStrategy.java    |  15 +-
 .../uhighlight/MultiTermHighlighting.java     | 236 ++++++++++--------
 .../search/uhighlight/PhraseHelper.java       |  19 +-
 .../search/uhighlight/UnifiedHighlighter.java |  35 ++-
 .../uhighlight/TestUnifiedHighlighter.java    |  82 ++++++
 .../uhighlight/TestUnifiedHighlighterMTQ.java |  90 +++++++
 6 files changed, 357 insertions(+), 120 deletions(-)
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/AnalysisOffsetStrategy.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/AnalysisOffsetStrategy.java
index 553a636ed6af..6b4cc74a48e2 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/AnalysisOffsetStrategy.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/AnalysisOffsetStrategy.java
@@ -19,8 +19,10 @@
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Collection;
 import java.util.Collections;
 import java.util.List;
+import java.util.function.Function;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.FilteringTokenFilter;
@@ -30,6 +32,7 @@
 import org.apache.lucene.index.LeafReader;
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.memory.MemoryIndex;
+import org.apache.lucene.search.Query;
 import org.apache.lucene.search.spans.SpanQuery;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.automaton.Automata;
@@ -50,7 +53,9 @@ public class AnalysisOffsetStrategy extends FieldOffsetStrategy {
   private final LeafReader leafReader;
   private final CharacterRunAutomaton preMemIndexFilterAutomaton;
 
-  public AnalysisOffsetStrategy(String field, BytesRef[] extractedTerms, PhraseHelper phraseHelper, CharacterRunAutomaton[] automata, Analyzer analyzer) {
+  public AnalysisOffsetStrategy(String field, BytesRef[] extractedTerms, PhraseHelper phraseHelper,
+                                CharacterRunAutomaton[] automata, Analyzer analyzer,
+                                Function<Query, Collection<Query>> multiTermQueryRewrite) {
     super(field, extractedTerms, phraseHelper, automata);
     this.analyzer = analyzer;
     // Automata (Wildcards / MultiTermQuery):
@@ -68,7 +73,8 @@ public AnalysisOffsetStrategy(String field, BytesRef[] extractedTerms, PhraseHel
       memoryIndex = new MemoryIndex(true, storePayloads);//true==store offsets
       leafReader = (LeafReader) memoryIndex.createSearcher().getIndexReader();
       // preFilter for MemoryIndex
-      preMemIndexFilterAutomaton = buildCombinedAutomaton(field, terms, this.automata, strictPhrases);
+      preMemIndexFilterAutomaton = buildCombinedAutomaton(field, terms, this.automata, strictPhrases,
+          multiTermQueryRewrite);
     } else {
       memoryIndex = null;
       leafReader = null;
@@ -155,7 +161,8 @@ protected boolean accept() throws IOException {
    */
   private static CharacterRunAutomaton buildCombinedAutomaton(String field, BytesRef[] terms,
                                                               CharacterRunAutomaton[] automata,
-                                                              PhraseHelper strictPhrases) {
+                                                              PhraseHelper strictPhrases,
+                                                              Function<Query, Collection<Query>> multiTermQueryRewrite) {
     List<CharacterRunAutomaton> allAutomata = new ArrayList<>();
     if (terms.length > 0) {
       allAutomata.add(new CharacterRunAutomaton(Automata.makeStringUnion(Arrays.asList(terms))));
@@ -163,7 +170,7 @@ private static CharacterRunAutomaton buildCombinedAutomaton(String field, BytesR
     Collections.addAll(allAutomata, automata);
     for (SpanQuery spanQuery : strictPhrases.getSpanQueries()) {
       Collections.addAll(allAutomata,
-          MultiTermHighlighting.extractAutomata(spanQuery, field, true));//true==lookInSpan
+          MultiTermHighlighting.extractAutomata(spanQuery, field, true, multiTermQueryRewrite));//true==lookInSpan
     }
 
     if (allAutomata.size() == 1) {
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/MultiTermHighlighting.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/MultiTermHighlighting.java
index 9498af584a85..8bf55c47ad43 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/MultiTermHighlighting.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/MultiTermHighlighting.java
@@ -20,8 +20,10 @@
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Collection;
 import java.util.Comparator;
 import java.util.List;
+import java.util.function.Function;
 
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
@@ -69,130 +71,142 @@ private MultiTermHighlighting() {
    * Extracts all MultiTermQueries for {@code field}, and returns equivalent
    * automata that will match terms.
    */
-  public static CharacterRunAutomaton[] extractAutomata(Query query, String field, boolean lookInSpan) {
+  public static CharacterRunAutomaton[] extractAutomata(Query query, String field, boolean lookInSpan,
+                                                        Function<Query, Collection<Query>> customAutomataExtraction) {
     List<CharacterRunAutomaton> list = new ArrayList<>();
-    if (query instanceof BooleanQuery) {
-      for (BooleanClause clause : (BooleanQuery) query) {
-        if (!clause.isProhibited()) {
-          list.addAll(Arrays.asList(extractAutomata(clause.getQuery(), field, lookInSpan)));
-        }
-      }
-    } else if (query instanceof ConstantScoreQuery) {
-      list.addAll(Arrays.asList(extractAutomata(((ConstantScoreQuery) query).getQuery(), field, lookInSpan)));
-    } else if (query instanceof DisjunctionMaxQuery) {
-      for (Query sub : ((DisjunctionMaxQuery) query).getDisjuncts()) {
-        list.addAll(Arrays.asList(extractAutomata(sub, field, lookInSpan)));
-      }
-    } else if (lookInSpan && query instanceof SpanOrQuery) {
-      for (Query sub : ((SpanOrQuery) query).getClauses()) {
-        list.addAll(Arrays.asList(extractAutomata(sub, field, lookInSpan)));
-      }
-    } else if (lookInSpan && query instanceof SpanNearQuery) {
-      for (Query sub : ((SpanNearQuery) query).getClauses()) {
-        list.addAll(Arrays.asList(extractAutomata(sub, field, lookInSpan)));
+    Collection<Query> customSubQueries = customAutomataExtraction.apply(query);
+    if (customSubQueries != null) {
+      for (Query sub : customSubQueries) {
+        list.addAll(Arrays.asList(extractAutomata(sub, field, lookInSpan, customAutomataExtraction)));
       }
-    } else if (lookInSpan && query instanceof SpanNotQuery) {
-      list.addAll(Arrays.asList(extractAutomata(((SpanNotQuery) query).getInclude(), field, lookInSpan)));
-    } else if (lookInSpan && query instanceof SpanPositionCheckQuery) {
-      list.addAll(Arrays.asList(extractAutomata(((SpanPositionCheckQuery) query).getMatch(), field, lookInSpan)));
-    } else if (lookInSpan && query instanceof SpanMultiTermQueryWrapper) {
-      list.addAll(Arrays.asList(extractAutomata(((SpanMultiTermQueryWrapper<?>) query).getWrappedQuery(), field, lookInSpan)));
-    } else if (query instanceof AutomatonQuery) {
-      final AutomatonQuery aq = (AutomatonQuery) query;
-      if (aq.getField().equals(field)) {
-        list.add(new CharacterRunAutomaton(aq.getAutomaton()) {
-          @Override
-          public String toString() {
-            return aq.toString();
-          }
-        });
-      }
-    } else if (query instanceof PrefixQuery) {
-      final PrefixQuery pq = (PrefixQuery) query;
-      Term prefix = pq.getPrefix();
-      if (prefix.field().equals(field)) {
-        list.add(new CharacterRunAutomaton(Operations.concatenate(Automata.makeString(prefix.text()),
-            Automata.makeAnyString())) {
-          @Override
-          public String toString() {
-            return pq.toString();
+    } else {
+      if (query instanceof BooleanQuery) {
+        for (BooleanClause clause : (BooleanQuery) query) {
+          if (!clause.isProhibited()) {
+            list.addAll(Arrays.asList(extractAutomata(clause.getQuery(), field, lookInSpan, customAutomataExtraction)));
           }
-        });
-      }
-    } else if (query instanceof FuzzyQuery) {
-      final FuzzyQuery fq = (FuzzyQuery) query;
-      if (fq.getField().equals(field)) {
-        String utf16 = fq.getTerm().text();
-        int termText[] = new int[utf16.codePointCount(0, utf16.length())];
-        for (int cp, i = 0, j = 0; i < utf16.length(); i += Character.charCount(cp)) {
-          termText[j++] = cp = utf16.codePointAt(i);
         }
-        int termLength = termText.length;
-        int prefixLength = Math.min(fq.getPrefixLength(), termLength);
-        String suffix = UnicodeUtil.newString(termText, prefixLength, termText.length - prefixLength);
-        LevenshteinAutomata builder = new LevenshteinAutomata(suffix, fq.getTranspositions());
-        String prefix = UnicodeUtil.newString(termText, 0, prefixLength);
-        Automaton automaton = builder.toAutomaton(fq.getMaxEdits(), prefix);
-        list.add(new CharacterRunAutomaton(automaton) {
-          @Override
-          public String toString() {
-            return fq.toString();
-          }
-        });
-      }
-    } else if (query instanceof TermRangeQuery) {
-      final TermRangeQuery tq = (TermRangeQuery) query;
-      if (tq.getField().equals(field)) {
-        final CharsRef lowerBound;
-        if (tq.getLowerTerm() == null) {
-          lowerBound = null;
-        } else {
-          lowerBound = new CharsRef(tq.getLowerTerm().utf8ToString());
+      } else if (query instanceof ConstantScoreQuery) {
+        list.addAll(Arrays.asList(extractAutomata(((ConstantScoreQuery) query).getQuery(), field, lookInSpan,
+            customAutomataExtraction)));
+      } else if (query instanceof DisjunctionMaxQuery) {
+        for (Query sub : ((DisjunctionMaxQuery) query).getDisjuncts()) {
+          list.addAll(Arrays.asList(extractAutomata(sub, field, lookInSpan, customAutomataExtraction)));
         }
-
-        final CharsRef upperBound;
-        if (tq.getUpperTerm() == null) {
-          upperBound = null;
-        } else {
-          upperBound = new CharsRef(tq.getUpperTerm().utf8ToString());
+      } else if (lookInSpan && query instanceof SpanOrQuery) {
+        for (Query sub : ((SpanOrQuery) query).getClauses()) {
+          list.addAll(Arrays.asList(extractAutomata(sub, field, lookInSpan, customAutomataExtraction)));
         }
+      } else if (lookInSpan && query instanceof SpanNearQuery) {
+        for (Query sub : ((SpanNearQuery) query).getClauses()) {
+          list.addAll(Arrays.asList(extractAutomata(sub, field, lookInSpan, customAutomataExtraction)));
+        }
+      } else if (lookInSpan && query instanceof SpanNotQuery) {
+        list.addAll(Arrays.asList(extractAutomata(((SpanNotQuery) query).getInclude(), field, lookInSpan,
+            customAutomataExtraction)));
+      } else if (lookInSpan && query instanceof SpanPositionCheckQuery) {
+        list.addAll(Arrays.asList(extractAutomata(((SpanPositionCheckQuery) query).getMatch(), field, lookInSpan,
+            customAutomataExtraction)));
+      } else if (lookInSpan && query instanceof SpanMultiTermQueryWrapper) {
+        list.addAll(Arrays.asList(extractAutomata(((SpanMultiTermQueryWrapper<?>) query).getWrappedQuery(), field,
+            lookInSpan, customAutomataExtraction)));
+      } else if (query instanceof AutomatonQuery) {
+        final AutomatonQuery aq = (AutomatonQuery) query;
+        if (aq.getField().equals(field)) {
+          list.add(new CharacterRunAutomaton(aq.getAutomaton()) {
+            @Override
+            public String toString() {
+              return aq.toString();
+            }
+          });
+        }
+      } else if (query instanceof PrefixQuery) {
+        final PrefixQuery pq = (PrefixQuery) query;
+        Term prefix = pq.getPrefix();
+        if (prefix.field().equals(field)) {
+          list.add(new CharacterRunAutomaton(Operations.concatenate(Automata.makeString(prefix.text()),
+              Automata.makeAnyString())) {
+            @Override
+            public String toString() {
+              return pq.toString();
+            }
+          });
+        }
+      } else if (query instanceof FuzzyQuery) {
+        final FuzzyQuery fq = (FuzzyQuery) query;
+        if (fq.getField().equals(field)) {
+          String utf16 = fq.getTerm().text();
+          int termText[] = new int[utf16.codePointCount(0, utf16.length())];
+          for (int cp, i = 0, j = 0; i < utf16.length(); i += Character.charCount(cp)) {
+            termText[j++] = cp = utf16.codePointAt(i);
+          }
+          int termLength = termText.length;
+          int prefixLength = Math.min(fq.getPrefixLength(), termLength);
+          String suffix = UnicodeUtil.newString(termText, prefixLength, termText.length - prefixLength);
+          LevenshteinAutomata builder = new LevenshteinAutomata(suffix, fq.getTranspositions());
+          String prefix = UnicodeUtil.newString(termText, 0, prefixLength);
+          Automaton automaton = builder.toAutomaton(fq.getMaxEdits(), prefix);
+          list.add(new CharacterRunAutomaton(automaton) {
+            @Override
+            public String toString() {
+              return fq.toString();
+            }
+          });
+        }
+      } else if (query instanceof TermRangeQuery) {
+        final TermRangeQuery tq = (TermRangeQuery) query;
+        if (tq.getField().equals(field)) {
+          final CharsRef lowerBound;
+          if (tq.getLowerTerm() == null) {
+            lowerBound = null;
+          } else {
+            lowerBound = new CharsRef(tq.getLowerTerm().utf8ToString());
+          }
 
-        final boolean includeLower = tq.includesLower();
-        final boolean includeUpper = tq.includesUpper();
-        final CharsRef scratch = new CharsRef();
-
-        @SuppressWarnings("deprecation")
-        final Comparator<CharsRef> comparator = CharsRef.getUTF16SortedAsUTF8Comparator();
+          final CharsRef upperBound;
+          if (tq.getUpperTerm() == null) {
+            upperBound = null;
+          } else {
+            upperBound = new CharsRef(tq.getUpperTerm().utf8ToString());
+          }
 
-        // this is *not* an automaton, but its very simple
-        list.add(new CharacterRunAutomaton(Automata.makeEmpty()) {
-          @Override
-          public boolean run(char[] s, int offset, int length) {
-            scratch.chars = s;
-            scratch.offset = offset;
-            scratch.length = length;
-
-            if (lowerBound != null) {
-              int cmp = comparator.compare(scratch, lowerBound);
-              if (cmp < 0 || (!includeLower && cmp == 0)) {
-                return false;
+          final boolean includeLower = tq.includesLower();
+          final boolean includeUpper = tq.includesUpper();
+          final CharsRef scratch = new CharsRef();
+
+          @SuppressWarnings("deprecation")
+          final Comparator<CharsRef> comparator = CharsRef.getUTF16SortedAsUTF8Comparator();
+
+          // this is *not* an automaton, but its very simple
+          list.add(new CharacterRunAutomaton(Automata.makeEmpty()) {
+            @Override
+            public boolean run(char[] s, int offset, int length) {
+              scratch.chars = s;
+              scratch.offset = offset;
+              scratch.length = length;
+
+              if (lowerBound != null) {
+                int cmp = comparator.compare(scratch, lowerBound);
+                if (cmp < 0 || (!includeLower && cmp == 0)) {
+                  return false;
+                }
               }
-            }
 
-            if (upperBound != null) {
-              int cmp = comparator.compare(scratch, upperBound);
-              if (cmp > 0 || (!includeUpper && cmp == 0)) {
-                return false;
+              if (upperBound != null) {
+                int cmp = comparator.compare(scratch, upperBound);
+                if (cmp > 0 || (!includeUpper && cmp == 0)) {
+                  return false;
+                }
               }
+              return true;
             }
-            return true;
-          }
 
-          @Override
-          public String toString() {
-            return tq.toString();
-          }
-        });
+            @Override
+            public String toString() {
+              return tq.toString();
+            }
+          });
+        }
       }
     }
     return list.toArray(new CharacterRunAutomaton[list.size()]);
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/PhraseHelper.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/PhraseHelper.java
index 5225041f9bec..95d51c917da4 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/PhraseHelper.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/PhraseHelper.java
@@ -40,7 +40,7 @@
 public class PhraseHelper {
 
   public static final PhraseHelper NONE = new PhraseHelper(new MatchAllDocsQuery(), "_ignored_",
-      spanQuery -> null, true);
+      spanQuery -> null, query -> null, true);
 
   //TODO it seems this ought to be a general thing on Spans?
   private static final Comparator<? super Spans> SPANS_COMPARATOR = (o1, o2) -> {
@@ -69,11 +69,14 @@ public class PhraseHelper {
    * {@code rewriteQueryPred} is an extension hook to override the default choice of
    * {@link WeightedSpanTermExtractor#mustRewriteQuery(SpanQuery)}. By default unknown query types are rewritten,
    * so use this to return {@link Boolean#FALSE} if you know the query doesn't need to be rewritten.
+   * Similarly, {@code preExtractRewriteFunction} is also an extension hook for extract to allow different queries
+   * to be set before the {@link WeightedSpanTermExtractor}'s extraction is invoked.
    * {@code ignoreQueriesNeedingRewrite} effectively ignores any query clause that needs to be "rewritten", which is
    * usually limited to just a {@link SpanMultiTermQueryWrapper} but could be other custom ones.
    */
   public PhraseHelper(Query query, String field, Function<SpanQuery, Boolean> rewriteQueryPred,
-               boolean ignoreQueriesNeedingRewrite) {
+                      Function<Query, Collection<Query>> preExtractRewriteFunction,
+                      boolean ignoreQueriesNeedingRewrite) {
     this.fieldName = field; // if null then don't require field match
     // filter terms to those we want
     positionInsensitiveTerms = field != null ? new FieldFilteringTermHashSet(field) : new HashSet<>();
@@ -98,6 +101,18 @@ public PhraseHelper(Query query, String field, Function<SpanQuery, Boolean> rewr
         }
       }
 
+      @Override
+      protected void extract(Query query, float boost, Map<String, WeightedSpanTerm> terms) throws IOException {
+        Collection<Query> newQueriesToExtract = preExtractRewriteFunction.apply(query);
+        if (newQueriesToExtract != null) {
+          for (Query newQuery : newQueriesToExtract) {
+            extract(newQuery, boost, terms);
+          }
+        } else {
+          super.extract(query, boost, terms);
+        }
+      }
+
       @Override
       protected boolean isQueryUnsupported(Class<? extends Query> clazz) {
         if (clazz.isAssignableFrom(MultiTermQuery.class)) {
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/UnifiedHighlighter.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/UnifiedHighlighter.java
index 72be180c177a..cb5605c564d7 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/UnifiedHighlighter.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/UnifiedHighlighter.java
@@ -21,6 +21,7 @@
 import java.text.BreakIterator;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Collection;
 import java.util.EnumSet;
 import java.util.HashMap;
 import java.util.List;
@@ -732,7 +733,8 @@ protected FieldOffsetStrategy getOffsetStrategy(String field, Query query, Sorte
     OffsetSource offsetSource = getOptimizedOffsetSource(field, terms, phraseHelper, automata);
     switch (offsetSource) {
       case ANALYSIS:
-        return new AnalysisOffsetStrategy(field, terms, phraseHelper, automata, getIndexAnalyzer());
+        return new AnalysisOffsetStrategy(field, terms, phraseHelper, automata, getIndexAnalyzer(),
+            this::preMultiTermQueryRewrite);
       case NONE_NEEDED:
         return NoOpOffsetStrategy.INSTANCE;
       case TERM_VECTORS:
@@ -776,13 +778,14 @@ protected PhraseHelper getPhraseHelper(String field, Query query, EnumSet<Highli
     boolean highlightPhrasesStrictly = highlightFlags.contains(HighlightFlag.PHRASES);
     boolean handleMultiTermQuery = highlightFlags.contains(HighlightFlag.MULTI_TERM_QUERY);
     return highlightPhrasesStrictly ?
-        new PhraseHelper(query, field, this::requiresRewrite, !handleMultiTermQuery) :
+        new PhraseHelper(query, field, this::requiresRewrite, this::preSpanQueryRewrite, !handleMultiTermQuery) :
         PhraseHelper.NONE;
   }
 
   protected CharacterRunAutomaton[] getAutomata(String field, Query query, EnumSet<HighlightFlag> highlightFlags) {
     return highlightFlags.contains(HighlightFlag.MULTI_TERM_QUERY)
-        ? MultiTermHighlighting.extractAutomata(query, field, !highlightFlags.contains(HighlightFlag.PHRASES))
+        ? MultiTermHighlighting.extractAutomata(query, field, !highlightFlags.contains(HighlightFlag.PHRASES),
+          this::preMultiTermQueryRewrite)
         : ZERO_LEN_AUTOMATA_ARRAY;
   }
 
@@ -830,6 +833,32 @@ protected Boolean requiresRewrite(SpanQuery spanQuery) {
     return null;
   }
 
+  /**
+   * When highlighting phrases accurately, we may need to handle custom queries that aren't supported in the
+   * {@link org.apache.lucene.search.highlight.WeightedSpanTermExtractor} as called by the {@link PhraseHelper}.
+   * Should custom query types be needed, this method should be overriden to return a collection of queries if appropriate,
+   * or null if nothing to do. If the query is not custom, simply returning null will allow the default rules to apply.
+   *
+   * @param query Query to be highlighted
+   * @return A Collection of Query object(s) if needs to be rewritten, otherwise null.
+   */
+  protected Collection<Query> preSpanQueryRewrite(Query query) {
+    return null;
+  }
+
+  /**
+   * When dealing with multi term queries / span queries, we may need to handle custom queries that aren't supported
+   * by the default automata extraction in {@link MultiTermHighlighting}. This can be overriden to return a collection
+   * of queries if appropriate, or null if nothing to do. If query is not custom, simply returning null will allow the
+   * default rules to apply.
+   *
+   * @param query Query to be highlighted
+   * @return A Collection of Query object(s) if needst o be rewritten, otherwise null.
+   */
+  protected Collection<Query> preMultiTermQueryRewrite(Query query) {
+    return null;
+  }
+
   private DocIdSetIterator asDocIdSetIterator(int[] sortedDocIds) {
     return new DocIdSetIterator() {
       int idx = -1;
diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighter.java b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighter.java
index 0fd7d3d0a233..9eee6348b16c 100644
--- a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighter.java
+++ b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighter.java
@@ -37,6 +37,7 @@
 import org.apache.lucene.index.Term;
 import org.apache.lucene.search.BooleanClause;
 import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.BoostQuery;
 import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.PhraseQuery;
@@ -45,6 +46,7 @@
 import org.apache.lucene.search.Sort;
 import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.search.Weight;
 import org.apache.lucene.search.postingshighlight.WholeBreakIterator;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.LuceneTestCase;
@@ -959,4 +961,84 @@ public String[] format(Passage passages[], String content) {
     ir.close();
   }
 
+  public void testBooleanWithSpanAndOverlappingTerms() throws IOException {
+    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
+    Field body = new Field("body", "There is no accord and satisfaction with this - Consideration of the accord is arbitrary.", fieldType);
+    Document doc = new Document();
+    doc.add(body);
+    iw.addDocument(doc);
+    IndexReader ir = iw.getReader();
+    iw.close();
+
+    IndexSearcher searcher = newSearcher(ir);
+
+   UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer) {
+      @Override
+      protected List<Query> preSpanQueryRewrite(Query query) {
+        if (query instanceof MyQuery) {
+          return Collections.singletonList(((MyQuery)query).wrapped);
+        }
+        return null;
+      }
+    };
+
+    BooleanQuery.Builder bqBuilder = new BooleanQuery.Builder();
+    Query phraseQuery = new BoostQuery(new PhraseQuery("body", "accord", "and", "satisfaction"), 2.0f);
+    Query oredTerms = new BooleanQuery.Builder()
+        .setMinimumNumberShouldMatch(2)
+        .add(new TermQuery(new Term("body", "accord")), BooleanClause.Occur.SHOULD)
+        .add(new TermQuery(new Term("body", "satisfaction")), BooleanClause.Occur.SHOULD)
+        .add(new TermQuery(new Term("body", "consideration")), BooleanClause.Occur.SHOULD)
+        .build();
+    Query proximityBoostingQuery = new MyQuery(oredTerms);
+    Query totalQuery = bqBuilder
+        .add(phraseQuery, BooleanClause.Occur.SHOULD)
+        .add(proximityBoostingQuery, BooleanClause.Occur.SHOULD)
+        .build();
+    TopDocs topDocs = searcher.search(totalQuery, 10, Sort.INDEXORDER);
+    assertEquals(1, topDocs.totalHits);
+    String[] snippets = highlighter.highlight("body", totalQuery, topDocs);
+    assertArrayEquals(new String[]{"There is no <b>accord</b> <b>and</b> <b>satisfaction</b> with this - <b>Consideration</b> of the <b>accord</b> is arbitrary."}, snippets);
+
+    ir.close();
+  }
+
+  private static class MyQuery extends Query {
+
+    private final Query wrapped;
+
+    MyQuery(Query wrapped) {
+      this.wrapped = wrapped;
+    }
+
+    @Override
+    public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
+      return wrapped.createWeight(searcher, needsScores, boost);
+    }
+
+    @Override
+    public Query rewrite(IndexReader reader) throws IOException {
+      Query newWrapped = wrapped.rewrite(reader);
+      if (newWrapped != wrapped) {
+        return new MyQuery(newWrapped);
+      }
+      return this;
+    }
+
+    @Override
+    public String toString(String field) {
+      return "[[["+wrapped.toString(field)+"]]]";
+    }
+
+    @Override
+    public boolean equals(Object obj) {
+      return obj != null && obj.getClass() == getClass() && wrapped.equals(((MyQuery)wrapped).wrapped);
+    }
+
+    @Override
+    public int hashCode() {
+      return wrapped.hashCode();
+    }
+  }
+
 }
diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterMTQ.java b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterMTQ.java
index 63f0bb1ca5b6..af6487c06a6d 100644
--- a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterMTQ.java
+++ b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterMTQ.java
@@ -20,6 +20,8 @@
 import java.io.IOException;
 import java.util.Arrays;
 import java.util.Collections;
+import java.util.List;
+import java.util.Objects;
 
 import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;
 import org.apache.lucene.analysis.Analyzer;
@@ -56,6 +58,7 @@
 import org.apache.lucene.search.spans.SpanOrQuery;
 import org.apache.lucene.search.spans.SpanQuery;
 import org.apache.lucene.search.spans.SpanTermQuery;
+import org.apache.lucene.search.spans.SpanWeight;
 import org.apache.lucene.store.BaseDirectoryWrapper;
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
@@ -933,4 +936,91 @@ public void testPositionSensitiveWithWildcardDoesNotHighlight() throws Exception
     ir.close();
   }
 
+
+  public void testCustomSpanQueryHighlighting() throws Exception {
+    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
+    Document doc = new Document();
+    doc.add(new Field("body", "alpha bravo charlie delta echo foxtrot golf hotel india juliet", fieldType));
+    doc.add(newTextField("id", "id", Field.Store.YES));
+
+    iw.addDocument(doc);
+    IndexReader ir = iw.getReader();
+    iw.close();
+
+    IndexSearcher searcher = newSearcher(ir);
+    UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer) {
+      @Override
+      protected List<Query> preMultiTermQueryRewrite(Query query) {
+        if (query instanceof MyWrapperSpanQuery) {
+          return Collections.singletonList(((MyWrapperSpanQuery) query).originalQuery);
+        }
+        return null;
+      }
+    };
+
+    int docId = searcher.search(new TermQuery(new Term("id", "id")), 1).scoreDocs[0].doc;
+
+    WildcardQuery wildcardQuery = new WildcardQuery(new Term("body", "foxtr*"));
+    SpanMultiTermQueryWrapper wildcardQueryWrapper = new SpanMultiTermQueryWrapper<>(wildcardQuery);
+
+
+    SpanQuery wrappedQuery = new MyWrapperSpanQuery(wildcardQueryWrapper);
+
+    BooleanQuery query = new BooleanQuery.Builder()
+        .add(wrappedQuery, BooleanClause.Occur.SHOULD)
+        .build();
+
+    int[] docIds = new int[]{docId};
+
+    String snippets[] = highlighter.highlightFields(new String[]{"body"}, query, docIds, new int[]{2}).get("body");
+    assertEquals(1, snippets.length);
+    assertEquals("alpha bravo charlie delta echo <b>foxtrot</b> golf hotel india juliet", snippets[0]);
+    ir.close();
+  }
+
+  private static class MyWrapperSpanQuery extends SpanQuery {
+
+    private final SpanQuery originalQuery;
+
+    private MyWrapperSpanQuery(SpanQuery originalQuery) {
+      this.originalQuery = Objects.requireNonNull(originalQuery);
+    }
+
+    @Override
+    public String getField() {
+      return originalQuery.getField();
+    }
+
+    @Override
+    public String toString(String field) {
+      return "(Wrapper[" + originalQuery.toString(field)+"])";
+    }
+
+    @Override
+    public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
+      return originalQuery.createWeight(searcher, needsScores, boost);
+    }
+
+    @Override
+    public Query rewrite(IndexReader reader) throws IOException {
+      Query newOriginalQuery = originalQuery.rewrite(reader);
+      if (newOriginalQuery != originalQuery) {
+        return new MyWrapperSpanQuery((SpanQuery)newOriginalQuery);
+      }
+      return this;
+    }
+
+    @Override
+    public boolean equals(Object o) {
+      if (this == o) return true;
+      if (o == null || getClass() != o.getClass()) return false;
+      return originalQuery.equals(((MyWrapperSpanQuery)o).originalQuery);
+    }
+
+    @Override
+    public int hashCode() {
+      return originalQuery.hashCode();
+    }
+  }
+
 }

From b71a0990da760cd6ac40be7e4aeee16d2906bac9 Mon Sep 17 00:00:00 2001
From: Michael Braun <n3ca88@gmail.com>
Date: Mon, 7 Nov 2016 20:51:14 -0500
Subject: [PATCH 2/2] Move test into different class more relevant to strict
 phrase handling

---
 .../uhighlight/TestUnifiedHighlighter.java    | 80 -------------------
 .../TestUnifiedHighlighterStrictPhrases.java  | 78 ++++++++++++++++++
 2 files changed, 78 insertions(+), 80 deletions(-)

diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighter.java b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighter.java
index 9eee6348b16c..fae937df4fc9 100644
--- a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighter.java
+++ b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighter.java
@@ -961,84 +961,4 @@ public String[] format(Passage passages[], String content) {
     ir.close();
   }
 
-  public void testBooleanWithSpanAndOverlappingTerms() throws IOException {
-    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
-    Field body = new Field("body", "There is no accord and satisfaction with this - Consideration of the accord is arbitrary.", fieldType);
-    Document doc = new Document();
-    doc.add(body);
-    iw.addDocument(doc);
-    IndexReader ir = iw.getReader();
-    iw.close();
-
-    IndexSearcher searcher = newSearcher(ir);
-
-   UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer) {
-      @Override
-      protected List<Query> preSpanQueryRewrite(Query query) {
-        if (query instanceof MyQuery) {
-          return Collections.singletonList(((MyQuery)query).wrapped);
-        }
-        return null;
-      }
-    };
-
-    BooleanQuery.Builder bqBuilder = new BooleanQuery.Builder();
-    Query phraseQuery = new BoostQuery(new PhraseQuery("body", "accord", "and", "satisfaction"), 2.0f);
-    Query oredTerms = new BooleanQuery.Builder()
-        .setMinimumNumberShouldMatch(2)
-        .add(new TermQuery(new Term("body", "accord")), BooleanClause.Occur.SHOULD)
-        .add(new TermQuery(new Term("body", "satisfaction")), BooleanClause.Occur.SHOULD)
-        .add(new TermQuery(new Term("body", "consideration")), BooleanClause.Occur.SHOULD)
-        .build();
-    Query proximityBoostingQuery = new MyQuery(oredTerms);
-    Query totalQuery = bqBuilder
-        .add(phraseQuery, BooleanClause.Occur.SHOULD)
-        .add(proximityBoostingQuery, BooleanClause.Occur.SHOULD)
-        .build();
-    TopDocs topDocs = searcher.search(totalQuery, 10, Sort.INDEXORDER);
-    assertEquals(1, topDocs.totalHits);
-    String[] snippets = highlighter.highlight("body", totalQuery, topDocs);
-    assertArrayEquals(new String[]{"There is no <b>accord</b> <b>and</b> <b>satisfaction</b> with this - <b>Consideration</b> of the <b>accord</b> is arbitrary."}, snippets);
-
-    ir.close();
-  }
-
-  private static class MyQuery extends Query {
-
-    private final Query wrapped;
-
-    MyQuery(Query wrapped) {
-      this.wrapped = wrapped;
-    }
-
-    @Override
-    public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
-      return wrapped.createWeight(searcher, needsScores, boost);
-    }
-
-    @Override
-    public Query rewrite(IndexReader reader) throws IOException {
-      Query newWrapped = wrapped.rewrite(reader);
-      if (newWrapped != wrapped) {
-        return new MyQuery(newWrapped);
-      }
-      return this;
-    }
-
-    @Override
-    public String toString(String field) {
-      return "[[["+wrapped.toString(field)+"]]]";
-    }
-
-    @Override
-    public boolean equals(Object obj) {
-      return obj != null && obj.getClass() == getClass() && wrapped.equals(((MyQuery)wrapped).wrapped);
-    }
-
-    @Override
-    public int hashCode() {
-      return wrapped.hashCode();
-    }
-  }
-
 }
diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterStrictPhrases.java b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterStrictPhrases.java
index 5fecdc6d5bc9..a60dfde1cf5c 100644
--- a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterStrictPhrases.java
+++ b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterStrictPhrases.java
@@ -17,6 +17,9 @@
 package org.apache.lucene.search.uhighlight;
 
 import java.io.IOException;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.List;
 
 import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;
 import org.apache.lucene.analysis.MockAnalyzer;
@@ -29,14 +32,17 @@
 import org.apache.lucene.index.Term;
 import org.apache.lucene.search.BooleanClause;
 import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.BoostQuery;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.MatchNoDocsQuery;
 import org.apache.lucene.search.MultiPhraseQuery;
 import org.apache.lucene.search.PhraseQuery;
 import org.apache.lucene.search.PrefixQuery;
+import org.apache.lucene.search.Query;
 import org.apache.lucene.search.Sort;
 import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.search.Weight;
 import org.apache.lucene.search.WildcardQuery;
 import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper;
 import org.apache.lucene.search.spans.SpanNearQuery;
@@ -401,4 +407,76 @@ public void testMatchNoDocsQuery() throws IOException {
     Object o = highlighter.highlightWithoutSearcher("body", new MatchNoDocsQuery(), content, 1);
     assertEquals(content, o);
   }
+
+  public void testPreSpanQueryRewrite() throws IOException {
+    indexWriter.addDocument(newDoc("There is no accord and satisfaction with this - Consideration of the accord is arbitrary."));
+    initReaderSearcherHighlighter();
+
+    highlighter = new UnifiedHighlighter(searcher, indexAnalyzer) {
+      @Override
+      protected Collection<Query> preSpanQueryRewrite(Query query) {
+        if (query instanceof MyQuery) {
+          return Collections.singletonList(((MyQuery)query).wrapped);
+        }
+        return null;
+      }
+    };
+    highlighter.setHighlightPhrasesStrictly(true);
+
+    BooleanQuery.Builder bqBuilder = new BooleanQuery.Builder();
+    Query phraseQuery = new BoostQuery(new PhraseQuery("body", "accord", "and", "satisfaction"), 2.0f);
+    Query oredTerms = new BooleanQuery.Builder()
+        .setMinimumNumberShouldMatch(2)
+        .add(new TermQuery(new Term("body", "accord")), BooleanClause.Occur.SHOULD)
+        .add(new TermQuery(new Term("body", "satisfaction")), BooleanClause.Occur.SHOULD)
+        .add(new TermQuery(new Term("body", "consideration")), BooleanClause.Occur.SHOULD)
+        .build();
+    Query proximityBoostingQuery = new MyQuery(oredTerms);
+    Query totalQuery = bqBuilder
+        .add(phraseQuery, BooleanClause.Occur.SHOULD)
+        .add(proximityBoostingQuery, BooleanClause.Occur.SHOULD)
+        .build();
+    TopDocs topDocs = searcher.search(totalQuery, 10, Sort.INDEXORDER);
+    assertEquals(1, topDocs.totalHits);
+    String[] snippets = highlighter.highlight("body", totalQuery, topDocs);
+    assertArrayEquals(new String[]{"There is no <b>accord</b> <b>and</b> <b>satisfaction</b> with this - <b>Consideration</b> of the <b>accord</b> is arbitrary."}, snippets);
+  }
+
+  private static class MyQuery extends Query {
+
+    private final Query wrapped;
+
+    MyQuery(Query wrapped) {
+      this.wrapped = wrapped;
+    }
+
+    @Override
+    public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
+      return wrapped.createWeight(searcher, needsScores, boost);
+    }
+
+    @Override
+    public Query rewrite(IndexReader reader) throws IOException {
+      Query newWrapped = wrapped.rewrite(reader);
+      if (newWrapped != wrapped) {
+        return new MyQuery(newWrapped);
+      }
+      return this;
+    }
+
+    @Override
+    public String toString(String field) {
+      return "[[["+wrapped.toString(field)+"]]]";
+    }
+
+    @Override
+    public boolean equals(Object obj) {
+      return obj != null && obj.getClass() == getClass() && wrapped.equals(((MyQuery)wrapped).wrapped);
+    }
+
+    @Override
+    public int hashCode() {
+      return wrapped.hashCode();
+    }
+  }
 }