elastic · romseygeek · Dec 2, 2019 · Dec 3, 2019 · Dec 3, 2019 · Dec 3, 2019
diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search/230_interval_query.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search/230_interval_query.yml
@@ -21,6 +21,10 @@ setup:
             - '{"text" : "Baby its cold there outside"}'
             - '{"index": {"_index": "test", "_id": "4"}}'
             - '{"text" : "Outside it is cold and wet"}'
+            - '{"index": {"_index": "test", "_id": "5"}}'
+            - '{"text" : "To be or not to be, that is the question"}'
+            - '{"index": {"_index": "test", "_id": "6"}}'
+            - '{"text" : "Id like to be under the sea or not"}'
 
 ---
 "Test ordered matching":
@@ -49,6 +53,22 @@ setup:
                     query: "cold outside"
   - match: { hits.total.value: 3 }
 
+---
+"Test unordered matching with duplicates":
+  - skip:
+      version: "- 8.0.0"
+      reason:  Added in v7.6
+  - do:
+      search:
+        index: test
+        body:
+          query:
+            intervals:
+              text:
+                match:
+                  query: "to be or not to be"
+  - match: { hits.total.value: 1 }
+
 ---
 "Test explicit unordered matching":
   - do:
@@ -185,6 +205,27 @@ setup:
                   ordered: false
   - match: { hits.total.value: 2 }
 
+---
+"Test unordered combination with duplicates":
+  - skip:
+      version: "- 8.0.0"
+      reason:  Added in v7.6
+  - do:
+      search:
+        index: test
+        body:
+          query:
+            intervals:
+              text:
+                all_of:
+                  intervals:
+                    - match:
+                        query: "to be"
+                    - match:
+                        query: "to be"
+                  ordered: false
+  - match: { hits.total.value: 1 }
+
 ---
 "Test block combination":
   - do:

diff --git a/server/src/main/java/org/apache/lucene/queries/XIntervals.java b/server/src/main/java/org/apache/lucene/queries/XIntervals.java
@@ -24,6 +24,7 @@
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.queries.intervals.FilteredIntervalsSource;
 import org.apache.lucene.queries.intervals.IntervalIterator;
 import org.apache.lucene.queries.intervals.IntervalQuery;
 import org.apache.lucene.queries.intervals.Intervals;
@@ -48,6 +49,8 @@
 import java.util.Iterator;
 import java.util.List;
 import java.util.Objects;
+import java.util.function.ToIntFunction;
+import java.util.stream.Collectors;
 
 /**
  * Replacement for {@link Intervals#wildcard(BytesRef)} and {@link Intervals#prefix(BytesRef)}
@@ -67,6 +70,28 @@ public static IntervalsSource prefix(BytesRef prefix) {
         return new MultiTermIntervalsSource(ca, 128, prefix.utf8ToString());
     }
 
+    public static IntervalsSource maxWidth(IntervalsSource in, ToIntFunction<IntervalsSource> widthFunction) {
+        return Intervals.or(in.pullUpDisjunctions().stream()
+            .map(s -> new MaxWidth(s, widthFunction.applyAsInt(s)))
+            .collect(Collectors.toList()));
+    }
+
+    private static class MaxWidth extends FilteredIntervalsSource {
+
+        private final int maxWidth;
+
+        MaxWidth(IntervalsSource in, int maxWidth) {
+            super("MAXWIDTH/" + maxWidth, in);
+            this.maxWidth = maxWidth;
+        }
+
+        @Override
+        protected boolean accept(IntervalIterator it) {
+            return (it.end() - it.start()) + 1 <= maxWidth;
+        }
+
+    }
+
     static class MultiTermIntervalsSource extends IntervalsSource {
 
         private final CompiledAutomaton automaton;

diff --git a/server/src/main/java/org/elasticsearch/index/query/IntervalBuilder.java b/server/src/main/java/org/elasticsearch/index/query/IntervalBuilder.java
@@ -26,12 +26,13 @@
 import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
 import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
 import org.apache.lucene.index.LeafReaderContext;
-import org.apache.lucene.search.BooleanQuery;
-import org.apache.lucene.search.MatchesIterator;
-import org.apache.lucene.search.QueryVisitor;
+import org.apache.lucene.queries.XIntervals;
 import org.apache.lucene.queries.intervals.IntervalIterator;
 import org.apache.lucene.queries.intervals.Intervals;
 import org.apache.lucene.queries.intervals.IntervalsSource;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.MatchesIterator;
+import org.apache.lucene.search.QueryVisitor;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.graph.GraphTokenStreamFiniteStrings;
 
@@ -40,7 +41,9 @@
 import java.util.Collection;
 import java.util.Collections;
 import java.util.Iterator;
+import java.util.LinkedHashMap;
 import java.util.List;
+import java.util.Map;
 
 /**
  * Constructs an IntervalsSource based on analyzed text
@@ -102,15 +105,15 @@ protected IntervalsSource analyzeText(CachingTokenFilter stream, int maxGaps, bo
             return analyzeTerm(stream);
         } else if (isGraph) {
             // graph
-            return combineSources(analyzeGraph(stream), maxGaps, ordered);
+            return combineSources(analyzeGraph(stream), maxGaps, ordered, true);
         } else {
             // phrase
             if (hasSynonyms) {
                 // phrase with single-term synonyms
                 return analyzeSynonyms(stream, maxGaps, ordered);
             } else {
                 // simple phrase
-                return combineSources(analyzeTerms(stream), maxGaps, ordered);
+                return combineSources(analyzeTerms(stream), maxGaps, ordered, true);
             }
         }
 
@@ -123,13 +126,16 @@ protected IntervalsSource analyzeTerm(TokenStream ts) throws IOException {
         return Intervals.term(BytesRef.deepCopyOf(bytesAtt.getBytesRef()));
     }
 
-    protected static IntervalsSource combineSources(List<IntervalsSource> sources, int maxGaps, boolean ordered) {
+    protected static IntervalsSource combineSources(List<IntervalsSource> sources, int maxGaps, boolean ordered, boolean fixedWidth) {
         if (sources.size() == 0) {
             return NO_INTERVALS;
         }
         if (sources.size() == 1) {
             return sources.get(0);
         }
+        if (ordered == false) {
+            sources = deduplicate(sources);
+        }
         IntervalsSource[] sourcesArray = sources.toArray(new IntervalsSource[0]);
         if (maxGaps == 0 && ordered) {
             return Intervals.phrase(sourcesArray);
@@ -138,9 +144,36 @@ protected static IntervalsSource combineSources(List<IntervalsSource> sources, i
         if (maxGaps == -1) {
             return inner;
         }
+        if (fixedWidth) {
+            return XIntervals.maxWidth(inner, s -> maxGaps + s.minExtent());
+        }
         return Intervals.maxgaps(maxGaps, inner);
     }
 
+    protected static List<IntervalsSource> deduplicate(List<IntervalsSource> sources) {
+        Map<IntervalsSource, Integer> counts = new LinkedHashMap<>();   // preserve order for testing
+        for (IntervalsSource source : sources) {
+            counts.compute(source, (k, v) -> v == null ? 1 : v + 1);
+        }
+        if (counts.size() == sources.size()) {
+            return sources;
+        }
+        sources = new ArrayList<>();
+        for (IntervalsSource source : counts.keySet()) {
+            int count = counts.get(source);
+            if (count == 1) {
+                sources.add(source);
+            } else {
+                IntervalsSource[] multiples = new IntervalsSource[count];
+                for (int i = 0; i < count; i++) {
+                    multiples[i] = source;
+                }
+                sources.add(Intervals.ordered(multiples));
+            }
+        }
+        return sources;
+    }
+
     protected List<IntervalsSource> analyzeTerms(TokenStream ts) throws IOException {
         List<IntervalsSource> terms = new ArrayList<>();
         TermToBytesRefAttribute bytesAtt = ts.addAttribute(TermToBytesRefAttribute.class);
@@ -189,7 +222,7 @@ else if (synonyms.size() > 1) {
         else {
             terms.add(extend(Intervals.or(synonyms.toArray(new IntervalsSource[0])), spaces));
         }
-        return combineSources(terms, maxGaps, ordered);
+        return combineSources(terms, maxGaps, ordered, true);
     }
 
     protected List<IntervalsSource> analyzeGraph(TokenStream source) throws IOException {
@@ -212,7 +245,7 @@ protected List<IntervalsSource> analyzeGraph(TokenStream source) throws IOExcept
                 Iterator<TokenStream> it = graph.getFiniteStrings(start, end);
                 while (it.hasNext()) {
                     TokenStream ts = it.next();
-                    IntervalsSource phrase = combineSources(analyzeTerms(ts), 0, true);
+                    IntervalsSource phrase = combineSources(analyzeTerms(ts), 0, true, true);
                     if (paths.size() >= maxClauseCount) {
                         throw new BooleanQuery.TooManyClauses();
                     }

diff --git a/server/src/main/java/org/elasticsearch/index/query/IntervalsSourceProvider.java b/server/src/main/java/org/elasticsearch/index/query/IntervalsSourceProvider.java
@@ -366,7 +366,7 @@ public IntervalsSource getSource(QueryShardContext ctx, MappedFieldType fieldTyp
             for (IntervalsSourceProvider provider : subSources) {
                 ss.add(provider.getSource(ctx, fieldType));
             }
-            IntervalsSource source = IntervalBuilder.combineSources(ss, maxGaps, ordered);
+            IntervalsSource source = IntervalBuilder.combineSources(ss, maxGaps, ordered, false);
             if (filter != null) {
                 return filter.filter(source, ctx, fieldType);
             }

diff --git a/server/src/test/java/org/elasticsearch/index/query/IntervalBuilderTests.java b/server/src/test/java/org/elasticsearch/index/query/IntervalBuilderTests.java
@@ -201,6 +201,21 @@ public void testGraphSynonymsWithGaps() throws IOException {
 
         assertEquals(expected, source);
 
+        source = BUILDER.analyzeText(new CachingTokenFilter(ts), 2, false);
+        expected = Intervals.or(
+            Intervals.maxwidth(6, Intervals.unordered(
+                Intervals.term("term1"), Intervals.extend(Intervals.term("term2"), 1, 0), Intervals.term("term5")
+            )),
+            Intervals.maxwidth(9, Intervals.unordered(
+                Intervals.term("term1"),
+                Intervals.phrase(
+                    Intervals.extend(Intervals.term("term3"), 1, 0),
+                    Intervals.extend(Intervals.term("term4"), 2, 0)),
+                Intervals.term("term5")
+            ))
+        );
+        assertEquals(expected, source);
+
     }
 
     public void testGraphTerminatesOnGap() throws IOException {
@@ -222,4 +237,26 @@ public void testGraphTerminatesOnGap() throws IOException {
         assertEquals(expected, source);
     }
 
+    public void testUnorderedWithRepeats() throws IOException {
+        IntervalsSource source = BUILDER.analyzeText("to be or not to be", -1, false);
+        IntervalsSource expected = Intervals.unordered(
+            Intervals.ordered(Intervals.term("to"), Intervals.term("to")),
+            Intervals.ordered(Intervals.term("be"), Intervals.term("be")),
+            Intervals.term("or"),
+            Intervals.term("not")
+        );
+        assertEquals(expected, source);
+    }
+
+    public void testUnorderedWithRepeatsAndMaxGaps() throws IOException {
+        IntervalsSource source = BUILDER.analyzeText("to be or not to be", 2, false);
+        IntervalsSource expected = Intervals.maxwidth(8, Intervals.unordered(
+            Intervals.ordered(Intervals.term("to"), Intervals.term("to")),
+            Intervals.ordered(Intervals.term("be"), Intervals.term("be")),
+            Intervals.term("or"),
+            Intervals.term("not")
+        ));
+        assertEquals(expected, source);
+    }
+
 }
diff --git a/server/src/test/java/org/elasticsearch/index/query/IntervalQueryBuilderTests.java b/server/src/test/java/org/elasticsearch/index/query/IntervalQueryBuilderTests.java
@@ -192,7 +192,7 @@ public void testMatchInterval() throws IOException {
 
         builder = (IntervalQueryBuilder) parseQuery(json);
         expected = new IntervalQuery(STRING_FIELD_NAME,
-            Intervals.maxgaps(40, Intervals.unordered(Intervals.term("hello"), Intervals.term("world"))));
+            XIntervals.maxWidth(Intervals.unordered(Intervals.term("hello"), Intervals.term("world")), s -> 42));
         assertEquals(expected, builder.toQuery(createShardContext()));
 
         json = "{ \"intervals\" : " +
@@ -217,7 +217,7 @@ public void testMatchInterval() throws IOException {
 
         builder = (IntervalQueryBuilder) parseQuery(json);
         expected = new IntervalQuery(STRING_FIELD_NAME,
-            Intervals.maxgaps(10, Intervals.ordered(Intervals.term("Hello"), Intervals.term("world"))));
+            XIntervals.maxWidth(Intervals.ordered(Intervals.term("Hello"), Intervals.term("world")), s -> 12));
         assertEquals(expected, builder.toQuery(createShardContext()));
 
         json = "{ \"intervals\" : " +
@@ -232,7 +232,7 @@ public void testMatchInterval() throws IOException {
         builder = (IntervalQueryBuilder) parseQuery(json);
         expected = new IntervalQuery(STRING_FIELD_NAME,
             Intervals.fixField(MASKED_FIELD,
-                                Intervals.maxgaps(10, Intervals.ordered(Intervals.term("Hello"), Intervals.term("world")))));
+                                XIntervals.maxWidth(Intervals.ordered(Intervals.term("Hello"), Intervals.term("world")), s -> 12)));
         assertEquals(expected, builder.toQuery(createShardContext()));
 
         json = "{ \"intervals\" : " +
@@ -248,7 +248,7 @@ public void testMatchInterval() throws IOException {
 
         builder = (IntervalQueryBuilder) parseQuery(json);
         expected = new IntervalQuery(STRING_FIELD_NAME,
-            Intervals.containing(Intervals.maxgaps(10, Intervals.ordered(Intervals.term("Hello"), Intervals.term("world"))),
+            Intervals.containing(XIntervals.maxWidth(Intervals.ordered(Intervals.term("Hello"), Intervals.term("world")), s -> 12),
                                  Intervals.term("blah")));
         assertEquals(expected, builder.toQuery(createShardContext()));
     }