Merge pull request #10531 from jpountz/upgrade/lucene-5.1-queries

Search: Replace deprecated filters with equivalent queries.
elastic · Apr 21, 2015 · daa25d1 · daa25d1
2 parents b31e590 + d7abb12
commit daa25d1
Show file tree

Hide file tree

Showing 129 changed files with 925 additions and 4,103 deletions.
diff --git a/dev-tools/forbidden/all-signatures.txt b/dev-tools/forbidden/all-signatures.txt
@@ -33,6 +33,11 @@ java.nio.file.Path#toFile()
 @defaultMessage Don't use deprecated lucene apis
 org.apache.lucene.index.DocsEnum
 org.apache.lucene.index.DocsAndPositionsEnum
+org.apache.lucene.queries.TermFilter
+org.apache.lucene.queries.TermsFilter
+org.apache.lucene.search.TermRangeFilter
+org.apache.lucene.search.NumericRangeFilter
+org.apache.lucene.search.PrefixFilter
 
 java.nio.file.Paths @ Use PathUtils.get instead.
 java.nio.file.FileSystems#getDefault() @ use PathUtils.getDefault instead.
diff --git a/docs/reference/migration/migrate_2_0.asciidoc b/docs/reference/migration/migrate_2_0.asciidoc
@@ -374,9 +374,18 @@ http.cors.allow-origin: /https?:\/\/localhost(:[0-9]+)?/
 The cluster state api doesn't return the `routing_nodes` section anymore when
 `routing_table` is requested. The newly introduced `routing_nodes` flag can
 be used separately to control whether `routing_nodes` should be returned.
+
 === Query DSL
 
 The `fuzzy_like_this` and `fuzzy_like_this_field` queries have been removed.
 
 The `limit` filter is deprecated and becomes a no-op. You can achieve similar
 behaviour using the <<search-request-body,terminate_after>> parameter.
+
+`or` and `and` on the one hand and `bool` on the other hand used to have
+different performance characteristics depending on the wrapped filters. This is
+fixed now, as a consequence the `or` and `and` filters are now deprecated in
+favour or `bool`.
+
+The `execution` option of the `terms` filter is now deprecated and ignored if
+provided.
diff --git a/docs/reference/query-dsl/filters/and-filter.asciidoc b/docs/reference/query-dsl/filters/and-filter.asciidoc
@@ -1,6 +1,8 @@
 [[query-dsl-and-filter]]
 === And Filter
 
+deprecated[2.0.0, Use the `bool` filter instead]
+
 A filter that matches documents using the `AND` boolean operator on other
 filters. Can be placed within queries that accept a filter.
 

diff --git a/docs/reference/query-dsl/filters/or-filter.asciidoc b/docs/reference/query-dsl/filters/or-filter.asciidoc
@@ -1,6 +1,8 @@
 [[query-dsl-or-filter]]
 === Or Filter
 
+deprecated[2.0.0, Use the `bool` filter instead]
+
 A filter that matches documents using the `OR` boolean operator on other
 filters. Can be placed within queries that accept a filter.
 

diff --git a/docs/reference/query-dsl/filters/terms-filter.asciidoc b/docs/reference/query-dsl/filters/terms-filter.asciidoc
@@ -18,71 +18,6 @@ Filters documents that have fields that match any of the provided terms
 The `terms` filter is also aliased with `in` as the filter name for
 simpler usage.
 
-[float]
-==== Execution Mode
-
-The way terms filter executes is by iterating over the terms provided
-and finding matches docs (loading into a bitset) and caching it.
-Sometimes, we want a different execution model that can still be
-achieved by building more complex queries in the DSL, but we can support
-them in the more compact model that terms filter provides.
-
-The `execution` option now has the following options :
-
-[horizontal]
-`plain`:: 
-    The default. Works as today. Iterates over all the terms,
-    building a bit set matching it, and filtering. The total filter is
-    cached.
-
-`fielddata`::
-    Generates a terms filters that uses the fielddata cache to
-    compare terms.  This execution mode is great to use when filtering
-    on a field that is already loaded into the fielddata cache from 
-    aggregating, sorting, or index warmers.  When filtering on
-    a large number of terms, this execution can be considerably faster
-    than the other modes.  The total filter is not cached unless
-    explicitly configured to do so.
-
-`bool`:: 
-    Generates a term filter (which is cached) for each term, and
-    wraps those in a bool filter. The bool filter itself is not cached as it
-    can operate very quickly on the cached term filters.
-
-`and`:: 
-    Generates a term filter (which is cached) for each term, and
-    wraps those in an and filter. The and filter itself is not cached.
-
-`or`:: 
-    Generates a term filter (which is cached) for each term, and
-    wraps those in an or filter. The or filter itself is not cached.
-    Generally, the `bool` execution mode should be preferred.
-
-If you don't want the generated individual term queries to be cached,
-you can use: `bool_nocache`, `and_nocache` or `or_nocache` instead, but
-be aware that this will affect performance.
-
-The "total" terms filter caching can still be explicitly controlled
-using the `_cache` option. Note the default value for it depends on the
-execution value.
-
-For example:
-
-[source,js]
---------------------------------------------------
-{
-    "constant_score" : {
-        "filter" : {
-            "terms" : {
-                "user" : ["kimchy", "elasticsearch"],
-                "execution" : "bool",
-                "_cache": true
-            }
-        }
-    }
-}
---------------------------------------------------
-
 [float]
 ==== Caching
 

diff --git a/rest-api-spec/test/indices.validate_query/10_basic.yaml b/rest-api-spec/test/indices.validate_query/10_basic.yaml
@@ -32,5 +32,5 @@
   - is_true: valid
   - match: {_shards.failed: 0}
   - match: {explanations.0.index: 'testing'}
-  - match: {explanations.0.explanation: 'ConstantScore(*:*)'}
+  - match: {explanations.0.explanation: '*:*'}
 
diff --git a/src/main/java/org/apache/lucene/search/vectorhighlight/CustomFieldQuery.java b/src/main/java/org/apache/lucene/search/vectorhighlight/CustomFieldQuery.java
@@ -22,45 +22,30 @@
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.queries.BlendedTermQuery;
-import org.apache.lucene.queries.FilterClause;
-import org.apache.lucene.queries.TermFilter;
-import org.apache.lucene.search.BooleanClause;
 import org.apache.lucene.search.ConstantScoreQuery;
 import org.apache.lucene.search.Filter;
 import org.apache.lucene.search.FilteredQuery;
 import org.apache.lucene.search.MultiPhraseQuery;
-import org.apache.lucene.search.MultiTermQueryWrapperFilter;
 import org.apache.lucene.search.PhraseQuery;
 import org.apache.lucene.search.Query;
+import org.apache.lucene.search.QueryWrapperFilter;
 import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.search.spans.SpanTermQuery;
 import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery;
-import org.elasticsearch.common.lucene.search.XBooleanFilter;
 import org.elasticsearch.common.lucene.search.function.FiltersFunctionScoreQuery;
 import org.elasticsearch.common.lucene.search.function.FunctionScoreQuery;
 
 import java.io.IOException;
-import java.lang.reflect.Field;
 import java.util.Collection;
 import java.util.List;
 
 /**
  *
  */
 // LUCENE MONITOR
+// TODO: remove me!
 public class CustomFieldQuery extends FieldQuery {
 
-    private static Field multiTermQueryWrapperFilterQueryField;
-
-    static {
-        try {
-            multiTermQueryWrapperFilterQueryField = MultiTermQueryWrapperFilter.class.getDeclaredField("query");
-            multiTermQueryWrapperFilterQueryField.setAccessible(true);
-        } catch (NoSuchFieldException e) {
-            // ignore
-        }
-    }
-
     public static final ThreadLocal<Boolean> highlightFilters = new ThreadLocal<>();
 
     public CustomFieldQuery(Query query, IndexReader reader, FastVectorHighlighter highlighter) throws IOException {
@@ -140,25 +125,8 @@ void flatten(Filter sourceFilter, IndexReader reader, Collection<Query> flatQuer
         if (highlight == null || highlight.equals(Boolean.FALSE)) {
             return;
         }
-        if (sourceFilter instanceof TermFilter) {
-            // TermFilter is just a deprecated wrapper over QWF
-            TermQuery actualQuery = (TermQuery) ((TermFilter) sourceFilter).getQuery();
-            flatten(new TermQuery(actualQuery.getTerm()), reader, flatQueries);
-        } else if (sourceFilter instanceof MultiTermQueryWrapperFilter) {
-            if (multiTermQueryWrapperFilterQueryField != null) {
-                try {
-                    flatten((Query) multiTermQueryWrapperFilterQueryField.get(sourceFilter), reader, flatQueries);
-                } catch (IllegalAccessException e) {
-                    // ignore
-                }
-            }
-        } else if (sourceFilter instanceof XBooleanFilter) {
-            XBooleanFilter booleanFilter = (XBooleanFilter) sourceFilter;
-            for (FilterClause clause : booleanFilter.clauses()) {
-                if (clause.getOccur() == BooleanClause.Occur.MUST || clause.getOccur() == BooleanClause.Occur.SHOULD) {
-                    flatten(clause.getFilter(), reader, flatQueries);
-                }
-            }
+        if (sourceFilter instanceof QueryWrapperFilter) {
+            flatten(((QueryWrapperFilter) sourceFilter).getQuery(), reader, flatQueries);
         }
     }
 }
diff --git a/...a/org/elasticsearch/action/admin/indices/validate/query/TransportValidateQueryAction.java b/...a/org/elasticsearch/action/admin/indices/validate/query/TransportValidateQueryAction.java
@@ -37,7 +37,6 @@
 import org.elasticsearch.cluster.routing.GroupShardsIterator;
 import org.elasticsearch.cluster.routing.ShardRouting;
 import org.elasticsearch.common.inject.Inject;
-import org.elasticsearch.common.lucene.search.MatchNoDocsFilter;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.common.util.BigArrays;
 import org.elasticsearch.index.IndexService;
@@ -219,7 +218,7 @@ protected ShardValidateQueryResponse shardOperation(ShardValidateQueryRequest re
 
     private String getRewrittenQuery(IndexSearcher searcher, Query query) throws IOException {
         Query queryRewrite = searcher.rewrite(query);
-        if (queryRewrite instanceof MatchNoDocsQuery || queryRewrite instanceof MatchNoDocsFilter) {
+        if (queryRewrite instanceof MatchNoDocsQuery) {
             return query.toString();
         } else {
             return queryRewrite.toString();

diff --git a/src/main/java/org/elasticsearch/common/lucene/docset/AndDocIdSet.java b/src/main/java/org/elasticsearch/common/lucene/docset/AndDocIdSet.java
@@ -19,21 +19,18 @@
 
 package org.elasticsearch.common.lucene.docset;
 
-import com.google.common.collect.Iterables;
-
 import org.apache.lucene.search.DocIdSet;
 import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.BitSet;
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.InPlaceMergeSorter;
 import org.apache.lucene.util.RamUsageEstimator;
-import org.elasticsearch.common.lucene.search.XDocIdSetIterator;
 
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
-import java.util.Collections;
 import java.util.List;
 
 /**
@@ -93,7 +90,7 @@ public DocIdSetIterator iterator() throws IOException {
                 return DocIdSetIterator.empty();
             }
             Bits bit = set.bits();
-            if (bit != null && DocIdSets.isBroken(it)) {
+            if (bit != null && bit instanceof BitSet == false) {
                 bits.add(bit);
             } else {
                 iterators.add(it);
@@ -138,7 +135,7 @@ public int length() {
         }
     }
 
-    static class IteratorBasedIterator extends XDocIdSetIterator {
+    static class IteratorBasedIterator extends DocIdSetIterator {
         private int doc = -1;
         private final DocIdSetIterator lead;
         private final DocIdSetIterator[] otherIterators;
@@ -174,16 +171,6 @@ protected void swap(int i, int j) {
             this.otherIterators = Arrays.copyOfRange(sortedIterators, 1, sortedIterators.length);
         }
 
-        @Override
-        public boolean isBroken() {
-            for (DocIdSetIterator it : Iterables.concat(Collections.singleton(lead), Arrays.asList(otherIterators))) {
-                if (DocIdSets.isBroken(it)) {
-                    return true;
-                }
-            }
-            return false;
-        }
-
         @Override
         public final int docID() {
             return doc;

diff --git a/src/main/java/org/elasticsearch/common/lucene/docset/DocIdSets.java b/src/main/java/org/elasticsearch/common/lucene/docset/DocIdSets.java
@@ -22,8 +22,6 @@
 import org.apache.lucene.index.LeafReader;
 import org.apache.lucene.search.DocIdSet;
 import org.apache.lucene.search.DocIdSetIterator;
-import org.apache.lucene.search.DocValuesDocIdSet;
-import org.apache.lucene.search.FilteredDocIdSetIterator;
 import org.apache.lucene.util.BitDocIdSet;
 import org.apache.lucene.util.BitSet;
 import org.apache.lucene.util.Bits;
@@ -33,7 +31,6 @@
 import org.elasticsearch.ElasticsearchIllegalArgumentException;
 import org.elasticsearch.ElasticsearchIllegalStateException;
 import org.elasticsearch.common.Nullable;
-import org.elasticsearch.common.lucene.search.XDocIdSetIterator;
 
 import java.io.IOException;
 
@@ -55,31 +52,6 @@ public static boolean isEmpty(@Nullable DocIdSet set) {
         return set == null || set == DocIdSet.EMPTY;
     }
 
-    /**
-     * Check if the given iterator can nextDoc() or advance() in sub-linear time
-     * of the number of documents. For instance, an iterator that would need to
-     * iterate one document at a time to check for its value would be considered
-     * broken.
-     */
-    public static boolean isBroken(DocIdSetIterator iterator) {
-        while (iterator instanceof FilteredDocIdSetIterator) {
-            // this iterator is filtered (likely by some bits)
-            // unwrap in order to check if the underlying iterator is fast
-            iterator = ((FilteredDocIdSetIterator) iterator).getDelegate();
-        }
-        if (iterator instanceof XDocIdSetIterator) {
-            return ((XDocIdSetIterator) iterator).isBroken();
-        }
-        if (iterator instanceof MatchDocIdSetIterator) {
-            return true;
-        }
-        // DocValuesDocIdSet produces anonymous slow iterators
-        if (iterator != null && DocValuesDocIdSet.class.equals(iterator.getClass().getEnclosingClass())) {
-            return true;
-        }
-        return false;
-    }
-
     /**
      * Converts to a cacheable {@link DocIdSet}
      * <p/>