Disable optimization if we aren't sure its faster (backport of #74260) (

#74563) This disables the filter-by-filter aggregation optimization used by `terms`, `range`, `date_histogram`, and `date_range` aggregations unless we're *sure* that its faster than the "native" implementation. Mostly this is when the top level query is empty or we can merge it into the filter generated by the agg rewrite process. Now that we have hard and fast rules we can drop the cost estimation framework without too much fear. So we remove it in this change. It stomps a bunch of complexity. Sadly, without the cost estimation stuff we have to add a separate mechanism for blocking the optimization against runtime fields for which it'd be kind of garbage. For that I added another rule preventing the filter-by-filter aggregation from running against the queries made by runtime fields. Its not fool-proof, but we have control over what queries we pass as a filter so its not wide open. I spent a lot of time working on an alternative to this that preserved that fancy filter-by-filter collection mechanism and was much more kind to the query cache. It detected cases where going full filter-by-filter was bad and grouped those filters together to collect in one pass with a funny ORing collector. It *worked*. And, if we were super concerned with the performance of the `filters` aggregation it'd be the way to go. But it was very complex and it was actually slower than using the native aggregation for things like `terms` and `date_histogram`. It was glorious. But it was wrong for us. Too complex and optimized the wrong things. So here we are. Hopefully this is a fairly simple solution to a sneaky problem.
elastic · Jun 24, 2021 · 2d5982b · 2d5982b
1 parent 3dc0911
commit 2d5982b
Show file tree

Hide file tree

Showing 22 changed files with 1,062 additions and 784 deletions.
diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.aggregation/20_terms.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.aggregation/20_terms.yml
@@ -822,8 +822,8 @@ setup:
 ---
 "string profiler via global ordinals filters implementation":
   - skip:
-      version: " - 7.12.99"
-      reason: filters implementation first supported with sub-aggregators in 7.13.0
+      version: " - 7.13.99"
+      reason: profile info changed in 7.14.0
   - do:
       indices.create:
           index: test_3
@@ -879,7 +879,7 @@ setup:
   - match: { profile.shards.0.aggregations.0.type: StringTermsAggregatorFromFilters }
   - match: { profile.shards.0.aggregations.0.description: str_terms }
   - match: { profile.shards.0.aggregations.0.breakdown.collect_count: 0 }
-  - match: { profile.shards.0.aggregations.0.debug.delegate: FiltersAggregator.FilterByFilter }
+  - match: { profile.shards.0.aggregations.0.debug.delegate: FilterByFilterAggregator }
   - match: { profile.shards.0.aggregations.0.debug.delegate_debug.filters.0.query: "str:cow" }
   - match: { profile.shards.0.aggregations.0.debug.delegate_debug.filters.1.query: "str:pig" }
   - match: { profile.shards.0.aggregations.0.debug.delegate_debug.filters.2.query: "str:sheep" }

diff --git a/.../src/yamlRestTest/resources/rest-api-spec/test/search.aggregation/370_doc_count_field.yml b/.../src/yamlRestTest/resources/rest-api-spec/test/search.aggregation/370_doc_count_field.yml
@@ -150,9 +150,10 @@ setup:
 ---
 "Test filters agg with doc_count":
   - skip:
-      version: " - 7.12.99"
+      version: " - 7.13.99"
+      reason: profile info changed in 7.14.0
       features: default_shards
-      reason: "name changed in 7.13.0"
+
   - do:
       search:
         body:
@@ -177,7 +178,7 @@ setup:
   - match: { aggregations.f.buckets.abc.doc_count: 11 }
   - match: { aggregations.f.buckets.foo.doc_count: 8 }
   - match: { aggregations.f.buckets.xyz.doc_count: 5 }
-  - match: { profile.shards.0.aggregations.0.type: FiltersAggregator.FilterByFilter }
+  - match: { profile.shards.0.aggregations.0.type: FilterByFilterAggregator }
   # We can't assert that segments_with_doc_count_field is > 0 because we might
   # end up with two shards and all of the documents with the _doc_count field
   # may be on one field. We have a test for this in AggregationProfilerIT

diff --git a/...lClusterTest/java/org/elasticsearch/search/profile/aggregation/AggregationProfilerIT.java b/...lClusterTest/java/org/elasticsearch/search/profile/aggregation/AggregationProfilerIT.java
@@ -47,7 +47,6 @@
 import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchResponse;
 import static org.hamcrest.Matchers.equalTo;
 import static org.hamcrest.Matchers.greaterThan;
-import static org.hamcrest.Matchers.greaterThanOrEqualTo;
 import static org.hamcrest.Matchers.notNullValue;
 
 @ESIntegTestCase.SuiteScopeTestCase
@@ -646,21 +645,17 @@ public void testFilterByFilter() throws InterruptedException, IOException {
                         "delegate_debug",
                         matchesMap().entry("average_docs_per_range", equalTo(RangeAggregator.DOCS_PER_RANGE_TO_USE_FILTERS * 2))
                             .entry("ranges", 1)
-                            .entry("delegate", "FiltersAggregator.FilterByFilter")
+                            .entry("delegate", "FilterByFilterAggregator")
                             .entry(
                                 "delegate_debug",
                                 matchesMap().entry("segments_with_deleted_docs", 0)
                                     .entry("segments_with_doc_count_field", 0)
-                                    .entry("max_cost", (long) RangeAggregator.DOCS_PER_RANGE_TO_USE_FILTERS * 2)
-                                    .entry("estimated_cost", (long) RangeAggregator.DOCS_PER_RANGE_TO_USE_FILTERS * 2)
-                                    .entry("estimate_cost_time", greaterThanOrEqualTo(0L)) // ~1,276,734 nanos is normal
                                     .entry("segments_counted", 0)
                                     .entry("segments_collected", greaterThan(0))
                                     .entry(
                                         "filters",
                                         matchesList().item(
-                                            matchesMap().entry("scorers_prepared_while_estimating_cost", greaterThan(0))
-                                                .entry("query", "DocValuesFieldExistsQuery [field=date]")
+                                            matchesMap().entry("query", "DocValuesFieldExistsQuery [field=date]")
                                                 .entry("specialized_for", "docvalues_field_exists")
                                                 .entry("results_from_metadata", 0)
                                         )

diff --git a/server/src/main/java/org/elasticsearch/index/mapper/KeywordScriptFieldType.java b/server/src/main/java/org/elasticsearch/index/mapper/KeywordScriptFieldType.java
@@ -53,7 +53,7 @@ public KeywordScriptFieldType(String name) {
         this(name, StringFieldScript.PARSE_FROM_SOURCE, null, Collections.emptyMap(), (builder, params) -> builder);
     }
 
-    KeywordScriptFieldType(
+    public KeywordScriptFieldType(
         String name,
         StringFieldScript.Factory scriptFactory,
         Script script,

diff --git a/server/src/main/java/org/elasticsearch/index/mapper/LongScriptFieldType.java b/server/src/main/java/org/elasticsearch/index/mapper/LongScriptFieldType.java
@@ -46,7 +46,7 @@ public LongScriptFieldType(String name) {
         this(name, LongFieldScript.PARSE_FROM_SOURCE, null, Collections.emptyMap(), (builder, params) -> builder);
     }
 
-    LongScriptFieldType(
+    public LongScriptFieldType(
         String name,
         LongFieldScript.Factory scriptFactory,
         Script script,

diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/AdaptingAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/AdaptingAggregator.java
@@ -30,7 +30,7 @@ public abstract class AdaptingAggregator extends Aggregator {
     public AdaptingAggregator(
         Aggregator parent,
         AggregatorFactories subAggregators,
-        CheckedFunction<AggregatorFactories, Aggregator, IOException> delegate
+        CheckedFunction<AggregatorFactories, ? extends Aggregator, IOException> delegate
     ) throws IOException {
         // Its important we set parent first or else when we build the sub-aggregators they can fail because they'll call this.parent.
         this.parent = parent;

diff --git a/...java/org/elasticsearch/search/aggregations/bucket/filter/DocValuesFieldExistsAdapter.java b/...java/org/elasticsearch/search/aggregations/bucket/filter/DocValuesFieldExistsAdapter.java
@@ -14,7 +14,6 @@
 import org.apache.lucene.search.DocValuesFieldExistsQuery;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.util.Bits;
-import org.elasticsearch.common.CheckedSupplier;
 
 import java.io.IOException;
 import java.util.function.BiConsumer;
@@ -43,14 +42,6 @@ long count(LeafReaderContext ctx, FiltersAggregator.Counter counter, Bits live)
         return super.count(ctx, counter, live);
     }
 
-    @Override
-    long estimateCountCost(LeafReaderContext ctx, CheckedSupplier<Boolean, IOException> canUseMetadata) throws IOException {
-        if (canUseMetadata.get() && canCountFromMetadata(ctx)) {
-            return 0;
-        }
-        return super.estimateCountCost(ctx, canUseMetadata);
-    }
-
     private boolean canCountFromMetadata(LeafReaderContext ctx) throws IOException {
         FieldInfo info = ctx.reader().getFieldInfos().fieldInfo(query().getField());
         if (info == null) {