Request-level circuit breaker support on coordinating nodes (#62884)

This commit allows coordinating node to account the memory used to perform partial and final reduce of aggregations in the request circuit breaker. The search coordinator adds the memory that it used to save and reduce the results of shard aggregations in the request circuit breaker. Before any partial or final reduce, the memory needed to reduce the aggregations is estimated and a CircuitBreakingException} is thrown if exceeds the maximum memory allowed in this breaker. This size is estimated as roughly 1.5 times the size of the serialized aggregations that need to be reduced. This estimation can be completely off for some aggregations but it is corrected with the real size after the reduce completes. If the reduce is successful, we update the circuit breaker to remove the size of the source aggregations and replace the estimation with the serialized size of the newly reduced result. As a follow up we could trigger partial reduces based on the memory accounted in the circuit breaker instead of relying on a static number of shard responses. A simpler follow up that could be done in the mean time is to [reduce the default batch reduce size](#51857) of blocking search request to a more sane number. Closes #37182
elastic · Sep 24, 2020 · 78a93dc · 78a93dc
1 parent cd584d4
commit 78a93dc
Show file tree

Hide file tree

Showing 27 changed files with 1,208 additions and 473 deletions.
diff --git a/...s/src/main/java/org/elasticsearch/benchmark/search/aggregations/TermsReduceBenchmark.java b/...s/src/main/java/org/elasticsearch/benchmark/search/aggregations/TermsReduceBenchmark.java
@@ -0,0 +1,230 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.elasticsearch.benchmark.search.aggregations;
+
+import org.apache.lucene.search.ScoreDoc;
+import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.search.TotalHits;
+import org.apache.lucene.util.BytesRef;
+import org.elasticsearch.action.OriginalIndices;
+import org.elasticsearch.action.search.QueryPhaseResultConsumer;
+import org.elasticsearch.action.search.SearchPhaseController;
+import org.elasticsearch.action.search.SearchProgressListener;
+import org.elasticsearch.action.search.SearchRequest;
+import org.elasticsearch.common.breaker.CircuitBreaker;
+import org.elasticsearch.common.breaker.NoopCircuitBreaker;
+import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
+import org.elasticsearch.common.lucene.search.TopDocsAndMaxScore;
+import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.index.Index;
+import org.elasticsearch.index.shard.ShardId;
+import org.elasticsearch.indices.breaker.NoneCircuitBreakerService;
+import org.elasticsearch.search.DocValueFormat;
+import org.elasticsearch.search.SearchModule;
+import org.elasticsearch.search.SearchShardTarget;
+import org.elasticsearch.search.aggregations.AggregationBuilders;
+import org.elasticsearch.search.aggregations.BucketOrder;
+import org.elasticsearch.search.aggregations.InternalAggregation;
+import org.elasticsearch.search.aggregations.InternalAggregations;
+import org.elasticsearch.search.aggregations.MultiBucketConsumerService;
+import org.elasticsearch.search.aggregations.bucket.terms.StringTerms;
+import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator;
+import org.elasticsearch.search.builder.SearchSourceBuilder;
+import org.elasticsearch.search.query.QuerySearchResult;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Param;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.Warmup;
+
+import java.util.AbstractList;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Random;
+import java.util.Set;
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.TimeUnit;
+
+import static java.util.Collections.emptyList;
+
+@Warmup(iterations = 5)
+@Measurement(iterations = 7)
+@BenchmarkMode(Mode.AverageTime)
+@OutputTimeUnit(TimeUnit.MILLISECONDS)
+@State(Scope.Thread)
+@Fork(value = 1)
+public class TermsReduceBenchmark {
+    private final SearchModule searchModule = new SearchModule(Settings.EMPTY, false, emptyList());
+    private final NamedWriteableRegistry namedWriteableRegistry = new NamedWriteableRegistry(searchModule.getNamedWriteables());
+    private final SearchPhaseController controller = new SearchPhaseController(
+        namedWriteableRegistry,
+        req -> new InternalAggregation.ReduceContextBuilder() {
+            @Override
+            public InternalAggregation.ReduceContext forPartialReduction() {
+                return InternalAggregation.ReduceContext.forPartialReduction(null, null, () -> PipelineAggregator.PipelineTree.EMPTY);
+            }
+
+            @Override
+            public InternalAggregation.ReduceContext forFinalReduction() {
+                final MultiBucketConsumerService.MultiBucketConsumer bucketConsumer = new MultiBucketConsumerService.MultiBucketConsumer(
+                    Integer.MAX_VALUE,
+                    new NoneCircuitBreakerService().getBreaker(CircuitBreaker.REQUEST)
+                );
+                return InternalAggregation.ReduceContext.forFinalReduction(
+                    null,
+                    null,
+                    bucketConsumer,
+                    PipelineAggregator.PipelineTree.EMPTY
+                );
+            }
+        }
+    );
+
+    @State(Scope.Benchmark)
+    public static class TermsList extends AbstractList<InternalAggregations> {
+        @Param({ "1600172297" })
+        long seed;
+
+        @Param({ "64", "128", "512" })
+        int numShards;
+
+        @Param({ "100" })
+        int topNSize;
+
+        @Param({ "1", "10", "100" })
+        int cardinalityFactor;
+
+        List<InternalAggregations> aggsList;
+
+        @Setup
+        public void setup() {
+            this.aggsList = new ArrayList<>();
+            Random rand = new Random(seed);
+            int cardinality = cardinalityFactor * topNSize;
+            BytesRef[] dict = new BytesRef[cardinality];
+            for (int i = 0; i < dict.length; i++) {
+                dict[i] = new BytesRef(Long.toString(rand.nextLong()));
+            }
+            for (int i = 0; i < numShards; i++) {
+                aggsList.add(InternalAggregations.from(Collections.singletonList(newTerms(rand, dict, true))));
+            }
+        }
+
+        private StringTerms newTerms(Random rand, BytesRef[] dict, boolean withNested) {
+            Set<BytesRef> randomTerms = new HashSet<>();
+            for (int i = 0; i < topNSize; i++) {
+                randomTerms.add(dict[rand.nextInt(dict.length)]);
+            }
+            List<StringTerms.Bucket> buckets = new ArrayList<>();
+            for (BytesRef term : randomTerms) {
+                InternalAggregations subAggs;
+                if (withNested) {
+                    subAggs = InternalAggregations.from(Collections.singletonList(newTerms(rand, dict, false)));
+                } else {
+                    subAggs = InternalAggregations.EMPTY;
+                }
+                buckets.add(new StringTerms.Bucket(term, rand.nextInt(10000), subAggs, true, 0L, DocValueFormat.RAW));
+            }
+
+            Collections.sort(buckets, (a, b) -> a.compareKey(b));
+            return new StringTerms(
+                "terms",
+                BucketOrder.key(true),
+                BucketOrder.count(false),
+                topNSize,
+                1,
+                Collections.emptyMap(),
+                DocValueFormat.RAW,
+                numShards,
+                true,
+                0,
+                buckets,
+                0
+            );
+        }
+
+        @Override
+        public InternalAggregations get(int index) {
+            return aggsList.get(index);
+        }
+
+        @Override
+        public int size() {
+            return aggsList.size();
+        }
+    }
+
+    @Param({ "32", "512" })
+    private int bufferSize;
+
+    @Benchmark
+    public SearchPhaseController.ReducedQueryPhase reduceAggs(TermsList candidateList) throws Exception {
+        List<QuerySearchResult> shards = new ArrayList<>();
+        for (int i = 0; i < candidateList.size(); i++) {
+            QuerySearchResult result = new QuerySearchResult();
+            result.setShardIndex(i);
+            result.from(0);
+            result.size(0);
+            result.topDocs(
+                new TopDocsAndMaxScore(
+                    new TopDocs(new TotalHits(1000, TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO), new ScoreDoc[0]),
+                    Float.NaN
+                ),
+                new DocValueFormat[] { DocValueFormat.RAW }
+            );
+            result.aggregations(candidateList.get(i));
+            result.setSearchShardTarget(
+                new SearchShardTarget("node", new ShardId(new Index("index", "index"), i), null, OriginalIndices.NONE)
+            );
+            shards.add(result);
+        }
+        SearchRequest request = new SearchRequest();
+        request.source(new SearchSourceBuilder().size(0).aggregation(AggregationBuilders.terms("test")));
+        request.setBatchedReduceSize(bufferSize);
+        ExecutorService executor = Executors.newFixedThreadPool(1);
+        QueryPhaseResultConsumer consumer = new QueryPhaseResultConsumer(
+            request,
+            executor,
+            new NoopCircuitBreaker(CircuitBreaker.REQUEST),
+            controller,
+            SearchProgressListener.NOOP,
+            namedWriteableRegistry,
+            shards.size(),
+            exc -> {}
+        );
+        CountDownLatch latch = new CountDownLatch(shards.size());
+        for (int i = 0; i < shards.size(); i++) {
+            consumer.consumeResult(shards.get(i), () -> latch.countDown());
+        }
+        latch.await();
+        SearchPhaseController.ReducedQueryPhase phase = consumer.reduce();
+        executor.shutdownNow();
+        return phase;
+    }
+}
diff --git a/...ernalClusterTest/java/org/elasticsearch/action/search/SearchProgressActionListenerIT.java b/...ernalClusterTest/java/org/elasticsearch/action/search/SearchProgressActionListenerIT.java
@@ -23,7 +23,6 @@
 import org.elasticsearch.action.admin.cluster.shards.ClusterSearchShardsResponse;
 import org.elasticsearch.client.Client;
 import org.elasticsearch.client.node.NodeClient;
-import org.elasticsearch.common.io.stream.DelayableWriteable;
 import org.elasticsearch.index.query.QueryBuilders;
 import org.elasticsearch.search.SearchShardTarget;
 import org.elasticsearch.search.aggregations.AggregationBuilders;
@@ -174,8 +173,7 @@ public void onFetchFailure(int shardIndex, SearchShardTarget shardTarget, Except
             }
 
             @Override
-            public void onPartialReduce(List<SearchShard> shards, TotalHits totalHits,
-                    DelayableWriteable.Serialized<InternalAggregations> aggs, int reducePhase) {
+            public void onPartialReduce(List<SearchShard> shards, TotalHits totalHits, InternalAggregations aggs, int reducePhase) {
                 numReduces.incrementAndGet();
             }