Correctly pre-size HashSets (#87700) (#87983)

The HashSet(int) constructor accepts the capacity of the set rather than its size. Unfortunately, HashSet requires N/0.75 capacity to hold N elements. So, if we call new HashSet(n) to hold N elements, the map will be too small and need to be resized/rehashed midway. We add a new methods Sets.newHashSetWithExpectedSize and Sets.newLinkedHashSetWithExpectedSize that calculate the correct capacity for the specified expected size. See * https://bugs.openjdk.org/browse/JDK-8284975 * https://bugs.openjdk.org/browse/JDK-8287419 * openjdk/jdk#8302 (cherry picked from commit 26c1d33)
elastic · Jun 23, 2022 · 44038a5 · 44038a5
1 parent 1921a26
commit 44038a5
Show file tree

Hide file tree

Showing 82 changed files with 203 additions and 140 deletions.
diff --git a/server/src/internalClusterTest/java/org/elasticsearch/search/scroll/DuelScrollIT.java b/server/src/internalClusterTest/java/org/elasticsearch/search/scroll/DuelScrollIT.java
@@ -15,6 +15,7 @@
 import org.elasticsearch.action.search.SearchType;
 import org.elasticsearch.cluster.metadata.IndexMetadata;
 import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.common.util.set.Sets;
 import org.elasticsearch.index.query.QueryBuilders;
 import org.elasticsearch.search.SearchHit;
 import org.elasticsearch.search.SearchHits;
@@ -25,7 +26,6 @@
 import org.elasticsearch.test.ESIntegTestCase;
 
 import java.util.Arrays;
-import java.util.HashSet;
 import java.util.Set;
 
 import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
@@ -125,7 +125,7 @@ private TestContext create(SearchType... searchTypes) throws Exception {
         boolean unevenRouting = randomBoolean();
 
         int numMissingDocs = scaledRandomIntBetween(0, numDocs / 100);
-        Set<Integer> missingDocs = new HashSet<>(numMissingDocs);
+        Set<Integer> missingDocs = Sets.newHashSetWithExpectedSize(numMissingDocs);
         for (int i = 0; i < numMissingDocs; i++) {
             while (missingDocs.add(randomInt(numDocs)) == false) {
             }

diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/ClusterStatsNodes.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/ClusterStatsNodes.java
@@ -19,6 +19,7 @@
 import org.elasticsearch.common.transport.TransportAddress;
 import org.elasticsearch.common.unit.ByteSizeValue;
 import org.elasticsearch.common.util.Maps;
+import org.elasticsearch.common.util.set.Sets;
 import org.elasticsearch.core.TimeValue;
 import org.elasticsearch.discovery.DiscoveryModule;
 import org.elasticsearch.index.stats.IndexingPressureStats;
@@ -64,7 +65,7 @@ public class ClusterStatsNodes implements ToXContentFragment {
         this.fs = new FsInfo.Path();
         this.plugins = new HashSet<>();
 
-        Set<InetAddress> seenAddresses = new HashSet<>(nodeResponses.size());
+        Set<InetAddress> seenAddresses = Sets.newHashSetWithExpectedSize(nodeResponses.size());
         List<NodeInfo> nodeInfos = new ArrayList<>(nodeResponses.size());
         List<NodeStats> nodeStats = new ArrayList<>(nodeResponses.size());
         for (ClusterStatsNodeResponse nodeResponse : nodeResponses) {

diff --git a/...c/main/java/org/elasticsearch/action/admin/indices/delete/TransportDeleteIndexAction.java b/...c/main/java/org/elasticsearch/action/admin/indices/delete/TransportDeleteIndexAction.java
@@ -90,7 +90,7 @@ protected void masterOperation(
 
         DeleteIndexClusterStateUpdateRequest deleteRequest = new DeleteIndexClusterStateUpdateRequest().ackTimeout(request.timeout())
             .masterNodeTimeout(request.masterNodeTimeout())
-            .indices(concreteIndices.toArray(new Index[concreteIndices.size()]));
+            .indices(concreteIndices.toArray(new Index[0]));
 
         deleteIndexService.deleteIndices(deleteRequest, listener.delegateResponse((l, e) -> {
             logger.debug(() -> "failed to delete indices [" + concreteIndices + "]", e);

diff --git a/server/src/main/java/org/elasticsearch/action/bulk/BulkShardRequest.java b/server/src/main/java/org/elasticsearch/action/bulk/BulkShardRequest.java
@@ -17,11 +17,11 @@
 import org.elasticsearch.action.update.UpdateRequest;
 import org.elasticsearch.common.io.stream.StreamInput;
 import org.elasticsearch.common.io.stream.StreamOutput;
+import org.elasticsearch.common.util.set.Sets;
 import org.elasticsearch.index.shard.ShardId;
 import org.elasticsearch.transport.RawIndexingDataTransportRequest;
 
 import java.io.IOException;
-import java.util.HashSet;
 import java.util.Set;
 
 public class BulkShardRequest extends ReplicatedWriteRequest<BulkShardRequest> implements Accountable, RawIndexingDataTransportRequest {
@@ -71,7 +71,7 @@ public String[] indices() {
         // These alias names have to be exposed by this method because authorization works with
         // aliases too, specifically, the item's target alias can be authorized but the concrete
         // index might not be.
-        Set<String> indices = new HashSet<>(1);
+        Set<String> indices = Sets.newHashSetWithExpectedSize(1);
         for (BulkItemRequest item : items) {
             if (item != null) {
                 indices.add(item.index());

diff --git a/server/src/main/java/org/elasticsearch/cluster/ClusterInfo.java b/server/src/main/java/org/elasticsearch/cluster/ClusterInfo.java
@@ -15,6 +15,7 @@
 import org.elasticsearch.common.io.stream.StreamOutput;
 import org.elasticsearch.common.io.stream.Writeable;
 import org.elasticsearch.common.unit.ByteSizeValue;
+import org.elasticsearch.common.util.set.Sets;
 import org.elasticsearch.index.shard.ShardId;
 import org.elasticsearch.index.store.StoreStats;
 import org.elasticsearch.xcontent.ToXContentFragment;
@@ -25,6 +26,7 @@
 import java.util.Map;
 import java.util.Objects;
 import java.util.Optional;
+import java.util.Set;
 
 /**
  * ClusterInfo is an object representing a map of nodes to {@link DiskUsage}
@@ -293,7 +295,7 @@ public static class ReservedSpace implements Writeable {
         public static final ReservedSpace EMPTY = new ReservedSpace(0, new HashSet<>());
 
         private final long total;
-        private final HashSet<ShardId> shardIds;
+        private final Set<ShardId> shardIds;
 
         private ReservedSpace(long total, HashSet<ShardId> shardIds) {
             this.total = total;
@@ -303,7 +305,7 @@ private ReservedSpace(long total, HashSet<ShardId> shardIds) {
         ReservedSpace(StreamInput in) throws IOException {
             total = in.readVLong();
             final int shardIdCount = in.readVInt();
-            shardIds = new HashSet<>(shardIdCount);
+            shardIds = Sets.newHashSetWithExpectedSize(shardIdCount);
             for (int i = 0; i < shardIdCount; i++) {
                 shardIds.add(new ShardId(in));
             }

diff --git a/server/src/main/java/org/elasticsearch/cluster/metadata/DesiredNodes.java b/server/src/main/java/org/elasticsearch/cluster/metadata/DesiredNodes.java
@@ -12,6 +12,7 @@
 import org.elasticsearch.common.io.stream.StreamInput;
 import org.elasticsearch.common.io.stream.StreamOutput;
 import org.elasticsearch.common.io.stream.Writeable;
+import org.elasticsearch.common.util.set.Sets;
 import org.elasticsearch.core.Nullable;
 import org.elasticsearch.xcontent.ConstructingObjectParser;
 import org.elasticsearch.xcontent.ParseField;
@@ -110,7 +111,7 @@ public boolean hasSameHistoryId(DesiredNodes other) {
     }
 
     private static void checkForDuplicatedExternalIDs(List<DesiredNode> nodes) {
-        Set<String> nodeIDs = new HashSet<>(nodes.size());
+        Set<String> nodeIDs = Sets.newHashSetWithExpectedSize(nodes.size());
         Set<String> duplicatedIDs = new HashSet<>();
         for (DesiredNode node : nodes) {
             String externalID = node.externalId();

diff --git a/server/src/main/java/org/elasticsearch/cluster/metadata/IndexMetadata.java b/server/src/main/java/org/elasticsearch/cluster/metadata/IndexMetadata.java
@@ -32,6 +32,7 @@
 import org.elasticsearch.common.settings.Setting.Property;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.common.util.Maps;
+import org.elasticsearch.common.util.set.Sets;
 import org.elasticsearch.common.xcontent.XContentHelper;
 import org.elasticsearch.common.xcontent.XContentParserUtils;
 import org.elasticsearch.core.Nullable;
@@ -2279,7 +2280,7 @@ public static Set<ShardId> selectShrinkShards(int shardId, IndexMetadata sourceI
             );
         }
         int routingFactor = getRoutingFactor(sourceIndexMetadata.getNumberOfShards(), numTargetShards);
-        Set<ShardId> shards = new HashSet<>(routingFactor);
+        Set<ShardId> shards = Sets.newHashSetWithExpectedSize(routingFactor);
         for (int i = shardId * routingFactor; i < routingFactor * shardId + routingFactor; i++) {
             shards.add(new ShardId(sourceIndexMetadata.getIndex(), i));
         }

diff --git a/server/src/main/java/org/elasticsearch/cluster/metadata/IndexNameExpressionResolver.java b/server/src/main/java/org/elasticsearch/cluster/metadata/IndexNameExpressionResolver.java
@@ -43,7 +43,6 @@
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.HashSet;
-import java.util.LinkedHashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Objects;
@@ -364,7 +363,7 @@ Index[] concreteIndices(Context context, String... indexExpressions) {
         }
 
         boolean excludedDataStreams = false;
-        final Set<Index> concreteIndices = new LinkedHashSet<>(expressions.size());
+        final Set<Index> concreteIndices = Sets.newLinkedHashSetWithExpectedSize(expressions.size());
         final SortedMap<String, IndexAbstraction> indicesLookup = context.state.metadata().getIndicesLookup();
         for (String expression : expressions) {
             IndexAbstraction indexAbstraction = indicesLookup.get(expression);

diff --git a/server/src/main/java/org/elasticsearch/cluster/metadata/Metadata.java b/server/src/main/java/org/elasticsearch/cluster/metadata/Metadata.java
@@ -33,6 +33,7 @@
 import org.elasticsearch.common.settings.Setting.Property;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.common.util.Maps;
+import org.elasticsearch.common.util.set.Sets;
 import org.elasticsearch.common.xcontent.XContentHelper;
 import org.elasticsearch.common.xcontent.XContentParserUtils;
 import org.elasticsearch.core.Nullable;
@@ -2272,7 +2273,7 @@ private void dedupeMapping(IndexMetadata.Builder indexMetadataBuilder) {
         }
 
         private void purgeUnusedEntries(ImmutableOpenMap<String, IndexMetadata> indices) {
-            final Set<String> sha256HashesInUse = new HashSet<>(mappingsByHash.size());
+            final Set<String> sha256HashesInUse = Sets.newHashSetWithExpectedSize(mappingsByHash.size());
             for (var im : indices.values()) {
                 if (im.mapping() != null) {
                     sha256HashesInUse.add(im.mapping().getSha256());

diff --git a/server/src/main/java/org/elasticsearch/cluster/node/DiscoveryNodes.java b/server/src/main/java/org/elasticsearch/cluster/node/DiscoveryNodes.java
@@ -26,7 +26,6 @@
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.HashMap;
-import java.util.HashSet;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
@@ -340,7 +339,7 @@ public String[] resolveNodes(String... nodes) {
         if (nodes == null || nodes.length == 0) {
             return stream().map(DiscoveryNode::getId).toArray(String[]::new);
         } else {
-            Set<String> resolvedNodesIds = new HashSet<>(nodes.length);
+            Set<String> resolvedNodesIds = Sets.newHashSetWithExpectedSize(nodes.length);
             for (String nodeId : nodes) {
                 if (nodeId == null) {
                     // don't silence the underlying issue, it is a bug, so lets fail if assertions are enabled

diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/OperationRouting.java b/server/src/main/java/org/elasticsearch/cluster/routing/OperationRouting.java
@@ -15,6 +15,7 @@
 import org.elasticsearch.common.settings.ClusterSettings;
 import org.elasticsearch.common.settings.Setting;
 import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.common.util.set.Sets;
 import org.elasticsearch.core.Nullable;
 import org.elasticsearch.index.IndexNotFoundException;
 import org.elasticsearch.index.shard.ShardId;
@@ -93,7 +94,7 @@ public GroupShardsIterator<ShardIterator> searchShards(
         @Nullable Map<String, Long> nodeCounts
     ) {
         final Set<IndexShardRoutingTable> shards = computeTargetedShards(clusterState, concreteIndices, routing);
-        final Set<ShardIterator> set = new HashSet<>(shards.size());
+        final Set<ShardIterator> set = Sets.newHashSetWithExpectedSize(shards.size());
         for (IndexShardRoutingTable shard : shards) {
             ShardIterator iterator = preferenceActiveShardIterator(
                 shard,

diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/AllocationService.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/AllocationService.java
@@ -36,14 +36,14 @@
 import org.elasticsearch.common.Strings;
 import org.elasticsearch.common.collect.ImmutableOpenMap;
 import org.elasticsearch.common.logging.ESLogMessage;
+import org.elasticsearch.common.util.set.Sets;
 import org.elasticsearch.gateway.GatewayAllocator;
 import org.elasticsearch.gateway.PriorityComparator;
 import org.elasticsearch.snapshots.SnapshotsInfoService;
 
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.Comparator;
-import java.util.HashSet;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
@@ -231,7 +231,7 @@ public ClusterState applyFailedShards(
                 int failedAllocations = failedShard.unassignedInfo() != null ? failedShard.unassignedInfo().getNumFailedAllocations() : 0;
                 final Set<String> failedNodeIds;
                 if (failedShard.unassignedInfo() != null) {
-                    failedNodeIds = new HashSet<>(failedShard.unassignedInfo().getFailedNodeIds().size() + 1);
+                    failedNodeIds = Sets.newHashSetWithExpectedSize(failedShard.unassignedInfo().getFailedNodeIds().size() + 1);
                     failedNodeIds.addAll(failedShard.unassignedInfo().getFailedNodeIds());
                     failedNodeIds.add(failedShard.currentNodeId());
                 } else {

diff --git a/.../java/org/elasticsearch/cluster/routing/allocation/allocator/BalancedShardsAllocator.java b/.../java/org/elasticsearch/cluster/routing/allocation/allocator/BalancedShardsAllocator.java
@@ -36,14 +36,14 @@
 import org.elasticsearch.common.settings.Setting;
 import org.elasticsearch.common.settings.Setting.Property;
 import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.common.util.set.Sets;
 import org.elasticsearch.core.Tuple;
 import org.elasticsearch.gateway.PriorityComparator;
 
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.Comparator;
 import java.util.HashMap;
-import java.util.HashSet;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
@@ -1167,7 +1167,8 @@ public boolean containsShard(ShardRouting shard) {
     }
 
     static final class ModelIndex implements Iterable<ShardRouting> {
-        private final Set<ShardRouting> shards = new HashSet<>(4); // expect few shards of same index to be allocated on same node
+        private final Set<ShardRouting> shards = Sets.newHashSetWithExpectedSize(4); // expect few shards of same index to be allocated on
+                                                                                     // same node
         private int highestPrimary = -1;
 
         ModelIndex() {}

diff --git a/server/src/main/java/org/elasticsearch/common/io/stream/StreamInput.java b/server/src/main/java/org/elasticsearch/common/io/stream/StreamInput.java
@@ -28,6 +28,7 @@
 import org.elasticsearch.common.text.Text;
 import org.elasticsearch.common.util.Maps;
 import org.elasticsearch.common.util.concurrent.EsRejectedExecutionException;
+import org.elasticsearch.common.util.set.Sets;
 import org.elasticsearch.core.CharArrays;
 import org.elasticsearch.core.Nullable;
 import org.elasticsearch.core.TimeValue;
@@ -59,8 +60,6 @@
 import java.util.Collections;
 import java.util.Date;
 import java.util.EnumSet;
-import java.util.HashSet;
-import java.util.LinkedHashSet;
 import java.util.List;
 import java.util.Locale;
 import java.util.Map;
@@ -757,8 +756,8 @@ public Object readGenericValue() throws IOException {
             case 21 -> readBytesRef();
             case 22 -> readGeoPoint();
             case 23 -> readZonedDateTime();
-            case 24 -> readCollection(StreamInput::readGenericValue, LinkedHashSet::new, Collections.emptySet());
-            case 25 -> readCollection(StreamInput::readGenericValue, HashSet::new, Collections.emptySet());
+            case 24 -> readCollection(StreamInput::readGenericValue, Sets::newLinkedHashSetWithExpectedSize, Collections.emptySet());
+            case 25 -> readCollection(StreamInput::readGenericValue, Sets::newHashSetWithExpectedSize, Collections.emptySet());
             case 26 -> readBigInteger();
             case 27 -> readOffsetTime();
             default -> throw new IOException("Can't read unknown type [" + type + "]");
@@ -1207,7 +1206,7 @@ public List<String> readOptionalStringList() throws IOException {
      * Reads a set of objects. If the returned set contains any entries it will be mutable. If it is empty it might be immutable.
      */
     public <T> Set<T> readSet(Writeable.Reader<T> reader) throws IOException {
-        return readCollection(reader, HashSet::new, Collections.emptySet());
+        return readCollection(reader, Sets::newHashSetWithExpectedSize, Collections.emptySet());
     }
 
     /**

diff --git a/server/src/main/java/org/elasticsearch/common/settings/AbstractScopedSettings.java b/server/src/main/java/org/elasticsearch/common/settings/AbstractScopedSettings.java
@@ -15,6 +15,7 @@
 import org.elasticsearch.ExceptionsHelper;
 import org.elasticsearch.common.regex.Regex;
 import org.elasticsearch.common.util.Maps;
+import org.elasticsearch.common.util.set.Sets;
 import org.elasticsearch.core.Tuple;
 
 import java.util.ArrayList;
@@ -349,7 +350,7 @@ public Map<String, Settings> getValue(Settings current, Settings previous) {
                 }
                 Map<String, Settings> namespaceToSettings = Maps.newMapWithExpectedSize(namespaces.size());
                 for (String namespace : namespaces) {
-                    Set<String> concreteSettings = new HashSet<>(settings.size());
+                    Set<String> concreteSettings = Sets.newHashSetWithExpectedSize(settings.size());
                     for (Setting.AffixSetting<?> setting : settings) {
                         concreteSettings.add(setting.getConcreteSettingForNamespace(namespace).getKey());
                     }

diff --git a/server/src/main/java/org/elasticsearch/common/util/SetBackedScalingCuckooFilter.java b/server/src/main/java/org/elasticsearch/common/util/SetBackedScalingCuckooFilter.java
@@ -13,10 +13,10 @@
 import org.elasticsearch.common.io.stream.StreamInput;
 import org.elasticsearch.common.io.stream.StreamOutput;
 import org.elasticsearch.common.io.stream.Writeable;
+import org.elasticsearch.common.util.set.Sets;
 
 import java.io.IOException;
 import java.util.ArrayList;
-import java.util.HashSet;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Objects;
@@ -96,7 +96,7 @@ public SetBackedScalingCuckooFilter(int threshold, Random rng, double fpp) {
         if (fpp < 0) {
             throw new IllegalArgumentException("[fpp] must be a positive double");
         }
-        this.hashes = new HashSet<>(threshold);
+        this.hashes = Sets.newHashSetWithExpectedSize(threshold);
         this.threshold = threshold;
         this.rng = rng;
         this.capacity = FILTER_CAPACITY;

diff --git a/server/src/main/java/org/elasticsearch/common/util/concurrent/ThreadContext.java b/server/src/main/java/org/elasticsearch/common/util/concurrent/ThreadContext.java
@@ -18,6 +18,7 @@
 import org.elasticsearch.common.settings.Setting.Property;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.common.util.Maps;
+import org.elasticsearch.common.util.set.Sets;
 import org.elasticsearch.core.Tuple;
 import org.elasticsearch.http.HttpTransportSettings;
 
@@ -306,7 +307,7 @@ public static Tuple<Map<String, String>, Map<String, Set<String>>> readHeadersFr
                 return Collections.singleton(input.readString());
             } else {
                 // use a linked hash set to preserve order
-                final LinkedHashSet<String> values = new LinkedHashSet<>(size);
+                final LinkedHashSet<String> values = Sets.newLinkedHashSetWithExpectedSize(size);
                 for (int i = 0; i < size; i++) {
                     final String value = input.readString();
                     final boolean added = values.add(value);

diff --git a/server/src/main/java/org/elasticsearch/common/util/set/Sets.java b/server/src/main/java/org/elasticsearch/common/util/set/Sets.java
@@ -14,6 +14,7 @@
 import java.util.EnumSet;
 import java.util.HashSet;
 import java.util.Iterator;
+import java.util.LinkedHashSet;
 import java.util.Objects;
 import java.util.Set;
 import java.util.SortedSet;
@@ -50,6 +51,19 @@ public static <T> HashSet<T> newHashSet(T... elements) {
         return new HashSet<>(Arrays.asList(elements));
     }
 
+    public static <E> Set<E> newHashSetWithExpectedSize(int expectedSize) {
+        return new HashSet<>(capacity(expectedSize));
+    }
+
+    public static <E> LinkedHashSet<E> newLinkedHashSetWithExpectedSize(int expectedSize) {
+        return new LinkedHashSet<>(capacity(expectedSize));
+    }
+
+    static int capacity(int expectedSize) {
+        assert expectedSize >= 0;
+        return expectedSize < 2 ? expectedSize + 1 : (int) (expectedSize / 0.75 + 1.0);
+    }
+
     public static <T> Set<T> newConcurrentHashSet() {
         return Collections.newSetFromMap(new ConcurrentHashMap<>());
     }