Skip to content

Commit

Permalink
Correctly pre-size HashSets (#87700) (#87983)
Browse files Browse the repository at this point in the history
The HashSet(int) constructor accepts the capacity of the set rather than its size. Unfortunately, HashSet requires N/0.75 capacity to hold N elements. So, if we call new HashSet(n) to hold N elements, the map will be too small and need to be
resized/rehashed midway.

We add a new methods Sets.newHashSetWithExpectedSize and Sets.newLinkedHashSetWithExpectedSize that calculate the correct capacity for the specified expected size.

See

* https://bugs.openjdk.org/browse/JDK-8284975
* https://bugs.openjdk.org/browse/JDK-8287419
* openjdk/jdk#8302

(cherry picked from commit 26c1d33)
  • Loading branch information
arteam committed Jun 23, 2022
1 parent 1921a26 commit 44038a5
Show file tree
Hide file tree
Showing 82 changed files with 203 additions and 140 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import org.elasticsearch.action.search.SearchType;
import org.elasticsearch.cluster.metadata.IndexMetadata;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.util.set.Sets;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.SearchHits;
Expand All @@ -25,7 +26,6 @@
import org.elasticsearch.test.ESIntegTestCase;

import java.util.Arrays;
import java.util.HashSet;
import java.util.Set;

import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
Expand Down Expand Up @@ -125,7 +125,7 @@ private TestContext create(SearchType... searchTypes) throws Exception {
boolean unevenRouting = randomBoolean();

int numMissingDocs = scaledRandomIntBetween(0, numDocs / 100);
Set<Integer> missingDocs = new HashSet<>(numMissingDocs);
Set<Integer> missingDocs = Sets.newHashSetWithExpectedSize(numMissingDocs);
for (int i = 0; i < numMissingDocs; i++) {
while (missingDocs.add(randomInt(numDocs)) == false) {
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import org.elasticsearch.common.transport.TransportAddress;
import org.elasticsearch.common.unit.ByteSizeValue;
import org.elasticsearch.common.util.Maps;
import org.elasticsearch.common.util.set.Sets;
import org.elasticsearch.core.TimeValue;
import org.elasticsearch.discovery.DiscoveryModule;
import org.elasticsearch.index.stats.IndexingPressureStats;
Expand Down Expand Up @@ -64,7 +65,7 @@ public class ClusterStatsNodes implements ToXContentFragment {
this.fs = new FsInfo.Path();
this.plugins = new HashSet<>();

Set<InetAddress> seenAddresses = new HashSet<>(nodeResponses.size());
Set<InetAddress> seenAddresses = Sets.newHashSetWithExpectedSize(nodeResponses.size());
List<NodeInfo> nodeInfos = new ArrayList<>(nodeResponses.size());
List<NodeStats> nodeStats = new ArrayList<>(nodeResponses.size());
for (ClusterStatsNodeResponse nodeResponse : nodeResponses) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ protected void masterOperation(

DeleteIndexClusterStateUpdateRequest deleteRequest = new DeleteIndexClusterStateUpdateRequest().ackTimeout(request.timeout())
.masterNodeTimeout(request.masterNodeTimeout())
.indices(concreteIndices.toArray(new Index[concreteIndices.size()]));
.indices(concreteIndices.toArray(new Index[0]));

deleteIndexService.deleteIndices(deleteRequest, listener.delegateResponse((l, e) -> {
logger.debug(() -> "failed to delete indices [" + concreteIndices + "]", e);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,11 @@
import org.elasticsearch.action.update.UpdateRequest;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.util.set.Sets;
import org.elasticsearch.index.shard.ShardId;
import org.elasticsearch.transport.RawIndexingDataTransportRequest;

import java.io.IOException;
import java.util.HashSet;
import java.util.Set;

public class BulkShardRequest extends ReplicatedWriteRequest<BulkShardRequest> implements Accountable, RawIndexingDataTransportRequest {
Expand Down Expand Up @@ -71,7 +71,7 @@ public String[] indices() {
// These alias names have to be exposed by this method because authorization works with
// aliases too, specifically, the item's target alias can be authorized but the concrete
// index might not be.
Set<String> indices = new HashSet<>(1);
Set<String> indices = Sets.newHashSetWithExpectedSize(1);
for (BulkItemRequest item : items) {
if (item != null) {
indices.add(item.index());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.io.stream.Writeable;
import org.elasticsearch.common.unit.ByteSizeValue;
import org.elasticsearch.common.util.set.Sets;
import org.elasticsearch.index.shard.ShardId;
import org.elasticsearch.index.store.StoreStats;
import org.elasticsearch.xcontent.ToXContentFragment;
Expand All @@ -25,6 +26,7 @@
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;

/**
* ClusterInfo is an object representing a map of nodes to {@link DiskUsage}
Expand Down Expand Up @@ -293,7 +295,7 @@ public static class ReservedSpace implements Writeable {
public static final ReservedSpace EMPTY = new ReservedSpace(0, new HashSet<>());

private final long total;
private final HashSet<ShardId> shardIds;
private final Set<ShardId> shardIds;

private ReservedSpace(long total, HashSet<ShardId> shardIds) {
this.total = total;
Expand All @@ -303,7 +305,7 @@ private ReservedSpace(long total, HashSet<ShardId> shardIds) {
ReservedSpace(StreamInput in) throws IOException {
total = in.readVLong();
final int shardIdCount = in.readVInt();
shardIds = new HashSet<>(shardIdCount);
shardIds = Sets.newHashSetWithExpectedSize(shardIdCount);
for (int i = 0; i < shardIdCount; i++) {
shardIds.add(new ShardId(in));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.io.stream.Writeable;
import org.elasticsearch.common.util.set.Sets;
import org.elasticsearch.core.Nullable;
import org.elasticsearch.xcontent.ConstructingObjectParser;
import org.elasticsearch.xcontent.ParseField;
Expand Down Expand Up @@ -110,7 +111,7 @@ public boolean hasSameHistoryId(DesiredNodes other) {
}

private static void checkForDuplicatedExternalIDs(List<DesiredNode> nodes) {
Set<String> nodeIDs = new HashSet<>(nodes.size());
Set<String> nodeIDs = Sets.newHashSetWithExpectedSize(nodes.size());
Set<String> duplicatedIDs = new HashSet<>();
for (DesiredNode node : nodes) {
String externalID = node.externalId();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
import org.elasticsearch.common.settings.Setting.Property;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.util.Maps;
import org.elasticsearch.common.util.set.Sets;
import org.elasticsearch.common.xcontent.XContentHelper;
import org.elasticsearch.common.xcontent.XContentParserUtils;
import org.elasticsearch.core.Nullable;
Expand Down Expand Up @@ -2279,7 +2280,7 @@ public static Set<ShardId> selectShrinkShards(int shardId, IndexMetadata sourceI
);
}
int routingFactor = getRoutingFactor(sourceIndexMetadata.getNumberOfShards(), numTargetShards);
Set<ShardId> shards = new HashSet<>(routingFactor);
Set<ShardId> shards = Sets.newHashSetWithExpectedSize(routingFactor);
for (int i = shardId * routingFactor; i < routingFactor * shardId + routingFactor; i++) {
shards.add(new ShardId(sourceIndexMetadata.getIndex(), i));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
Expand Down Expand Up @@ -364,7 +363,7 @@ Index[] concreteIndices(Context context, String... indexExpressions) {
}

boolean excludedDataStreams = false;
final Set<Index> concreteIndices = new LinkedHashSet<>(expressions.size());
final Set<Index> concreteIndices = Sets.newLinkedHashSetWithExpectedSize(expressions.size());
final SortedMap<String, IndexAbstraction> indicesLookup = context.state.metadata().getIndicesLookup();
for (String expression : expressions) {
IndexAbstraction indexAbstraction = indicesLookup.get(expression);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
import org.elasticsearch.common.settings.Setting.Property;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.util.Maps;
import org.elasticsearch.common.util.set.Sets;
import org.elasticsearch.common.xcontent.XContentHelper;
import org.elasticsearch.common.xcontent.XContentParserUtils;
import org.elasticsearch.core.Nullable;
Expand Down Expand Up @@ -2272,7 +2273,7 @@ private void dedupeMapping(IndexMetadata.Builder indexMetadataBuilder) {
}

private void purgeUnusedEntries(ImmutableOpenMap<String, IndexMetadata> indices) {
final Set<String> sha256HashesInUse = new HashSet<>(mappingsByHash.size());
final Set<String> sha256HashesInUse = Sets.newHashSetWithExpectedSize(mappingsByHash.size());
for (var im : indices.values()) {
if (im.mapping() != null) {
sha256HashesInUse.add(im.mapping().getSha256());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
Expand Down Expand Up @@ -340,7 +339,7 @@ public String[] resolveNodes(String... nodes) {
if (nodes == null || nodes.length == 0) {
return stream().map(DiscoveryNode::getId).toArray(String[]::new);
} else {
Set<String> resolvedNodesIds = new HashSet<>(nodes.length);
Set<String> resolvedNodesIds = Sets.newHashSetWithExpectedSize(nodes.length);
for (String nodeId : nodes) {
if (nodeId == null) {
// don't silence the underlying issue, it is a bug, so lets fail if assertions are enabled
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import org.elasticsearch.common.settings.ClusterSettings;
import org.elasticsearch.common.settings.Setting;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.util.set.Sets;
import org.elasticsearch.core.Nullable;
import org.elasticsearch.index.IndexNotFoundException;
import org.elasticsearch.index.shard.ShardId;
Expand Down Expand Up @@ -93,7 +94,7 @@ public GroupShardsIterator<ShardIterator> searchShards(
@Nullable Map<String, Long> nodeCounts
) {
final Set<IndexShardRoutingTable> shards = computeTargetedShards(clusterState, concreteIndices, routing);
final Set<ShardIterator> set = new HashSet<>(shards.size());
final Set<ShardIterator> set = Sets.newHashSetWithExpectedSize(shards.size());
for (IndexShardRoutingTable shard : shards) {
ShardIterator iterator = preferenceActiveShardIterator(
shard,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,14 +36,14 @@
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.collect.ImmutableOpenMap;
import org.elasticsearch.common.logging.ESLogMessage;
import org.elasticsearch.common.util.set.Sets;
import org.elasticsearch.gateway.GatewayAllocator;
import org.elasticsearch.gateway.PriorityComparator;
import org.elasticsearch.snapshots.SnapshotsInfoService;

import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
Expand Down Expand Up @@ -231,7 +231,7 @@ public ClusterState applyFailedShards(
int failedAllocations = failedShard.unassignedInfo() != null ? failedShard.unassignedInfo().getNumFailedAllocations() : 0;
final Set<String> failedNodeIds;
if (failedShard.unassignedInfo() != null) {
failedNodeIds = new HashSet<>(failedShard.unassignedInfo().getFailedNodeIds().size() + 1);
failedNodeIds = Sets.newHashSetWithExpectedSize(failedShard.unassignedInfo().getFailedNodeIds().size() + 1);
failedNodeIds.addAll(failedShard.unassignedInfo().getFailedNodeIds());
failedNodeIds.add(failedShard.currentNodeId());
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,14 +36,14 @@
import org.elasticsearch.common.settings.Setting;
import org.elasticsearch.common.settings.Setting.Property;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.util.set.Sets;
import org.elasticsearch.core.Tuple;
import org.elasticsearch.gateway.PriorityComparator;

import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
Expand Down Expand Up @@ -1167,7 +1167,8 @@ public boolean containsShard(ShardRouting shard) {
}

static final class ModelIndex implements Iterable<ShardRouting> {
private final Set<ShardRouting> shards = new HashSet<>(4); // expect few shards of same index to be allocated on same node
private final Set<ShardRouting> shards = Sets.newHashSetWithExpectedSize(4); // expect few shards of same index to be allocated on
// same node
private int highestPrimary = -1;

ModelIndex() {}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import org.elasticsearch.common.text.Text;
import org.elasticsearch.common.util.Maps;
import org.elasticsearch.common.util.concurrent.EsRejectedExecutionException;
import org.elasticsearch.common.util.set.Sets;
import org.elasticsearch.core.CharArrays;
import org.elasticsearch.core.Nullable;
import org.elasticsearch.core.TimeValue;
Expand Down Expand Up @@ -59,8 +60,6 @@
import java.util.Collections;
import java.util.Date;
import java.util.EnumSet;
import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
Expand Down Expand Up @@ -757,8 +756,8 @@ public Object readGenericValue() throws IOException {
case 21 -> readBytesRef();
case 22 -> readGeoPoint();
case 23 -> readZonedDateTime();
case 24 -> readCollection(StreamInput::readGenericValue, LinkedHashSet::new, Collections.emptySet());
case 25 -> readCollection(StreamInput::readGenericValue, HashSet::new, Collections.emptySet());
case 24 -> readCollection(StreamInput::readGenericValue, Sets::newLinkedHashSetWithExpectedSize, Collections.emptySet());
case 25 -> readCollection(StreamInput::readGenericValue, Sets::newHashSetWithExpectedSize, Collections.emptySet());
case 26 -> readBigInteger();
case 27 -> readOffsetTime();
default -> throw new IOException("Can't read unknown type [" + type + "]");
Expand Down Expand Up @@ -1207,7 +1206,7 @@ public List<String> readOptionalStringList() throws IOException {
* Reads a set of objects. If the returned set contains any entries it will be mutable. If it is empty it might be immutable.
*/
public <T> Set<T> readSet(Writeable.Reader<T> reader) throws IOException {
return readCollection(reader, HashSet::new, Collections.emptySet());
return readCollection(reader, Sets::newHashSetWithExpectedSize, Collections.emptySet());
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import org.elasticsearch.ExceptionsHelper;
import org.elasticsearch.common.regex.Regex;
import org.elasticsearch.common.util.Maps;
import org.elasticsearch.common.util.set.Sets;
import org.elasticsearch.core.Tuple;

import java.util.ArrayList;
Expand Down Expand Up @@ -349,7 +350,7 @@ public Map<String, Settings> getValue(Settings current, Settings previous) {
}
Map<String, Settings> namespaceToSettings = Maps.newMapWithExpectedSize(namespaces.size());
for (String namespace : namespaces) {
Set<String> concreteSettings = new HashSet<>(settings.size());
Set<String> concreteSettings = Sets.newHashSetWithExpectedSize(settings.size());
for (Setting.AffixSetting<?> setting : settings) {
concreteSettings.add(setting.getConcreteSettingForNamespace(namespace).getKey());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,10 @@
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.io.stream.Writeable;
import org.elasticsearch.common.util.set.Sets;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Objects;
Expand Down Expand Up @@ -96,7 +96,7 @@ public SetBackedScalingCuckooFilter(int threshold, Random rng, double fpp) {
if (fpp < 0) {
throw new IllegalArgumentException("[fpp] must be a positive double");
}
this.hashes = new HashSet<>(threshold);
this.hashes = Sets.newHashSetWithExpectedSize(threshold);
this.threshold = threshold;
this.rng = rng;
this.capacity = FILTER_CAPACITY;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import org.elasticsearch.common.settings.Setting.Property;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.util.Maps;
import org.elasticsearch.common.util.set.Sets;
import org.elasticsearch.core.Tuple;
import org.elasticsearch.http.HttpTransportSettings;

Expand Down Expand Up @@ -306,7 +307,7 @@ public static Tuple<Map<String, String>, Map<String, Set<String>>> readHeadersFr
return Collections.singleton(input.readString());
} else {
// use a linked hash set to preserve order
final LinkedHashSet<String> values = new LinkedHashSet<>(size);
final LinkedHashSet<String> values = Sets.newLinkedHashSetWithExpectedSize(size);
for (int i = 0; i < size; i++) {
final String value = input.readString();
final boolean added = values.add(value);
Expand Down
14 changes: 14 additions & 0 deletions server/src/main/java/org/elasticsearch/common/util/set/Sets.java
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import java.util.EnumSet;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.Objects;
import java.util.Set;
import java.util.SortedSet;
Expand Down Expand Up @@ -50,6 +51,19 @@ public static <T> HashSet<T> newHashSet(T... elements) {
return new HashSet<>(Arrays.asList(elements));
}

public static <E> Set<E> newHashSetWithExpectedSize(int expectedSize) {
return new HashSet<>(capacity(expectedSize));
}

public static <E> LinkedHashSet<E> newLinkedHashSetWithExpectedSize(int expectedSize) {
return new LinkedHashSet<>(capacity(expectedSize));
}

static int capacity(int expectedSize) {
assert expectedSize >= 0;
return expectedSize < 2 ? expectedSize + 1 : (int) (expectedSize / 0.75 + 1.0);
}

public static <T> Set<T> newConcurrentHashSet() {
return Collections.newSetFromMap(new ConcurrentHashMap<>());
}
Expand Down

0 comments on commit 44038a5

Please sign in to comment.