Skip to content

Commit

Permalink
[8.2] Adding a deprecation info API check for too many shards (#85967) (
Browse files Browse the repository at this point in the history
#86518)

* Adding a deprecation info API check for too many shards (#85967)

This commit makes sure that there is enough room in a cluster to add a small number of shards during an upgrade. The information is exposed in the deprecation info API as a cluster configuration check.
Closes #85702

* fixing unit test
  • Loading branch information
masseyke committed May 6, 2022
1 parent bcdf75d commit a20b65f
Show file tree
Hide file tree
Showing 5 changed files with 183 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,29 @@ private Optional<String> checkShardLimit(int newShards, int newFrozenShards, Clu
);
}

/**
* This method decides whether there is enough room in the cluster to add the given number of shards with the given number of replicas
* without exceeding the "cluster.max_shards_per_node.frozen" setting if the shards are going on frozen nodes or the
* "cluster.max_shards_per_node" setting if the shards are going on normal nodes. This check does not guarantee that the number of
* shards can be added, just that there is theoretically room to add them without exceeding the shards per node configuration.
* @param numberOfNewShards The number of primary shards that we want to be able to add to the cluster
* @param replicas The number of replcas of the primary shards that we want to be able to add to the cluster
* @param state The cluster state, used to get cluster settings and to get the number of open shards already in the cluster
* @param frozenNodes If true, check whether there is room to put these shards onto frozen nodes. If false, check whether there is room
* to put these shards onto normal nodes.
* @return True if there is room to add the requested number of shards to the cluster, and false if there is not
*/
public static boolean canAddShardsToCluster(int numberOfNewShards, int replicas, ClusterState state, boolean frozenNodes) {
Settings clusterSettings = state.getMetadata().settings();
int maxShardsPerNode = frozenNodes
? SETTING_CLUSTER_MAX_SHARDS_PER_NODE_FROZEN.get(clusterSettings)
: SETTING_CLUSTER_MAX_SHARDS_PER_NODE.get(clusterSettings);
int nodeCount = nodeCount(state, frozenNodes ? ShardLimitValidator::hasFrozen : ShardLimitValidator::hasNonFrozen);
String nodeGroup = frozenNodes ? FROZEN_GROUP : "normal";
Optional<String> errorMessage = checkShardLimit(numberOfNewShards * (1 + replicas), state, maxShardsPerNode, nodeCount, nodeGroup);
return errorMessage.isPresent() == false;
}

// package-private for testing
static Optional<String> checkShardLimit(int newShards, ClusterState state, int maxShardsPerNode, int nodeCount, String group) {
// Only enforce the shard limit if we have at least one data node, so that we don't block
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ public void testOverShardLimit() {
nodesInCluster,
counts.getFirstIndexShards(),
counts.getFirstIndexReplicas(),
counts.getShardsPerNode(),
group
);

Expand Down Expand Up @@ -75,6 +76,14 @@ public void testOverShardLimit() {
+ " shards open",
errorMessage.get()
);
assertFalse(
ShardLimitValidator.canAddShardsToCluster(
counts.getFailingIndexShards(),
counts.getFailingIndexReplicas(),
state,
ShardLimitValidator.FROZEN_GROUP.equals(group)
)
);
}

public void testUnderShardLimit() {
Expand All @@ -87,6 +96,7 @@ public void testUnderShardLimit() {
nodesInCluster,
counts.getFirstIndexShards(),
counts.getFirstIndexReplicas(),
counts.getShardsPerNode(),
group
);

Expand All @@ -101,6 +111,7 @@ public void testUnderShardLimit() {
);

assertFalse(errorMessage.isPresent());
assertTrue(ShardLimitValidator.canAddShardsToCluster(shardsToAdd, 0, state, ShardLimitValidator.FROZEN_GROUP.equals(group)));
}

public void testValidateShardLimitOpenIndices() {
Expand Down Expand Up @@ -182,7 +193,13 @@ private ClusterState createClusterStateForReplicaUpdate(int nodesInCluster, int
return state;
}

public static ClusterState createClusterForShardLimitTest(int nodesInCluster, int shardsInIndex, int replicas, String group) {
public static ClusterState createClusterForShardLimitTest(
int nodesInCluster,
int shardsInIndex,
int replicas,
int maxShardsPerNode,
String group
) {
DiscoveryNodes nodes = createDiscoveryNodes(nodesInCluster, group);

Settings.Builder settings = Settings.builder().put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT);
Expand All @@ -195,10 +212,15 @@ public static ClusterState createClusterForShardLimitTest(int nodesInCluster, in
.numberOfShards(shardsInIndex)
.numberOfReplicas(replicas);
Metadata.Builder metadata = Metadata.builder().put(indexMetadata);
Settings.Builder clusterSettings = Settings.builder()
.put(ShardLimitValidator.SETTING_CLUSTER_MAX_SHARDS_PER_NODE_FROZEN.getKey(), maxShardsPerNode)
.put(ShardLimitValidator.SETTING_CLUSTER_MAX_SHARDS_PER_NODE.getKey(), maxShardsPerNode);
if (randomBoolean()) {
metadata.transientSettings(Settings.EMPTY);
metadata.persistentSettings(clusterSettings.build());
} else {
metadata.persistentSettings(Settings.EMPTY);
metadata.transientSettings(clusterSettings.build());
}

return ClusterState.builder(ClusterName.DEFAULT).metadata(metadata).nodes(nodes).build();
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

package org.elasticsearch.xpack.deprecation;

import org.elasticsearch.cluster.ClusterState;
import org.elasticsearch.indices.ShardLimitValidator;
import org.elasticsearch.xpack.core.deprecation.DeprecationIssue;

import java.util.Locale;

public class ClusterDeprecationChecks {
/**
* Upgrading can require the addition of one ore more small indices. This method checks that based on configuration we have the room
* to add a small number of additional shards to the cluster. The goal is to prevent a failure during upgrade.
* @param clusterState The cluster state, used to get settings and information about nodes
* @return A deprecation issue if there is not enough room in this cluster to add a few more shards, or null otherwise
*/
static DeprecationIssue checkShards(ClusterState clusterState) {
// Make sure we have room to add a small non-frozen index if needed
final int shardsInFutureNewSmallIndex = 5;
final int replicasForFutureIndex = 1;
if (ShardLimitValidator.canAddShardsToCluster(shardsInFutureNewSmallIndex, replicasForFutureIndex, clusterState, false)) {
return null;
} else {
final int totalShardsToAdd = shardsInFutureNewSmallIndex * (1 + replicasForFutureIndex);
return new DeprecationIssue(
DeprecationIssue.Level.WARNING,
"The cluster has too many shards to be able to upgrade",
"https://ela.st/es-deprecation-8-shard-limit",
String.format(
Locale.ROOT,
"Upgrading requires adding a small number of new shards. There is not enough room for %d more "
+ "shards. Increase the cluster.max_shards_per_node setting, or remove indices "
+ "to clear up resources.",
totalShardsToAdd
),
false,
null
);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import org.elasticsearch.license.XPackLicenseState;
import org.elasticsearch.xpack.core.deprecation.DeprecationIssue;

import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Objects;
Expand All @@ -36,7 +37,9 @@ public class DeprecationChecks {

private DeprecationChecks() {}

static List<Function<ClusterState, DeprecationIssue>> CLUSTER_SETTINGS_CHECKS = Collections.emptyList();
static List<Function<ClusterState, DeprecationIssue>> CLUSTER_SETTINGS_CHECKS = Collections.unmodifiableList(
Arrays.asList(ClusterDeprecationChecks::checkShards)
);

static final List<
NodeDeprecationCheck<Settings, PluginsAndModules, ClusterState, XPackLicenseState, DeprecationIssue>> NODE_SETTINGS_CHECKS = List
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

package org.elasticsearch.xpack.deprecation;

import org.elasticsearch.Version;
import org.elasticsearch.cluster.ClusterName;
import org.elasticsearch.cluster.ClusterState;
import org.elasticsearch.cluster.metadata.IndexMetadata;
import org.elasticsearch.cluster.metadata.Metadata;
import org.elasticsearch.cluster.node.DiscoveryNode;
import org.elasticsearch.cluster.node.DiscoveryNodes;
import org.elasticsearch.cluster.routing.allocation.DataTier;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.indices.ShardLimitValidator;
import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.xpack.core.deprecation.DeprecationIssue;

import java.util.List;
import java.util.UUID;

import static java.util.Collections.singletonList;
import static org.elasticsearch.xpack.deprecation.DeprecationChecks.CLUSTER_SETTINGS_CHECKS;
import static org.hamcrest.Matchers.equalTo;

public class ClusterDeprecationChecksTests extends ESTestCase {
public void testCheckShards() {
/*
* This test sets the number of allowed shards per node to 5 and creates 2 nodes. So we have room for 10 shards, which is the
* number of shards that checkShards() is making sure we can add. The first time there are no indices, so the check passes. The
* next time there is an index with one shard and one replica, leaving room for 8 shards. So the check fails.
*/
final ClusterState state = ClusterState.builder(new ClusterName(randomAlphaOfLength(5)))
.metadata(
Metadata.builder()
.persistentSettings(Settings.builder().put(ShardLimitValidator.SETTING_CLUSTER_MAX_SHARDS_PER_NODE.getKey(), 5).build())
.build()
)
.nodes(
DiscoveryNodes.builder()
.add(new DiscoveryNode(UUID.randomUUID().toString(), buildNewFakeTransportAddress(), Version.CURRENT))
.add(new DiscoveryNode(UUID.randomUUID().toString(), buildNewFakeTransportAddress(), Version.CURRENT))
)
.build();
List<DeprecationIssue> issues = DeprecationChecks.filterChecks(CLUSTER_SETTINGS_CHECKS, c -> c.apply(state));
assertThat(0, equalTo(issues.size()));

final ClusterState stateWithProblems = ClusterState.builder(new ClusterName(randomAlphaOfLength(5)))
.metadata(
Metadata.builder()
.persistentSettings(Settings.builder().put(ShardLimitValidator.SETTING_CLUSTER_MAX_SHARDS_PER_NODE.getKey(), 4).build())
.put(
IndexMetadata.builder(randomAlphaOfLength(10))
.settings(settings(Version.CURRENT).put(DataTier.TIER_PREFERENCE_SETTING.getKey(), " "))
.numberOfShards(1)
.numberOfReplicas(1)
.build(),
false
)
.build()
)
.nodes(
DiscoveryNodes.builder()
.add(new DiscoveryNode(UUID.randomUUID().toString(), buildNewFakeTransportAddress(), Version.CURRENT))
.add(new DiscoveryNode(UUID.randomUUID().toString(), buildNewFakeTransportAddress(), Version.CURRENT))
)
.build();

issues = DeprecationChecks.filterChecks(CLUSTER_SETTINGS_CHECKS, c -> c.apply(stateWithProblems));

DeprecationIssue expected = new DeprecationIssue(
DeprecationIssue.Level.WARNING,
"The cluster has too many shards to be able to upgrade",
"https://ela.st/es-deprecation-8-shard-limit",
"Upgrading requires adding a small number of new shards. There is not enough room for 10 more shards. Increase the cluster"
+ ".max_shards_per_node setting, or remove indices to clear up resources.",
false,
null
);
assertEquals(singletonList(expected), issues);
}
}

0 comments on commit a20b65f

Please sign in to comment.