Skip to content

Commit

Permalink
Fallback to the actual shard size when forecast is not available (#93461
Browse files Browse the repository at this point in the history
)
  • Loading branch information
idegtiarenko committed Feb 6, 2023
1 parent 1fb3a1b commit acece61
Show file tree
Hide file tree
Showing 4 changed files with 177 additions and 80 deletions.
5 changes: 5 additions & 0 deletions docs/changelog/93461.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 93461
summary: Fallback to the actual shard size when forecast is not available
area: Allocation
type: bug
issues: []
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,6 @@
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.OptionalLong;
import java.util.Set;
import java.util.function.BiFunction;
import java.util.stream.StreamSupport;
Expand Down Expand Up @@ -321,26 +320,26 @@ float minWeightDelta(Balancer balancer, String index) {
* A {@link Balancer}
*/
public static class Balancer {
private final Map<String, ModelNode> nodes;
private final WriteLoadForecaster writeLoadForecaster;
private final RoutingAllocation allocation;
private final RoutingNodes routingNodes;
private final Metadata metadata;
private final WeightFunction weight;

private final float threshold;
private final Metadata metadata;
private final float avgShardsPerNode;
private final double avgWriteLoadPerNode;
private final double avgDiskUsageInBytesPerNode;
private final Map<String, ModelNode> nodes;
private final NodeSorter sorter;

public Balancer(WriteLoadForecaster writeLoadForecaster, RoutingAllocation allocation, WeightFunction weight, float threshold) {
this.writeLoadForecaster = writeLoadForecaster;
this.allocation = allocation;
this.weight = weight;
this.threshold = threshold;
this.routingNodes = allocation.routingNodes();
this.metadata = allocation.metadata();
this.weight = weight;
this.threshold = threshold;
avgShardsPerNode = ((float) metadata.getTotalNumberOfShards()) / routingNodes.size();
avgWriteLoadPerNode = getTotalWriteLoad(writeLoadForecaster, metadata) / routingNodes.size();
avgDiskUsageInBytesPerNode = ((double) getTotalDiskUsageInBytes(allocation.clusterInfo(), metadata) / routingNodes.size());
Expand Down Expand Up @@ -371,15 +370,10 @@ private static long getTotalDiskUsageInBytes(ClusterInfo clusterInfo, Metadata m

// Visible for testing
static long getIndexDiskUsageInBytes(ClusterInfo clusterInfo, IndexMetadata indexMetadata) {
OptionalLong forecastedShardSizeInBytes = indexMetadata.getForecastedShardSizeInBytes();
final long indexDiskUsageInBytes;
if (forecastedShardSizeInBytes.isPresent()) {
int i = numberOfCopies(indexMetadata);
indexDiskUsageInBytes = forecastedShardSizeInBytes.getAsLong() * i;
} else {
indexDiskUsageInBytes = getIndexDiskUsageInBytesFromClusterInfo(clusterInfo, indexMetadata);
}
return indexDiskUsageInBytes;
var forecastedShardSizeInBytes = indexMetadata.getForecastedShardSizeInBytes();
return forecastedShardSizeInBytes.isPresent()
? forecastedShardSizeInBytes.getAsLong() * numberOfCopies(indexMetadata)
: getIndexDiskUsageInBytesFromClusterInfo(clusterInfo, indexMetadata);
}

private static long getIndexDiskUsageInBytesFromClusterInfo(ClusterInfo clusterInfo, IndexMetadata indexMetadata) {
Expand Down Expand Up @@ -408,6 +402,10 @@ private static long getIndexDiskUsageInBytesFromClusterInfo(ClusterInfo clusterI
return shardCount == 0 ? 0 : (totalSizeInBytes / shardCount) * numberOfCopies(indexMetadata);
}

private static long getShardDiskUsageInBytes(ShardRouting shardRouting, IndexMetadata indexMetadata, ClusterInfo clusterInfo) {
return indexMetadata.getForecastedShardSizeInBytes().orElseGet(() -> clusterInfo.getShardSize(shardRouting, 0L));
}

private static int numberOfCopies(IndexMetadata indexMetadata) {
return indexMetadata.getNumberOfShards() * (1 + indexMetadata.getNumberOfReplicas());
}
Expand All @@ -416,6 +414,14 @@ private double getShardWriteLoad(String index) {
return writeLoadForecaster.getForecastedWriteLoad(metadata.index(index)).orElse(0.0);
}

private double diskUsageInBytesPerShard(String index) {
var indexMetadata = metadata.index(index);
var forecastedShardSizeInBytes = indexMetadata.getForecastedShardSizeInBytes();
return forecastedShardSizeInBytes.isPresent()
? forecastedShardSizeInBytes.getAsLong()
: (double) getIndexDiskUsageInBytesFromClusterInfo(allocation.clusterInfo(), indexMetadata) / numberOfCopies(indexMetadata);
}

/**
* Returns an array view on the nodes in the balancer. Nodes should not be removed from this list.
*/
Expand Down Expand Up @@ -445,10 +451,6 @@ public double avgDiskUsageInBytesPerNode() {
return avgDiskUsageInBytesPerNode;
}

public double diskUsageInBytesPerShard(String index) {
return metadata.index(index).getForecastedShardSizeInBytes().orElse(0);
}

/**
* Returns a new {@link NodeSorter} that sorts the nodes based on their
* current weight with respect to the index passed to the sorter. The
Expand Down Expand Up @@ -962,7 +964,7 @@ private Decision decideCanForceAllocateForVacate(ShardRouting shardRouting, Rout
private Map<String, ModelNode> buildModelFromAssigned() {
Map<String, ModelNode> nodes = Maps.newMapWithExpectedSize(routingNodes.size());
for (RoutingNode rn : routingNodes) {
ModelNode node = new ModelNode(writeLoadForecaster, metadata, rn);
ModelNode node = new ModelNode(writeLoadForecaster, metadata, allocation.clusterInfo(), rn);
nodes.put(rn.nodeId(), node);
for (ShardRouting shard : rn) {
assert rn.nodeId().equals(shard.currentNodeId());
Expand Down Expand Up @@ -1254,12 +1256,14 @@ static class ModelNode implements Iterable<ModelIndex> {
private double diskUsageInBytes = 0.0;
private final WriteLoadForecaster writeLoadForecaster;
private final Metadata metadata;
private final ClusterInfo clusterInfo;
private final RoutingNode routingNode;
private final Map<String, ModelIndex> indices;

ModelNode(WriteLoadForecaster writeLoadForecaster, Metadata metadata, RoutingNode routingNode) {
ModelNode(WriteLoadForecaster writeLoadForecaster, Metadata metadata, ClusterInfo clusterInfo, RoutingNode routingNode) {
this.writeLoadForecaster = writeLoadForecaster;
this.metadata = metadata;
this.clusterInfo = clusterInfo;
this.routingNode = routingNode;
this.indices = Maps.newMapWithExpectedSize(routingNode.size() + 10);// some extra to account for shard movements
}
Expand Down Expand Up @@ -1305,7 +1309,7 @@ public void addShard(ShardRouting shard) {
indices.computeIfAbsent(shard.getIndexName(), t -> new ModelIndex()).addShard(shard);
IndexMetadata indexMetadata = metadata.index(shard.index());
writeLoad += writeLoadForecaster.getForecastedWriteLoad(indexMetadata).orElse(0.0);
diskUsageInBytes += indexMetadata.getForecastedShardSizeInBytes().orElse(0);
diskUsageInBytes += Balancer.getShardDiskUsageInBytes(shard, indexMetadata, clusterInfo);
numShards++;
}

Expand All @@ -1319,7 +1323,7 @@ public void removeShard(ShardRouting shard) {
}
IndexMetadata indexMetadata = metadata.index(shard.index());
writeLoad -= writeLoadForecaster.getForecastedWriteLoad(indexMetadata).orElse(0.0);
diskUsageInBytes -= indexMetadata.getForecastedShardSizeInBytes().orElse(0);
diskUsageInBytes -= Balancer.getShardDiskUsageInBytes(shard, indexMetadata, clusterInfo);
numShards--;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,18 +13,21 @@
import org.elasticsearch.Version;
import org.elasticsearch.action.ActionListener;
import org.elasticsearch.cluster.ClusterInfo;
import org.elasticsearch.cluster.ClusterName;
import org.elasticsearch.cluster.ClusterState;
import org.elasticsearch.cluster.ESAllocationTestCase;
import org.elasticsearch.cluster.TestShardRoutingRoleStrategies;
import org.elasticsearch.cluster.metadata.IndexMetadata;
import org.elasticsearch.cluster.metadata.Metadata;
import org.elasticsearch.cluster.node.DiscoveryNodes;
import org.elasticsearch.cluster.routing.RoutingTable;
import org.elasticsearch.cluster.routing.ShardRouting;
import org.elasticsearch.cluster.routing.ShardRoutingState;
import org.elasticsearch.cluster.routing.allocation.command.AllocationCommands;
import org.elasticsearch.cluster.routing.allocation.command.MoveAllocationCommand;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.shard.ShardId;

import java.util.Map;

import static org.elasticsearch.cluster.routing.RoutingNodesHelper.shardsWithState;
import static org.hamcrest.Matchers.equalTo;
Expand All @@ -35,37 +38,18 @@ public class ExpectedShardSizeAllocationTests extends ESAllocationTestCase {

public void testInitializingHasExpectedSize() {
final long byteSize = randomIntBetween(0, Integer.MAX_VALUE);
AllocationService strategy = createAllocationService(Settings.EMPTY, () -> new ClusterInfo() {
@Override
public Long getShardSize(ShardRouting shardRouting) {
if (shardRouting.getIndexName().equals("test") && shardRouting.shardId().getId() == 0) {
return byteSize;
}
return null;
}
});
final ClusterInfo clusterInfo = createClusterInfoWith(new ShardId("test", "_na_", 0), byteSize);
AllocationService strategy = createAllocationService(Settings.EMPTY, () -> clusterInfo);

logger.info("Building initial routing table");
var indexMetadata = IndexMetadata.builder("test").settings(settings(Version.CURRENT)).numberOfShards(1).numberOfReplicas(1).build();

Metadata metadata = Metadata.builder()
.put(
IndexMetadata.builder("test")
.settings(
settings(Version.CURRENT).put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1)
.put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1)
)
)
.build();

RoutingTable routingTable = RoutingTable.builder(TestShardRoutingRoleStrategies.DEFAULT_ROLE_ONLY)
.addAsNew(metadata.index("test"))
ClusterState clusterState = ClusterState.builder(ClusterName.DEFAULT)
.metadata(Metadata.builder().put(indexMetadata, false))
.routingTable(RoutingTable.builder(TestShardRoutingRoleStrategies.DEFAULT_ROLE_ONLY).addAsNew(indexMetadata))
.nodes(DiscoveryNodes.builder().add(newNode("node1")))
.build();

ClusterState clusterState = ClusterState.builder(
org.elasticsearch.cluster.ClusterName.CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY)
).metadata(metadata).routingTable(routingTable).build();
logger.info("Adding one node and performing rerouting");
clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder().add(newNode("node1"))).build();
clusterState = strategy.reroute(clusterState, "reroute", ActionListener.noop());

assertEquals(1, clusterState.getRoutingNodes().node("node1").numberOfShardsWithState(ShardRoutingState.INITIALIZING));
Expand All @@ -92,43 +76,26 @@ public Long getShardSize(ShardRouting shardRouting) {

public void testExpectedSizeOnMove() {
final long byteSize = randomIntBetween(0, Integer.MAX_VALUE);
final AllocationService allocation = createAllocationService(Settings.EMPTY, () -> new ClusterInfo() {
@Override
public Long getShardSize(ShardRouting shardRouting) {
if (shardRouting.getIndexName().equals("test") && shardRouting.shardId().getId() == 0) {
return byteSize;
}
return null;
}
});
final ClusterInfo clusterInfo = createClusterInfoWith(new ShardId("test", "_na_", 0), byteSize);
final AllocationService allocation = createAllocationService(Settings.EMPTY, () -> clusterInfo);
logger.info("creating an index with 1 shard, no replica");
Metadata metadata = Metadata.builder()
.put(IndexMetadata.builder("test").settings(settings(Version.CURRENT)).numberOfShards(1).numberOfReplicas(0))
.build();
RoutingTable routingTable = RoutingTable.builder(TestShardRoutingRoleStrategies.DEFAULT_ROLE_ONLY)
.addAsNew(metadata.index("test"))
var indexMetadata = IndexMetadata.builder("test").settings(settings(Version.CURRENT)).numberOfShards(1).numberOfReplicas(0).build();
ClusterState clusterState = ClusterState.builder(ClusterName.DEFAULT)
.metadata(Metadata.builder().put(indexMetadata, false))
.routingTable(RoutingTable.builder(TestShardRoutingRoleStrategies.DEFAULT_ROLE_ONLY).addAsNew(indexMetadata))
.nodes(DiscoveryNodes.builder().add(newNode("node1")).add(newNode("node2")))
.build();
ClusterState clusterState = ClusterState.builder(
org.elasticsearch.cluster.ClusterName.CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY)
).metadata(metadata).routingTable(routingTable).build();

logger.info("adding two nodes and performing rerouting");
clusterState = ClusterState.builder(clusterState)
.nodes(DiscoveryNodes.builder().add(newNode("node1")).add(newNode("node2")))
.build();
clusterState = allocation.reroute(clusterState, "reroute", ActionListener.noop());

logger.info("start primary shard");
clusterState = startInitializingShardsAndReroute(allocation, clusterState);

logger.info("move the shard");
String existingNodeId = clusterState.routingTable().index("test").shard(0).primaryShard().currentNodeId();
String toNodeId;
if ("node1".equals(existingNodeId)) {
toNodeId = "node2";
} else {
toNodeId = "node1";
}
String toNodeId = "node1".equals(existingNodeId) ? "node2" : "node1";

AllocationService.CommandsResult commandsResult = allocation.reroute(
clusterState,
new AllocationCommands(new MoveAllocationCommand("test", 0, existingNodeId, toNodeId)),
Expand All @@ -152,4 +119,18 @@ public Long getShardSize(ShardRouting shardRouting) {
assertThat(clusterState.getRoutingNodes().node(toNodeId).iterator().next().state(), equalTo(ShardRoutingState.STARTED));
assertEquals(clusterState.getRoutingNodes().node(toNodeId).iterator().next().getExpectedShardSize(), -1);
}

private static ClusterInfo createClusterInfoWith(ShardId shardId, long size) {
return new ClusterInfo(
Map.of(),
Map.of(),
Map.ofEntries(
Map.entry(ClusterInfo.shardIdentifierFromRouting(shardId, true), size),
Map.entry(ClusterInfo.shardIdentifierFromRouting(shardId, false), size)
),
Map.of(),
Map.of(),
Map.of()
);
}
}

0 comments on commit acece61

Please sign in to comment.