From 0822b6d1677406f4b37ea17e13c0407567656921 Mon Sep 17 00:00:00 2001 From: Sean Busbey Date: Wed, 8 Apr 2020 14:03:33 -0500 Subject: [PATCH] HBASE-24138 log more details about balancer decisions for StochasticLoadBalancer (#1455) - at DEBUG log messages about RegionCountSkewCostFunction region/server totals - at DEBUG log messages about the decision to balance or not with total costs - at TRACE log messages about region count on each server RegionCountSkewCostFunction sees - at TRACE log message with the individual cost functions used in the decision to balance or not Signed-off-by: Viraj Jasani (cherry picked from commit 2d78a286b682b34e8a718c521e265d58536f1e5e) --- .../hbase/master/balancer/ServerAndLoad.java | 5 +++ .../balancer/StochasticLoadBalancer.java | 36 ++++++++++++------- .../master/balancer/BalancerTestBase.java | 13 ++++--- 3 files changed, 37 insertions(+), 17 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/ServerAndLoad.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/ServerAndLoad.java index 3675bd3d07f2..0698d128eb30 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/ServerAndLoad.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/ServerAndLoad.java @@ -65,4 +65,9 @@ public boolean equals(Object o) { } return false; } + + @Override + public String toString() { + return "server=" + sn + " , load=" + load; + } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java index a2f393f493a7..66e68d582612 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java @@ -330,28 +330,29 @@ protected boolean needsBalance(TableName tableName, Cluster cluster) { for (CostFunction c : costFunctions) { float multiplier = c.getMultiplier(); if (multiplier <= 0) { + LOG.trace("{} not needed because multiplier is <= 0", c.getClass().getSimpleName()); continue; } if (!c.isNeeded()) { - LOG.debug("{} not needed", c.getClass().getSimpleName()); + LOG.trace("{} not needed", c.getClass().getSimpleName()); continue; } sumMultiplier += multiplier; total += c.cost() * multiplier; } - if (total <= 0 || sumMultiplier <= 0 - || (sumMultiplier > 0 && (total / sumMultiplier) < minCostNeedBalance)) { + boolean balanced = total <= 0 || sumMultiplier <= 0 || + (sumMultiplier > 0 && (total / sumMultiplier) < minCostNeedBalance); + if (LOG.isDebugEnabled()) { + LOG.debug("{} {}; total cost={}, sum multiplier={}; cost/multiplier to need a balance is {}", + balanced ? "Skipping load balancing because balanced" : "We need to load balance", + isByTable ? String.format("table (%s)", tableName) : "cluster", + total, sumMultiplier, minCostNeedBalance); if (LOG.isTraceEnabled()) { - final String loadBalanceTarget = - isByTable ? String.format("table (%s)", tableName) : "cluster"; - LOG.trace("Skipping load balancing because the {} is balanced. Total cost: {}, " - + "Sum multiplier: {}, Minimum cost needed for balance: {}", loadBalanceTarget, total, - sumMultiplier, minCostNeedBalance); + LOG.trace("Balance decision detailed function costs={}", functionCost()); } - return false; } - return true; + return !balanced; } @VisibleForTesting @@ -1188,16 +1189,27 @@ static class RegionCountSkewCostFunction extends CostFunction { this.setMultiplier(conf.getFloat(REGION_COUNT_SKEW_COST_KEY, DEFAULT_REGION_COUNT_SKEW_COST)); } + @Override + void init(Cluster cluster) { + super.init(cluster); + LOG.debug("{} sees a total of {} servers and {} regions.", getClass().getSimpleName(), + cluster.numServers, cluster.numRegions); + if (LOG.isTraceEnabled()) { + for (int i =0; i < cluster.numServers; i++) { + LOG.trace("{} sees server '{}' has {} regions", getClass().getSimpleName(), + cluster.servers[i], cluster.regionsPerServer[i].length); + } + } + } + @Override protected double cost() { if (stats == null || stats.length != cluster.numServers) { stats = new double[cluster.numServers]; } - for (int i =0; i < cluster.numServers; i++) { stats[i] = cluster.regionsPerServer[i].length; } - return costFromArray(stats); } } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/BalancerTestBase.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/BalancerTestBase.java index 623105968c7c..685088fed2c9 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/BalancerTestBase.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/BalancerTestBase.java @@ -201,9 +201,11 @@ public void assertClusterAsBalanced(List servers) { int max = numRegions % numServers == 0 ? min : min + 1; for (ServerAndLoad server : servers) { - assertTrue(server.getLoad() >= 0); - assertTrue(server.getLoad() <= max); - assertTrue(server.getLoad() >= min); + assertTrue("All servers should have a positive load. " + server, server.getLoad() >= 0); + assertTrue("All servers should have load no more than " + max + ". " + server, + server.getLoad() <= max); + assertTrue("All servers should have load no less than " + min + ". " + server, + server.getLoad() >= min); } } @@ -561,7 +563,7 @@ protected void testWithCluster(Map> serverMap, Map>> LoadOfAllTable = (Map) mockClusterServersWithTables(serverMap); List plans = loadBalancer.balanceCluster(LoadOfAllTable); - assertNotNull(plans); + assertNotNull("Initial cluster balance should produce plans.", plans); // Check to see that this actually got to a stable place. if (assertFullyBalanced || assertFullyBalancedForReplicas) { @@ -575,7 +577,8 @@ protected void testWithCluster(Map> serverMap, assertClusterAsBalanced(balancedCluster); LoadOfAllTable = (Map) mockClusterServersWithTables(serverMap); List secondPlans = loadBalancer.balanceCluster(LoadOfAllTable); - assertNull(secondPlans); + assertNull("Given a requirement to be fully balanced, second attempt at plans should " + + "produce none.", secondPlans); } if (assertFullyBalancedForReplicas) {