Skip to content

Commit

Permalink
HBASE-17707 New More Accurate Table Skew cost function/generator
Browse files Browse the repository at this point in the history
This reverts commit 3b914df.

Signed-off-by: tedyu <yuzhihong@gmail.com>
  • Loading branch information
kahliloppenheimer authored and tedyu committed Mar 7, 2017
1 parent dfc6cf3 commit 93b0cde
Show file tree
Hide file tree
Showing 4 changed files with 547 additions and 4 deletions.
Expand Up @@ -53,6 +53,7 @@
import org.apache.hadoop.hbase.master.RegionPlan; import org.apache.hadoop.hbase.master.RegionPlan;
import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer.Cluster.Action.Type; import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer.Cluster.Action.Type;
import org.apache.hadoop.hbase.security.access.AccessControlLists; import org.apache.hadoop.hbase.security.access.AccessControlLists;
import org.apache.hadoop.hbase.util.Pair;
import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.util.StringUtils;


import com.google.common.annotations.VisibleForTesting; import com.google.common.annotations.VisibleForTesting;
Expand Down Expand Up @@ -140,6 +141,7 @@ protected static class Cluster {
int[] initialRegionIndexToServerIndex; //regionIndex -> serverIndex (initial cluster state) int[] initialRegionIndexToServerIndex; //regionIndex -> serverIndex (initial cluster state)
int[] regionIndexToTableIndex; //regionIndex -> tableIndex int[] regionIndexToTableIndex; //regionIndex -> tableIndex
int[][] numRegionsPerServerPerTable; //serverIndex -> tableIndex -> # regions int[][] numRegionsPerServerPerTable; //serverIndex -> tableIndex -> # regions
int[] numRegionsPerTable; // tableIndex -> number of regions that table has
int[] numMaxRegionsPerTable; //tableIndex -> max number of regions in a single RS int[] numMaxRegionsPerTable; //tableIndex -> max number of regions in a single RS
int[] regionIndexToPrimaryIndex; //regionIndex -> regionIndex of the primary int[] regionIndexToPrimaryIndex; //regionIndex -> regionIndex of the primary
boolean hasRegionReplicas = false; //whether there is regions with replicas boolean hasRegionReplicas = false; //whether there is regions with replicas
Expand Down Expand Up @@ -330,6 +332,7 @@ protected Cluster(


numTables = tables.size(); numTables = tables.size();
numRegionsPerServerPerTable = new int[numServers][numTables]; numRegionsPerServerPerTable = new int[numServers][numTables];
numRegionsPerTable = new int[numTables];


for (int i = 0; i < numServers; i++) { for (int i = 0; i < numServers; i++) {
for (int j = 0; j < numTables; j++) { for (int j = 0; j < numTables; j++) {
Expand All @@ -339,6 +342,7 @@ protected Cluster(


for (int i=0; i < regionIndexToServerIndex.length; i++) { for (int i=0; i < regionIndexToServerIndex.length; i++) {
if (regionIndexToServerIndex[i] >= 0) { if (regionIndexToServerIndex[i] >= 0) {
numRegionsPerTable[regionIndexToTableIndex[i]]++;
numRegionsPerServerPerTable[regionIndexToServerIndex[i]][regionIndexToTableIndex[i]]++; numRegionsPerServerPerTable[regionIndexToServerIndex[i]][regionIndexToTableIndex[i]]++;
} }
} }
Expand Down Expand Up @@ -470,6 +474,76 @@ private void registerRegion(HRegionInfo region, int regionIndex,
} }
} }


/**
* Returns the minimum number of regions of a table T each server would store if T were
* perfectly distributed (i.e. round-robin-ed) across the cluster
*/
public int minRegionsIfEvenlyDistributed(int table) {
return numRegionsPerTable[table] / numServers;
}

/**
* Returns the maximum number of regions of a table T each server would store if T were
* perfectly distributed (i.e. round-robin-ed) across the cluster
*/
public int maxRegionsIfEvenlyDistributed(int table) {
int min = minRegionsIfEvenlyDistributed(table);
return numRegionsPerTable[table] % numServers == 0 ? min : min + 1;
}

/**
* Returns the number of servers that should hold maxRegionsIfEvenlyDistributed for a given
* table. A special case here is if maxRegionsIfEvenlyDistributed == minRegionsIfEvenlyDistributed,
* in which case all servers should hold the max
*/
public int numServersWithMaxRegionsIfEvenlyDistributed(int table) {
int numWithMax = numRegionsPerTable[table] % numServers;
if (numWithMax == 0) {
return numServers;
} else {
return numWithMax;
}
}

/**
* Returns true iff at least one server in the cluster stores either more than the min/max load
* per server when all regions are evenly distributed across the cluster
*/
public boolean hasUnevenRegionDistribution() {
int minLoad = numRegions / numServers;
int maxLoad = numRegions % numServers == 0 ? minLoad : minLoad + 1;
for (int server = 0; server < numServers; server++) {
int numRegions = getNumRegions(server);
if (numRegions > maxLoad || numRegions < minLoad) {
return true;
}
}
return false;
}

/**
* Returns a pair where the first server is that with the least number of regions across the
* cluster and the second server is that with the most number of regions across the cluster
*/
public Pair<Integer, Integer> findLeastAndMostLoadedServers() {
int minServer = 0;
int maxServer = 0;
int minLoad = getNumRegions(minServer);
int maxLoad = minLoad;
for (int server = 1; server < numServers; server++) {
int numRegions = getNumRegions(server);
if (numRegions < minLoad) {
minServer = server;
minLoad = numRegions;
}
if (numRegions > maxLoad) {
maxServer = server;
maxLoad = numRegions;
}
}
return Pair.newPair(minServer, maxServer);
}

/** An action to move or swap a region */ /** An action to move or swap a region */
public static class Action { public static class Action {
public static enum Type { public static enum Type {
Expand Down

0 comments on commit 93b0cde

Please sign in to comment.