Skip to content

Commit

Permalink
HBASE-27104 Add a tool command list_unknownservers (#4714)
Browse files Browse the repository at this point in the history
Signed-off-by: Duo Zhang <zhangduo@apache.org>
(cherry picked from commit 1bd0b58)
  • Loading branch information
2005hithlj authored and Apache9 committed Aug 22, 2022
1 parent 3984891 commit 78f587f
Show file tree
Hide file tree
Showing 13 changed files with 245 additions and 9 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,9 @@ public interface ClusterMetrics {
/** Returns the names of region servers on the dead list */
List<ServerName> getDeadServerNames();

/** Returns the names of region servers on the unknown list */
List<ServerName> getUnknownServerNames();

/** Returns the names of region servers on the live list */
Map<ServerName, ServerMetrics> getLiveServerMetrics();

Expand Down Expand Up @@ -176,6 +179,10 @@ enum Option {
* metrics about dead region servers
*/
DEAD_SERVERS,
/**
* metrics about unknown region servers
*/
UNKNOWN_SERVERS,
/**
* metrics about master name
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ public static ClusterStatusProtos.ClusterStatus toClusterStatus(ClusterMetrics m
.collect(Collectors.toList()))
.addAllDeadServers(metrics.getDeadServerNames().stream().map(ProtobufUtil::toServerName)
.collect(Collectors.toList()))
.addAllUnknownServers(metrics.getUnknownServerNames().stream()
.map(ProtobufUtil::toServerName).collect(Collectors.toList()))
.addAllLiveServers(metrics.getLiveServerMetrics().entrySet().stream()
.map(s -> ClusterStatusProtos.LiveServerInfo.newBuilder()
.setServer(ProtobufUtil.toServerName(s.getKey()))
Expand Down Expand Up @@ -98,6 +100,8 @@ public static ClusterMetrics toClusterMetrics(ClusterStatusProtos.ClusterStatus
ServerMetricsBuilder::toServerMetrics)))
.setDeadServerNames(proto.getDeadServersList().stream().map(ProtobufUtil::toServerName)
.collect(Collectors.toList()))
.setUnknownServerNames(proto.getUnknownServersList().stream().map(ProtobufUtil::toServerName)
.collect(Collectors.toList()))
.setBackerMasterNames(proto.getBackupMastersList().stream().map(ProtobufUtil::toServerName)
.collect(Collectors.toList()))
.setRegionsInTransition(proto.getRegionsInTransitionList().stream()
Expand Down Expand Up @@ -147,6 +151,8 @@ public static ClusterMetrics.Option toOption(ClusterStatusProtos.Option option)
return ClusterMetrics.Option.LIVE_SERVERS;
case DEAD_SERVERS:
return ClusterMetrics.Option.DEAD_SERVERS;
case UNKNOWN_SERVERS:
return ClusterMetrics.Option.UNKNOWN_SERVERS;
case REGIONS_IN_TRANSITION:
return ClusterMetrics.Option.REGIONS_IN_TRANSITION;
case CLUSTER_ID:
Expand Down Expand Up @@ -186,6 +192,8 @@ public static ClusterStatusProtos.Option toOption(ClusterMetrics.Option option)
return ClusterStatusProtos.Option.LIVE_SERVERS;
case DEAD_SERVERS:
return ClusterStatusProtos.Option.DEAD_SERVERS;
case UNKNOWN_SERVERS:
return ClusterStatusProtos.Option.UNKNOWN_SERVERS;
case REGIONS_IN_TRANSITION:
return ClusterStatusProtos.Option.REGIONS_IN_TRANSITION;
case CLUSTER_ID:
Expand Down Expand Up @@ -238,6 +246,7 @@ public static ClusterMetricsBuilder newBuilder() {
@Nullable
private String hbaseVersion;
private List<ServerName> deadServerNames = Collections.emptyList();
private List<ServerName> unknownServerNames = Collections.emptyList();
private Map<ServerName, ServerMetrics> liveServerMetrics = new TreeMap<>();
@Nullable
private ServerName masterName;
Expand Down Expand Up @@ -267,6 +276,11 @@ public ClusterMetricsBuilder setDeadServerNames(List<ServerName> value) {
return this;
}

public ClusterMetricsBuilder setUnknownServerNames(List<ServerName> value) {
this.unknownServerNames = value;
return this;
}

public ClusterMetricsBuilder setLiveServerMetrics(Map<ServerName, ServerMetrics> value) {
liveServerMetrics.putAll(value);
return this;
Expand Down Expand Up @@ -324,16 +338,18 @@ public ClusterMetricsBuilder setMasterTasks(List<ServerTask> masterTasks) {
}

public ClusterMetrics build() {
return new ClusterMetricsImpl(hbaseVersion, deadServerNames, liveServerMetrics, masterName,
backupMasterNames, regionsInTransition, clusterId, masterCoprocessorNames, balancerOn,
masterInfoPort, serversName, tableRegionStatesCount, masterTasks);
return new ClusterMetricsImpl(hbaseVersion, deadServerNames, unknownServerNames,
liveServerMetrics, masterName, backupMasterNames, regionsInTransition, clusterId,
masterCoprocessorNames, balancerOn, masterInfoPort, serversName, tableRegionStatesCount,
masterTasks);
}

private static class ClusterMetricsImpl implements ClusterMetrics {
@Nullable
private final String hbaseVersion;
private final List<ServerName> deadServerNames;
private final Map<ServerName, ServerMetrics> liveServerMetrics;
private final List<ServerName> unknownServerNames;
@Nullable
private final ServerName masterName;
private final List<ServerName> backupMasterNames;
Expand All @@ -349,13 +365,14 @@ private static class ClusterMetricsImpl implements ClusterMetrics {
private final List<ServerTask> masterTasks;

ClusterMetricsImpl(String hbaseVersion, List<ServerName> deadServerNames,
Map<ServerName, ServerMetrics> liveServerMetrics, ServerName masterName,
List<ServerName> backupMasterNames, List<RegionState> regionsInTransition, String clusterId,
List<String> masterCoprocessorNames, Boolean balancerOn, int masterInfoPort,
List<ServerName> serversName, Map<TableName, RegionStatesCount> tableRegionStatesCount,
List<ServerTask> masterTasks) {
List<ServerName> unknownServerNames, Map<ServerName, ServerMetrics> liveServerMetrics,
ServerName masterName, List<ServerName> backupMasterNames,
List<RegionState> regionsInTransition, String clusterId, List<String> masterCoprocessorNames,
Boolean balancerOn, int masterInfoPort, List<ServerName> serversName,
Map<TableName, RegionStatesCount> tableRegionStatesCount, List<ServerTask> masterTasks) {
this.hbaseVersion = hbaseVersion;
this.deadServerNames = Preconditions.checkNotNull(deadServerNames);
this.unknownServerNames = Preconditions.checkNotNull(unknownServerNames);
this.liveServerMetrics = Preconditions.checkNotNull(liveServerMetrics);
this.masterName = masterName;
this.backupMasterNames = Preconditions.checkNotNull(backupMasterNames);
Expand All @@ -379,6 +396,11 @@ public List<ServerName> getDeadServerNames() {
return Collections.unmodifiableList(deadServerNames);
}

@Override
public List<ServerName> getUnknownServerNames() {
return Collections.unmodifiableList(unknownServerNames);
}

@Override
public Map<ServerName, ServerMetrics> getLiveServerMetrics() {
return Collections.unmodifiableMap(liveServerMetrics);
Expand Down Expand Up @@ -469,6 +491,14 @@ public String toString() {
}
}

int unknownServerSize = getUnknownServerNames().size();
sb.append("\nNumber of unknown region servers: " + unknownServerSize);
if (unknownServerSize > 0) {
for (ServerName serverName : getUnknownServerNames()) {
sb.append("\n " + serverName);
}
}

sb.append("\nAverage load: " + getAverageLoad());
sb.append("\nNumber of requests: " + getRequestCount());
sb.append("\nNumber of regions: " + getRegionCount());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,11 @@ public List<ServerName> getDeadServerNames() {
return metrics.getDeadServerNames();
}

@Override
public List<ServerName> getUnknownServerNames() {
return metrics.getUnknownServerNames();
}

@Override
public Map<ServerName, ServerMetrics> getLiveServerMetrics() {
return metrics.getLiveServerMetrics();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3024,6 +3024,14 @@ default List<ServerName> listDeadServers() throws IOException {
return getClusterMetrics(EnumSet.of(Option.DEAD_SERVERS)).getDeadServerNames();
}

/**
* List unknown region servers.
* @return List of unknown region servers.
*/
default List<ServerName> listUnknownServers() throws IOException {
return getClusterMetrics(EnumSet.of(Option.UNKNOWN_SERVERS)).getUnknownServerNames();
}

/**
* Clear dead region servers from master.
* @param servers list of dead region servers.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1426,6 +1426,14 @@ default CompletableFuture<List<ServerName>> listDeadServers() {
.thenApply(ClusterMetrics::getDeadServerNames);
}

/**
* List all the unknown region servers.
*/
default CompletableFuture<List<ServerName>> listUnknownServers() {
return this.getClusterMetrics(EnumSet.of(Option.UNKNOWN_SERVERS))
.thenApply(ClusterMetrics::getUnknownServerNames);
}

/**
* Clear dead region servers from master.
* @param servers list of dead region servers.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -764,6 +764,11 @@ public CompletableFuture<List<ServerName>> listDeadServers() {
return wrap(rawAdmin.listDeadServers());
}

@Override
public CompletableFuture<List<ServerName>> listUnknownServers() {
return wrap(rawAdmin.listUnknownServers());
}

@Override
public CompletableFuture<List<ServerName>> clearDeadServers(List<ServerName> servers) {
return wrap(rawAdmin.clearDeadServers(servers));
Expand Down
2 changes: 2 additions & 0 deletions hbase-protocol-shaded/src/main/protobuf/ClusterStatus.proto
Original file line number Diff line number Diff line change
Expand Up @@ -339,6 +339,7 @@ message ClusterStatus {
repeated ServerName servers_name = 11;
repeated TableRegionStatesCount table_region_states_count = 12;
repeated ServerTask master_tasks = 13;
repeated ServerName unknown_servers = 14;
}

enum Option {
Expand All @@ -355,4 +356,5 @@ enum Option {
SERVERS_NAME = 10;
TABLE_TO_REGIONS_COUNT = 11;
TASKS = 12;
UNKNOWN_SERVERS = 13;
}
Original file line number Diff line number Diff line change
Expand Up @@ -2846,6 +2846,12 @@ public ClusterMetrics getClusterMetricsWithoutCoprocessor(EnumSet<Option> option
}
break;
}
case UNKNOWN_SERVERS: {
if (serverManager != null) {
builder.setUnknownServerNames(getUnknownServers());
}
break;
}
case MASTER_COPROCESSORS: {
if (cpHost != null) {
builder.setMasterCoprocessorNames(Arrays.asList(getMasterCoprocessors()));
Expand Down Expand Up @@ -2905,6 +2911,17 @@ public ClusterMetrics getClusterMetricsWithoutCoprocessor(EnumSet<Option> option
return builder.build();
}

private List<ServerName> getUnknownServers() {
if (serverManager != null) {
final Set<ServerName> serverNames = getAssignmentManager().getRegionStates().getRegionStates()
.stream().map(RegionState::getServerName).collect(Collectors.toSet());
final List<ServerName> unknownServerNames = serverNames.stream()
.filter(sn -> sn != null && serverManager.isServerUnknown(sn)).collect(Collectors.toList());
return unknownServerNames;
}
return null;
}

private Map<ServerName, ServerMetrics> getOnlineServers() {
if (serverManager != null) {
final Map<ServerName, ServerMetrics> map = new HashMap<>();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,11 @@ public List<ServerName> getDeadServerNames() {
return null;
}

@Override
public List<ServerName> getUnknownServerNames() {
return null;
}

@Override
public Map<ServerName, ServerMetrics> getLiveServerMetrics() {
Map<ServerName, ServerMetrics> liveServerMetrics = new HashMap<>();
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.master;

import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseClassTestRule;
import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.testclassification.MasterTests;
import org.apache.hadoop.hbase.testclassification.MediumTests;
import org.junit.AfterClass;
import org.junit.Assert;
import org.junit.BeforeClass;
import org.junit.ClassRule;
import org.junit.Test;
import org.junit.experimental.categories.Category;

@Category({ MasterTests.class, MediumTests.class })
public class TestUnknownServers {
@ClassRule
public static final HBaseClassTestRule CLASS_RULE =
HBaseClassTestRule.forClass(TestUnknownServers.class);

private static HBaseTestingUtility UTIL;
private static Admin ADMIN;
private final static int SLAVES = 2;
private static boolean IS_UNKNOWN_SERVER = true;

@BeforeClass
public static void setUpBeforeClass() throws Exception {
UTIL = new HBaseTestingUtility();
UTIL.getConfiguration().setClass(HConstants.MASTER_IMPL,
TestUnknownServers.HMasterForTest.class, HMaster.class);
UTIL.startMiniCluster(SLAVES);
ADMIN = UTIL.getAdmin();
}

@Test
public void testListUnknownServers() throws Exception {
Assert.assertEquals(ADMIN.listUnknownServers().size(), SLAVES);
IS_UNKNOWN_SERVER = false;
Assert.assertEquals(ADMIN.listUnknownServers().size(), 0);
}

@AfterClass
public static void tearDownAfterClass() throws Exception {
if (ADMIN != null) {
ADMIN.close();
}
if (UTIL != null) {
UTIL.shutdownMiniCluster();
}
}

public static final class HMasterForTest extends HMaster {

public HMasterForTest(Configuration conf) throws IOException {
super(conf);
}

@Override
protected ServerManager createServerManager(MasterServices master, RegionServerList storage)
throws IOException {
setupClusterConnection();
return new TestUnknownServers.ServerManagerForTest(master, storage);
}
}

private static final class ServerManagerForTest extends ServerManager {

public ServerManagerForTest(MasterServices master, RegionServerList storage) {
super(master, storage);
}

@Override
public boolean isServerUnknown(ServerName serverName) {
return IS_UNKNOWN_SERVER;
}
}
}
8 changes: 7 additions & 1 deletion hbase-shell/src/main/ruby/hbase/admin.rb
Original file line number Diff line number Diff line change
Expand Up @@ -1537,7 +1537,7 @@ def clear_compaction_queues(server_name, queue_name = nil)
end

#----------------------------------------------------------------------------------------------
# clear dead region servers
# list dead region servers
def list_deadservers
@admin.listDeadServers.to_a
end
Expand All @@ -1558,6 +1558,12 @@ def clear_deadservers(dead_servers)
@admin.clearDeadServers(servers).to_a
end

#----------------------------------------------------------------------------------------------
# list unknown region servers
def list_unknownservers
@admin.listUnknownServers.to_a
end

#----------------------------------------------------------------------------------------------
# List live region servers
def list_liveservers
Expand Down
1 change: 1 addition & 0 deletions hbase-shell/src/main/ruby/shell.rb
Original file line number Diff line number Diff line change
Expand Up @@ -475,6 +475,7 @@ def self.exception_handler(hide_traceback)
clear_compaction_queues
list_deadservers
list_liveservers
list_unknownservers
clear_deadservers
clear_block_cache
stop_master
Expand Down
Loading

0 comments on commit 78f587f

Please sign in to comment.