Skip to content

Commit

Permalink
HBASE-23735 [Flakey Tests] TestClusterRestartFailover & TestClusterRe…
Browse files Browse the repository at this point in the history
…startFailoverSplitWithoutZk
  • Loading branch information
saintstack committed Jan 25, 2020
1 parent 753cc99 commit 1690414
Showing 1 changed file with 21 additions and 4 deletions.
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
/**
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
Expand Down Expand Up @@ -40,8 +40,10 @@
import org.apache.hadoop.hbase.master.assignment.ServerStateNode;
import org.apache.hadoop.hbase.master.procedure.ServerCrashProcedure;
import org.apache.hadoop.hbase.procedure2.Procedure;
import org.apache.hadoop.hbase.regionserver.HRegionServer;
import org.apache.hadoop.hbase.testclassification.LargeTests;
import org.apache.hadoop.hbase.testclassification.MasterTests;
import org.apache.hadoop.hbase.util.JVMClusterUtil;
import org.apache.zookeeper.KeeperException;
import org.junit.ClassRule;
import org.junit.Test;
Expand All @@ -58,7 +60,7 @@ public class TestClusterRestartFailover extends AbstractTestRestartCluster {

private static final Logger LOG = LoggerFactory.getLogger(TestClusterRestartFailover.class);

private static CountDownLatch SCP_LATCH;
private volatile static CountDownLatch SCP_LATCH;
private static ServerName SERVER_FOR_TEST;

@Override
Expand All @@ -79,7 +81,16 @@ public void test() throws Exception {
setupCluster();
setupTable();

SERVER_FOR_TEST = UTIL.getHBaseCluster().getRegionServer(0).getServerName();
// Find server that does not have hbase:namespace on it. This tests holds up SCPs. If it
// holds up the server w/ hbase:namespace, the Master initialization will be held up
// because this table is not online and test fails.
for (JVMClusterUtil.RegionServerThread rst:
UTIL.getHBaseCluster().getLiveRegionServerThreads()) {
HRegionServer rs = rst.getRegionServer();
if (rs.getRegions(TableName.NAMESPACE_TABLE_NAME).isEmpty()) {
SERVER_FOR_TEST = rs.getServerName();
}
}
UTIL.waitFor(60000, () -> getServerStateNode(SERVER_FOR_TEST) != null);
ServerStateNode serverNode = getServerStateNode(SERVER_FOR_TEST);
assertNotNull(serverNode);
Expand All @@ -98,8 +109,9 @@ public void test() throws Exception {
LOG.info("Restarting cluster");
UTIL.restartHBaseCluster(StartMiniClusterOption.builder().masterClass(HMasterForTest.class)
.numMasters(1).numRegionServers(3).rsPorts(ports).build());
LOG.info("Started cluster");
UTIL.waitFor(60000, () -> UTIL.getHBaseCluster().getMaster().isInitialized());

LOG.info("Started cluster master, waiting for {}", SERVER_FOR_TEST);
UTIL.waitFor(60000, () -> getServerStateNode(SERVER_FOR_TEST) != null);
serverNode = getServerStateNode(SERVER_FOR_TEST);
assertFalse("serverNode should not be ONLINE during SCP processing",
Expand All @@ -113,6 +125,7 @@ public void test() throws Exception {
Procedure.NO_PROC_ID);

// Wait the SCP to finish
LOG.info("Waiting on latch");
SCP_LATCH.countDown();
UTIL.waitFor(60000, () -> procedure.get().isFinished());

Expand All @@ -126,13 +139,17 @@ public void test() throws Exception {
}

private void setupCluster() throws Exception {
LOG.info("Setup cluster");
UTIL.startMiniCluster(
StartMiniClusterOption.builder().masterClass(HMasterForTest.class).numMasters(1)
.numRegionServers(3).build());
LOG.info("Cluster is up");
UTIL.waitFor(60000, () -> UTIL.getMiniHBaseCluster().getMaster().isInitialized());
LOG.info("Master is up");
// wait for all SCPs finished
UTIL.waitFor(60000, () -> UTIL.getHBaseCluster().getMaster().getProcedures().stream()
.noneMatch(p -> p instanceof ServerCrashProcedure));
LOG.info("No SCPs");
}

private void setupTable() throws Exception {
Expand Down

0 comments on commit 1690414

Please sign in to comment.