HBASE-23808 [Flakey Test] TestMasterShutdown#testMasterShutdownBefore… (

#1141) Signed-off-by: Bharath Vissapragada <bharathv@apache.org> Signed-off-by: stack <stack@apache.org>
apache · Feb 14, 2020 · 0011d96 · 0011d96
1 parent 5d77ff6
commit 0011d96
Show file tree

Hide file tree

Showing 2 changed files with 173 additions and 93 deletions.
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
@@ -495,8 +495,8 @@ public void doGet(HttpServletRequest request,
    * </ol>
    * <p>
    * Remaining steps of initialization occur in
-   * #finishActiveMasterInitialization(MonitoredTask) after
-   * the master becomes the active one.
+   * {@link #finishActiveMasterInitialization(MonitoredTask)} after the master becomes the
+   * active one.
    */
   public HMaster(final Configuration conf)
       throws IOException, KeeperException {
@@ -2770,6 +2770,7 @@ public void shutdown() throws IOException {
     if (cpHost != null) {
       cpHost.preShutdown();
     }
+
     // Tell the servermanager cluster shutdown has been called. This makes it so when Master is
     // last running server, it'll stop itself. Next, we broadcast the cluster shutdown by setting
     // the cluster status as down. RegionServers will notice this change in state and will start

diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterShutdown.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterShutdown.java
@@ -1,4 +1,4 @@
-/**
+/*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
@@ -18,10 +18,14 @@
 package org.apache.hadoop.hbase.master;
 
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotEquals;
 import static org.junit.Assert.assertNotNull;
-import static org.junit.Assert.assertTrue;
-
+import java.io.IOException;
+import java.time.Duration;
 import java.util.List;
+import java.util.concurrent.CompletableFuture;
+import java.util.concurrent.CompletionException;
+import java.util.concurrent.TimeUnit;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hbase.ClusterMetrics;
 import org.apache.hadoop.hbase.HBaseClassTestRule;
@@ -30,26 +34,40 @@
 import org.apache.hadoop.hbase.LocalHBaseCluster;
 import org.apache.hadoop.hbase.MiniHBaseCluster;
 import org.apache.hadoop.hbase.StartMiniClusterOption;
-import org.apache.hadoop.hbase.client.Admin;
-import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.Waiter;
+import org.apache.hadoop.hbase.client.AsyncConnection;
 import org.apache.hadoop.hbase.client.ConnectionFactory;
 import org.apache.hadoop.hbase.testclassification.LargeTests;
 import org.apache.hadoop.hbase.testclassification.MasterTests;
 import org.apache.hadoop.hbase.util.JVMClusterUtil.MasterThread;
+import org.apache.hadoop.hbase.zookeeper.ReadOnlyZKClient;
+import org.junit.Before;
 import org.junit.ClassRule;
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
+import org.apache.hbase.thirdparty.org.apache.commons.collections4.CollectionUtils;
 
 @Category({MasterTests.class, LargeTests.class})
 public class TestMasterShutdown {
+  private static final Logger LOG = LoggerFactory.getLogger(TestMasterShutdown.class);
 
   @ClassRule
   public static final HBaseClassTestRule CLASS_RULE =
       HBaseClassTestRule.forClass(TestMasterShutdown.class);
 
-  private static final Logger LOG = LoggerFactory.getLogger(TestMasterShutdown.class);
+  private HBaseTestingUtility htu;
+
+  @Before
+  public void shutdownCluster() throws IOException {
+    if (htu != null) {
+      // an extra check in case the test cluster was not terminated after HBaseClassTestRule's
+      // Timeout interrupted the test thread.
+      LOG.warn("found non-null TestingUtility -- previous test did not terminate cleanly.");
+      htu.shutdownMiniCluster();
+    }
+  }
 
   /**
    * Simple test of shutdown.
@@ -59,103 +77,164 @@ public class TestMasterShutdown {
    */
   @Test
   public void testMasterShutdown() throws Exception {
-    final int NUM_MASTERS = 3;
-    final int NUM_RS = 3;
-
     // Create config to use for this cluster
     Configuration conf = HBaseConfiguration.create();
 
     // Start the cluster
-    HBaseTestingUtility htu = new HBaseTestingUtility(conf);
-    StartMiniClusterOption option = StartMiniClusterOption.builder()
-        .numMasters(NUM_MASTERS).numRegionServers(NUM_RS).numDataNodes(NUM_RS).build();
-    htu.startMiniCluster(option);
-    MiniHBaseCluster cluster = htu.getHBaseCluster();
-
-    // get all the master threads
-    List<MasterThread> masterThreads = cluster.getMasterThreads();
-
-    // wait for each to come online
-    for (MasterThread mt : masterThreads) {
-      assertTrue(mt.isAlive());
-    }
-
-    // find the active master
-    HMaster active = null;
-    for (int i = 0; i < masterThreads.size(); i++) {
-      if (masterThreads.get(i).getMaster().isActiveMaster()) {
-        active = masterThreads.get(i).getMaster();
-        break;
+    try {
+      htu = new HBaseTestingUtility(conf);
+      StartMiniClusterOption option = StartMiniClusterOption.builder()
+        .numMasters(3)
+        .numRegionServers(3)
+        .numDataNodes(3)
+        .build();
+      final MiniHBaseCluster cluster = htu.startMiniCluster(option);
+
+      // wait for all master thread to spawn and start their run loop.
+      final long thirtySeconds = TimeUnit.SECONDS.toMillis(30);
+      final long oneSecond = TimeUnit.SECONDS.toMillis(1);
+      assertNotEquals(-1, htu.waitFor(thirtySeconds, oneSecond, () -> {
+        final List<MasterThread> masterThreads = cluster.getMasterThreads();
+        return masterThreads != null
+          && masterThreads.size() >= 3
+          && masterThreads.stream().allMatch(Thread::isAlive);
+      }));
+
+      // find the active master
+      final HMaster active = cluster.getMaster();
+      assertNotNull(active);
+
+      // make sure the other two are backup masters
+      ClusterMetrics status = active.getClusterMetrics();
+      assertEquals(2, status.getBackupMasterNames().size());
+
+      // tell the active master to shutdown the cluster
+      active.shutdown();
+      assertNotEquals(-1, htu.waitFor(thirtySeconds, oneSecond,
+        () -> CollectionUtils.isEmpty(cluster.getLiveMasterThreads())));
+      assertNotEquals(-1, htu.waitFor(thirtySeconds, oneSecond,
+        () -> CollectionUtils.isEmpty(cluster.getLiveRegionServerThreads())));
+    } finally {
+      if (htu != null) {
+        htu.shutdownMiniCluster();
+        htu = null;
       }
     }
-    assertNotNull(active);
-    // make sure the other two are backup masters
-    ClusterMetrics status = active.getClusterMetrics();
-    assertEquals(2, status.getBackupMasterNames().size());
-
-    // tell the active master to shutdown the cluster
-    active.shutdown();
-
-    for (int i = NUM_MASTERS - 1; i >= 0 ;--i) {
-      cluster.waitOnMaster(i);
-    }
-    // make sure all the masters properly shutdown
-    assertEquals(0, masterThreads.size());
-
-    htu.shutdownMiniCluster();
   }
 
+  /**
+   * This test appears to be an intentional race between a thread that issues a shutdown RPC to the
+   * master, while the master is concurrently realizing it cannot initialize because there are no
+   * region servers available to it. The expected behavior is that master initialization is
+   * interruptable via said shutdown RPC.
+   */
   @Test
   public void testMasterShutdownBeforeStartingAnyRegionServer() throws Exception {
-    final int NUM_MASTERS = 1;
-    final int NUM_RS = 0;
-
-    // Create config to use for this cluster
-    Configuration conf = HBaseConfiguration.create();
-    conf.setInt("hbase.ipc.client.failed.servers.expiry", 200);
-    conf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART, 1);
-
-    // Start the cluster
-    final HBaseTestingUtility util = new HBaseTestingUtility(conf);
-    util.startMiniDFSCluster(3);
-    util.startMiniZKCluster();
-    util.createRootDir();
-    final LocalHBaseCluster cluster =
-        new LocalHBaseCluster(conf, NUM_MASTERS, NUM_RS, HMaster.class,
-            MiniHBaseCluster.MiniHBaseClusterRegionServer.class);
-    final int MASTER_INDEX = 0;
-    final MasterThread master = cluster.getMasters().get(MASTER_INDEX);
-    master.start();
-    LOG.info("Called master start on " + master.getName());
-    Thread shutdownThread = new Thread("Shutdown-Thread") {
-      @Override
-      public void run() {
-        LOG.info("Before call to shutdown master");
-        try {
-          try (
-            Connection connection = ConnectionFactory.createConnection(util.getConfiguration())) {
-            try (Admin admin = connection.getAdmin()) {
-              admin.shutdown();
+    LocalHBaseCluster hbaseCluster = null;
+    try {
+      htu =  new HBaseTestingUtility(
+        createMasterShutdownBeforeStartingAnyRegionServerConfiguration());
+
+      // configure a cluster with
+      final StartMiniClusterOption options = StartMiniClusterOption.builder()
+        .numDataNodes(1)
+        .numMasters(1)
+        .numRegionServers(0)
+        .masterClass(HMaster.class)
+        .rsClass(MiniHBaseCluster.MiniHBaseClusterRegionServer.class)
+        .createRootDir(true)
+        .build();
+
+      // Can't simply `htu.startMiniCluster(options)` because that method waits for the master to
+      // start completely. However, this test's premise is that a partially started master should
+      // still respond to a shutdown RPC. So instead, we manage each component lifecycle
+      // independently.
+      // I think it's not worth refactoring HTU's helper methods just for this class.
+      htu.startMiniDFSCluster(options.getNumDataNodes());
+      htu.startMiniZKCluster(options.getNumZkServers());
+      htu.createRootDir();
+      hbaseCluster = new LocalHBaseCluster(htu.getConfiguration(), options.getNumMasters(),
+        options.getNumRegionServers(), options.getMasterClass(), options.getRsClass());
+      final MasterThread masterThread = hbaseCluster.getMasters().get(0);
+
+      final CompletableFuture<Void> shutdownFuture = CompletableFuture.runAsync(() -> {
+        // Switching to master registry exacerbated a race in the master bootstrap that can result
+        // in a lost shutdown command (HBASE-8422, HBASE-23836). The race is essentially because
+        // the server manager in HMaster is not initialized by the time shutdown() RPC (below) is
+        // made to the master. The suspected reason as to why it was uncommon before HBASE-18095
+        // is because the connection creation with ZK registry is so slow that by then the server
+        // manager is usually init'ed in time for the RPC to be made. For now, adding an explicit
+        // wait() in the test, waiting for the server manager to become available.
+        final long timeout = TimeUnit.MINUTES.toMillis(10);
+        assertNotEquals("timeout waiting for server manager to become available.",
+          -1, Waiter.waitFor(htu.getConfiguration(), timeout,
+            () -> masterThread.getMaster().getServerManager() != null));
+
+        // Master has come up far enough that we can terminate it without creating a zombie.
+        final long result = Waiter.waitFor(htu.getConfiguration(), timeout, 500, () -> {
+          final Configuration conf = createResponsiveZkConfig(htu.getConfiguration());
+          LOG.debug("Attempting to establish connection.");
+          final CompletableFuture<AsyncConnection> connFuture =
+            ConnectionFactory.createAsyncConnection(conf);
+          try (final AsyncConnection conn = connFuture.join()) {
+            LOG.debug("Sending shutdown RPC.");
+            try {
+              conn.getAdmin().shutdown().join();
+              LOG.debug("Shutdown RPC sent.");
+              return true;
+            } catch (CompletionException e) {
+              LOG.debug("Failure sending shutdown RPC.");
             }
+          } catch (IOException|CompletionException e) {
+            LOG.debug("Failed to establish connection.");
+          } catch (Throwable e) {
+            LOG.info("Something unexpected happened.", e);
           }
-        } catch (Exception e) {
-          LOG.info("Error while calling Admin.shutdown, which is expected: " + e.getMessage());
-        }
-        LOG.info("After call to shutdown master");
-        cluster.waitOnMaster(MASTER_INDEX);
+          return false;
+        });
+        assertNotEquals("Failed to issue shutdown RPC after " + Duration.ofMillis(timeout),
+          -1, result);
+      });
+
+      masterThread.start();
+      shutdownFuture.join();
+      masterThread.join();
+    } finally {
+      if (hbaseCluster != null) {
+        hbaseCluster.shutdown();
+      }
+      if (htu != null) {
+        htu.shutdownMiniCluster();
+        htu = null;
       }
-    };
-    shutdownThread.start();
-    LOG.info("Called master join on " + master.getName());
-    master.join();
-    shutdownThread.join();
-
-    List<MasterThread> masterThreads = cluster.getMasters();
-    // make sure all the masters properly shutdown
-    assertEquals(0, masterThreads.size());
-
-    util.shutdownMiniZKCluster();
-    util.shutdownMiniDFSCluster();
-    util.cleanupTestDir();
+    }
+  }
+
+  /**
+   * Create a cluster configuration suitable for
+   * {@link #testMasterShutdownBeforeStartingAnyRegionServer()}.
+   */
+  private static Configuration createMasterShutdownBeforeStartingAnyRegionServerConfiguration() {
+    final Configuration conf = HBaseConfiguration.create();
+    // make sure the master will wait forever in the absence of a RS.
+    conf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART, 1);
+    // don't need a long write pipeline for this test.
+    conf.setInt("dfs.replication", 1);
+    return conf;
+  }
+
+  /**
+   * Create a new {@link Configuration} based on {@code baseConf} that has ZooKeeper connection
+   * settings tuned very aggressively. The resulting client is used within a retry loop, so there's
+   * no value in having the client itself do the retries. We want to iterate on the base
+   * configuration because we're waiting for the mini-cluster to start and set it's ZK client port.
+   *
+   * @return a new, configured {@link Configuration} instance.
+   */
+  private static Configuration createResponsiveZkConfig(final Configuration baseConf) {
+    final Configuration conf = HBaseConfiguration.create(baseConf);
+    conf.setInt(ReadOnlyZKClient.RECOVERY_RETRY, 3);
+    conf.setInt(ReadOnlyZKClient.RECOVERY_RETRY_INTERVAL_MILLIS, 100);
+    return conf;
   }
 }