Skip to content
Permalink
Browse files
HBASE-25293 Followup jira to address the client handling issue when c…
…haning from meta replica to non-meta-replica at the server side. (#2768)

Signed-off-by: stack <stack@apache.org>
  • Loading branch information
huaxiangsun committed Dec 16, 2020
1 parent 1bb9b78 commit 1c217da2ff8b9d6163e9dceb896ebdbc552925d5
Show file tree
Hide file tree
Showing 4 changed files with 144 additions and 11 deletions.
@@ -211,7 +211,7 @@ private boolean tryComplete(LocateRequest req, CompletableFuture<RegionLocations

this.metaReplicaSelector = CatalogReplicaLoadBalanceSelectorFactory.createSelector(
replicaSelectorClass, META_TABLE_NAME, conn, () -> {
int numOfReplicas = 1;
int numOfReplicas = CatalogReplicaLoadBalanceSelector.UNINITIALIZED_NUM_OF_REPLICAS;
try {
RegionLocations metaLocations = conn.registry.getMetaRegionLocations().get(
conn.connConf.getReadRpcTimeoutNs(), TimeUnit.NANOSECONDS);
@@ -28,6 +28,8 @@
@InterfaceAudience.Private
interface CatalogReplicaLoadBalanceSelector {

int UNINITIALIZED_NUM_OF_REPLICAS = -1;

/**
* This method is called when input location is stale, i.e, when clients run into
* org.apache.hadoop.hbase.NotServingRegionException.
@@ -108,7 +108,6 @@ public String toString() {
private final TableName tableName;
private final IntSupplier getNumOfReplicas;
private volatile boolean isStopped = false;
private final static int UNINITIALIZED_NUM_OF_REPLICAS = -1;

CatalogReplicaLoadBalanceSimpleSelector(TableName tableName, AsyncConnectionImpl conn,
IntSupplier getNumOfReplicas) {
@@ -117,7 +116,7 @@ public String toString() {
this.getNumOfReplicas = getNumOfReplicas;

// This numOfReplicas is going to be lazy initialized.
this.numOfReplicas = UNINITIALIZED_NUM_OF_REPLICAS;
this.numOfReplicas = CatalogReplicaLoadBalanceSelector.UNINITIALIZED_NUM_OF_REPLICAS;
// Start chores
this.conn.getChoreService().scheduleChore(getCacheCleanupChore(this));
this.conn.getChoreService().scheduleChore(getRefreshReplicaCountChore(this));
@@ -146,7 +145,7 @@ public void onError(HRegionLocation loc) {
*/
private int getRandomReplicaId() {
int cachedNumOfReplicas = this.numOfReplicas;
if (cachedNumOfReplicas == UNINITIALIZED_NUM_OF_REPLICAS) {
if (cachedNumOfReplicas == CatalogReplicaLoadBalanceSelector.UNINITIALIZED_NUM_OF_REPLICAS) {
cachedNumOfReplicas = refreshCatalogReplicaCount();
this.numOfReplicas = cachedNumOfReplicas;
}
@@ -262,16 +261,16 @@ private void cleanupReplicaReplicaStaleCache() {
private int refreshCatalogReplicaCount() {
int newNumOfReplicas = this.getNumOfReplicas.getAsInt();
LOG.debug("Refreshed replica count {}", newNumOfReplicas);
if (newNumOfReplicas == 1) {
LOG.warn("Table {}'s region replica count is 1, maybe a misconfiguration or failure to "
+ "fetch the replica count", tableName);
// If the returned number of replicas is -1, it is caused by failure to fetch the
// replica count. Do not update the numOfReplicas in this case.
if (newNumOfReplicas == CatalogReplicaLoadBalanceSelector.UNINITIALIZED_NUM_OF_REPLICAS) {
LOG.error("Failed to fetch Table {}'s region replica count", tableName);
return this.numOfReplicas;
}
int cachedNumOfReplicas = this.numOfReplicas;

// If the returned number of replicas is 1, it is mostly caused by failure to fetch the
// replica count. Do not update the numOfReplicas in this case.
int cachedNumOfReplicas = this.numOfReplicas;
if ((cachedNumOfReplicas == UNINITIALIZED_NUM_OF_REPLICAS) ||
((cachedNumOfReplicas != newNumOfReplicas) && (newNumOfReplicas != 1))) {
(cachedNumOfReplicas != newNumOfReplicas)) {
this.numOfReplicas = newNumOfReplicas;
}
return newNumOfReplicas;
@@ -0,0 +1,132 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.client;

import static org.apache.hadoop.hbase.HConstants.EMPTY_START_ROW;
import static org.apache.hadoop.hbase.TableName.META_TABLE_NAME;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotEquals;
import java.io.IOException;
import java.util.concurrent.TimeUnit;
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseClassTestRule;
import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.RegionLocations;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.security.User;
import org.apache.hadoop.hbase.testclassification.ClientTests;
import org.apache.hadoop.hbase.testclassification.MediumTests;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.ClassRule;
import org.junit.Test;
import org.junit.experimental.categories.Category;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

@Category({ MediumTests.class, ClientTests.class })
public class TestCatalogReplicaLoadBalanceSimpleSelector {

@ClassRule
public static final HBaseClassTestRule CLASS_RULE =
HBaseClassTestRule.forClass(TestCatalogReplicaLoadBalanceSimpleSelector.class);

private static final Logger LOG = LoggerFactory.getLogger(
TestCatalogReplicaLoadBalanceSimpleSelector.class);

private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();

private static final int NB_SERVERS = 4;
private static int numOfMetaReplica = NB_SERVERS - 1;

private static AsyncConnectionImpl CONN;

private static ConnectionRegistry registry;
private static Admin admin;

@BeforeClass
public static void setUp() throws Exception {
Configuration conf = TEST_UTIL.getConfiguration();

TEST_UTIL.startMiniCluster(NB_SERVERS);
admin = TEST_UTIL.getAdmin();
admin.balancerSwitch(false, true);

// Enable hbase:meta replication.
HBaseTestingUtility.setReplicas(admin, TableName.META_TABLE_NAME, numOfMetaReplica);
TEST_UTIL.waitFor(30000, () -> TEST_UTIL.getMiniHBaseCluster().getRegions(
TableName.META_TABLE_NAME).size() >= numOfMetaReplica);

registry = ConnectionRegistryFactory.getRegistry(TEST_UTIL.getConfiguration());
CONN = new AsyncConnectionImpl(conf, registry,
registry.getClusterId().get(), null, User.getCurrent());
}

@AfterClass
public static void tearDown() throws Exception {
IOUtils.closeQuietly(CONN);
TEST_UTIL.shutdownMiniCluster();
}

@Test
public void testMetaChangeFromReplicaNoReplica() throws IOException, InterruptedException {
String replicaSelectorClass = CONN.getConfiguration().
get(RegionLocator.LOCATOR_META_REPLICAS_MODE_LOADBALANCE_SELECTOR,
CatalogReplicaLoadBalanceSimpleSelector.class.getName());

CatalogReplicaLoadBalanceSelector metaSelector = CatalogReplicaLoadBalanceSelectorFactory
.createSelector(replicaSelectorClass, META_TABLE_NAME, CONN, () -> {
int numOfReplicas = CatalogReplicaLoadBalanceSelector.UNINITIALIZED_NUM_OF_REPLICAS;
try {
RegionLocations metaLocations = CONN.registry.getMetaRegionLocations().get
(CONN.connConf.getReadRpcTimeoutNs(), TimeUnit.NANOSECONDS);
numOfReplicas = metaLocations.size();
} catch (Exception e) {
LOG.error("Failed to get table {}'s region replication, ", META_TABLE_NAME, e);
}
return numOfReplicas;
});

assertNotEquals(
metaSelector.select(TableName.valueOf("test"), EMPTY_START_ROW, RegionLocateType.CURRENT),
RegionReplicaUtil.DEFAULT_REPLICA_ID);

// Change to No meta replica
HBaseTestingUtility.setReplicas(admin, TableName.META_TABLE_NAME, 1);
TEST_UTIL.waitFor(30000, () -> TEST_UTIL.getMiniHBaseCluster().getRegions(
TableName.META_TABLE_NAME).size() == 1);

CatalogReplicaLoadBalanceSelector metaSelectorWithNoReplica =
CatalogReplicaLoadBalanceSelectorFactory.createSelector(
replicaSelectorClass, META_TABLE_NAME, CONN, () -> {
int numOfReplicas = CatalogReplicaLoadBalanceSelector.UNINITIALIZED_NUM_OF_REPLICAS;
try {
RegionLocations metaLocations = CONN.registry.getMetaRegionLocations().get(
CONN.connConf.getReadRpcTimeoutNs(), TimeUnit.NANOSECONDS);
numOfReplicas = metaLocations.size();
} catch (Exception e) {
LOG.error("Failed to get table {}'s region replication, ", META_TABLE_NAME, e);
}
return numOfReplicas;
});
assertEquals(
metaSelectorWithNoReplica.select(TableName.valueOf("test"), EMPTY_START_ROW,
RegionLocateType.CURRENT), RegionReplicaUtil.DEFAULT_REPLICA_ID);
}
}

0 comments on commit 1c217da

Please sign in to comment.