Skip to content

Commit

Permalink
HBASE-24998 Introduce a ReplicationSourceOverallController interface …
Browse files Browse the repository at this point in the history
…and decouple ReplicationSourceManager and ReplicationSource
  • Loading branch information
infraio committed Sep 9, 2020
1 parent e053a00 commit ccbd2e8
Show file tree
Hide file tree
Showing 11 changed files with 193 additions and 133 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -979,6 +979,8 @@ public enum OperationStatusCode {
/*
* cluster replication constants.
*/
public static final String REPLICATION_OFFLOAD_ENABLE_KEY = "hbase.replication.offload.enabled";
public static final boolean REPLICATION_OFFLOAD_ENABLE_DEFAULT = false;
public static final String
REPLICATION_SOURCE_SERVICE_CLASSNAME = "hbase.replication.source.service";
public static final String
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,5 +33,5 @@ public interface ReplicationListener {
* A region server has been removed from the local cluster
* @param regionServer the removed region server
*/
public void regionServerRemoved(String regionServer);
void regionServerRemoved(String regionServer);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.replication;

import org.apache.hadoop.hbase.replication.regionserver.MetricsReplicationGlobalSourceSource;
import org.apache.hadoop.hbase.replication.regionserver.RecoveredReplicationSource;
import org.apache.yetus.audience.InterfaceAudience;

import java.util.concurrent.atomic.AtomicLong;

/**
* Used to control all replication sources inside one RegionServer or ReplicationServer.
* Used by {@link ReplicationSource} or {@link RecoveredReplicationSource}.
*/
@InterfaceAudience.Private
public interface ReplicationSourceController {

/**
* Returns the maximum size in bytes of edits held in memory which are pending replication
* across all sources inside this RegionServer or ReplicationServer.
*/
long getTotalBufferLimit();

AtomicLong getTotalBufferUsed();

MetricsReplicationGlobalSourceSource getGlobalMetrics();

/**
* Call this when the recovered replication source replicated all WALs.
*/
void finishRecoveredSource(RecoveredReplicationSource src);
}
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.replication.ReplicationPeer;
import org.apache.hadoop.hbase.replication.ReplicationQueueStorage;
import org.apache.hadoop.hbase.replication.ReplicationSourceController;
import org.apache.hadoop.hbase.util.CommonFSUtils;
import org.apache.hadoop.hbase.wal.AbstractFSWALProvider;
import org.apache.yetus.audience.InterfaceAudience;
Expand All @@ -45,18 +46,15 @@ public class RecoveredReplicationSource extends ReplicationSource {

private static final Logger LOG = LoggerFactory.getLogger(RecoveredReplicationSource.class);

private Path walDir;

private String actualPeerId;

@Override
public void init(Configuration conf, FileSystem fs, Path walDir, ReplicationSourceManager manager,
ReplicationQueueStorage queueStorage, ReplicationPeer replicationPeer, Server server,
String peerClusterZnode, UUID clusterId, WALFileLengthProvider walFileLengthProvider,
MetricsSource metrics) throws IOException {
super.init(conf, fs, walDir, manager, queueStorage, replicationPeer, server, peerClusterZnode,
clusterId, walFileLengthProvider, metrics);
this.walDir = walDir;
public void init(Configuration conf, FileSystem fs, Path walDir,
ReplicationSourceController overallController, ReplicationQueueStorage queueStorage,
ReplicationPeer replicationPeer, Server server, String peerClusterZnode, UUID clusterId,
WALFileLengthProvider walFileLengthProvider, MetricsSource metrics) throws IOException {
super.init(conf, fs, walDir, overallController, queueStorage, replicationPeer, server,
peerClusterZnode, clusterId, walFileLengthProvider, metrics);
this.actualPeerId = this.replicationQueueInfo.getPeerId();
}

Expand Down Expand Up @@ -149,7 +147,7 @@ private Path getReplSyncUpPath(Path path) throws IOException {
void tryFinish() {
if (workerThreads.isEmpty()) {
this.getSourceMetrics().clear();
manager.finishRecoveredSource(this);
controller.finishRecoveredSource(this);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@
import org.apache.hadoop.hbase.replication.ReplicationPeer;
import org.apache.hadoop.hbase.replication.ReplicationQueueInfo;
import org.apache.hadoop.hbase.replication.ReplicationQueueStorage;
import org.apache.hadoop.hbase.replication.ReplicationSourceController;
import org.apache.hadoop.hbase.replication.ReplicationUtils;
import org.apache.hadoop.hbase.replication.SystemTableWALEntryFilter;
import org.apache.hadoop.hbase.replication.WALEntryFilter;
Expand Down Expand Up @@ -99,8 +100,9 @@ public class ReplicationSource implements ReplicationSourceInterface {
protected Configuration conf;
protected ReplicationQueueInfo replicationQueueInfo;

// The manager of all sources to which we ping back our progress
ReplicationSourceManager manager;
protected Path walDir;

protected ReplicationSourceController controller;
// Should we stop everything?
protected Server server;
// How long should we sleep for each retry
Expand Down Expand Up @@ -177,23 +179,14 @@ public class ReplicationSource implements ReplicationSourceInterface {
this.baseFilterOutWALEntries = Collections.unmodifiableList(baseFilterOutWALEntries);
}

/**
* Instantiation method used by region servers
* @param conf configuration to use
* @param fs file system to use
* @param manager replication manager to ping to
* @param server the server for this region server
* @param queueId the id of our replication queue
* @param clusterId unique UUID for the cluster
* @param metrics metrics for replication source
*/
@Override
public void init(Configuration conf, FileSystem fs, Path walDir, ReplicationSourceManager manager,
ReplicationQueueStorage queueStorage, ReplicationPeer replicationPeer, Server server,
String queueId, UUID clusterId, WALFileLengthProvider walFileLengthProvider,
MetricsSource metrics) throws IOException {
public void init(Configuration conf, FileSystem fs, Path walDir,
ReplicationSourceController overallController, ReplicationQueueStorage queueStorage,
ReplicationPeer replicationPeer, Server server, String queueId, UUID clusterId,
WALFileLengthProvider walFileLengthProvider, MetricsSource metrics) throws IOException {
this.server = server;
this.conf = HBaseConfiguration.create(conf);
this.walDir = walDir;
this.waitOnEndpointSeconds =
this.conf.getInt(WAIT_ON_ENDPOINT_SECONDS, DEFAULT_WAIT_ON_ENDPOINT_SECONDS);
decorateConf();
Expand All @@ -204,7 +197,7 @@ public void init(Configuration conf, FileSystem fs, Path walDir, ReplicationSour
this.queueSizePerGroup = this.conf.getInt("hbase.regionserver.maxlogs", 32);
this.queueStorage = queueStorage;
this.replicationPeer = replicationPeer;
this.manager = manager;
this.controller = overallController;
this.fs = fs;
this.metrics = metrics;
this.clusterId = clusterId;
Expand All @@ -217,6 +210,7 @@ public void init(Configuration conf, FileSystem fs, Path walDir, ReplicationSour
currentBandwidth = getCurrentBandwidth();
this.throttler = new ReplicationThrottler((double) currentBandwidth / 10.0);
this.walFileLengthProvider = walFileLengthProvider;

LOG.info("queueId={}, ReplicationSource: {}, currentBandwidth={}", queueId,
replicationPeer.getId(), this.currentBandwidth);
}
Expand Down Expand Up @@ -734,9 +728,9 @@ public void postShipEdits(List<Entry> entries, int batchSize) {
throttler.addPushSize(batchSize);
}
totalReplicatedEdits.addAndGet(entries.size());
long newBufferUsed = manager.getTotalBufferUsed().addAndGet(-batchSize);
long newBufferUsed = controller.getTotalBufferUsed().addAndGet(-batchSize);
// Record the new buffer usage
this.manager.getGlobalMetrics().setWALReaderEditsBufferBytes(newBufferUsed);
controller.getGlobalMetrics().setWALReaderEditsBufferBytes(newBufferUsed);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
import org.apache.hadoop.hbase.replication.ReplicationEndpoint;
import org.apache.hadoop.hbase.replication.ReplicationPeer;
import org.apache.hadoop.hbase.replication.ReplicationQueueStorage;
import org.apache.hadoop.hbase.replication.ReplicationSourceController;
import org.apache.hadoop.hbase.wal.WAL.Entry;
import org.apache.yetus.audience.InterfaceAudience;

Expand All @@ -44,14 +45,22 @@ public interface ReplicationSourceInterface {
/**
* Initializer for the source
*
* @param conf the configuration to use
* @param fs the file system to use
* @param server the server for this region server
*/
void init(Configuration conf, FileSystem fs, Path walDir, ReplicationSourceManager manager,
ReplicationQueueStorage queueStorage, ReplicationPeer replicationPeer, Server server,
String queueId, UUID clusterId, WALFileLengthProvider walFileLengthProvider,
MetricsSource metrics) throws IOException;
* @param conf configuration to use
* @param fs file system to use
* @param walDir the directory where the WAL is located
* @param overallController the overall controller of all replication sources
* @param queueStorage the replication queue storage
* @param replicationPeer the replication peer
* @param server the server which start and run this replication source
* @param queueId the id of our replication queue
* @param clusterId unique UUID for the cluster
* @param walFileLengthProvider used to get the WAL length
* @param metrics metrics for this replication source
*/
void init(Configuration conf, FileSystem fs, Path walDir,
ReplicationSourceController overallController, ReplicationQueueStorage queueStorage,
ReplicationPeer replicationPeer, Server server, String queueId, UUID clusterId,
WALFileLengthProvider walFileLengthProvider, MetricsSource metrics) throws IOException;

/**
* Add a log to the list of logs to replicate
Expand Down

0 comments on commit ccbd2e8

Please sign in to comment.