tables = table.describeBackupSet(name);
-
- if (tables == null) {
- return null;
- }
-
- return StringUtils.join(tables, BackupRestoreConstants.TABLENAME_DELIMITER_IN_COMMAND);
- }
- }
-
- @Override
- protected void addOptions() {
- // define supported options
- addOptNoArg(OPTION_OVERWRITE, OPTION_OVERWRITE_DESC);
- addOptNoArg(OPTION_CHECK, OPTION_CHECK_DESC);
- addOptNoArg(OPTION_DEBUG, OPTION_DEBUG_DESC);
- addOptWithArg(OPTION_SET, OPTION_SET_RESTORE_DESC);
- addOptWithArg(OPTION_TABLE, OPTION_TABLE_LIST_DESC);
- addOptWithArg(OPTION_TABLE_MAPPING, OPTION_TABLE_MAPPING_DESC);
- addOptWithArg(OPTION_YARN_QUEUE_NAME, OPTION_YARN_QUEUE_NAME_RESTORE_DESC);
- }
-
- @Override
- protected void processOptions(CommandLine cmd) {
- this.cmd = cmd;
- }
-
- @Override
- protected int doWork() throws Exception {
- return parseAndRun();
- }
-
public static void main(String[] args) throws Exception {
Configuration conf = HBaseConfiguration.create();
Path hbasedir = CommonFSUtils.getRootDir(conf);
@@ -226,45 +76,7 @@ public static void main(String[] args) throws Exception {
}
@Override
- public int run(String[] args) {
- Objects.requireNonNull(conf, "Tool configuration is not initialized");
-
- CommandLine cmd;
- try {
- // parse the command line arguments
- cmd = parseArgs(args);
- cmdLineArgs = args;
- } catch (Exception e) {
- System.out.println("Error when parsing command-line arguments: " + e.getMessage());
- printToolUsage();
- return EXIT_FAILURE;
- }
-
- if (cmd.hasOption(SHORT_HELP_OPTION) || cmd.hasOption(LONG_HELP_OPTION)) {
- printToolUsage();
- return EXIT_FAILURE;
- }
-
- processOptions(cmd);
-
- int ret = EXIT_FAILURE;
- try {
- ret = doWork();
- } catch (Exception e) {
- LOG.error("Error running command-line tool", e);
- return EXIT_FAILURE;
- }
- return ret;
- }
-
- protected void printToolUsage() {
- System.out.println(USAGE_STRING);
- HelpFormatter helpFormatter = new HelpFormatter();
- helpFormatter.setLeftPadding(2);
- helpFormatter.setDescPadding(8);
- helpFormatter.setWidth(100);
- helpFormatter.setSyntaxPrefix("Options:");
- helpFormatter.printHelp(" ", null, options, USAGE_FOOTER);
- System.out.println(BackupRestoreConstants.VERIFY_BACKUP);
+ protected String getUsageString() {
+ return USAGE_STRING;
}
}
diff --git a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/AbstractPitrRestoreHandler.java b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/AbstractPitrRestoreHandler.java
new file mode 100644
index 000000000000..3f31255d60f6
--- /dev/null
+++ b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/AbstractPitrRestoreHandler.java
@@ -0,0 +1,421 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.backup.impl;
+
+import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.CONF_CONTINUOUS_BACKUP_PITR_WINDOW_DAYS;
+import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.CONF_CONTINUOUS_BACKUP_WAL_DIR;
+import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.DEFAULT_CONTINUOUS_BACKUP_PITR_WINDOW_DAYS;
+import static org.apache.hadoop.hbase.mapreduce.WALPlayer.IGNORE_EMPTY_FILES;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.TimeUnit;
+import java.util.stream.Collectors;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.backup.BackupRestoreFactory;
+import org.apache.hadoop.hbase.backup.PointInTimeRestoreRequest;
+import org.apache.hadoop.hbase.backup.RestoreJob;
+import org.apache.hadoop.hbase.backup.RestoreRequest;
+import org.apache.hadoop.hbase.backup.util.BackupUtils;
+import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.mapreduce.WALInputFormat;
+import org.apache.hadoop.hbase.mapreduce.WALPlayer;
+import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
+import org.apache.hadoop.util.Tool;
+import org.apache.yetus.audience.InterfaceAudience;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Abstract base class for handling Point-In-Time Restore (PITR).
+ *
+ * Defines the common PITR algorithm using the Template Method Pattern. Subclasses provide the
+ * metadata source (e.g., backup system table or a custom backup location).
+ *
+ * The PITR flow includes:
+ *
+ * - Validating recovery time within the PITR window
+ * - Checking for continuous backup and valid backup availability
+ * - Restoring the backup
+ * - Replaying WALs to bring tables to the target state
+ *
+ *
+ * Subclasses must implement {@link #getBackupMetadata(PointInTimeRestoreRequest)} to supply the
+ * list of completed backups.
+ */
+@InterfaceAudience.Private
+public abstract class AbstractPitrRestoreHandler {
+ private static final Logger LOG = LoggerFactory.getLogger(AbstractPitrRestoreHandler.class);
+
+ protected final Connection conn;
+ protected final BackupAdminImpl backupAdmin;
+ protected final PointInTimeRestoreRequest request;
+
+ AbstractPitrRestoreHandler(Connection conn, PointInTimeRestoreRequest request) {
+ this.conn = conn;
+ this.backupAdmin = new BackupAdminImpl(conn);
+ this.request = request;
+ }
+
+ /**
+ * Validates the PITR request and performs the restore if valid. This is the main entry point for
+ * the PITR process and should be called by clients.
+ */
+ public final void validateAndRestore() throws IOException {
+ long endTime = request.getToDateTime();
+ validateRequestToTime(endTime);
+
+ TableName[] sourceTableArray = request.getFromTables();
+ TableName[] targetTableArray = resolveTargetTables(sourceTableArray, request.getToTables());
+
+ // Validate PITR requirements
+ validatePitr(endTime, sourceTableArray, targetTableArray);
+
+ // If only validation is required, log and return
+ if (request.isCheck()) {
+ LOG.info("PITR can be successfully executed");
+ return;
+ }
+
+ // Execute PITR process
+ try (BackupSystemTable table = new BackupSystemTable(conn)) {
+ Map continuousBackupTables = table.getContinuousBackupTableSet();
+ List backupMetadataList = getBackupMetadata(request);
+
+ for (int i = 0; i < sourceTableArray.length; i++) {
+ restoreTableWithWalReplay(sourceTableArray[i], targetTableArray[i], endTime,
+ continuousBackupTables, backupMetadataList, request);
+ }
+ }
+ }
+
+ /**
+ * Validates whether the requested end time falls within the allowed PITR recovery window.
+ * @param endTime The target recovery time.
+ * @throws IOException If the requested recovery time is outside the allowed window.
+ */
+ private void validateRequestToTime(long endTime) throws IOException {
+ long pitrWindowDays = conn.getConfiguration().getLong(CONF_CONTINUOUS_BACKUP_PITR_WINDOW_DAYS,
+ DEFAULT_CONTINUOUS_BACKUP_PITR_WINDOW_DAYS);
+ long currentTime = EnvironmentEdgeManager.getDelegate().currentTime();
+ long pitrMaxStartTime = currentTime - TimeUnit.DAYS.toMillis(pitrWindowDays);
+
+ if (endTime < pitrMaxStartTime) {
+ String errorMsg = String.format(
+ "Requested recovery time (%d) is out of the allowed PITR window (last %d days).", endTime,
+ pitrWindowDays);
+ LOG.error(errorMsg);
+ throw new IOException(errorMsg);
+ }
+
+ if (endTime > currentTime) {
+ String errorMsg = String.format(
+ "Requested recovery time (%d) is in the future. Current time: %d.", endTime, currentTime);
+ LOG.error(errorMsg);
+ throw new IOException(errorMsg);
+ }
+ }
+
+ /**
+ * Resolves the target table array. If null or empty, defaults to the source table array.
+ */
+ private TableName[] resolveTargetTables(TableName[] sourceTables, TableName[] targetTables) {
+ return (targetTables == null || targetTables.length == 0) ? sourceTables : targetTables;
+ }
+
+ /**
+ * Validates whether Point-In-Time Recovery (PITR) is possible for the given tables at the
+ * specified time.
+ *
+ * PITR requires:
+ *
+ * - Continuous backup to be enabled for the source tables.
+ * - A valid backup image and corresponding WALs to be available.
+ *
+ * @param endTime The target recovery time.
+ * @param sTableArray The source tables to restore.
+ * @param tTableArray The target tables where the restore will be performed.
+ * @throws IOException If PITR is not possible due to missing continuous backup or backup images.
+ */
+ private void validatePitr(long endTime, TableName[] sTableArray, TableName[] tTableArray)
+ throws IOException {
+ try (BackupSystemTable table = new BackupSystemTable(conn)) {
+ // Retrieve the set of tables with continuous backup enabled
+ Map continuousBackupTables = table.getContinuousBackupTableSet();
+
+ // Ensure all source tables have continuous backup enabled
+ validateContinuousBackup(sTableArray, continuousBackupTables);
+
+ // Fetch completed backup information
+ List backupMetadataList = getBackupMetadata(request);
+
+ // Ensure a valid backup and WALs exist for PITR
+ validateBackupAvailability(sTableArray, tTableArray, endTime, continuousBackupTables,
+ backupMetadataList);
+ }
+ }
+
+ /**
+ * Ensures that all source tables have continuous backup enabled.
+ */
+ private void validateContinuousBackup(TableName[] tables,
+ Map continuousBackupTables) throws IOException {
+ List missingTables =
+ Arrays.stream(tables).filter(table -> !continuousBackupTables.containsKey(table)).toList();
+
+ if (!missingTables.isEmpty()) {
+ String errorMsg = "Continuous Backup is not enabled for the following tables: "
+ + missingTables.stream().map(TableName::getNameAsString).collect(Collectors.joining(", "));
+ LOG.error(errorMsg);
+ throw new IOException(errorMsg);
+ }
+ }
+
+ /**
+ * Ensures that a valid backup and corresponding WALs exist for PITR for each source table. PITR
+ * requires: 1. A valid backup available before the end time. 2. Write-Ahead Logs (WALs) covering
+ * the remaining duration up to the end time.
+ */
+ private void validateBackupAvailability(TableName[] sTableArray, TableName[] tTableArray,
+ long endTime, Map continuousBackupTables, List backups)
+ throws IOException {
+ for (int i = 0; i < sTableArray.length; i++) {
+ if (
+ !canPerformPitr(sTableArray[i], tTableArray[i], endTime, continuousBackupTables, backups)
+ ) {
+ String errorMsg = String.format(
+ "PITR failed: No valid backup/WALs found for source table %s (target: %s) before time %d",
+ sTableArray[i].getNameAsString(), tTableArray[i].getNameAsString(), endTime);
+ LOG.error(errorMsg);
+ throw new IOException(errorMsg);
+ }
+ }
+ }
+
+ /**
+ * Checks whether PITR can be performed for a given source-target table pair.
+ */
+ private boolean canPerformPitr(TableName stableName, TableName tTableName, long endTime,
+ Map continuousBackupTables, List backups) {
+ return getValidBackup(stableName, tTableName, endTime, continuousBackupTables, backups) != null;
+ }
+
+ /**
+ * Finds and returns the first valid backup metadata entry that can be used to restore the given
+ * source table up to the specified end time. A backup is considered valid if:
+ *
+ * - It contains the source table
+ * - It was completed before the requested end time
+ * - Its start time is after the table's continuous backup start time
+ * - It passes the restore request validation
+ *
+ */
+ private PitrBackupMetadata getValidBackup(TableName sTableName, TableName tTablename,
+ long endTime, Map continuousBackupTables, List backups) {
+ for (PitrBackupMetadata backup : backups) {
+ if (isValidBackupForPitr(backup, sTableName, endTime, continuousBackupTables)) {
+
+ RestoreRequest restoreRequest =
+ BackupUtils.createRestoreRequest(backup.getRootDir(), backup.getBackupId(), true,
+ new TableName[] { sTableName }, new TableName[] { tTablename }, false);
+
+ try {
+ if (backupAdmin.validateRequest(restoreRequest)) {
+ return backup;
+ }
+ } catch (IOException e) {
+ LOG.warn("Exception occurred while testing the backup : {} for restore ",
+ backup.getBackupId(), e);
+ }
+ }
+ }
+ return null;
+ }
+
+ /**
+ * Determines if the given backup is valid for PITR.
+ *
+ * A backup is valid if:
+ *
+ * - It contains the source table.
+ * - It was completed before the end time.
+ * - The start timestamp of the backup is after the continuous backup start time for the
+ * table.
+ *
+ * @param backupMetadata Backup information object.
+ * @param tableName Table to check.
+ * @param endTime The target recovery time.
+ * @param continuousBackupTables Map of tables with continuous backup enabled.
+ * @return true if the backup is valid for PITR, false otherwise.
+ */
+ private boolean isValidBackupForPitr(PitrBackupMetadata backupMetadata, TableName tableName,
+ long endTime, Map continuousBackupTables) {
+ return backupMetadata.getTableNames().contains(tableName)
+ && backupMetadata.getCompleteTs() <= endTime
+ && continuousBackupTables.getOrDefault(tableName, 0L) <= backupMetadata.getStartTs();
+ }
+
+ /**
+ * Restores the table using the selected backup and replays WALs from the backup start time to the
+ * requested end time.
+ * @throws IOException if no valid backup is found or WAL replay fails
+ */
+ private void restoreTableWithWalReplay(TableName sourceTable, TableName targetTable, long endTime,
+ Map continuousBackupTables, List backupMetadataList,
+ PointInTimeRestoreRequest request) throws IOException {
+ PitrBackupMetadata backupMetadata =
+ getValidBackup(sourceTable, targetTable, endTime, continuousBackupTables, backupMetadataList);
+ if (backupMetadata == null) {
+ String errorMsg = "Could not find a valid backup and WALs for PITR for table: "
+ + sourceTable.getNameAsString();
+ LOG.error(errorMsg);
+ throw new IOException(errorMsg);
+ }
+
+ RestoreRequest restoreRequest = BackupUtils.createRestoreRequest(backupMetadata.getRootDir(),
+ backupMetadata.getBackupId(), false, new TableName[] { sourceTable },
+ new TableName[] { targetTable }, request.isOverwrite());
+
+ backupAdmin.restore(restoreRequest);
+ replayWal(sourceTable, targetTable, backupMetadata.getStartTs(), endTime);
+
+ reBulkloadFiles(sourceTable, targetTable, backupMetadata.getStartTs(), endTime,
+ request.isKeepOriginalSplits(), request.getRestoreRootDir());
+ }
+
+ /**
+ * Re-applies/re-bulkloads store files discovered from WALs into the target table.
+ *
+ * Note: this method re-uses the same {@link RestoreJob} MapReduce job that we originally
+ * implemented for performing full and incremental backup restores. The MR job (obtained via
+ * {@link BackupRestoreFactory#getRestoreJob(Configuration)}) is used here to perform an HFile
+ * bulk-load of the discovered store files into {@code targetTable}.
+ * @param sourceTable source table name (used for locating bulk files and logging)
+ * @param targetTable destination table to bulk-load the HFiles into
+ * @param startTime start of WAL range (ms)
+ * @param endTime end of WAL range (ms)
+ * @param keepOriginalSplits pass-through flag to control whether original region splits are
+ * preserved
+ * @param restoreRootDir local/DFS path under which temporary and output dirs are created
+ * @throws IOException on IO or job failure
+ */
+ private void reBulkloadFiles(TableName sourceTable, TableName targetTable, long startTime,
+ long endTime, boolean keepOriginalSplits, String restoreRootDir) throws IOException {
+
+ Configuration conf = HBaseConfiguration.create(conn.getConfiguration());
+ conf.setBoolean(RestoreJob.KEEP_ORIGINAL_SPLITS_KEY, keepOriginalSplits);
+
+ String walBackupDir = conn.getConfiguration().get(CONF_CONTINUOUS_BACKUP_WAL_DIR);
+ Path walDirPath = new Path(walBackupDir);
+ conf.set(RestoreJob.BACKUP_ROOT_PATH_KEY, walDirPath.toString());
+
+ RestoreJob restoreService = BackupRestoreFactory.getRestoreJob(conf);
+
+ List bulkloadFiles = BackupUtils.collectBulkFiles(conn, sourceTable, targetTable,
+ startTime, endTime, new Path(restoreRootDir), new ArrayList());
+
+ if (bulkloadFiles.isEmpty()) {
+ LOG.info("No bulk-load files found for {} in time range {}-{}. Skipping bulkload restore.",
+ sourceTable, startTime, endTime);
+ return;
+ }
+
+ Path[] pathsArray = bulkloadFiles.toArray(new Path[0]);
+
+ try {
+ // Use the existing RestoreJob MR job (the same MapReduce job used for full/incremental
+ // restores)
+ // to perform the HFile bulk-load of the discovered store files into `targetTable`.
+ restoreService.run(pathsArray, new TableName[] { sourceTable }, new Path(restoreRootDir),
+ new TableName[] { targetTable }, false);
+ LOG.info("Re-bulkload completed for {}", targetTable);
+ } catch (Exception e) {
+ String errorMessage =
+ String.format("Re-bulkload failed for %s: %s", targetTable, e.getMessage());
+ LOG.error(errorMessage, e);
+ throw new IOException(errorMessage, e);
+ }
+ }
+
+ /**
+ * Replays WALs to bring the table to the desired state.
+ */
+ private void replayWal(TableName sourceTable, TableName targetTable, long startTime, long endTime)
+ throws IOException {
+ String walBackupDir = conn.getConfiguration().get(CONF_CONTINUOUS_BACKUP_WAL_DIR);
+ Path walDirPath = new Path(walBackupDir);
+ LOG.info(
+ "Starting WAL replay for source: {}, target: {}, time range: {} - {}, WAL backup dir: {}",
+ sourceTable, targetTable, startTime, endTime, walDirPath);
+
+ List validDirs =
+ BackupUtils.getValidWalDirs(conn.getConfiguration(), walDirPath, startTime, endTime);
+ if (validDirs.isEmpty()) {
+ LOG.warn("No valid WAL directories found for range {} - {}. Skipping WAL replay.", startTime,
+ endTime);
+ return;
+ }
+
+ executeWalReplay(validDirs, sourceTable, targetTable, startTime, endTime);
+ }
+
+ /**
+ * Executes WAL replay using WALPlayer.
+ */
+ private void executeWalReplay(List walDirs, TableName sourceTable, TableName targetTable,
+ long startTime, long endTime) throws IOException {
+ Tool walPlayer = initializeWalPlayer(startTime, endTime);
+ String[] args =
+ { String.join(",", walDirs), sourceTable.getNameAsString(), targetTable.getNameAsString() };
+
+ try {
+ LOG.info("Executing WALPlayer with args: {}", Arrays.toString(args));
+ int exitCode = walPlayer.run(args);
+ if (exitCode == 0) {
+ LOG.info("WAL replay completed successfully for {}", targetTable);
+ } else {
+ throw new IOException("WAL replay failed with exit code: " + exitCode);
+ }
+ } catch (Exception e) {
+ LOG.error("Error during WAL replay for {}: {}", targetTable, e.getMessage(), e);
+ throw new IOException("Exception during WAL replay", e);
+ }
+ }
+
+ /**
+ * Initializes and configures WALPlayer.
+ */
+ private Tool initializeWalPlayer(long startTime, long endTime) {
+ Configuration conf = HBaseConfiguration.create(conn.getConfiguration());
+ conf.setLong(WALInputFormat.START_TIME_KEY, startTime);
+ conf.setLong(WALInputFormat.END_TIME_KEY, endTime);
+ conf.setBoolean(IGNORE_EMPTY_FILES, true);
+ Tool walPlayer = new WALPlayer();
+ walPlayer.setConf(conf);
+ return walPlayer;
+ }
+
+ protected abstract List getBackupMetadata(PointInTimeRestoreRequest request)
+ throws IOException;
+}
diff --git a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupAdminImpl.java b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupAdminImpl.java
index c36b398e5e86..2122ef9378fb 100644
--- a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupAdminImpl.java
+++ b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupAdminImpl.java
@@ -17,6 +17,7 @@
*/
package org.apache.hadoop.hbase.backup.impl;
+import com.google.errorprone.annotations.RestrictedApi;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
@@ -40,6 +41,7 @@
import org.apache.hadoop.hbase.backup.BackupRestoreFactory;
import org.apache.hadoop.hbase.backup.BackupType;
import org.apache.hadoop.hbase.backup.HBackupFileSystem;
+import org.apache.hadoop.hbase.backup.PointInTimeRestoreRequest;
import org.apache.hadoop.hbase.backup.RestoreRequest;
import org.apache.hadoop.hbase.backup.util.BackupSet;
import org.apache.hadoop.hbase.backup.util.BackupUtils;
@@ -174,8 +176,11 @@ public int deleteBackups(String[] backupIds) throws IOException {
* @param table backup system table
* @throws IOException if a table operation fails
*/
- private void finalizeDelete(List backupRoots, BackupSystemTable table)
- throws IOException {
+ @RestrictedApi(
+ explanation = "Package-private for test visibility only. Do not use outside tests.",
+ link = "",
+ allowedOnPath = "(.*/src/test/.*|.*/org/apache/hadoop/hbase/backup/impl/BackupAdminImpl.java)")
+ void finalizeDelete(List backupRoots, BackupSystemTable table) throws IOException {
for (String backupRoot : backupRoots) {
Set incrTableSet = table.getIncrementalBackupTableSet(backupRoot);
Map> tableMap =
@@ -211,7 +216,11 @@ private void finalizeDelete(List backupRoots, BackupSystemTable table)
* @return total number of deleted backup images
* @throws IOException if deleting the backup fails
*/
- private int deleteBackup(String backupId, BackupSystemTable sysTable) throws IOException {
+ @RestrictedApi(
+ explanation = "Package-private for test visibility only. Do not use outside tests.",
+ link = "",
+ allowedOnPath = "(.*/src/test/.*|.*/org/apache/hadoop/hbase/backup/impl/BackupAdminImpl.java)")
+ int deleteBackup(String backupId, BackupSystemTable sysTable) throws IOException {
BackupInfo backupInfo = sysTable.readBackupInfo(backupId);
int totalDeleted = 0;
@@ -273,7 +282,11 @@ private int deleteBackup(String backupId, BackupSystemTable sysTable) throws IOE
return totalDeleted;
}
- private void removeTableFromBackupImage(BackupInfo info, TableName tn, BackupSystemTable sysTable)
+ @RestrictedApi(
+ explanation = "Package-private for test visibility only. Do not use outside tests.",
+ link = "",
+ allowedOnPath = "(.*/src/test/.*|.*/org/apache/hadoop/hbase/backup/impl/BackupAdminImpl.java)")
+ void removeTableFromBackupImage(BackupInfo info, TableName tn, BackupSystemTable sysTable)
throws IOException {
List tables = info.getTableNames();
LOG.debug(
@@ -296,7 +309,11 @@ private void removeTableFromBackupImage(BackupInfo info, TableName tn, BackupSys
}
}
- private List getAffectedBackupSessions(BackupInfo backupInfo, TableName tn,
+ @RestrictedApi(
+ explanation = "Package-private for test visibility only. Do not use outside tests.",
+ link = "",
+ allowedOnPath = "(.*/src/test/.*|.*/org/apache/hadoop/hbase/backup/impl/BackupAdminImpl.java)")
+ List getAffectedBackupSessions(BackupInfo backupInfo, TableName tn,
BackupSystemTable table) throws IOException {
LOG.debug("GetAffectedBackupInfos for: " + backupInfo.getBackupId() + " table=" + tn);
long ts = backupInfo.getStartTs();
@@ -328,7 +345,11 @@ private List getAffectedBackupSessions(BackupInfo backupInfo, TableN
* Clean up the data at target directory
* @throws IOException if cleaning up the backup directory fails
*/
- private void cleanupBackupDir(BackupInfo backupInfo, TableName table, Configuration conf)
+ @RestrictedApi(
+ explanation = "Package-private for test visibility only. Do not use outside tests.",
+ link = "",
+ allowedOnPath = "(.*/src/test/.*|.*/org/apache/hadoop/hbase/backup/impl/BackupAdminImpl.java)")
+ void cleanupBackupDir(BackupInfo backupInfo, TableName table, Configuration conf)
throws IOException {
try {
// clean up the data at target directory
@@ -338,7 +359,7 @@ private void cleanupBackupDir(BackupInfo backupInfo, TableName table, Configurat
return;
}
- FileSystem outputFs = FileSystem.get(new Path(backupInfo.getBackupRootDir()).toUri(), conf);
+ FileSystem outputFs = getFileSystem(new Path(backupInfo.getBackupRootDir()), conf);
Path targetDirPath = new Path(BackupUtils.getTableBackupDir(backupInfo.getBackupRootDir(),
backupInfo.getBackupId(), table));
@@ -354,7 +375,19 @@ private void cleanupBackupDir(BackupInfo backupInfo, TableName table, Configurat
}
}
- private boolean isLastBackupSession(BackupSystemTable table, TableName tn, long startTime)
+ @RestrictedApi(
+ explanation = "Package-private for test visibility only. Do not use outside tests.",
+ link = "",
+ allowedOnPath = "(.*/src/test/.*|.*/org/apache/hadoop/hbase/backup/impl/BackupAdminImpl.java)")
+ FileSystem getFileSystem(Path path, Configuration conf) throws IOException {
+ return FileSystem.get(path.toUri(), conf);
+ }
+
+ @RestrictedApi(
+ explanation = "Package-private for test visibility only. Do not use outside tests.",
+ link = "",
+ allowedOnPath = "(.*/src/test/.*|.*/org/apache/hadoop/hbase/backup/impl/BackupAdminImpl.java)")
+ boolean isLastBackupSession(BackupSystemTable table, TableName tn, long startTime)
throws IOException {
List history = table.getBackupHistory();
for (BackupInfo info : history) {
@@ -490,15 +523,8 @@ private String[] toStringArray(TableName[] list) {
@Override
public void restore(RestoreRequest request) throws IOException {
if (request.isCheck()) {
- // check and load backup image manifest for the tables
- Path rootPath = new Path(request.getBackupRootDir());
- String backupId = request.getBackupId();
- TableName[] sTableArray = request.getFromTables();
- BackupManifest manifest =
- HBackupFileSystem.getManifest(conn.getConfiguration(), rootPath, backupId);
-
- // Check and validate the backup image and its dependencies
- if (BackupUtils.validate(Arrays.asList(sTableArray), manifest, conn.getConfiguration())) {
+ boolean isValid = validateRequest(request);
+ if (isValid) {
LOG.info(CHECK_OK);
} else {
LOG.error(CHECK_FAILED);
@@ -509,6 +535,42 @@ public void restore(RestoreRequest request) throws IOException {
new RestoreTablesClient(conn, request).execute();
}
+ public boolean validateRequest(RestoreRequest request) throws IOException {
+ // check and load backup image manifest for the tables
+ Path rootPath = new Path(request.getBackupRootDir());
+ String backupId = request.getBackupId();
+ TableName[] sTableArray = request.getFromTables();
+ BackupManifest manifest =
+ HBackupFileSystem.getManifest(conn.getConfiguration(), rootPath, backupId);
+
+ // Validate the backup image and its dependencies
+ return BackupUtils.validate(Arrays.asList(sTableArray), manifest, conn.getConfiguration());
+ }
+
+ /**
+ * Initiates Point-In-Time Restore (PITR) for the given request.
+ *
+ * If {@code backupRootDir} is specified in the request, performs PITR using metadata from the
+ * provided custom backup location. Otherwise, defaults to using metadata from the backup system
+ * table.
+ * @param request PointInTimeRestoreRequest containing PITR parameters.
+ * @throws IOException if validation fails or restore cannot be completed.
+ */
+ @Override
+ public void pointInTimeRestore(PointInTimeRestoreRequest request) throws IOException {
+ AbstractPitrRestoreHandler handler;
+
+ // Choose the appropriate handler based on whether a custom backup location is provided
+ if (request.getBackupRootDir() == null) {
+ handler = new DefaultPitrRestoreHandler(conn, request);
+ } else {
+ handler = new CustomBackupLocationPitrRestoreHandler(conn, request);
+ }
+ handler.validateAndRestore();
+
+ LOG.info("Successfully completed Point In Time Restore for all tables.");
+ }
+
@Override
public String backupTables(BackupRequest request) throws IOException {
BackupType type = request.getBackupType();
@@ -517,28 +579,47 @@ public String backupTables(BackupRequest request) throws IOException {
String backupId = BackupRestoreConstants.BACKUPID_PREFIX + EnvironmentEdgeManager.currentTime();
if (type == BackupType.INCREMENTAL) {
- Set incrTableSet;
- try (BackupSystemTable table = new BackupSystemTable(conn)) {
- incrTableSet = table.getIncrementalBackupTableSet(targetRootDir);
- }
+ if (request.isContinuousBackupEnabled()) {
+ Set continuousBackupTableSet;
+ try (BackupSystemTable table = new BackupSystemTable(conn)) {
+ continuousBackupTableSet = table.getContinuousBackupTableSet().keySet();
+ }
+ if (continuousBackupTableSet.isEmpty()) {
+ String msg = "Continuous backup table set contains no tables. "
+ + "You need to run Continuous backup first "
+ + (tableList != null ? "on " + StringUtils.join(tableList, ",") : "");
+ throw new IOException(msg);
+ }
+ if (!continuousBackupTableSet.containsAll(tableList)) {
+ String extraTables = StringUtils.join(tableList, ",");
+ String msg = "Some tables (" + extraTables + ") haven't gone through Continuous backup. "
+ + "Perform Continuous backup on " + extraTables + " first, then retry the command";
+ throw new IOException(msg);
+ }
+ } else {
+ Set incrTableSet;
+ try (BackupSystemTable table = new BackupSystemTable(conn)) {
+ incrTableSet = table.getIncrementalBackupTableSet(targetRootDir);
+ }
- if (incrTableSet.isEmpty()) {
- String msg =
- "Incremental backup table set contains no tables. " + "You need to run full backup first "
+ if (incrTableSet.isEmpty()) {
+ String msg = "Incremental backup table set contains no tables. "
+ + "You need to run full backup first "
+ (tableList != null ? "on " + StringUtils.join(tableList, ",") : "");
- throw new IOException(msg);
- }
- if (tableList != null) {
- tableList.removeAll(incrTableSet);
- if (!tableList.isEmpty()) {
- String extraTables = StringUtils.join(tableList, ",");
- String msg = "Some tables (" + extraTables + ") haven't gone through full backup. "
- + "Perform full backup on " + extraTables + " first, " + "then retry the command";
throw new IOException(msg);
}
+ if (tableList != null) {
+ tableList.removeAll(incrTableSet);
+ if (!tableList.isEmpty()) {
+ String extraTables = StringUtils.join(tableList, ",");
+ String msg = "Some tables (" + extraTables + ") haven't gone through full backup. "
+ + "Perform full backup on " + extraTables + " first, then retry the command";
+ throw new IOException(msg);
+ }
+ }
+ tableList = Lists.newArrayList(incrTableSet);
}
- tableList = Lists.newArrayList(incrTableSet);
}
if (tableList != null && !tableList.isEmpty()) {
for (TableName table : tableList) {
@@ -565,7 +646,12 @@ public String backupTables(BackupRequest request) throws IOException {
}
}
if (nonExistingTableList != null) {
- if (type == BackupType.INCREMENTAL) {
+ // Non-continuous incremental backup is controlled by 'incremental backup table set'
+ // and not by user provided backup table list. This is an optimization to avoid copying
+ // the same set of WALs for incremental backups of different tables at different times
+ // HBASE-14038. Since continuous incremental backup and full backup backs-up user provided
+ // table list, we should inform use about non-existence of input table(s)
+ if (type == BackupType.INCREMENTAL && !request.isContinuousBackupEnabled()) {
// Update incremental backup set
tableList = excludeNonExistingTables(tableList, nonExistingTableList);
} else {
@@ -581,7 +667,8 @@ public String backupTables(BackupRequest request) throws IOException {
request = builder.withBackupType(request.getBackupType()).withTableList(tableList)
.withTargetRootDir(request.getTargetRootDir()).withBackupSetName(request.getBackupSetName())
.withTotalTasks(request.getTotalTasks()).withBandwidthPerTasks((int) request.getBandwidth())
- .withNoChecksumVerify(request.getNoChecksumVerify()).build();
+ .withNoChecksumVerify(request.getNoChecksumVerify())
+ .withContinuousBackupEnabled(request.isContinuousBackupEnabled()).build();
TableBackupClient client;
try {
@@ -627,8 +714,11 @@ public void mergeBackups(String[] backupIds) throws IOException {
* @param table backup system table
* @throws IOException if the backup image is not valid for merge
*/
- private void checkIfValidForMerge(String[] backupIds, BackupSystemTable table)
- throws IOException {
+ @RestrictedApi(
+ explanation = "Package-private for test visibility only. Do not use outside tests.",
+ link = "",
+ allowedOnPath = "(.*/src/test/.*|.*/org/apache/hadoop/hbase/backup/impl/BackupAdminImpl.java)")
+ void checkIfValidForMerge(String[] backupIds, BackupSystemTable table) throws IOException {
String backupRoot = null;
final Set allTables = new HashSet<>();
diff --git a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupCommands.java b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupCommands.java
index 66694f4384f4..f70bf627d176 100644
--- a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupCommands.java
+++ b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupCommands.java
@@ -17,11 +17,19 @@
*/
package org.apache.hadoop.hbase.backup.impl;
+import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.CONF_CONTINUOUS_BACKUP_PITR_WINDOW_DAYS;
+import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.CONF_CONTINUOUS_BACKUP_WAL_DIR;
+import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.CONTINUOUS_BACKUP_REPLICATION_PEER;
+import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.DEFAULT_CONTINUOUS_BACKUP_PITR_WINDOW_DAYS;
import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.OPTION_BACKUP_LIST_DESC;
import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.OPTION_BANDWIDTH;
import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.OPTION_BANDWIDTH_DESC;
import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.OPTION_DEBUG;
import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.OPTION_DEBUG_DESC;
+import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.OPTION_ENABLE_CONTINUOUS_BACKUP;
+import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.OPTION_ENABLE_CONTINUOUS_BACKUP_DESC;
+import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.OPTION_FORCE_DELETE;
+import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.OPTION_FORCE_DELETE_DESC;
import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.OPTION_IGNORECHECKSUM;
import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.OPTION_IGNORECHECKSUM_DESC;
import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.OPTION_KEEP;
@@ -41,13 +49,26 @@
import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.OPTION_WORKERS_DESC;
import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.OPTION_YARN_QUEUE_NAME;
import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.OPTION_YARN_QUEUE_NAME_DESC;
+import static org.apache.hadoop.hbase.backup.replication.ContinuousBackupReplicationEndpoint.ONE_DAY_IN_MILLISECONDS;
+import static org.apache.hadoop.hbase.backup.util.BackupUtils.DATE_FORMAT;
import java.io.IOException;
import java.net.URI;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashSet;
import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+import java.util.Set;
+import java.util.TimeZone;
+import java.util.concurrent.TimeUnit;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
@@ -60,14 +81,19 @@
import org.apache.hadoop.hbase.backup.BackupRestoreConstants.BackupCommand;
import org.apache.hadoop.hbase.backup.BackupType;
import org.apache.hadoop.hbase.backup.HBackupFileSystem;
+import org.apache.hadoop.hbase.backup.util.BackupFileSystemManager;
import org.apache.hadoop.hbase.backup.util.BackupSet;
import org.apache.hadoop.hbase.backup.util.BackupUtils;
+import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
+import org.apache.hadoop.hbase.replication.ReplicationPeerDescription;
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
+import org.apache.hadoop.hbase.util.Pair;
import org.apache.yetus.audience.InterfaceAudience;
import org.apache.hbase.thirdparty.com.google.common.base.Splitter;
+import org.apache.hbase.thirdparty.com.google.common.base.Strings;
import org.apache.hbase.thirdparty.com.google.common.collect.Lists;
import org.apache.hbase.thirdparty.org.apache.commons.cli.CommandLine;
import org.apache.hbase.thirdparty.org.apache.commons.cli.HelpFormatter;
@@ -339,14 +365,64 @@ public void execute() throws IOException {
boolean ignoreChecksum = cmdline.hasOption(OPTION_IGNORECHECKSUM);
+ BackupType backupType = BackupType.valueOf(args[1].toUpperCase());
+ List tableNameList = null;
+ if (tables != null) {
+ tableNameList = Lists.newArrayList(BackupUtils.parseTableNames(tables));
+ }
+ boolean continuousBackup = cmdline.hasOption(OPTION_ENABLE_CONTINUOUS_BACKUP);
+ if (continuousBackup && !BackupType.FULL.equals(backupType)) {
+ System.out.println("ERROR: Continuous backup can Only be specified for Full Backup");
+ printUsage();
+ throw new IOException(INCORRECT_USAGE);
+ }
+
+ /*
+ * The `continuousBackup` flag is specified only during the first full backup to initiate
+ * continuous WAL replication. After that, it is redundant because the tables are already set
+ * up for continuous backup. If the `continuousBackup` flag is not explicitly enabled, we need
+ * to determine the backup mode based on the current state of the specified tables: - If all
+ * the specified tables are already part of continuous backup, we treat the request as a
+ * continuous backup request and proceed accordingly (since these tables are already
+ * continuously backed up, no additional setup is needed). - If none of the specified tables
+ * are part of continuous backup, we treat the request as a normal full backup without
+ * continuous backup. - If the request includes a mix of tables—some with continuous backup
+ * enabled and others without—we cannot determine a clear backup strategy. In this case, we
+ * throw an error. If all tables are already in continuous backup mode, we explicitly set the
+ * `continuousBackup` flag to `true` so that the request is processed using the continuous
+ * backup approach rather than the normal full backup flow.
+ */
+ if (!continuousBackup && tableNameList != null && !tableNameList.isEmpty()) {
+ try (BackupSystemTable backupSystemTable = new BackupSystemTable(conn)) {
+ Set continuousBackupTableSet =
+ backupSystemTable.getContinuousBackupTableSet().keySet();
+
+ boolean allTablesInContinuousBackup = continuousBackupTableSet.containsAll(tableNameList);
+ boolean noTablesInContinuousBackup =
+ tableNameList.stream().noneMatch(continuousBackupTableSet::contains);
+
+ // Ensure that all tables are either fully in continuous backup or not at all
+ if (!allTablesInContinuousBackup && !noTablesInContinuousBackup) {
+ System.err
+ .println("ERROR: Some tables are already in continuous backup, while others are not. "
+ + "Cannot mix both in a single request.");
+ printUsage();
+ throw new IOException(INCORRECT_USAGE);
+ }
+
+ // If all tables are already in continuous backup, enable the flag
+ if (allTablesInContinuousBackup) {
+ continuousBackup = true;
+ }
+ }
+ }
+
try (BackupAdminImpl admin = new BackupAdminImpl(conn)) {
BackupRequest.Builder builder = new BackupRequest.Builder();
- BackupRequest request = builder.withBackupType(BackupType.valueOf(args[1].toUpperCase()))
- .withTableList(
- tables != null ? Lists.newArrayList(BackupUtils.parseTableNames(tables)) : null)
+ BackupRequest request = builder.withBackupType(backupType).withTableList(tableNameList)
.withTargetRootDir(targetBackupDir).withTotalTasks(workers)
.withBandwidthPerTasks(bandwidth).withNoChecksumVerify(ignoreChecksum)
- .withBackupSetName(setName).build();
+ .withBackupSetName(setName).withContinuousBackupEnabled(continuousBackup).build();
String backupId = admin.backupTables(request);
System.out.println("Backup session " + backupId + " finished. Status: SUCCESS");
} catch (IOException e) {
@@ -400,6 +476,8 @@ protected void printUsage() {
options.addOption(OPTION_YARN_QUEUE_NAME, true, OPTION_YARN_QUEUE_NAME_DESC);
options.addOption(OPTION_DEBUG, false, OPTION_DEBUG_DESC);
options.addOption(OPTION_IGNORECHECKSUM, false, OPTION_IGNORECHECKSUM_DESC);
+ options.addOption(OPTION_ENABLE_CONTINUOUS_BACKUP, false,
+ OPTION_ENABLE_CONTINUOUS_BACKUP_DESC);
HelpFormatter helpFormatter = new HelpFormatter();
helpFormatter.setLeftPadding(2);
@@ -577,15 +655,20 @@ public void execute() throws IOException {
printUsage();
throw new IOException(INCORRECT_USAGE);
}
+
+ boolean isForceDelete = cmdline.hasOption(OPTION_FORCE_DELETE);
super.execute();
if (cmdline.hasOption(OPTION_KEEP)) {
- executeDeleteOlderThan(cmdline);
+ executeDeleteOlderThan(cmdline, isForceDelete);
} else if (cmdline.hasOption(OPTION_LIST)) {
- executeDeleteListOfBackups(cmdline);
+ executeDeleteListOfBackups(cmdline, isForceDelete);
}
+
+ cleanUpUnusedBackupWALs();
}
- private void executeDeleteOlderThan(CommandLine cmdline) throws IOException {
+ private void executeDeleteOlderThan(CommandLine cmdline, boolean isForceDelete)
+ throws IOException {
String value = cmdline.getOptionValue(OPTION_KEEP);
int days = 0;
try {
@@ -607,6 +690,7 @@ public boolean apply(BackupInfo info) {
BackupAdminImpl admin = new BackupAdminImpl(conn)) {
history = sysTable.getBackupHistory(-1, dateFilter);
String[] backupIds = convertToBackupIds(history);
+ validatePITRBackupDeletion(backupIds, isForceDelete);
int deleted = admin.deleteBackups(backupIds);
System.out.println("Deleted " + deleted + " backups. Total older than " + days + " days: "
+ backupIds.length);
@@ -625,10 +709,11 @@ private String[] convertToBackupIds(List history) {
return ids;
}
- private void executeDeleteListOfBackups(CommandLine cmdline) throws IOException {
+ private void executeDeleteListOfBackups(CommandLine cmdline, boolean isForceDelete)
+ throws IOException {
String value = cmdline.getOptionValue(OPTION_LIST);
String[] backupIds = value.split(",");
-
+ validatePITRBackupDeletion(backupIds, isForceDelete);
try (BackupAdminImpl admin = new BackupAdminImpl(conn)) {
int deleted = admin.deleteBackups(backupIds);
System.out.println("Deleted " + deleted + " backups. Total requested: " + backupIds.length);
@@ -640,12 +725,378 @@ private void executeDeleteListOfBackups(CommandLine cmdline) throws IOException
}
+ /**
+ * Validates whether the specified backups can be deleted while preserving Point-In-Time
+ * Recovery (PITR) capabilities. If a backup is the only remaining full backup enabling PITR for
+ * certain tables, deletion is prevented unless forced.
+ * @param backupIds Array of backup IDs to validate.
+ * @param isForceDelete Flag indicating whether deletion should proceed regardless of PITR
+ * constraints.
+ * @throws IOException If a backup is essential for PITR and force deletion is not enabled.
+ */
+ private void validatePITRBackupDeletion(String[] backupIds, boolean isForceDelete)
+ throws IOException {
+ if (!isForceDelete) {
+ for (String backupId : backupIds) {
+ List affectedTables = getTablesDependentOnBackupForPITR(backupId);
+ if (!affectedTables.isEmpty()) {
+ String errMsg = String.format(
+ "Backup %s is the only FULL backup remaining that enables PITR for tables: %s. "
+ + "Use the force option to delete it anyway.",
+ backupId, affectedTables);
+ System.err.println(errMsg);
+ throw new IOException(errMsg);
+ }
+ }
+ }
+ }
+
+ /**
+ * Identifies tables that rely on the specified backup for PITR (Point-In-Time Recovery). A
+ * table is considered dependent on the backup if it does not have any other valid full backups
+ * that can cover the PITR window enabled by the specified backup.
+ * @param backupId The ID of the backup being evaluated for PITR coverage.
+ * @return A list of tables that are dependent on the specified backup for PITR recovery.
+ * @throws IOException If there is an error retrieving the backup metadata or backup system
+ * table.
+ */
+ private List getTablesDependentOnBackupForPITR(String backupId) throws IOException {
+ List dependentTables = new ArrayList<>();
+
+ try (final BackupSystemTable backupSystemTable = new BackupSystemTable(conn)) {
+ // Fetch the target backup's info using the backup ID
+ BackupInfo targetBackup = backupSystemTable.readBackupInfo(backupId);
+ if (targetBackup == null) {
+ throw new IOException("Backup info not found for backupId: " + backupId);
+ }
+
+ // Only full backups are mandatory for PITR
+ if (!BackupType.FULL.equals(targetBackup.getType())) {
+ return List.of();
+ }
+
+ // Retrieve the tables with continuous backup enabled along with their start times
+ Map continuousBackupStartTimes =
+ backupSystemTable.getContinuousBackupTableSet();
+
+ // Calculate the PITR window by fetching configuration and current time
+ long pitrWindowDays = getConf().getLong(CONF_CONTINUOUS_BACKUP_PITR_WINDOW_DAYS,
+ DEFAULT_CONTINUOUS_BACKUP_PITR_WINDOW_DAYS);
+ long currentTime = EnvironmentEdgeManager.getDelegate().currentTime();
+ final long maxAllowedPITRTime = currentTime - TimeUnit.DAYS.toMillis(pitrWindowDays);
+
+ // Check each table associated with the target backup
+ for (TableName table : targetBackup.getTableNames()) {
+ // Skip tables without continuous backup enabled
+ if (!continuousBackupStartTimes.containsKey(table)) {
+ continue;
+ }
+
+ // Calculate the PITR window this backup covers for the table
+ Optional> coveredPitrWindow = getCoveredPitrWindowForTable(targetBackup,
+ continuousBackupStartTimes.get(table), maxAllowedPITRTime, currentTime);
+
+ // If this backup does not cover a valid PITR window for the table, skip
+ if (coveredPitrWindow.isEmpty()) {
+ continue;
+ }
+
+ // Check if there is any other valid backup that can cover the PITR window
+ List allBackups = backupSystemTable.getBackupInfos(BackupState.COMPLETE);
+ boolean hasAnotherValidBackup =
+ canAnyOtherBackupCover(allBackups, targetBackup, table, coveredPitrWindow.get(),
+ continuousBackupStartTimes.get(table), maxAllowedPITRTime, currentTime);
+
+ // If no other valid backup exists, add the table to the dependent list
+ if (!hasAnotherValidBackup) {
+ dependentTables.add(table);
+ }
+ }
+ }
+
+ return dependentTables;
+ }
+
+ /**
+ * Calculates the PITR (Point-In-Time Recovery) window that the given backup enables for a
+ * table.
+ * @param backupInfo Metadata of the backup being evaluated.
+ * @param continuousBackupStartTime When continuous backups started for the table.
+ * @param maxAllowedPITRTime The earliest timestamp from which PITR is supported in the
+ * cluster.
+ * @param currentTime Current time.
+ * @return Optional PITR window as a pair (start, end), or empty if backup is not useful for
+ * PITR.
+ */
+ private Optional> getCoveredPitrWindowForTable(BackupInfo backupInfo,
+ long continuousBackupStartTime, long maxAllowedPITRTime, long currentTime) {
+
+ long backupStartTs = backupInfo.getStartTs();
+ long backupEndTs = backupInfo.getCompleteTs();
+ long effectiveStart = Math.max(continuousBackupStartTime, maxAllowedPITRTime);
+
+ if (backupStartTs < continuousBackupStartTime) {
+ return Optional.empty();
+ }
+
+ return Optional.of(Pair.newPair(Math.max(backupEndTs, effectiveStart), currentTime));
+ }
+
+ /**
+ * Checks if any backup (excluding the current backup) can cover the specified PITR window for
+ * the given table. A backup can cover the PITR window if it fully encompasses the target time
+ * range specified.
+ * @param allBackups List of all backups available.
+ * @param currentBackup The current backup that should not be considered for
+ * coverage.
+ * @param table The table for which we need to check backup coverage.
+ * @param targetWindow A pair representing the target PITR window (start and end
+ * times).
+ * @param continuousBackupStartTime When continuous backups started for the table.
+ * @param maxAllowedPITRTime The earliest timestamp from which PITR is supported in the
+ * cluster.
+ * @param currentTime Current time.
+ * @return {@code true} if any backup (excluding the current one) fully covers the target PITR
+ * window; {@code false} otherwise.
+ */
+ private boolean canAnyOtherBackupCover(List allBackups, BackupInfo currentBackup,
+ TableName table, Pair targetWindow, long continuousBackupStartTime,
+ long maxAllowedPITRTime, long currentTime) {
+
+ long targetStart = targetWindow.getFirst();
+ long targetEnd = targetWindow.getSecond();
+
+ // Iterate through all backups (including the current one)
+ for (BackupInfo backup : allBackups) {
+ // Skip if the backup is not full or doesn't contain the table
+ if (!BackupType.FULL.equals(backup.getType())) continue;
+ if (!backup.getTableNames().contains(table)) continue;
+
+ // Skip the current backup itself
+ if (backup.equals(currentBackup)) continue;
+
+ // Get the covered PITR window for this backup
+ Optional> coveredWindow = getCoveredPitrWindowForTable(backup,
+ continuousBackupStartTime, maxAllowedPITRTime, currentTime);
+
+ if (coveredWindow.isPresent()) {
+ Pair covered = coveredWindow.get();
+
+ // The backup must fully cover the target window
+ if (covered.getFirst() <= targetStart && covered.getSecond() >= targetEnd) {
+ return true;
+ }
+ }
+ }
+
+ return false;
+ }
+
+ /**
+ * Cleans up Write-Ahead Logs (WALs) that are no longer required for PITR after a successful
+ * backup deletion. If no full backups are present, all WALs are deleted, tables are removed
+ * from continuous backup metadata, and the associated replication peer is disabled.
+ */
+ private void cleanUpUnusedBackupWALs() throws IOException {
+ Configuration conf = getConf() != null ? getConf() : HBaseConfiguration.create();
+ String backupWalDir = conf.get(CONF_CONTINUOUS_BACKUP_WAL_DIR);
+
+ if (Strings.isNullOrEmpty(backupWalDir)) {
+ System.out.println("No WAL directory specified for continuous backup. Skipping cleanup.");
+ return;
+ }
+
+ try (Admin admin = conn.getAdmin();
+ BackupSystemTable sysTable = new BackupSystemTable(conn)) {
+ // Get list of tables under continuous backup
+ Map continuousBackupTables = sysTable.getContinuousBackupTableSet();
+ if (continuousBackupTables.isEmpty()) {
+ System.out.println("No continuous backups configured. Skipping WAL cleanup.");
+ return;
+ }
+
+ // Find the earliest timestamp after which WALs are still needed
+ long cutoffTimestamp = determineWALCleanupCutoffTime(sysTable);
+ if (cutoffTimestamp == 0) {
+ // No full backup exists. PITR cannot function without a base full backup.
+ // Clean up all WALs, remove tables from backup metadata, and disable the replication
+ // peer.
+ System.out
+ .println("No full backups found. Cleaning up all WALs and disabling replication peer.");
+
+ disableContinuousBackupReplicationPeer(admin);
+ removeAllTablesFromContinuousBackup(sysTable);
+ deleteAllBackupWALFiles(conf, backupWalDir);
+ return;
+ }
+
+ // Update metadata before actual cleanup to avoid inconsistencies
+ updateBackupTableStartTimes(sysTable, cutoffTimestamp);
+
+ // Delete WAL files older than cutoff timestamp
+ deleteOldWALFiles(conf, backupWalDir, cutoffTimestamp);
+
+ }
+ }
+
+ /**
+ * Determines the cutoff time for cleaning WAL files.
+ * @param sysTable Backup system table
+ * @return cutoff timestamp or 0 if not found
+ */
+ long determineWALCleanupCutoffTime(BackupSystemTable sysTable) throws IOException {
+ List backupInfos = sysTable.getBackupInfos(BackupState.COMPLETE);
+ Collections.reverse(backupInfos); // Start from oldest
+
+ for (BackupInfo backupInfo : backupInfos) {
+ if (BackupType.FULL.equals(backupInfo.getType())) {
+ return backupInfo.getStartTs();
+ }
+ }
+ return 0;
+ }
+
+ private void disableContinuousBackupReplicationPeer(Admin admin) throws IOException {
+ for (ReplicationPeerDescription peer : admin.listReplicationPeers()) {
+ if (peer.getPeerId().equals(CONTINUOUS_BACKUP_REPLICATION_PEER) && peer.isEnabled()) {
+ admin.disableReplicationPeer(CONTINUOUS_BACKUP_REPLICATION_PEER);
+ System.out.println("Disabled replication peer: " + CONTINUOUS_BACKUP_REPLICATION_PEER);
+ break;
+ }
+ }
+ }
+
+ /**
+ * Updates the start time for continuous backups if older than cutoff timestamp.
+ * @param sysTable Backup system table
+ * @param cutoffTimestamp Timestamp before which WALs are no longer needed
+ */
+ void updateBackupTableStartTimes(BackupSystemTable sysTable, long cutoffTimestamp)
+ throws IOException {
+
+ Map backupTables = sysTable.getContinuousBackupTableSet();
+ Set tablesToUpdate = new HashSet<>();
+
+ for (Map.Entry entry : backupTables.entrySet()) {
+ if (entry.getValue() < cutoffTimestamp) {
+ tablesToUpdate.add(entry.getKey());
+ }
+ }
+
+ if (!tablesToUpdate.isEmpty()) {
+ sysTable.updateContinuousBackupTableSet(tablesToUpdate, cutoffTimestamp);
+ }
+ }
+
+ private void removeAllTablesFromContinuousBackup(BackupSystemTable sysTable)
+ throws IOException {
+ Map allTables = sysTable.getContinuousBackupTableSet();
+ if (!allTables.isEmpty()) {
+ sysTable.removeContinuousBackupTableSet(allTables.keySet());
+ System.out.println("Removed all tables from continuous backup metadata.");
+ }
+ }
+
+ private void deleteAllBackupWALFiles(Configuration conf, String backupWalDir)
+ throws IOException {
+ try {
+ BackupFileSystemManager manager =
+ new BackupFileSystemManager(CONTINUOUS_BACKUP_REPLICATION_PEER, conf, backupWalDir);
+ FileSystem fs = manager.getBackupFs();
+ Path walDir = manager.getWalsDir();
+ Path bulkloadDir = manager.getBulkLoadFilesDir();
+
+ // Delete contents under WAL directory
+ if (fs.exists(walDir)) {
+ FileStatus[] walContents = fs.listStatus(walDir);
+ for (FileStatus item : walContents) {
+ fs.delete(item.getPath(), true); // recursive delete of each child
+ }
+ System.out.println("Deleted all contents under WAL directory: " + walDir);
+ }
+
+ // Delete contents under bulk load directory
+ if (fs.exists(bulkloadDir)) {
+ FileStatus[] bulkContents = fs.listStatus(bulkloadDir);
+ for (FileStatus item : bulkContents) {
+ fs.delete(item.getPath(), true); // recursive delete of each child
+ }
+ System.out.println("Deleted all contents under Bulk Load directory: " + bulkloadDir);
+ }
+
+ } catch (IOException e) {
+ System.out.println("WARNING: Failed to delete contents under backup directories: "
+ + backupWalDir + ". Error: " + e.getMessage());
+ throw e;
+ }
+ }
+
+ /**
+ * Cleans up old WAL and bulk-loaded files based on the determined cutoff timestamp.
+ */
+ void deleteOldWALFiles(Configuration conf, String backupWalDir, long cutoffTime)
+ throws IOException {
+ System.out.println("Starting WAL cleanup in backup directory: " + backupWalDir
+ + " with cutoff time: " + cutoffTime);
+
+ BackupFileSystemManager manager =
+ new BackupFileSystemManager(CONTINUOUS_BACKUP_REPLICATION_PEER, conf, backupWalDir);
+ FileSystem fs = manager.getBackupFs();
+ Path walDir = manager.getWalsDir();
+ Path bulkloadDir = manager.getBulkLoadFilesDir();
+
+ SimpleDateFormat dateFormat = new SimpleDateFormat(DATE_FORMAT);
+ dateFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+
+ System.out.println("Listing directories under: " + walDir);
+
+ FileStatus[] directories = fs.listStatus(walDir);
+
+ for (FileStatus dirStatus : directories) {
+ if (!dirStatus.isDirectory()) {
+ continue; // Skip files, we only want directories
+ }
+
+ Path dirPath = dirStatus.getPath();
+ String dirName = dirPath.getName();
+
+ try {
+ long dayStart = parseDayDirectory(dirName, dateFormat);
+ System.out
+ .println("Checking WAL directory: " + dirName + " (Start Time: " + dayStart + ")");
+
+ // If WAL files of that day are older than cutoff time, delete them
+ if (dayStart + ONE_DAY_IN_MILLISECONDS - 1 < cutoffTime) {
+ System.out.println("Deleting outdated WAL directory: " + dirPath);
+ fs.delete(dirPath, true);
+ Path bulkloadPath = new Path(bulkloadDir, dirName);
+ System.out.println("Deleting corresponding bulk-load directory: " + bulkloadPath);
+ fs.delete(bulkloadPath, true);
+ }
+ } catch (ParseException e) {
+ System.out.println("WARNING: Failed to parse directory name '" + dirName
+ + "'. Skipping. Error: " + e.getMessage());
+ } catch (IOException e) {
+ System.err.println("WARNING: Failed to delete directory '" + dirPath
+ + "'. Skipping. Error: " + e.getMessage());
+ }
+ }
+
+ System.out.println("Completed WAL cleanup for backup directory: " + backupWalDir);
+ }
+
+ private long parseDayDirectory(String dayDir, SimpleDateFormat dateFormat)
+ throws ParseException {
+ return dateFormat.parse(dayDir).getTime();
+ }
+
@Override
protected void printUsage() {
System.out.println(DELETE_CMD_USAGE);
Options options = new Options();
options.addOption(OPTION_KEEP, true, OPTION_KEEP_DESC);
options.addOption(OPTION_LIST, true, OPTION_BACKUP_LIST_DESC);
+ options.addOption(OPTION_FORCE_DELETE, false, OPTION_FORCE_DELETE_DESC);
HelpFormatter helpFormatter = new HelpFormatter();
helpFormatter.setLeftPadding(2);
diff --git a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupImageAdapter.java b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupImageAdapter.java
new file mode 100644
index 000000000000..8b785a0f0504
--- /dev/null
+++ b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupImageAdapter.java
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.backup.impl;
+
+import java.util.List;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.backup.impl.BackupManifest.BackupImage;
+import org.apache.yetus.audience.InterfaceAudience;
+
+/**
+ * Adapter that wraps a {@link BackupImage} to expose it as {@link PitrBackupMetadata}.
+ */
+@InterfaceAudience.Private
+public class BackupImageAdapter implements PitrBackupMetadata {
+ private final BackupImage image;
+
+ public BackupImageAdapter(BackupImage image) {
+ this.image = image;
+ }
+
+ @Override
+ public List getTableNames() {
+ return image.getTableNames();
+ }
+
+ @Override
+ public long getStartTs() {
+ return image.getStartTs();
+ }
+
+ @Override
+ public long getCompleteTs() {
+ return image.getCompleteTs();
+ }
+
+ @Override
+ public String getBackupId() {
+ return image.getBackupId();
+ }
+
+ @Override
+ public String getRootDir() {
+ return image.getRootDir();
+ }
+}
diff --git a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupInfoAdapter.java b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupInfoAdapter.java
new file mode 100644
index 000000000000..967fae551cb5
--- /dev/null
+++ b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupInfoAdapter.java
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.backup.impl;
+
+import java.util.List;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.backup.BackupInfo;
+import org.apache.yetus.audience.InterfaceAudience;
+
+/**
+ * Adapter that wraps a {@link BackupInfo} to expose it as {@link PitrBackupMetadata}.
+ */
+@InterfaceAudience.Private
+public class BackupInfoAdapter implements PitrBackupMetadata {
+ private final BackupInfo info;
+
+ public BackupInfoAdapter(BackupInfo info) {
+ this.info = info;
+ }
+
+ @Override
+ public List getTableNames() {
+ return info.getTableNames();
+ }
+
+ @Override
+ public long getStartTs() {
+ return info.getStartTs();
+ }
+
+ @Override
+ public long getCompleteTs() {
+ return info.getCompleteTs();
+ }
+
+ @Override
+ public String getBackupId() {
+ return info.getBackupId();
+ }
+
+ @Override
+ public String getRootDir() {
+ return info.getBackupRootDir();
+ }
+}
diff --git a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupManager.java b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupManager.java
index 810af8f032ce..c2ed4f7fa1fc 100644
--- a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupManager.java
+++ b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupManager.java
@@ -199,8 +199,8 @@ public void close() {
* @throws BackupException exception
*/
public BackupInfo createBackupInfo(String backupId, BackupType type, List tableList,
- String targetRootDir, int workers, long bandwidth, boolean noChecksumVerify)
- throws BackupException {
+ String targetRootDir, int workers, long bandwidth, boolean noChecksumVerify,
+ boolean continuousBackupEnabled) throws BackupException {
if (targetRootDir == null) {
throw new BackupException("Wrong backup request parameter: target backup root directory");
}
@@ -238,6 +238,7 @@ public BackupInfo createBackupInfo(String backupId, BackupType type, List readRegionServerLastLogRollResult() throws IOExcept
return systemTable.readRegionServerLastLogRollResult(backupInfo.getBackupRootDir());
}
+ public List readBulkloadRows(List tableList, long endTimestamp)
+ throws IOException {
+ return systemTable.readBulkloadRows(tableList, endTimestamp);
+ }
+
public List readBulkloadRows(List tableList) throws IOException {
return systemTable.readBulkloadRows(tableList);
}
@@ -427,4 +433,17 @@ public void addIncrementalBackupTableSet(Set tables) throws IOExcepti
public Connection getConnection() {
return conn;
}
+
+ /**
+ * Adds a set of tables to the global continuous backup set. Only tables that do not already have
+ * continuous backup enabled will be updated.
+ * @param tables set of tables to add to continuous backup
+ * @param startTimestamp timestamp indicating when continuous backup started for newly added
+ * tables
+ * @throws IOException if an error occurs while updating the backup system table
+ */
+ public void addContinuousBackupTableSet(Set tables, long startTimestamp)
+ throws IOException {
+ systemTable.addContinuousBackupTableSet(tables, startTimestamp);
+ }
}
diff --git a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupSystemTable.java b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupSystemTable.java
index f2ddcf5e7573..2f9c3171346a 100644
--- a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupSystemTable.java
+++ b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupSystemTable.java
@@ -169,6 +169,13 @@ public String toString() {
private final static byte[] ACTIVE_SESSION_NO = Bytes.toBytes("no");
private final static String INCR_BACKUP_SET = "incrbackupset:";
+ private final static String CONTINUOUS_BACKUP_SET = "continuousbackupset";
+ /**
+ * Row key identifier for storing the last replicated WAL timestamp in the backup system table for
+ * continuous backup.
+ */
+ private static final String CONTINUOUS_BACKUP_REPLICATION_TIMESTAMP_ROW =
+ "continuous_backup_last_replicated";
private final static String TABLE_RS_LOG_MAP_PREFIX = "trslm:";
private final static String RS_LOG_TS_PREFIX = "rslogts:";
@@ -373,26 +380,37 @@ public void deleteBulkLoadedRows(List rows) throws IOException {
}
/**
- * Reads all registered bulk loads.
+ * Reads the rows from backup table recording bulk loaded hfiles
*/
public List readBulkloadRows() throws IOException {
Scan scan = BackupSystemTable.createScanForOrigBulkLoadedFiles(null);
- return processBulkLoadRowScan(scan);
+ return processBulkLoadRowScan(scan, Long.MAX_VALUE);
}
/**
- * Reads the registered bulk loads for the given tables.
+ * Reads the rows from backup table recording bulk loaded hfiles
+ * @param tableList list of table names
*/
public List readBulkloadRows(Collection tableList) throws IOException {
+ return readBulkloadRows(tableList, Long.MAX_VALUE);
+ }
+
+ /**
+ * Reads the rows from backup table recording bulk loaded hfiles
+ * @param tableList list of table names
+ * @param endTimestamp upper bound timestamp for bulkload entries retrieval
+ */
+ public List readBulkloadRows(Collection tableList, long endTimestamp)
+ throws IOException {
List result = new ArrayList<>();
for (TableName table : tableList) {
Scan scan = BackupSystemTable.createScanForOrigBulkLoadedFiles(table);
- result.addAll(processBulkLoadRowScan(scan));
+ result.addAll(processBulkLoadRowScan(scan, endTimestamp));
}
return result;
}
- private List processBulkLoadRowScan(Scan scan) throws IOException {
+ private List processBulkLoadRowScan(Scan scan, long endTimestamp) throws IOException {
List result = new ArrayList<>();
try (Table bulkLoadTable = connection.getTable(bulkLoadTableName);
ResultScanner scanner = bulkLoadTable.getScanner(scan)) {
@@ -404,8 +422,10 @@ private List processBulkLoadRowScan(Scan scan) throws IOException {
String path = null;
String region = null;
byte[] row = null;
+ long timestamp = 0L;
for (Cell cell : res.listCells()) {
row = CellUtil.cloneRow(cell);
+ timestamp = cell.getTimestamp();
String rowStr = Bytes.toString(row);
region = BackupSystemTable.getRegionNameFromOrigBulkLoadRow(rowStr);
if (
@@ -425,8 +445,11 @@ private List processBulkLoadRowScan(Scan scan) throws IOException {
path = Bytes.toString(CellUtil.cloneValue(cell));
}
}
- result.add(new BulkLoad(table, region, fam, path, row));
- LOG.debug("Found bulk load entry for table {}, family {}: {}", table, fam, path);
+ LOG.debug("Found orig path {} for family {} of table {} and region {} with timestamp {}",
+ path, fam, table, region, timestamp);
+ if (timestamp <= endTimestamp) {
+ result.add(new BulkLoad(table, region, fam, path, row, timestamp));
+ }
}
}
return result;
@@ -892,6 +915,37 @@ public Set getIncrementalBackupTableSet(String backupRoot) throws IOE
}
}
+ /**
+ * Retrieves the current set of tables covered by continuous backup along with the timestamp
+ * indicating when continuous backup started for each table.
+ * @return a map where the key is the table name and the value is the timestamp representing the
+ * start time of continuous backup for that table.
+ * @throws IOException if an I/O error occurs while accessing the backup system table.
+ */
+ public Map getContinuousBackupTableSet() throws IOException {
+ LOG.trace("Retrieving continuous backup table set from the backup system table.");
+ Map tableMap = new TreeMap<>();
+
+ try (Table systemTable = connection.getTable(tableName)) {
+ Get getOperation = createGetForContinuousBackupTableSet();
+ Result result = systemTable.get(getOperation);
+
+ if (result.isEmpty()) {
+ return tableMap;
+ }
+
+ // Extract table names and timestamps from the result cells
+ List cells = result.listCells();
+ for (Cell cell : cells) {
+ TableName tableName = TableName.valueOf(CellUtil.cloneQualifier(cell));
+ long timestamp = Bytes.toLong(CellUtil.cloneValue(cell));
+ tableMap.put(tableName, timestamp);
+ }
+ }
+
+ return tableMap;
+ }
+
/**
* Add tables to global incremental backup set
* @param tables set of tables
@@ -913,6 +967,170 @@ public void addIncrementalBackupTableSet(Set tables, String backupRoo
}
}
+ /**
+ * Add tables to the global continuous backup set. Only updates tables that are not already in the
+ * continuous backup set.
+ * @param tables set of tables to add
+ * @param startTimestamp timestamp indicating when continuous backup started
+ * @throws IOException if an error occurs while updating the backup system table
+ */
+ public void addContinuousBackupTableSet(Set tables, long startTimestamp)
+ throws IOException {
+ if (LOG.isTraceEnabled()) {
+ LOG.trace("Add continuous backup table set to backup system table. tables ["
+ + StringUtils.join(tables, " ") + "]");
+ }
+ if (LOG.isDebugEnabled()) {
+ tables.forEach(table -> LOG.debug(Objects.toString(table)));
+ }
+
+ // Get existing continuous backup tables
+ Map existingTables = getContinuousBackupTableSet();
+
+ try (Table table = connection.getTable(tableName)) {
+ Put put = createPutForContinuousBackupTableSet(tables, existingTables, startTimestamp);
+ if (!put.isEmpty()) {
+ table.put(put);
+ }
+ }
+ }
+
+ /**
+ * Updates the system table with the new start timestamps for continuous backup tables.
+ * @param tablesToUpdate The set of tables that need their start timestamps updated.
+ * @param newStartTimestamp The new start timestamp to be set.
+ */
+ public void updateContinuousBackupTableSet(Set tablesToUpdate, long newStartTimestamp)
+ throws IOException {
+ if (tablesToUpdate == null || tablesToUpdate.isEmpty()) {
+ LOG.warn("No tables provided for updating start timestamps.");
+ return;
+ }
+
+ try (Table table = connection.getTable(tableName)) {
+ Put put = new Put(rowkey(CONTINUOUS_BACKUP_SET));
+
+ for (TableName tableName : tablesToUpdate) {
+ put.addColumn(BackupSystemTable.META_FAMILY, Bytes.toBytes(tableName.getNameAsString()),
+ Bytes.toBytes(newStartTimestamp));
+ }
+
+ table.put(put);
+ LOG.info("Successfully updated start timestamps for {} tables in the backup system table.",
+ tablesToUpdate.size());
+ }
+ }
+
+ /**
+ * Removes tables from the global continuous backup set. Only removes entries that currently exist
+ * in the backup system table.
+ * @param tables set of tables to remove
+ * @throws IOException if an error occurs while updating the backup system table
+ */
+ public void removeContinuousBackupTableSet(Set tables) throws IOException {
+ if (LOG.isTraceEnabled()) {
+ LOG.trace("Remove continuous backup table set from backup system table. tables ["
+ + StringUtils.join(tables, " ") + "]");
+ }
+ if (LOG.isDebugEnabled()) {
+ tables.forEach(table -> LOG.debug("Removing: " + table));
+ }
+
+ Map existingTables = getContinuousBackupTableSet();
+ Set toRemove =
+ tables.stream().filter(existingTables::containsKey).collect(Collectors.toSet());
+
+ if (toRemove.isEmpty()) {
+ LOG.debug("No matching tables found to remove from continuous backup set.");
+ return;
+ }
+
+ try (Table table = connection.getTable(tableName)) {
+ Delete delete = createDeleteForContinuousBackupTableSet(toRemove);
+ table.delete(delete);
+ }
+ }
+
+ /**
+ * Updates the latest replicated WAL timestamp for a region server in the backup system table.
+ * This is used to track the replication checkpoint for continuous backup and PITR (Point-in-Time
+ * Restore).
+ * @param serverName the server for which the latest WAL timestamp is being recorded
+ * @param timestamp the timestamp (in milliseconds) of the last WAL entry replicated
+ * @throws IOException if an error occurs while writing to the backup system table
+ */
+ public void updateBackupCheckpointTimestamp(ServerName serverName, long timestamp)
+ throws IOException {
+
+ HBaseProtos.ServerName.Builder serverProto =
+ HBaseProtos.ServerName.newBuilder().setHostName(serverName.getHostname())
+ .setPort(serverName.getPort()).setStartCode(serverName.getStartCode());
+
+ try (Table table = connection.getTable(tableName)) {
+ Put put = createPutForBackupCheckpoint(serverProto.build().toByteArray(), timestamp);
+ if (!put.isEmpty()) {
+ table.put(put);
+ }
+ }
+ }
+
+ /**
+ * Retrieves the latest replicated WAL timestamps for all region servers from the backup system
+ * table. This is used to track the replication checkpoint state for continuous backup and PITR
+ * (Point-in-Time Restore).
+ * @return a map where the key is {@link ServerName} and the value is the latest replicated WAL
+ * timestamp in milliseconds
+ * @throws IOException if an error occurs while reading from the backup system table
+ */
+ public Map getBackupCheckpointTimestamps() throws IOException {
+ LOG.trace("Fetching latest backup checkpoint timestamps for all region servers.");
+
+ Map checkpointMap = new HashMap<>();
+
+ byte[] rowKey = rowkey(CONTINUOUS_BACKUP_REPLICATION_TIMESTAMP_ROW);
+ Get get = new Get(rowKey);
+ get.addFamily(BackupSystemTable.META_FAMILY);
+
+ try (Table table = connection.getTable(tableName)) {
+ Result result = table.get(get);
+
+ if (result.isEmpty()) {
+ LOG.debug("No checkpoint timestamps found in backup system table.");
+ return checkpointMap;
+ }
+
+ List| cells = result.listCells();
+ for (Cell cell : cells) {
+ try {
+ HBaseProtos.ServerName protoServer =
+ HBaseProtos.ServerName.parseFrom(CellUtil.cloneQualifier(cell));
+ ServerName serverName = ServerName.valueOf(protoServer.getHostName(),
+ protoServer.getPort(), protoServer.getStartCode());
+
+ long timestamp = Bytes.toLong(CellUtil.cloneValue(cell));
+ checkpointMap.put(serverName, timestamp);
+ } catch (IllegalArgumentException e) {
+ LOG.warn("Failed to parse server name or timestamp from cell: {}", cell, e);
+ }
+ }
+ }
+
+ return checkpointMap;
+ }
+
+ /**
+ * Constructs a {@link Put} operation to update the last replicated WAL timestamp for a given
+ * server in the backup system table.
+ * @param serverNameBytes the serialized server name as bytes
+ * @param timestamp the WAL entry timestamp to store
+ * @return a {@link Put} object ready to be written to the system table
+ */
+ private Put createPutForBackupCheckpoint(byte[] serverNameBytes, long timestamp) {
+ Put put = new Put(rowkey(CONTINUOUS_BACKUP_REPLICATION_TIMESTAMP_ROW));
+ put.addColumn(BackupSystemTable.META_FAMILY, serverNameBytes, Bytes.toBytes(timestamp));
+ return put;
+ }
+
/**
* Deletes incremental backup set for a backup destination
* @param backupRoot backup root
@@ -1241,6 +1459,18 @@ private Get createGetForIncrBackupTableSet(String backupRoot) throws IOException
return get;
}
+ /**
+ * Creates a Get operation to retrieve the continuous backup table set from the backup system
+ * table.
+ * @return a Get operation for retrieving the table set
+ */
+ private Get createGetForContinuousBackupTableSet() throws IOException {
+ Get get = new Get(rowkey(CONTINUOUS_BACKUP_SET));
+ get.addFamily(BackupSystemTable.META_FAMILY);
+ get.readVersions(1);
+ return get;
+ }
+
/**
* Creates Put to store incremental backup table set
* @param tables tables
@@ -1255,6 +1485,28 @@ private Put createPutForIncrBackupTableSet(Set tables, String backupR
return put;
}
+ /**
+ * Creates a Put operation to store the continuous backup table set. Only includes tables that are
+ * not already in the set.
+ * @param tables tables to add
+ * @param existingTables tables that already have continuous backup enabled
+ * @param startTimestamp timestamp indicating when continuous backup started
+ * @return put operation
+ */
+ private Put createPutForContinuousBackupTableSet(Set tables,
+ Map existingTables, long startTimestamp) {
+ Put put = new Put(rowkey(CONTINUOUS_BACKUP_SET));
+
+ for (TableName table : tables) {
+ if (!existingTables.containsKey(table)) {
+ put.addColumn(BackupSystemTable.META_FAMILY, Bytes.toBytes(table.getNameAsString()),
+ Bytes.toBytes(startTimestamp));
+ }
+ }
+
+ return put;
+ }
+
/**
* Creates Delete for incremental backup table set
* @param backupRoot backup root
@@ -1266,6 +1518,19 @@ private Delete createDeleteForIncrBackupTableSet(String backupRoot) {
return delete;
}
+ /**
+ * Creates Delete for continuous backup table set
+ * @param tables tables to remove
+ * @return delete operation
+ */
+ private Delete createDeleteForContinuousBackupTableSet(Set tables) {
+ Delete delete = new Delete(rowkey(CONTINUOUS_BACKUP_SET));
+ for (TableName tableName : tables) {
+ delete.addColumn(META_FAMILY, Bytes.toBytes(tableName.getNameAsString()));
+ }
+ return delete;
+ }
+
/**
* Creates Scan operation to load backup history
* @return scan operation
diff --git a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BulkLoad.java b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BulkLoad.java
index 0f1e79c976bb..1befe7c469cc 100644
--- a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BulkLoad.java
+++ b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BulkLoad.java
@@ -34,14 +34,16 @@ public class BulkLoad {
private final String columnFamily;
private final String hfilePath;
private final byte[] rowKey;
+ private final long timestamp;
public BulkLoad(TableName tableName, String region, String columnFamily, String hfilePath,
- byte[] rowKey) {
+ byte[] rowKey, long timestamp) {
this.tableName = tableName;
this.region = region;
this.columnFamily = columnFamily;
this.hfilePath = hfilePath;
this.rowKey = rowKey;
+ this.timestamp = timestamp;
}
public TableName getTableName() {
@@ -64,6 +66,10 @@ public byte[] getRowKey() {
return rowKey;
}
+ public long getTimestamp() {
+ return timestamp;
+ }
+
@Override
public boolean equals(Object o) {
if (this == o) {
@@ -75,19 +81,20 @@ public boolean equals(Object o) {
BulkLoad that = (BulkLoad) o;
return new EqualsBuilder().append(tableName, that.tableName).append(region, that.region)
.append(columnFamily, that.columnFamily).append(hfilePath, that.hfilePath)
- .append(rowKey, that.rowKey).isEquals();
+ .append(rowKey, that.rowKey).append(timestamp, that.timestamp).isEquals();
}
@Override
public int hashCode() {
return new HashCodeBuilder().append(tableName).append(region).append(columnFamily)
- .append(hfilePath).append(rowKey).toHashCode();
+ .append(hfilePath).append(rowKey).append(timestamp).toHashCode();
}
@Override
public String toString() {
return new ToStringBuilder(this, ToStringStyle.NO_CLASS_NAME_STYLE)
.append("tableName", tableName).append("region", region).append("columnFamily", columnFamily)
- .append("hfilePath", hfilePath).append("rowKey", rowKey).toString();
+ .append("hfilePath", hfilePath).append("rowKey", rowKey).append("timestamp", timestamp)
+ .toString();
}
}
diff --git a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/CustomBackupLocationPitrRestoreHandler.java b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/CustomBackupLocationPitrRestoreHandler.java
new file mode 100644
index 000000000000..1657b68d0234
--- /dev/null
+++ b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/CustomBackupLocationPitrRestoreHandler.java
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.backup.impl;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.stream.Collectors;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.backup.HBackupFileSystem;
+import org.apache.hadoop.hbase.backup.PointInTimeRestoreRequest;
+import org.apache.hadoop.hbase.client.Connection;
+import org.apache.yetus.audience.InterfaceAudience;
+
+/**
+ * PITR restore handler that retrieves backup metadata from a custom backup root directory.
+ *
+ * This implementation is used when the PITR request specifies a custom backup location via
+ * {@code backupRootDir}.
+ */
+@InterfaceAudience.Private
+public class CustomBackupLocationPitrRestoreHandler extends AbstractPitrRestoreHandler {
+
+ public CustomBackupLocationPitrRestoreHandler(Connection conn,
+ PointInTimeRestoreRequest request) {
+ super(conn, request);
+ }
+
+ /**
+ * Retrieves completed backup entries from the given custom backup root directory and converts
+ * them into {@link PitrBackupMetadata} using {@link BackupImageAdapter}.
+ * @param request the PITR request
+ * @return list of completed backup metadata entries from the custom location
+ * @throws IOException if reading from the custom backup directory fails
+ */
+ @Override
+ protected List getBackupMetadata(PointInTimeRestoreRequest request)
+ throws IOException {
+ return HBackupFileSystem
+ .getAllBackupImages(conn.getConfiguration(), new Path(request.getBackupRootDir())).stream()
+ .map(BackupImageAdapter::new).collect(Collectors.toList());
+ }
+}
diff --git a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/DefaultPitrRestoreHandler.java b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/DefaultPitrRestoreHandler.java
new file mode 100644
index 000000000000..c6844ba96bd3
--- /dev/null
+++ b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/DefaultPitrRestoreHandler.java
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.backup.impl;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.stream.Collectors;
+import org.apache.hadoop.hbase.backup.BackupInfo;
+import org.apache.hadoop.hbase.backup.PointInTimeRestoreRequest;
+import org.apache.hadoop.hbase.client.Connection;
+import org.apache.yetus.audience.InterfaceAudience;
+
+/**
+ * Default PITR restore handler that retrieves backup metadata from the system table.
+ *
+ * This implementation is used when no custom backup root directory is specified in the request.
+ */
+@InterfaceAudience.Private
+public class DefaultPitrRestoreHandler extends AbstractPitrRestoreHandler {
+
+ public DefaultPitrRestoreHandler(Connection conn, PointInTimeRestoreRequest request) {
+ super(conn, request);
+ }
+
+ /**
+ * Retrieves completed backup entries from the BackupSystemTable and converts them into
+ * {@link PitrBackupMetadata} using {@link BackupInfoAdapter}.
+ * @param request the PITR request
+ * @return list of completed backup metadata entries
+ * @throws IOException if reading from the backup system table fails
+ */
+ @Override
+ protected List getBackupMetadata(PointInTimeRestoreRequest request)
+ throws IOException {
+ try (BackupSystemTable table = new BackupSystemTable(conn)) {
+ return table.getBackupInfos(BackupInfo.BackupState.COMPLETE).stream()
+ .map(BackupInfoAdapter::new).collect(Collectors.toList());
+ }
+ }
+}
diff --git a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/FullTableBackupClient.java b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/FullTableBackupClient.java
index 2293fd4f8149..63e26fdc245c 100644
--- a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/FullTableBackupClient.java
+++ b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/FullTableBackupClient.java
@@ -17,16 +17,26 @@
*/
package org.apache.hadoop.hbase.backup.impl;
+import static org.apache.hadoop.hbase.HConstants.REPLICATION_BULKLOAD_ENABLE_KEY;
+import static org.apache.hadoop.hbase.HConstants.REPLICATION_SCOPE_GLOBAL;
import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.BACKUP_ATTEMPTS_PAUSE_MS_KEY;
import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.BACKUP_MAX_ATTEMPTS_KEY;
+import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.CONF_CONTINUOUS_BACKUP_WAL_DIR;
+import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.CONTINUOUS_BACKUP_REPLICATION_PEER;
import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.DEFAULT_BACKUP_ATTEMPTS_PAUSE_MS;
import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.DEFAULT_BACKUP_MAX_ATTEMPTS;
+import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.DEFAULT_CONTINUOUS_BACKUP_REPLICATION_ENDPOINT;
import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.JOB_NAME_CONF_KEY;
+import static org.apache.hadoop.hbase.backup.replication.ContinuousBackupReplicationEndpoint.CONF_BACKUP_ROOT_DIR;
+import static org.apache.hadoop.hbase.backup.replication.ContinuousBackupReplicationEndpoint.CONF_PEER_UUID;
import java.io.IOException;
import java.util.ArrayList;
+import java.util.HashMap;
import java.util.List;
import java.util.Map;
+import java.util.UUID;
+import java.util.stream.Collectors;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.backup.BackupCopyJob;
import org.apache.hadoop.hbase.backup.BackupInfo;
@@ -37,7 +47,13 @@
import org.apache.hadoop.hbase.backup.BackupType;
import org.apache.hadoop.hbase.backup.util.BackupUtils;
import org.apache.hadoop.hbase.client.Admin;
+import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
+import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.client.TableDescriptor;
+import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
+import org.apache.hadoop.hbase.replication.ReplicationException;
+import org.apache.hadoop.hbase.replication.ReplicationPeerConfig;
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
import org.apache.yetus.audience.InterfaceAudience;
import org.slf4j.Logger;
@@ -61,9 +77,9 @@ public FullTableBackupClient(final Connection conn, final String backupId, Backu
/**
* Do snapshot copy.
* @param backupInfo backup info
- * @throws Exception exception
+ * @throws IOException exception
*/
- protected void snapshotCopy(BackupInfo backupInfo) throws Exception {
+ protected void snapshotCopy(BackupInfo backupInfo) throws IOException {
LOG.info("Snapshot copy is starting.");
// set overall backup phase: snapshot_copy
@@ -131,72 +147,22 @@ protected void snapshotCopy(BackupInfo backupInfo) throws Exception {
@Override
public void execute() throws IOException {
try (Admin admin = conn.getAdmin()) {
- // Begin BACKUP
beginBackup(backupManager, backupInfo);
- String savedStartCode;
- boolean firstBackup;
- // do snapshot for full table backup
-
- savedStartCode = backupManager.readBackupStartCode();
- firstBackup = savedStartCode == null || Long.parseLong(savedStartCode) == 0L;
- if (firstBackup) {
- // This is our first backup. Let's put some marker to system table so that we can hold the
- // logs while we do the backup.
- backupManager.writeBackupStartCode(0L);
- }
- // We roll log here before we do the snapshot. It is possible there is duplicate data
- // in the log that is already in the snapshot. But if we do it after the snapshot, we
- // could have data loss.
- // A better approach is to do the roll log on each RS in the same global procedure as
- // the snapshot.
- LOG.info("Execute roll log procedure for full backup ...");
// Gather the bulk loads being tracked by the system, which can be deleted (since their data
// will be part of the snapshot being taken). We gather this list before taking the actual
// snapshots for the same reason as the log rolls.
List bulkLoadsToDelete = backupManager.readBulkloadRows(tableList);
- BackupUtils.logRoll(conn, backupInfo.getBackupRootDir(), conf);
-
- newTimestamps = backupManager.readRegionServerLastLogRollResult();
-
- // SNAPSHOT_TABLES:
- backupInfo.setPhase(BackupPhase.SNAPSHOT);
- for (TableName tableName : tableList) {
- String snapshotName = "snapshot_" + Long.toString(EnvironmentEdgeManager.currentTime())
- + "_" + tableName.getNamespaceAsString() + "_" + tableName.getQualifierAsString();
-
- snapshotTable(admin, tableName, snapshotName);
- backupInfo.setSnapshotName(tableName, snapshotName);
+ if (backupInfo.isContinuousBackupEnabled()) {
+ handleContinuousBackup(admin);
+ } else {
+ handleNonContinuousBackup(admin);
}
- // SNAPSHOT_COPY:
- // do snapshot copy
- LOG.debug("snapshot copy for " + backupId);
- snapshotCopy(backupInfo);
- // Updates incremental backup table set
- backupManager.addIncrementalBackupTableSet(backupInfo.getTables());
-
- // BACKUP_COMPLETE:
- // set overall backup status: complete. Here we make sure to complete the backup.
- // After this checkpoint, even if entering cancel process, will let the backup finished
- backupInfo.setState(BackupState.COMPLETE);
- // The table list in backupInfo is good for both full backup and incremental backup.
- // For incremental backup, it contains the incremental backup table set.
- backupManager.writeRegionServerLogTimestamp(backupInfo.getTables(), newTimestamps);
-
- Map> newTableSetTimestampMap =
- backupManager.readLogTimestampMap();
-
- backupInfo.setTableSetTimestampMap(newTableSetTimestampMap);
- Long newStartCode =
- BackupUtils.getMinValue(BackupUtils.getRSLogTimestampMins(newTableSetTimestampMap));
- backupManager.writeBackupStartCode(newStartCode);
-
backupManager
.deleteBulkLoadedRows(bulkLoadsToDelete.stream().map(BulkLoad::getRowKey).toList());
- // backup complete
completeBackup(conn, backupInfo, BackupType.FULL, conf);
} catch (Exception e) {
failBackup(conn, backupInfo, backupManager, e, "Unexpected BackupException : ",
@@ -205,6 +171,187 @@ public void execute() throws IOException {
}
}
+ private void handleContinuousBackup(Admin admin) throws IOException {
+ backupInfo.setPhase(BackupInfo.BackupPhase.SETUP_WAL_REPLICATION);
+ long startTimestamp = startContinuousWALBackup(admin);
+ backupManager.addContinuousBackupTableSet(backupInfo.getTables(), startTimestamp);
+
+ // Updating the start time of this backup to reflect the actual beginning of the full backup.
+ // So far, we have only set up continuous WAL replication, but the full backup has not yet
+ // started.
+ // Setting the correct start time is crucial for Point-In-Time Recovery (PITR).
+ // When selecting a backup for PITR, we must ensure that the backup started **on or after** the
+ // starting time of the WALs. If WAL streaming began later, we couldn't guarantee that WALs
+ // exist for the entire period between the backup's start time and the desired PITR timestamp.
+ backupInfo.setStartTs(startTimestamp);
+
+ performBackupSnapshots(admin);
+
+ // set overall backup status: complete. Here we make sure to complete the backup.
+ // After this checkpoint, even if entering cancel process, will let the backup finished
+ backupInfo.setState(BackupState.COMPLETE);
+
+ if (!conf.getBoolean(REPLICATION_BULKLOAD_ENABLE_KEY, false)) {
+ System.out.println("WARNING: Bulkload replication is not enabled. "
+ + "Since continuous backup is using HBase replication, bulk loaded files won't be backed up as part of continuous backup. "
+ + "To ensure bulk-loaded files are backed up, enable bulkload replication "
+ + "(hbase.replication.bulkload.enabled=true) and configure a unique cluster ID using "
+ + "hbase.replication.cluster.id. This cluster ID is required by the replication framework "
+ + "to uniquely identify clusters, even if continuous backup itself does not directly rely on it.");
+ }
+ }
+
+ private void handleNonContinuousBackup(Admin admin) throws IOException {
+ initializeBackupStartCode(backupManager);
+ performLogRoll();
+ performBackupSnapshots(admin);
+ backupManager.addIncrementalBackupTableSet(backupInfo.getTables());
+
+ // set overall backup status: complete. Here we make sure to complete the backup.
+ // After this checkpoint, even if entering cancel process, will let the backup finished
+ backupInfo.setState(BackupState.COMPLETE);
+
+ updateBackupMetadata();
+ }
+
+ private void initializeBackupStartCode(BackupManager backupManager) throws IOException {
+ String savedStartCode;
+ boolean firstBackup;
+ // do snapshot for full table backup
+ savedStartCode = backupManager.readBackupStartCode();
+ firstBackup = savedStartCode == null || Long.parseLong(savedStartCode) == 0L;
+ if (firstBackup) {
+ // This is our first backup. Let's put some marker to system table so that we can hold the
+ // logs while we do the backup.
+ backupManager.writeBackupStartCode(0L);
+ }
+ }
+
+ private void performLogRoll() throws IOException {
+ // We roll log here before we do the snapshot. It is possible there is duplicate data
+ // in the log that is already in the snapshot. But if we do it after the snapshot, we
+ // could have data loss.
+ // A better approach is to do the roll log on each RS in the same global procedure as
+ // the snapshot.
+ LOG.info("Execute roll log procedure for full backup ...");
+ BackupUtils.logRoll(conn, backupInfo.getBackupRootDir(), conf);
+ newTimestamps = backupManager.readRegionServerLastLogRollResult();
+ }
+
+ private void performBackupSnapshots(Admin admin) throws IOException {
+ backupInfo.setPhase(BackupPhase.SNAPSHOT);
+ performSnapshots(admin);
+ LOG.debug("Performing snapshot copy for backup ID: {}", backupInfo.getBackupId());
+ snapshotCopy(backupInfo);
+ }
+
+ private void performSnapshots(Admin admin) throws IOException {
+ backupInfo.setPhase(BackupPhase.SNAPSHOT);
+
+ for (TableName tableName : tableList) {
+ String snapshotName = String.format("snapshot_%d_%s_%s", EnvironmentEdgeManager.currentTime(),
+ tableName.getNamespaceAsString(), tableName.getQualifierAsString());
+ snapshotTable(admin, tableName, snapshotName);
+ backupInfo.setSnapshotName(tableName, snapshotName);
+ }
+ }
+
+ private void updateBackupMetadata() throws IOException {
+ // The table list in backupInfo is good for both full backup and incremental backup.
+ // For incremental backup, it contains the incremental backup table set.
+ backupManager.writeRegionServerLogTimestamp(backupInfo.getTables(), newTimestamps);
+ Map> timestampMap = backupManager.readLogTimestampMap();
+ backupInfo.setTableSetTimestampMap(timestampMap);
+ Long newStartCode = BackupUtils.getMinValue(BackupUtils.getRSLogTimestampMins(timestampMap));
+ backupManager.writeBackupStartCode(newStartCode);
+ }
+
+ private long startContinuousWALBackup(Admin admin) throws IOException {
+ enableTableReplication(admin);
+ if (continuousBackupReplicationPeerExists(admin)) {
+ updateContinuousBackupReplicationPeer(admin);
+ } else {
+ addContinuousBackupReplicationPeer(admin);
+ }
+ LOG.info("Continuous WAL Backup setup completed.");
+ return EnvironmentEdgeManager.getDelegate().currentTime();
+ }
+
+ private void enableTableReplication(Admin admin) throws IOException {
+ for (TableName table : tableList) {
+ TableDescriptor tableDescriptor = admin.getDescriptor(table);
+ TableDescriptorBuilder tableDescriptorBuilder =
+ TableDescriptorBuilder.newBuilder(tableDescriptor);
+
+ for (ColumnFamilyDescriptor cfDescriptor : tableDescriptor.getColumnFamilies()) {
+ if (cfDescriptor.getScope() != REPLICATION_SCOPE_GLOBAL) {
+ ColumnFamilyDescriptor newCfDescriptor = ColumnFamilyDescriptorBuilder
+ .newBuilder(cfDescriptor).setScope(REPLICATION_SCOPE_GLOBAL).build();
+
+ tableDescriptorBuilder.modifyColumnFamily(newCfDescriptor);
+ }
+ }
+
+ admin.modifyTable(tableDescriptorBuilder.build());
+ LOG.info("Enabled Global replication scope for table: {}", table);
+ }
+ }
+
+ private void updateContinuousBackupReplicationPeer(Admin admin) throws IOException {
+ Map> tableMap = tableList.stream()
+ .collect(Collectors.toMap(tableName -> tableName, tableName -> new ArrayList<>()));
+
+ try {
+ if (!admin.isReplicationPeerEnabled(CONTINUOUS_BACKUP_REPLICATION_PEER)) {
+ admin.enableReplicationPeer(CONTINUOUS_BACKUP_REPLICATION_PEER);
+ }
+ admin.appendReplicationPeerTableCFs(CONTINUOUS_BACKUP_REPLICATION_PEER, tableMap);
+ LOG.info("Updated replication peer {} with table and column family map.",
+ CONTINUOUS_BACKUP_REPLICATION_PEER);
+ } catch (ReplicationException e) {
+ LOG.error("Error while updating the replication peer: {}. Error: {}",
+ CONTINUOUS_BACKUP_REPLICATION_PEER, e.getMessage(), e);
+ throw new IOException("Error while updating the continuous backup replication peer.", e);
+ }
+ }
+
+ private void addContinuousBackupReplicationPeer(Admin admin) throws IOException {
+ String backupWalDir = conf.get(CONF_CONTINUOUS_BACKUP_WAL_DIR);
+
+ if (backupWalDir == null || backupWalDir.isEmpty()) {
+ String errorMsg = "WAL Directory is not specified for continuous backup.";
+ LOG.error(errorMsg);
+ throw new IOException(errorMsg);
+ }
+
+ Map additionalArgs = new HashMap<>();
+ additionalArgs.put(CONF_PEER_UUID, UUID.randomUUID().toString());
+ additionalArgs.put(CONF_BACKUP_ROOT_DIR, backupWalDir);
+
+ Map> tableMap = tableList.stream()
+ .collect(Collectors.toMap(tableName -> tableName, tableName -> new ArrayList<>()));
+
+ ReplicationPeerConfig peerConfig = ReplicationPeerConfig.newBuilder()
+ .setReplicationEndpointImpl(DEFAULT_CONTINUOUS_BACKUP_REPLICATION_ENDPOINT)
+ .setReplicateAllUserTables(false).setTableCFsMap(tableMap).putAllConfiguration(additionalArgs)
+ .build();
+
+ try {
+ admin.addReplicationPeer(CONTINUOUS_BACKUP_REPLICATION_PEER, peerConfig, true);
+ LOG.info("Successfully added replication peer with ID: {}",
+ CONTINUOUS_BACKUP_REPLICATION_PEER);
+ } catch (IOException e) {
+ LOG.error("Failed to add replication peer with ID: {}. Error: {}",
+ CONTINUOUS_BACKUP_REPLICATION_PEER, e.getMessage(), e);
+ throw e;
+ }
+ }
+
+ private boolean continuousBackupReplicationPeerExists(Admin admin) throws IOException {
+ return admin.listReplicationPeers().stream()
+ .anyMatch(peer -> peer.getPeerId().equals(CONTINUOUS_BACKUP_REPLICATION_PEER));
+ }
+
protected void snapshotTable(Admin admin, TableName tableName, String snapshotName)
throws IOException {
int maxAttempts = conf.getInt(BACKUP_MAX_ATTEMPTS_KEY, DEFAULT_BACKUP_MAX_ATTEMPTS);
diff --git a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/IncrementalTableBackupClient.java b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/IncrementalTableBackupClient.java
index 4fac0ca3c93c..b3027f09a4ac 100644
--- a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/IncrementalTableBackupClient.java
+++ b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/IncrementalTableBackupClient.java
@@ -17,16 +17,19 @@
*/
package org.apache.hadoop.hbase.backup.impl;
+import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.CONF_CONTINUOUS_BACKUP_WAL_DIR;
import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.JOB_NAME_CONF_KEY;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.ArrayList;
+import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
+import java.util.stream.Collectors;
import org.apache.commons.io.FilenameUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.fs.FileSystem;
@@ -49,6 +52,7 @@
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.io.hfile.HFile;
import org.apache.hadoop.hbase.mapreduce.HFileOutputFormat2;
+import org.apache.hadoop.hbase.mapreduce.WALInputFormat;
import org.apache.hadoop.hbase.mapreduce.WALPlayer;
import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils;
import org.apache.hadoop.hbase.snapshot.SnapshotManifest;
@@ -63,6 +67,7 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import org.apache.hbase.thirdparty.com.google.common.base.Strings;
import org.apache.hbase.thirdparty.com.google.common.collect.Lists;
import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
@@ -74,6 +79,7 @@
@InterfaceAudience.Private
public class IncrementalTableBackupClient extends TableBackupClient {
private static final Logger LOG = LoggerFactory.getLogger(IncrementalTableBackupClient.class);
+ private static final String BULKLOAD_COLLECTOR_OUTPUT = "bulkload-collector-output";
protected IncrementalTableBackupClient() {
}
@@ -125,63 +131,89 @@ protected static int getIndex(TableName tbl, List sTableList) {
* the backup is marked as complete.
* @param tablesToBackup list of tables to be backed up
*/
- protected List handleBulkLoad(List tablesToBackup) throws IOException {
+ protected List handleBulkLoad(List tablesToBackup,
+ Map> tablesToWALFileList, Map tablesToPrevBackupTs)
+ throws IOException {
Map toBulkload = new HashMap<>();
- List bulkLoads = backupManager.readBulkloadRows(tablesToBackup);
+ List bulkLoads = new ArrayList<>();
+
FileSystem tgtFs;
try {
tgtFs = FileSystem.get(new URI(backupInfo.getBackupRootDir()), conf);
} catch (URISyntaxException use) {
throw new IOException("Unable to get FileSystem", use);
}
+
Path rootdir = CommonFSUtils.getRootDir(conf);
Path tgtRoot = new Path(new Path(backupInfo.getBackupRootDir()), backupId);
- for (BulkLoad bulkLoad : bulkLoads) {
- TableName srcTable = bulkLoad.getTableName();
- MergeSplitBulkloadInfo bulkloadInfo =
- toBulkload.computeIfAbsent(srcTable, MergeSplitBulkloadInfo::new);
- String regionName = bulkLoad.getRegion();
- String fam = bulkLoad.getColumnFamily();
- String filename = FilenameUtils.getName(bulkLoad.getHfilePath());
+ if (!backupInfo.isContinuousBackupEnabled()) {
+ bulkLoads = backupManager.readBulkloadRows(tablesToBackup);
+ for (BulkLoad bulkLoad : bulkLoads) {
+ TableName srcTable = bulkLoad.getTableName();
+ if (!tablesToBackup.contains(srcTable)) {
+ LOG.debug("Skipping {} since it is not in tablesToBackup", srcTable);
+ continue;
+ }
- if (!tablesToBackup.contains(srcTable)) {
- LOG.debug("Skipping {} since it is not in tablesToBackup", srcTable);
- continue;
- }
- Path tblDir = CommonFSUtils.getTableDir(rootdir, srcTable);
- Path p = new Path(tblDir, regionName + Path.SEPARATOR + fam + Path.SEPARATOR + filename);
-
- String srcTableQualifier = srcTable.getQualifierAsString();
- String srcTableNs = srcTable.getNamespaceAsString();
- Path tgtFam = new Path(tgtRoot, srcTableNs + Path.SEPARATOR + srcTableQualifier
- + Path.SEPARATOR + regionName + Path.SEPARATOR + fam);
- if (!tgtFs.mkdirs(tgtFam)) {
- throw new IOException("couldn't create " + tgtFam);
- }
- Path tgt = new Path(tgtFam, filename);
+ MergeSplitBulkloadInfo bulkloadInfo =
+ toBulkload.computeIfAbsent(srcTable, MergeSplitBulkloadInfo::new);
+ String regionName = bulkLoad.getRegion();
+ String fam = bulkLoad.getColumnFamily();
+ String filename = FilenameUtils.getName(bulkLoad.getHfilePath());
+ Path tblDir = CommonFSUtils.getTableDir(rootdir, srcTable);
+ Path p = new Path(tblDir, regionName + Path.SEPARATOR + fam + Path.SEPARATOR + filename);
+ String srcTableQualifier = srcTable.getQualifierAsString();
+ String srcTableNs = srcTable.getNamespaceAsString();
+ Path tgtFam = new Path(tgtRoot, srcTableNs + Path.SEPARATOR + srcTableQualifier
+ + Path.SEPARATOR + regionName + Path.SEPARATOR + fam);
+ if (!tgtFs.mkdirs(tgtFam)) {
+ throw new IOException("couldn't create " + tgtFam);
+ }
- Path archiveDir = HFileArchiveUtil.getStoreArchivePath(conf, srcTable, regionName, fam);
- Path archive = new Path(archiveDir, filename);
+ Path tgt = new Path(tgtFam, filename);
+ Path archiveDir = HFileArchiveUtil.getStoreArchivePath(conf, srcTable, regionName, fam);
+ Path archive = new Path(archiveDir, filename);
- if (fs.exists(p)) {
- if (LOG.isTraceEnabled()) {
- LOG.trace("found bulk hfile {} in {} for {}", bulkLoad.getHfilePath(), p.getParent(),
- srcTableQualifier);
- LOG.trace("copying {} to {}", p, tgt);
+ if (fs.exists(p)) {
+ if (LOG.isTraceEnabled()) {
+ LOG.trace("found bulk hfile {} in {} for {}", bulkLoad.getHfilePath(), p.getParent(),
+ srcTableQualifier);
+ LOG.trace("copying {} to {}", p, tgt);
+ }
+ bulkloadInfo.addActiveFile(p.toString());
+ } else if (fs.exists(archive)) {
+ LOG.debug("copying archive {} to {}", archive, tgt);
+ bulkloadInfo.addArchiveFiles(archive.toString());
}
- bulkloadInfo.addActiveFile(p.toString());
- } else if (fs.exists(archive)) {
- LOG.debug("copying archive {} to {}", archive, tgt);
- bulkloadInfo.addArchiveFiles(archive.toString());
}
- }
- for (MergeSplitBulkloadInfo bulkloadInfo : toBulkload.values()) {
- mergeSplitAndCopyBulkloadedHFiles(bulkloadInfo.getActiveFiles(),
- bulkloadInfo.getArchiveFiles(), bulkloadInfo.getSrcTable(), tgtFs);
- }
+ for (MergeSplitBulkloadInfo bulkloadInfo : toBulkload.values()) {
+ mergeSplitAndCopyBulkloadedHFiles(bulkloadInfo.getActiveFiles(),
+ bulkloadInfo.getArchiveFiles(), bulkloadInfo.getSrcTable(), tgtFs);
+ }
+ } else {
+ // Continuous incremental backup: run BulkLoadCollectorJob over backed-up WALs
+ Path collectorOutput = new Path(getBulkOutputDir(), BULKLOAD_COLLECTOR_OUTPUT);
+ for (TableName table : tablesToBackup) {
+ long startTs = tablesToPrevBackupTs.getOrDefault(table, 0L);
+ long endTs = backupInfo.getIncrCommittedWalTs();
+ List walDirs = tablesToWALFileList.getOrDefault(table, new ArrayList());
+
+ List bulkloadPaths = BackupUtils.collectBulkFiles(conn, table, table, startTs, endTs,
+ collectorOutput, walDirs);
+
+ List bulkLoadFiles =
+ bulkloadPaths.stream().map(Path::toString).collect(Collectors.toList());
+
+ if (bulkLoadFiles.isEmpty()) {
+ LOG.info("No bulk-load files found for table {}", table);
+ continue;
+ }
+ mergeSplitAndCopyBulkloadedHFiles(bulkLoadFiles, table, tgtFs);
+ }
+ }
return bulkLoads;
}
@@ -285,16 +317,35 @@ public void updateFileLists(List activeFiles, List archiveFiles)
*/
@Override
public void execute() throws IOException, ColumnFamilyMismatchException {
+ // tablesToWALFileList and tablesToPrevBackupTs are needed for "continuous" Incremental backup
+ Map> tablesToWALFileList = new HashMap<>();
+ Map tablesToPrevBackupTs = new HashMap<>();
try {
Map tablesToFullBackupIds = getFullBackupIds();
verifyCfCompatibility(backupInfo.getTables(), tablesToFullBackupIds);
// case PREPARE_INCREMENTAL:
+ if (backupInfo.isContinuousBackupEnabled()) {
+ // committedWALsTs is needed only for Incremental backups with continuous backup
+ // since these do not depend on log roll ts
+ long committedWALsTs = BackupUtils.getReplicationCheckpoint(conn);
+ backupInfo.setIncrCommittedWalTs(committedWALsTs);
+ }
beginBackup(backupManager, backupInfo);
backupInfo.setPhase(BackupPhase.PREPARE_INCREMENTAL);
- LOG.debug("For incremental backup, current table set is "
- + backupManager.getIncrementalBackupTableSet());
- newTimestamps = ((IncrementalBackupManager) backupManager).getIncrBackupLogFileMap();
+ // Non-continuous Backup incremental backup is controlled by 'incremental backup table set'
+ // and not by user provided backup table list. This is an optimization to avoid copying
+ // the same set of WALs for incremental backups of different tables at different times
+ // HBASE-14038
+ // Continuous-incremental backup backs up user provided table list/set
+ Set currentTableSet;
+ if (backupInfo.isContinuousBackupEnabled()) {
+ currentTableSet = backupInfo.getTables();
+ } else {
+ currentTableSet = backupManager.getIncrementalBackupTableSet();
+ newTimestamps = ((IncrementalBackupManager) backupManager).getIncrBackupLogFileMap();
+ }
+ LOG.debug("For incremental backup, the current table set is {}", currentTableSet);
} catch (Exception e) {
// fail the overall backup and return
failBackup(conn, backupInfo, backupManager, e, "Unexpected Exception : ",
@@ -308,7 +359,7 @@ public void execute() throws IOException, ColumnFamilyMismatchException {
BackupUtils.copyTableRegionInfo(conn, backupInfo, conf);
setupRegionLocator();
// convert WAL to HFiles and copy them to .tmp under BACKUP_ROOT
- convertWALsToHFiles();
+ convertWALsToHFiles(tablesToWALFileList, tablesToPrevBackupTs);
incrementalCopyHFiles(new String[] { getBulkOutputDir().toString() },
backupInfo.getBackupRootDir());
} catch (Exception e) {
@@ -321,23 +372,27 @@ public void execute() throws IOException, ColumnFamilyMismatchException {
// set overall backup status: complete. Here we make sure to complete the backup.
// After this checkpoint, even if entering cancel process, will let the backup finished
try {
- // Set the previousTimestampMap which is before this current log roll to the manifest.
- Map> previousTimestampMap = backupManager.readLogTimestampMap();
- backupInfo.setIncrTimestampMap(previousTimestampMap);
-
- // The table list in backupInfo is good for both full backup and incremental backup.
- // For incremental backup, it contains the incremental backup table set.
- backupManager.writeRegionServerLogTimestamp(backupInfo.getTables(), newTimestamps);
-
- Map> newTableSetTimestampMap =
- backupManager.readLogTimestampMap();
-
- backupInfo.setTableSetTimestampMap(newTableSetTimestampMap);
- Long newStartCode =
- BackupUtils.getMinValue(BackupUtils.getRSLogTimestampMins(newTableSetTimestampMap));
- backupManager.writeBackupStartCode(newStartCode);
+ if (!backupInfo.isContinuousBackupEnabled()) {
+ // Set the previousTimestampMap which is before this current log roll to the manifest.
+ Map> previousTimestampMap =
+ backupManager.readLogTimestampMap();
+ backupInfo.setIncrTimestampMap(previousTimestampMap);
+
+ // The table list in backupInfo is good for both full backup and incremental backup.
+ // For incremental backup, it contains the incremental backup table set.
+ backupManager.writeRegionServerLogTimestamp(backupInfo.getTables(), newTimestamps);
+
+ Map> newTableSetTimestampMap =
+ backupManager.readLogTimestampMap();
+
+ backupInfo.setTableSetTimestampMap(newTableSetTimestampMap);
+ Long newStartCode =
+ BackupUtils.getMinValue(BackupUtils.getRSLogTimestampMins(newTableSetTimestampMap));
+ backupManager.writeBackupStartCode(newStartCode);
+ }
- List bulkLoads = handleBulkLoad(backupInfo.getTableNames());
+ List bulkLoads =
+ handleBulkLoad(backupInfo.getTableNames(), tablesToWALFileList, tablesToPrevBackupTs);
// backup complete
completeBackup(conn, backupInfo, BackupType.INCREMENTAL, conf);
@@ -395,24 +450,60 @@ protected void deleteBulkLoadDirectory() throws IOException {
}
}
- protected void convertWALsToHFiles() throws IOException {
- // get incremental backup file list and prepare parameters for DistCp
- List incrBackupFileList = backupInfo.getIncrBackupFileList();
- // Get list of tables in incremental backup set
- Set tableSet = backupManager.getIncrementalBackupTableSet();
- // filter missing files out (they have been copied by previous backups)
- incrBackupFileList = filterMissingFiles(incrBackupFileList);
- List tableList = new ArrayList();
- for (TableName table : tableSet) {
- // Check if table exists
- if (tableExists(table, conn)) {
- tableList.add(table.getNameAsString());
- } else {
- LOG.warn("Table " + table + " does not exists. Skipping in WAL converter");
+ protected void convertWALsToHFiles(Map> tablesToWALFileList,
+ Map tablesToPrevBackupTs) throws IOException {
+ long previousBackupTs = 0L;
+ long currentBackupTs = 0L;
+ if (backupInfo.isContinuousBackupEnabled()) {
+ String walBackupDir = conf.get(CONF_CONTINUOUS_BACKUP_WAL_DIR);
+ if (Strings.isNullOrEmpty(walBackupDir)) {
+ throw new IOException(
+ "Incremental backup requires the WAL backup directory " + CONF_CONTINUOUS_BACKUP_WAL_DIR);
}
+ Path walBackupPath = new Path(walBackupDir);
+ Set tableSet = backupInfo.getTables();
+ currentBackupTs = backupInfo.getIncrCommittedWalTs();
+ List backupInfos = backupManager.getBackupHistory(true);
+ for (TableName table : tableSet) {
+ for (BackupInfo backup : backupInfos) {
+ // find previous backup for this table
+ if (backup.getTables().contains(table)) {
+ LOG.info("Found previous backup of type {} with id {} for table {}", backup.getType(),
+ backup.getBackupId(), table.getNameAsString());
+ List walBackupFileList;
+ if (backup.getType() == BackupType.FULL) {
+ previousBackupTs = backup.getStartTs();
+ } else {
+ previousBackupTs = backup.getIncrCommittedWalTs();
+ }
+ walBackupFileList =
+ BackupUtils.getValidWalDirs(conf, walBackupPath, previousBackupTs, currentBackupTs);
+ tablesToWALFileList.put(table, walBackupFileList);
+ tablesToPrevBackupTs.put(table, previousBackupTs);
+ walToHFiles(walBackupFileList, Arrays.asList(table.getNameAsString()),
+ previousBackupTs);
+ break;
+ }
+ }
+ }
+ } else {
+ // get incremental backup file list and prepare parameters for DistCp
+ List incrBackupFileList = backupInfo.getIncrBackupFileList();
+ // Get list of tables in incremental backup set
+ Set tableSet = backupManager.getIncrementalBackupTableSet();
+ // filter missing files out (they have been copied by previous backups)
+ incrBackupFileList = filterMissingFiles(incrBackupFileList);
+ List tableList = new ArrayList();
+ for (TableName table : tableSet) {
+ // Check if table exists
+ if (tableExists(table, conn)) {
+ tableList.add(table.getNameAsString());
+ } else {
+ LOG.warn("Table " + table + " does not exists. Skipping in WAL converter");
+ }
+ }
+ walToHFiles(incrBackupFileList, tableList, previousBackupTs);
}
- walToHFiles(incrBackupFileList, tableList);
-
}
protected boolean tableExists(TableName table, Connection conn) throws IOException {
@@ -421,7 +512,8 @@ protected boolean tableExists(TableName table, Connection conn) throws IOExcepti
}
}
- protected void walToHFiles(List dirPaths, List tableList) throws IOException {
+ protected void walToHFiles(List dirPaths, List tableList, long previousBackupTs)
+ throws IOException {
Tool player = new WALPlayer();
// Player reads all files in arbitrary directory structure and creates
@@ -435,9 +527,12 @@ protected void walToHFiles(List dirPaths, List tableList) throws
conf.set(WALPlayer.INPUT_FILES_SEPARATOR_KEY, ";");
conf.setBoolean(WALPlayer.MULTI_TABLES_SUPPORT, true);
conf.set(JOB_NAME_CONF_KEY, jobname);
-
boolean diskBasedSortingEnabledOriginalValue = HFileOutputFormat2.diskBasedSortingEnabled(conf);
conf.setBoolean(HFileOutputFormat2.DISK_BASED_SORTING_ENABLED_KEY, true);
+ if (backupInfo.isContinuousBackupEnabled()) {
+ conf.set(WALInputFormat.START_TIME_KEY, Long.toString(previousBackupTs));
+ conf.set(WALInputFormat.END_TIME_KEY, Long.toString(backupInfo.getIncrCommittedWalTs()));
+ }
String[] playerArgs = { dirs, StringUtils.join(tableList, ",") };
try {
diff --git a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/PitrBackupMetadata.java b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/PitrBackupMetadata.java
new file mode 100644
index 000000000000..dc135ce79c08
--- /dev/null
+++ b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/PitrBackupMetadata.java
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.backup.impl;
+
+import java.util.List;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.backup.BackupInfo;
+import org.apache.hadoop.hbase.backup.impl.BackupManifest.BackupImage;
+import org.apache.yetus.audience.InterfaceAudience;
+
+/**
+ * A unified abstraction over backup metadata used during Point-In-Time Restore (PITR).
+ *
+ * This interface allows the PITR algorithm to operate uniformly over different types of backup
+ * metadata sources, such as {@link BackupInfo} (system table) and {@link BackupImage} (custom
+ * backup location), without knowing their specific implementations.
+ */
+@InterfaceAudience.Private
+public interface PitrBackupMetadata {
+
+ /** Returns List of table names included in the backup */
+ List getTableNames();
+
+ /** Returns Start timestamp of the backup */
+ long getStartTs();
+
+ /** Returns Completion timestamp of the backup */
+ long getCompleteTs();
+
+ /** Returns Unique identifier for the backup */
+ String getBackupId();
+
+ /** Returns Root directory where the backup is stored */
+ String getRootDir();
+}
diff --git a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/TableBackupClient.java b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/TableBackupClient.java
index 30c27f01faaf..9e31ca409ada 100644
--- a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/TableBackupClient.java
+++ b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/TableBackupClient.java
@@ -92,7 +92,7 @@ public void init(final Connection conn, final String backupId, BackupRequest req
this.fs = CommonFSUtils.getCurrentFileSystem(conf);
backupInfo = backupManager.createBackupInfo(backupId, request.getBackupType(), tableList,
request.getTargetRootDir(), request.getTotalTasks(), request.getBandwidth(),
- request.getNoChecksumVerify());
+ request.getNoChecksumVerify(), request.isContinuousBackupEnabled());
if (tableList == null || tableList.isEmpty()) {
this.tableList = new ArrayList<>(backupInfo.getTables());
}
diff --git a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/mapreduce/BulkLoadCollectorJob.java b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/mapreduce/BulkLoadCollectorJob.java
new file mode 100644
index 000000000000..cf19d2622216
--- /dev/null
+++ b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/mapreduce/BulkLoadCollectorJob.java
@@ -0,0 +1,399 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.backup.mapreduce;
+
+import static org.apache.hadoop.hbase.mapreduce.WALPlayer.TABLES_KEY;
+import static org.apache.hadoop.hbase.mapreduce.WALPlayer.TABLE_MAP_KEY;
+
+import java.io.IOException;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.List;
+import java.util.Map;
+import java.util.TreeMap;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.backup.util.BackupFileSystemManager;
+import org.apache.hadoop.hbase.backup.util.BulkLoadProcessor;
+import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
+import org.apache.hadoop.hbase.mapreduce.WALInputFormat;
+import org.apache.hadoop.hbase.regionserver.wal.WALCellCodec;
+import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
+import org.apache.hadoop.hbase.wal.WALEdit;
+import org.apache.hadoop.hbase.wal.WALKey;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+import org.apache.yetus.audience.InterfaceAudience;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * MapReduce job that scans WAL backups and extracts referenced bulk-load store-file paths.
+ *
+ * This job is intended to be used when you want a list of HFiles / store-files referenced by WAL
+ * bulk-load descriptors. It emits a de-duplicated list of full paths (one per line) by default
+ * using the {@link DedupReducer}.
+ *
+ *
+ * Usage (CLI):
+ * {@code BulkLoadCollector [ []]}
+ *
+ */
+@InterfaceAudience.Private
+public class BulkLoadCollectorJob extends Configured implements Tool {
+ private static final Logger LOG = LoggerFactory.getLogger(BulkLoadCollectorJob.class);
+
+ public static final String NAME = "BulkLoadCollector";
+ public static final String DEFAULT_REDUCERS = "1";
+
+ public BulkLoadCollectorJob() {
+ }
+
+ public BulkLoadCollectorJob(final Configuration c) {
+ super(c);
+ }
+
+ /**
+ * Mapper that extracts relative bulk-load paths from a WAL entry (via {@code BulkLoadProcessor}),
+ * resolves them to full paths (via
+ * {@code BackupFileSystemManager#resolveBulkLoadFullPath(Path, Path)}), and emits each full path
+ * as the map key (Text). Uses the same table-filtering semantics as other WAL mappers: if no
+ * tables are configured, all tables are processed; otherwise only the configured table set is
+ * processed. Map output: (Text fullPathString, NullWritable)
+ */
+ public static class BulkLoadCollectorMapper extends Mapper {
+ private final Map tables = new TreeMap<>();
+ private final Text out = new Text();
+
+ @Override
+ protected void map(WALKey key, WALEdit value, Context context)
+ throws IOException, InterruptedException {
+ if (key == null) {
+ if (LOG.isTraceEnabled()) LOG.trace("map: received null WALKey, skipping");
+ return;
+ }
+ if (value == null) {
+ if (LOG.isTraceEnabled())
+ LOG.trace("map: received null WALEdit for table={}, skipping", safeTable(key));
+ return;
+ }
+
+ TableName tname = key.getTableName();
+
+ // table filtering
+ if (!(tables.isEmpty() || tables.containsKey(tname))) {
+ if (LOG.isTraceEnabled()) {
+ LOG.trace("map: skipping table={} because it is not in configured table list", tname);
+ }
+ return;
+ }
+
+ // Extract relative store-file paths referenced by this WALEdit.
+ // Delegates parsing to BulkLoadProcessor so parsing logic is centralized.
+ List relativePaths = BulkLoadProcessor.processBulkLoadFiles(key, value);
+ if (relativePaths.isEmpty()) return;
+
+ // Determine WAL input path for this split (used to compute date/prefix for full path)
+ Path walInputPath;
+ try {
+ walInputPath =
+ new Path(((WALInputFormat.WALSplit) context.getInputSplit()).getLogFileName());
+ } catch (ClassCastException cce) {
+ String splitClass =
+ (context.getInputSplit() == null) ? "null" : context.getInputSplit().getClass().getName();
+ LOG.warn(
+ "map: unexpected InputSplit type (not WALSplit) - cannot determine WAL input path; context.getInputSplit() class={}",
+ splitClass, cce);
+ throw new IOException("Unexpected InputSplit type: expected WALSplit but got " + splitClass,
+ cce);
+ }
+
+ if (LOG.isTraceEnabled()) {
+ LOG.trace("map: walInputPath={} table={} relativePathsCount={}", walInputPath, tname,
+ relativePaths.size());
+ }
+
+ // Build full path for each relative path and emit it.
+ for (Path rel : relativePaths) {
+ Path full = BackupFileSystemManager.resolveBulkLoadFullPath(walInputPath, rel);
+ out.set(full.toString());
+ context.write(out, NullWritable.get());
+ context.getCounter("BulkCollector", "StoreFilesEmitted").increment(1);
+ }
+ }
+
+ @Override
+ protected void setup(Context context) throws IOException {
+ String[] tableMap = context.getConfiguration().getStrings(TABLE_MAP_KEY);
+ String[] tablesToUse = context.getConfiguration().getStrings(TABLES_KEY);
+ if (tableMap == null) {
+ tableMap = tablesToUse;
+ }
+ if (tablesToUse == null) {
+ // user requested all tables; tables map remains empty to indicate "all"
+ return;
+ }
+
+ if (tablesToUse.length != tableMap.length) {
+ throw new IOException("Incorrect table mapping specified.");
+ }
+
+ int i = 0;
+ for (String table : tablesToUse) {
+ TableName from = TableName.valueOf(table);
+ TableName to = TableName.valueOf(tableMap[i++]);
+ tables.put(from, to);
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("setup: configuring mapping {} -> {}", from, to);
+ }
+ }
+ }
+
+ private String safeTable(WALKey key) {
+ try {
+ return key == null ? "" : key.getTableName().toString();
+ } catch (Exception e) {
+ return "";
+ }
+ }
+ }
+
+ /**
+ * Reducer that deduplicates full-path keys emitted by the mappers. It writes each unique key
+ * exactly once. Reduce input: (Text fullPathString, Iterable) Reduce output: (Text
+ * fullPathString, NullWritable)
+ */
+ public static class DedupReducer extends Reducer {
+ @Override
+ protected void reduce(Text key, Iterable values, Context ctx)
+ throws IOException, InterruptedException {
+ // Write the unique path once.
+ ctx.write(key, NullWritable.get());
+ }
+ }
+
+ /**
+ * Create and configure a Job instance for bulk-file collection.
+ * @param args CLI args expected to be: inputDirs bulkFilesOut [tables] [tableMap]
+ * @throws IOException on misconfiguration
+ */
+ public Job createSubmittableJob(String[] args) throws IOException {
+ Configuration conf = getConf();
+
+ setupTime(conf, WALInputFormat.START_TIME_KEY);
+ setupTime(conf, WALInputFormat.END_TIME_KEY);
+
+ if (args == null || args.length < 2) {
+ throw new IOException(
+ "Usage: [ []]");
+ }
+
+ String inputDirs = args[0];
+ String bulkFilesOut = args[1];
+
+ // tables are optional (args[2])
+ String[] tables = (args.length == 2) ? new String[] {} : args[2].split(",");
+ String[] tableMap;
+ if (args.length > 3) {
+ tableMap = args[3].split(",");
+ if (tableMap.length != tables.length) {
+ throw new IOException("The same number of tables and mapping must be provided.");
+ }
+ } else {
+ // if no mapping is specified, map each table to itself
+ tableMap = tables;
+ }
+
+ LOG.info("createSubmittableJob: inputDirs='{}' bulkFilesOut='{}' tables={} tableMap={}",
+ inputDirs, bulkFilesOut, String.join(",", tables), String.join(",", tableMap));
+
+ conf.setStrings(TABLES_KEY, tables);
+ conf.setStrings(TABLE_MAP_KEY, tableMap);
+ conf.set(FileInputFormat.INPUT_DIR, inputDirs);
+
+ // create and return the actual Job configured for bulk-file discovery
+ return BulkLoadCollectorJob.createSubmittableJob(conf, inputDirs, bulkFilesOut);
+ }
+
+ /**
+ * Low-level job wiring. Creates the Job instance and sets input, mapper, reducer and output.
+ * @param conf configuration used for the job
+ * @param inputDirs WAL input directories (comma-separated)
+ * @param bulkFilesOut output directory to write discovered full-paths
+ * @throws IOException on invalid args
+ */
+ private static Job createSubmittableJob(Configuration conf, String inputDirs, String bulkFilesOut)
+ throws IOException {
+ if (bulkFilesOut == null || bulkFilesOut.isEmpty()) {
+ throw new IOException("bulkFilesOut (output dir) must be provided.");
+ }
+ if (inputDirs == null || inputDirs.isEmpty()) {
+ throw new IOException("inputDirs (WAL input dir) must be provided.");
+ }
+
+ Job job = Job.getInstance(conf, NAME + "_" + EnvironmentEdgeManager.currentTime());
+ job.setJarByClass(BulkLoadCollectorJob.class);
+
+ // Input: use same WALInputFormat used by WALPlayer so we parse WALs consistently
+ job.setInputFormatClass(WALInputFormat.class);
+ FileInputFormat.setInputDirRecursive(job, true);
+ FileInputFormat.setInputPaths(job, inputDirs);
+
+ // Mapper: extract and emit full bulk-load file paths (Text, NullWritable)
+ job.setMapperClass(BulkLoadCollectorMapper.class);
+ job.setMapOutputKeyClass(Text.class);
+ job.setMapOutputValueClass(NullWritable.class);
+
+ // Reducer: deduplicate the full-path keys
+ job.setReducerClass(DedupReducer.class);
+ // default to a single reducer (single deduped file); callers can set mapreduce.job.reduces
+ int reducers = conf.getInt("mapreduce.job.reduces", Integer.parseInt(DEFAULT_REDUCERS));
+ job.setNumReduceTasks(reducers);
+
+ // Output: write plain text lines (one path per line)
+ job.setOutputFormatClass(TextOutputFormat.class);
+ FileOutputFormat.setOutputPath(job, new Path(bulkFilesOut));
+
+ LOG.info("createSubmittableJob: created job name='{}' reducers={}", job.getJobName(), reducers);
+
+ String codecCls = WALCellCodec.getWALCellCodecClass(conf).getName();
+ try {
+ TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(),
+ Class.forName(codecCls));
+ } catch (Exception e) {
+ throw new IOException("Cannot determine wal codec class " + codecCls, e);
+ }
+ return job;
+ }
+
+ /**
+ * Parse a time option. Supports the user-friendly ISO-like format
+ * {@code yyyy-MM-dd'T'HH:mm:ss.SS} or milliseconds since epoch. If the option is not present,
+ * this method is a no-op.
+ * @param conf configuration containing option
+ * @param option key to read (e.g. WALInputFormat.START_TIME_KEY)
+ * @throws IOException on parse failure
+ */
+ private void setupTime(Configuration conf, String option) throws IOException {
+ String val = conf.get(option);
+ if (val == null) {
+ return;
+ }
+ long ms;
+ try {
+ // first try to parse in user-friendly form
+ ms = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SS").parse(val).getTime();
+ } catch (ParseException pe) {
+ try {
+ // then see if a number (milliseconds) was specified
+ ms = Long.parseLong(val);
+ } catch (NumberFormatException nfe) {
+ throw new IOException(
+ option + " must be specified either in the form 2001-02-20T16:35:06.99 "
+ + "or as number of milliseconds");
+ }
+ }
+ conf.setLong(option, ms);
+ }
+
+ /**
+ * CLI entry point.
+ * @param args job arguments (see {@link #usage(String)})
+ * @throws Exception on job failure
+ */
+ public static void main(String[] args) throws Exception {
+ int ret = ToolRunner.run(new BulkLoadCollectorJob(HBaseConfiguration.create()), args);
+ System.exit(ret);
+ }
+
+ @Override
+ public int run(String[] args) throws Exception {
+ if (args.length < 2) {
+ usage("Wrong number of arguments: " + args.length);
+ System.exit(-1);
+ }
+
+ Job job = createSubmittableJob(args);
+ return job.waitForCompletion(true) ? 0 : 1;
+ }
+
+ /**
+ * Print usage/help for the BulkLoadCollectorJob CLI/driver.
+ *
+ *
+ *
+ * args layout:
+ * args[0] = input directory (required)
+ * args[1] = output directory (required)
+ * args[2] = tables (comma-separated) (optional)
+ * args[3] = tableMappings (comma-separated) (optional; must match tables length)
+ *
+ */
+ private void usage(final String errorMsg) {
+ if (errorMsg != null && !errorMsg.isEmpty()) {
+ System.err.println("ERROR: " + errorMsg);
+ }
+
+ System.err.println(
+ "Usage: " + NAME + " [ []]");
+ System.err.println(
+ " directory of WALs to scan (comma-separated list accepted)");
+ System.err.println(
+ " directory to write discovered store-file paths (output)");
+ System.err.println(
+ " optional comma-separated list of tables to include; if omitted, all tables are processed");
+ System.err.println(
+ " optional comma-separated list of mapped target tables; must match number of tables");
+
+ System.err.println();
+ System.err.println("Time range options (either milliseconds or yyyy-MM-dd'T'HH:mm:ss.SS):");
+ System.err.println(" -D" + WALInputFormat.START_TIME_KEY + "=[date|ms]");
+ System.err.println(" -D" + WALInputFormat.END_TIME_KEY + "=[date|ms]");
+
+ System.err.println();
+ System.err.println("Configuration alternatives (can be provided via -D):");
+ System.err
+ .println(" -D" + TABLES_KEY + "= (alternative to arg[2])");
+ System.err
+ .println(" -D" + TABLE_MAP_KEY + "= (alternative to arg[3])");
+ System.err.println(
+ " -Dmapreduce.job.reduces= (number of reducers; default 1)");
+ System.err.println();
+
+ System.err.println("Performance hints:");
+ System.err.println(" For large inputs consider disabling speculative execution:");
+ System.err
+ .println(" -Dmapreduce.map.speculative=false -Dmapreduce.reduce.speculative=false");
+
+ System.err.println();
+ System.err.println("Example:");
+ System.err.println(
+ " " + NAME + " /wals/input /out/bulkfiles ns:tbl1,ns:tbl2 ns:tbl1_mapped,ns:tbl2_mapped");
+ }
+}
diff --git a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/mapreduce/MapReduceRestoreJob.java b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/mapreduce/MapReduceRestoreJob.java
index 7a2fce4c418a..4711cba46680 100644
--- a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/mapreduce/MapReduceRestoreJob.java
+++ b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/mapreduce/MapReduceRestoreJob.java
@@ -61,11 +61,10 @@ public void run(Path[] dirPaths, TableName[] tableNames, Path restoreRootDir,
String dirs = StringUtils.join(dirPaths, ",");
if (LOG.isDebugEnabled()) {
- LOG.debug("Restore " + (fullBackupRestore ? "full" : "incremental")
- + " backup from directory " + dirs + " from hbase tables "
- + StringUtils.join(tableNames, BackupRestoreConstants.TABLENAME_DELIMITER_IN_COMMAND)
- + " to tables "
- + StringUtils.join(newTableNames, BackupRestoreConstants.TABLENAME_DELIMITER_IN_COMMAND));
+ LOG.debug("Restore {} from directory {} from hbase tables {} to tables {}",
+ fullBackupRestore ? "full backup" : "incremental backup / bulkload files (as part of PITR)",
+ dirs, StringUtils.join(tableNames, BackupRestoreConstants.TABLENAME_DELIMITER_IN_COMMAND),
+ StringUtils.join(newTableNames, BackupRestoreConstants.TABLENAME_DELIMITER_IN_COMMAND));
}
for (int i = 0; i < tableNames.length; i++) {
diff --git a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/replication/BulkLoadUploadException.java b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/replication/BulkLoadUploadException.java
new file mode 100644
index 000000000000..91a46c77e319
--- /dev/null
+++ b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/replication/BulkLoadUploadException.java
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.backup.replication;
+
+import java.io.IOException;
+import org.apache.yetus.audience.InterfaceAudience;
+
+@InterfaceAudience.Private
+public class BulkLoadUploadException extends IOException {
+ public BulkLoadUploadException(String message) {
+ super(message);
+ }
+
+ public BulkLoadUploadException(String message, Throwable cause) {
+ super(message, cause);
+ }
+}
diff --git a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/replication/ContinuousBackupReplicationEndpoint.java b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/replication/ContinuousBackupReplicationEndpoint.java
new file mode 100644
index 000000000000..19cd2733af7b
--- /dev/null
+++ b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/replication/ContinuousBackupReplicationEndpoint.java
@@ -0,0 +1,553 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.backup.replication;
+
+import com.google.errorprone.annotations.RestrictedApi;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.UncheckedIOException;
+import java.util.List;
+import java.util.Map;
+import java.util.UUID;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.Executors;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.locks.ReentrantLock;
+import java.util.stream.Collectors;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.FileUtil;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.backup.impl.BackupSystemTable;
+import org.apache.hadoop.hbase.backup.util.BackupFileSystemManager;
+import org.apache.hadoop.hbase.backup.util.BackupUtils;
+import org.apache.hadoop.hbase.backup.util.BulkLoadProcessor;
+import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.client.ConnectionFactory;
+import org.apache.hadoop.hbase.io.asyncfs.monitor.StreamSlowMonitor;
+import org.apache.hadoop.hbase.regionserver.wal.WALUtil;
+import org.apache.hadoop.hbase.replication.BaseReplicationEndpoint;
+import org.apache.hadoop.hbase.replication.EmptyEntriesPolicy;
+import org.apache.hadoop.hbase.replication.ReplicationResult;
+import org.apache.hadoop.hbase.replication.regionserver.ReplicationSourceInterface;
+import org.apache.hadoop.hbase.util.CommonFSUtils;
+import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
+import org.apache.hadoop.hbase.wal.FSHLogProvider;
+import org.apache.hadoop.hbase.wal.WAL;
+import org.apache.yetus.audience.InterfaceAudience;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * ContinuousBackupReplicationEndpoint is responsible for replicating WAL entries to a backup
+ * storage. It organizes WAL entries by day and periodically flushes the data, ensuring that WAL
+ * files do not exceed the configured size. The class includes mechanisms for handling the WAL
+ * files, performing bulk load backups, and ensuring that the replication process is safe.
+ */
+@InterfaceAudience.Private
+public class ContinuousBackupReplicationEndpoint extends BaseReplicationEndpoint {
+ private static final Logger LOG =
+ LoggerFactory.getLogger(ContinuousBackupReplicationEndpoint.class);
+ public static final String CONF_PEER_UUID = "hbase.backup.wal.replication.peerUUID";
+ public static final String CONF_BACKUP_ROOT_DIR = "hbase.backup.root.dir";
+ public static final String CONF_BACKUP_MAX_WAL_SIZE = "hbase.backup.max.wal.size";
+ public static final long DEFAULT_MAX_WAL_SIZE = 128 * 1024 * 1024;
+
+ public static final String CONF_STAGED_WAL_FLUSH_INITIAL_DELAY =
+ "hbase.backup.staged.wal.flush.initial.delay.seconds";
+ public static final int DEFAULT_STAGED_WAL_FLUSH_INITIAL_DELAY_SECONDS = 5 * 60; // 5 minutes
+ public static final String CONF_STAGED_WAL_FLUSH_INTERVAL =
+ "hbase.backup.staged.wal.flush.interval.seconds";
+ public static final int DEFAULT_STAGED_WAL_FLUSH_INTERVAL_SECONDS = 5 * 60; // 5 minutes
+ public static final int EXECUTOR_TERMINATION_TIMEOUT_SECONDS = 60; // TODO: configurable??
+
+ private final Map walWriters = new ConcurrentHashMap<>();
+ private final ReentrantLock lock = new ReentrantLock();
+
+ private ReplicationSourceInterface replicationSource;
+ private Configuration conf;
+ private BackupFileSystemManager backupFileSystemManager;
+ private UUID peerUUID;
+ private String peerId;
+ private ScheduledExecutorService flushExecutor;
+
+ private long latestWALEntryTimestamp = -1L;
+
+ public static final long ONE_DAY_IN_MILLISECONDS = TimeUnit.DAYS.toMillis(1);
+ public static final String WAL_FILE_PREFIX = "wal_file.";
+
+ @Override
+ public void init(Context context) throws IOException {
+ super.init(context);
+ this.replicationSource = context.getReplicationSource();
+ this.peerId = context.getPeerId();
+ this.conf = HBaseConfiguration.create(context.getConfiguration());
+
+ initializePeerUUID();
+ initializeBackupFileSystemManager();
+ startWalFlushExecutor();
+ LOG.info("{} Initialization complete", Utils.logPeerId(peerId));
+ }
+
+ private void initializePeerUUID() throws IOException {
+ String peerUUIDStr = conf.get(CONF_PEER_UUID);
+ if (peerUUIDStr == null || peerUUIDStr.isEmpty()) {
+ throw new IOException("Peer UUID is not specified. Please configure " + CONF_PEER_UUID);
+ }
+ try {
+ this.peerUUID = UUID.fromString(peerUUIDStr);
+ LOG.info("{} Peer UUID initialized to {}", Utils.logPeerId(peerId), peerUUID);
+ } catch (IllegalArgumentException e) {
+ throw new IOException("Invalid Peer UUID format: " + peerUUIDStr, e);
+ }
+ }
+
+ private void initializeBackupFileSystemManager() throws IOException {
+ String backupRootDir = conf.get(CONF_BACKUP_ROOT_DIR);
+ if (backupRootDir == null || backupRootDir.isEmpty()) {
+ throw new IOException(
+ "Backup root directory is not specified. Configure " + CONF_BACKUP_ROOT_DIR);
+ }
+
+ try {
+ this.backupFileSystemManager = new BackupFileSystemManager(peerId, conf, backupRootDir);
+ LOG.info("{} BackupFileSystemManager initialized successfully for {}",
+ Utils.logPeerId(peerId), backupRootDir);
+ } catch (IOException e) {
+ throw new IOException("Failed to initialize BackupFileSystemManager", e);
+ }
+ }
+
+ private void startWalFlushExecutor() {
+ int initialDelay = conf.getInt(CONF_STAGED_WAL_FLUSH_INITIAL_DELAY,
+ DEFAULT_STAGED_WAL_FLUSH_INITIAL_DELAY_SECONDS);
+ int flushInterval =
+ conf.getInt(CONF_STAGED_WAL_FLUSH_INTERVAL, DEFAULT_STAGED_WAL_FLUSH_INTERVAL_SECONDS);
+
+ flushExecutor = Executors.newSingleThreadScheduledExecutor();
+ flushExecutor.scheduleAtFixedRate(this::flushAndBackupSafely, initialDelay, flushInterval,
+ TimeUnit.SECONDS);
+ LOG.info("{} Scheduled WAL flush executor started with initial delay {}s and interval {}s",
+ Utils.logPeerId(peerId), initialDelay, flushInterval);
+ }
+
+ private void flushAndBackupSafely() {
+ lock.lock();
+ try {
+ LOG.info("{} Periodic WAL flush triggered", Utils.logPeerId(peerId));
+ flushWriters();
+ replicationSource.persistOffsets();
+ LOG.info("{} Periodic WAL flush and offset persistence completed successfully",
+ Utils.logPeerId(peerId));
+ } catch (IOException e) {
+ LOG.error("{} Error during WAL flush: {}", Utils.logPeerId(peerId), e.getMessage(), e);
+ } finally {
+ lock.unlock();
+ }
+ }
+
+ private void flushWriters() throws IOException {
+ LOG.info("{} Flushing {} WAL writers", Utils.logPeerId(peerId), walWriters.size());
+ for (Map.Entry entry : walWriters.entrySet()) {
+ FSHLogProvider.Writer writer = entry.getValue();
+ if (writer != null) {
+ LOG.debug("{} Closing WAL writer for day: {}", Utils.logPeerId(peerId), entry.getKey());
+ try {
+ writer.close();
+ LOG.debug("{} Successfully closed WAL writer for day: {}", Utils.logPeerId(peerId),
+ entry.getKey());
+ } catch (IOException e) {
+ LOG.error("{} Failed to close WAL writer for day: {}. Error: {}", Utils.logPeerId(peerId),
+ entry.getKey(), e.getMessage(), e);
+ throw e;
+ }
+ }
+ }
+ walWriters.clear();
+
+ // All received WAL entries have been flushed and persisted successfully.
+ // At this point, it's safe to record the latest replicated timestamp,
+ // as we are guaranteed that all entries up to that timestamp are durably stored.
+ // This checkpoint is essential for enabling consistent Point-in-Time Restore (PITR).
+ updateLastReplicatedTimestampForContinuousBackup();
+
+ LOG.info("{} WAL writers flushed and cleared", Utils.logPeerId(peerId));
+ }
+
+ @Override
+ public UUID getPeerUUID() {
+ return peerUUID;
+ }
+
+ @Override
+ public void start() {
+ LOG.info("{} Starting ContinuousBackupReplicationEndpoint", Utils.logPeerId(peerId));
+ startAsync();
+ }
+
+ @Override
+ protected void doStart() {
+ LOG.info("{} ContinuousBackupReplicationEndpoint started successfully.",
+ Utils.logPeerId(peerId));
+ notifyStarted();
+ }
+
+ @Override
+ public EmptyEntriesPolicy getEmptyEntriesPolicy() {
+ // Since this endpoint writes to S3 asynchronously, an empty entry batch
+ // does not guarantee that all previously submitted entries were persisted.
+ // Hence, avoid committing the WAL position.
+ return EmptyEntriesPolicy.SUBMIT;
+ }
+
+ @Override
+ public ReplicationResult replicate(ReplicateContext replicateContext) {
+ final List entries = replicateContext.getEntries();
+ if (entries.isEmpty()) {
+ LOG.debug("{} No WAL entries to replicate", Utils.logPeerId(peerId));
+ return ReplicationResult.SUBMITTED;
+ }
+
+ LOG.debug("{} Received {} WAL entries for replication", Utils.logPeerId(peerId),
+ entries.size());
+
+ Map> groupedEntries = groupEntriesByDay(entries);
+ LOG.debug("{} Grouped WAL entries by day: {}", Utils.logPeerId(peerId),
+ groupedEntries.keySet());
+
+ lock.lock();
+ try {
+ for (Map.Entry> entry : groupedEntries.entrySet()) {
+ LOG.debug("{} Backing up {} WAL entries for day {}", Utils.logPeerId(peerId),
+ entry.getValue().size(), entry.getKey());
+ backupWalEntries(entry.getKey(), entry.getValue());
+ }
+
+ // Capture the timestamp of the last WAL entry processed. This is used as the replication
+ // checkpoint so that point-in-time restores know the latest consistent time up to which
+ // replication has
+ // occurred.
+ latestWALEntryTimestamp = entries.get(entries.size() - 1).getKey().getWriteTime();
+
+ if (isAnyWriterFull()) {
+ LOG.debug("{} Some WAL writers reached max size, triggering flush",
+ Utils.logPeerId(peerId));
+ flushWriters();
+ LOG.debug("{} Replication committed after WAL flush", Utils.logPeerId(peerId));
+ return ReplicationResult.COMMITTED;
+ }
+
+ LOG.debug("{} Replication submitted successfully", Utils.logPeerId(peerId));
+ return ReplicationResult.SUBMITTED;
+ } catch (IOException e) {
+ LOG.error("{} Replication failed. Error details: {}", Utils.logPeerId(peerId), e.getMessage(),
+ e);
+ return ReplicationResult.FAILED;
+ } finally {
+ lock.unlock();
+ }
+ }
+
+ /**
+ * Persists the latest replicated WAL entry timestamp in the backup system table. This checkpoint
+ * is critical for Continuous Backup and Point-in-Time Restore (PITR) to ensure restore operations
+ * only go up to a known safe point. The value is stored per region server using its ServerName as
+ * the key.
+ * @throws IOException if the checkpoint update fails
+ */
+ private void updateLastReplicatedTimestampForContinuousBackup() throws IOException {
+ try (final Connection conn = ConnectionFactory.createConnection(conf);
+ BackupSystemTable backupSystemTable = new BackupSystemTable(conn)) {
+ backupSystemTable.updateBackupCheckpointTimestamp(replicationSource.getServerWALsBelongTo(),
+ latestWALEntryTimestamp);
+ }
+ }
+
+ private Map> groupEntriesByDay(List entries) {
+ return entries.stream().collect(
+ Collectors.groupingBy(entry -> (entry.getKey().getWriteTime() / ONE_DAY_IN_MILLISECONDS)
+ * ONE_DAY_IN_MILLISECONDS));
+ }
+
+ private boolean isAnyWriterFull() {
+ return walWriters.values().stream().anyMatch(this::isWriterFull);
+ }
+
+ private boolean isWriterFull(FSHLogProvider.Writer writer) {
+ long maxWalSize = conf.getLong(CONF_BACKUP_MAX_WAL_SIZE, DEFAULT_MAX_WAL_SIZE);
+ return writer.getLength() >= maxWalSize;
+ }
+
+ private void backupWalEntries(long day, List walEntries) throws IOException {
+ LOG.debug("{} Starting backup of {} WAL entries for day {}", Utils.logPeerId(peerId),
+ walEntries.size(), day);
+
+ try {
+ FSHLogProvider.Writer walWriter = walWriters.computeIfAbsent(day, this::createWalWriter);
+
+ for (WAL.Entry entry : walEntries) {
+ walWriter.append(entry);
+ }
+
+ walWriter.sync(true);
+ } catch (UncheckedIOException e) {
+ String errorMsg = Utils.logPeerId(peerId) + " Failed to get or create WAL Writer for " + day;
+ LOG.error("{} Backup failed for day {}. Error: {}", Utils.logPeerId(peerId), day,
+ e.getMessage(), e);
+ throw new IOException(errorMsg, e);
+ }
+
+ List bulkLoadFiles = BulkLoadProcessor.processBulkLoadFiles(walEntries);
+
+ if (LOG.isTraceEnabled()) {
+ LOG.trace("{} Processed {} bulk load files for WAL entries", Utils.logPeerId(peerId),
+ bulkLoadFiles.size());
+ LOG.trace("{} Bulk load files: {}", Utils.logPeerId(peerId),
+ bulkLoadFiles.stream().map(Path::toString).collect(Collectors.joining(", ")));
+ }
+
+ uploadBulkLoadFiles(day, bulkLoadFiles);
+ }
+
+ private FSHLogProvider.Writer createWalWriter(long dayInMillis) {
+ String dayDirectoryName = BackupUtils.formatToDateString(dayInMillis);
+
+ FileSystem fs = backupFileSystemManager.getBackupFs();
+ Path walsDir = backupFileSystemManager.getWalsDir();
+
+ try {
+ // Create a directory for the day
+ Path dayDir = new Path(walsDir, dayDirectoryName);
+ fs.mkdirs(dayDir);
+
+ // Generate a unique WAL file name
+ long currentTime = EnvironmentEdgeManager.getDelegate().currentTime();
+ String walFileName = WAL_FILE_PREFIX + currentTime + "." + UUID.randomUUID();
+ Path walFilePath = new Path(dayDir, walFileName);
+
+ // Initialize the WAL writer
+ FSHLogProvider.Writer writer =
+ ObjectStoreProtobufWalWriter.class.getDeclaredConstructor().newInstance();
+ writer.init(fs, walFilePath, conf, true, WALUtil.getWALBlockSize(conf, fs, walFilePath),
+ StreamSlowMonitor.create(conf, walFileName));
+
+ LOG.info("{} WAL writer created: {}", Utils.logPeerId(peerId), walFilePath);
+ return writer;
+ } catch (Exception e) {
+ throw new UncheckedIOException(
+ Utils.logPeerId(peerId) + " Failed to initialize WAL Writer for day: " + dayDirectoryName,
+ new IOException(e));
+ }
+ }
+
+ @Override
+ public void stop() {
+ LOG.info("{} Stopping ContinuousBackupReplicationEndpoint...", Utils.logPeerId(peerId));
+ stopAsync();
+ }
+
+ @Override
+ protected void doStop() {
+ close();
+ LOG.info("{} ContinuousBackupReplicationEndpoint stopped successfully.",
+ Utils.logPeerId(peerId));
+ notifyStopped();
+ }
+
+ private void close() {
+ LOG.info("{} Closing WAL replication component...", Utils.logPeerId(peerId));
+ shutdownFlushExecutor();
+ lock.lock();
+ try {
+ flushWriters();
+ replicationSource.persistOffsets();
+ } catch (IOException e) {
+ LOG.error("{} Failed to Flush Open Wal Writers: {}", Utils.logPeerId(peerId), e.getMessage(),
+ e);
+ } finally {
+ lock.unlock();
+ LOG.info("{} WAL replication component closed.", Utils.logPeerId(peerId));
+ }
+ }
+
+ @RestrictedApi(
+ explanation = "Package-private for test visibility only. Do not use outside tests.",
+ link = "",
+ allowedOnPath = "(.*/src/test/.*|.*/org/apache/hadoop/hbase/backup/replication/ContinuousBackupReplicationEndpoint.java)")
+ void uploadBulkLoadFiles(long dayInMillis, List bulkLoadFiles)
+ throws BulkLoadUploadException {
+ if (bulkLoadFiles.isEmpty()) {
+ LOG.debug("{} No bulk load files to upload for {}", Utils.logPeerId(peerId), dayInMillis);
+ return;
+ }
+
+ LOG.debug("{} Starting upload of {} bulk load files", Utils.logPeerId(peerId),
+ bulkLoadFiles.size());
+
+ if (LOG.isTraceEnabled()) {
+ LOG.trace("{} Bulk load files to upload: {}", Utils.logPeerId(peerId),
+ bulkLoadFiles.stream().map(Path::toString).collect(Collectors.joining(", ")));
+ }
+ String dayDirectoryName = BackupUtils.formatToDateString(dayInMillis);
+ Path bulkloadDir = new Path(backupFileSystemManager.getBulkLoadFilesDir(), dayDirectoryName);
+ try {
+ backupFileSystemManager.getBackupFs().mkdirs(bulkloadDir);
+ } catch (IOException e) {
+ throw new BulkLoadUploadException(
+ String.format("%s Failed to create bulkload directory in backupFS: %s",
+ Utils.logPeerId(peerId), bulkloadDir),
+ e);
+ }
+
+ for (Path file : bulkLoadFiles) {
+ Path sourcePath;
+ try {
+ sourcePath = getBulkLoadFileStagingPath(file);
+ } catch (FileNotFoundException fnfe) {
+ throw new BulkLoadUploadException(
+ String.format("%s Bulk load file not found: %s", Utils.logPeerId(peerId), file), fnfe);
+ } catch (IOException ioe) {
+ throw new BulkLoadUploadException(
+ String.format("%s Failed to resolve source path for: %s", Utils.logPeerId(peerId), file),
+ ioe);
+ }
+
+ Path destPath = new Path(bulkloadDir, file);
+
+ try {
+ LOG.debug("{} Copying bulk load file from {} to {}", Utils.logPeerId(peerId), sourcePath,
+ destPath);
+
+ copyWithCleanup(CommonFSUtils.getRootDirFileSystem(conf), sourcePath,
+ backupFileSystemManager.getBackupFs(), destPath, conf);
+
+ LOG.info("{} Bulk load file {} successfully backed up to {}", Utils.logPeerId(peerId), file,
+ destPath);
+ } catch (IOException e) {
+ throw new BulkLoadUploadException(
+ String.format("%s Failed to copy bulk load file %s to %s on day %s",
+ Utils.logPeerId(peerId), file, destPath, BackupUtils.formatToDateString(dayInMillis)),
+ e);
+ }
+ }
+
+ LOG.debug("{} Completed upload of bulk load files", Utils.logPeerId(peerId));
+ }
+
+ /**
+ * Copy a file with cleanup logic in case of failure. Always overwrite destination to avoid
+ * leaving corrupt partial files.
+ */
+ @RestrictedApi(
+ explanation = "Package-private for test visibility only. Do not use outside tests.",
+ link = "",
+ allowedOnPath = "(.*/src/test/.*|.*/org/apache/hadoop/hbase/backup/replication/ContinuousBackupReplicationEndpoint.java)")
+ static void copyWithCleanup(FileSystem srcFS, Path src, FileSystem dstFS, Path dst,
+ Configuration conf) throws IOException {
+ try {
+ if (dstFS.exists(dst)) {
+ FileStatus srcStatus = srcFS.getFileStatus(src);
+ FileStatus dstStatus = dstFS.getFileStatus(dst);
+
+ if (srcStatus.getLen() == dstStatus.getLen()) {
+ LOG.info("Destination file {} already exists with same length ({}). Skipping copy.", dst,
+ dstStatus.getLen());
+ return; // Skip upload
+ } else {
+ LOG.warn(
+ "Destination file {} exists but length differs (src={}, dst={}). " + "Overwriting now.",
+ dst, srcStatus.getLen(), dstStatus.getLen());
+ }
+ }
+
+ // Always overwrite in case previous copy left partial data
+ FileUtil.copy(srcFS, src, dstFS, dst, false, true, conf);
+ } catch (IOException e) {
+ try {
+ if (dstFS.exists(dst)) {
+ dstFS.delete(dst, true);
+ LOG.warn("Deleted partial/corrupt destination file {} after copy failure", dst);
+ }
+ } catch (IOException cleanupEx) {
+ LOG.warn("Failed to cleanup destination file {} after copy failure", dst, cleanupEx);
+ }
+ throw e;
+ }
+ }
+
+ private Path getBulkLoadFileStagingPath(Path relativePathFromNamespace) throws IOException {
+ FileSystem rootFs = CommonFSUtils.getRootDirFileSystem(conf);
+ Path rootDir = CommonFSUtils.getRootDir(conf);
+ Path baseNSDir = new Path(HConstants.BASE_NAMESPACE_DIR);
+ Path baseNamespaceDir = new Path(rootDir, baseNSDir);
+ Path hFileArchiveDir =
+ new Path(rootDir, new Path(HConstants.HFILE_ARCHIVE_DIRECTORY, baseNSDir));
+
+ LOG.debug("{} Searching for bulk load file: {} in paths: {}, {}", Utils.logPeerId(peerId),
+ relativePathFromNamespace, baseNamespaceDir, hFileArchiveDir);
+
+ Path result =
+ findExistingPath(rootFs, baseNamespaceDir, hFileArchiveDir, relativePathFromNamespace);
+ LOG.debug("{} Bulk load file found at {}", Utils.logPeerId(peerId), result);
+ return result;
+ }
+
+ private static Path findExistingPath(FileSystem rootFs, Path baseNamespaceDir,
+ Path hFileArchiveDir, Path filePath) throws IOException {
+ if (LOG.isTraceEnabled()) {
+ LOG.trace("Checking for bulk load file at: {} and {}", new Path(baseNamespaceDir, filePath),
+ new Path(hFileArchiveDir, filePath));
+ }
+
+ for (Path candidate : new Path[] { new Path(baseNamespaceDir, filePath),
+ new Path(hFileArchiveDir, filePath) }) {
+ if (rootFs.exists(candidate)) {
+ return candidate;
+ }
+ }
+
+ throw new FileNotFoundException("Bulk load file not found at either: "
+ + new Path(baseNamespaceDir, filePath) + " or " + new Path(hFileArchiveDir, filePath));
+ }
+
+ private void shutdownFlushExecutor() {
+ if (flushExecutor != null) {
+ LOG.info("{} Initiating WAL flush executor shutdown.", Utils.logPeerId(peerId));
+
+ flushExecutor.shutdown();
+ try {
+ if (
+ !flushExecutor.awaitTermination(EXECUTOR_TERMINATION_TIMEOUT_SECONDS, TimeUnit.SECONDS)
+ ) {
+ LOG.warn("{} Flush executor did not terminate within timeout, forcing shutdown.",
+ Utils.logPeerId(peerId));
+ flushExecutor.shutdownNow();
+ }
+ } catch (InterruptedException e) {
+ Thread.currentThread().interrupt();
+ flushExecutor.shutdownNow();
+ LOG.warn("{} Flush executor shutdown was interrupted.", Utils.logPeerId(peerId), e);
+ }
+ LOG.info("{} WAL flush thread stopped.", Utils.logPeerId(peerId));
+ }
+ }
+}
diff --git a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/replication/ObjectStoreProtobufWalWriter.java b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/replication/ObjectStoreProtobufWalWriter.java
new file mode 100644
index 000000000000..27f4fbdc027e
--- /dev/null
+++ b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/replication/ObjectStoreProtobufWalWriter.java
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.backup.replication;
+
+import java.io.IOException;
+import java.util.concurrent.atomic.AtomicLong;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.io.asyncfs.monitor.StreamSlowMonitor;
+import org.apache.hadoop.hbase.regionserver.wal.ProtobufLogWriter;
+import org.apache.hadoop.hbase.util.AtomicUtils;
+import org.apache.hadoop.hbase.util.CommonFSUtils;
+import org.apache.yetus.audience.InterfaceAudience;
+
+/**
+ * A custom implementation of {@link ProtobufLogWriter} that provides support for writing
+ * protobuf-based WAL (Write-Ahead Log) entries to object store-backed files.
+ *
+ * This class overrides the {@link ProtobufLogWriter#sync(boolean)} and
+ * {@link ProtobufLogWriter#initOutput(FileSystem, Path, boolean, int, short, long, StreamSlowMonitor, boolean)}
+ * methods to ensure compatibility with object stores, while ignoring specific capability checks
+ * such as HFLUSH and HSYNC. These checks are often not supported by some object stores, and
+ * bypassing them ensures smooth operation in such environments.
+ *
+ */
+@InterfaceAudience.Private
+public class ObjectStoreProtobufWalWriter extends ProtobufLogWriter {
+ private final AtomicLong syncedLength = new AtomicLong(0);
+
+ @Override
+ public void sync(boolean forceSync) throws IOException {
+ FSDataOutputStream fsDataOutputstream = this.output;
+ if (fsDataOutputstream == null) {
+ return; // Presume closed
+ }
+ // Special case for Hadoop S3: Unlike traditional file systems, where flush() ensures data is
+ // durably written, in Hadoop S3, flush() only writes data to the internal buffer and does not
+ // immediately persist it to S3. The actual upload to S3 happens asynchronously, typically when
+ // a block is full or when close() is called, which finalizes the upload process.
+ fsDataOutputstream.flush();
+ AtomicUtils.updateMax(this.syncedLength, fsDataOutputstream.getPos());
+ }
+
+ @Override
+ protected void initOutput(FileSystem fs, Path path, boolean overwritable, int bufferSize,
+ short replication, long blockSize, StreamSlowMonitor monitor, boolean noLocalWrite)
+ throws IOException {
+ try {
+ super.initOutput(fs, path, overwritable, bufferSize, replication, blockSize, monitor,
+ noLocalWrite);
+ } catch (CommonFSUtils.StreamLacksCapabilityException e) {
+ // Ignore capability check for HFLUSH and HSYNC capabilities
+ // Some object stores may not support these capabilities, so we bypass the exception handling
+ // to ensure compatibility with such stores.
+ }
+ }
+}
diff --git a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/replication/Utils.java b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/replication/Utils.java
new file mode 100644
index 000000000000..69365674acca
--- /dev/null
+++ b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/replication/Utils.java
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.backup.replication;
+
+import org.apache.yetus.audience.InterfaceAudience;
+
+@InterfaceAudience.Private
+public final class Utils {
+ private Utils() {
+ }
+
+ public static String logPeerId(String peerId) {
+ return "[Source for peer " + peerId + "]:";
+ }
+}
diff --git a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/util/BackupFileSystemManager.java b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/util/BackupFileSystemManager.java
new file mode 100644
index 000000000000..a616eb69e47f
--- /dev/null
+++ b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/util/BackupFileSystemManager.java
@@ -0,0 +1,154 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.backup.util;
+
+import java.io.IOException;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.backup.replication.Utils;
+import org.apache.yetus.audience.InterfaceAudience;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Initializes and organizes backup directories for continuous Write-Ahead Logs (WALs) and
+ * bulk-loaded files within the specified backup root directory.
+ */
+@InterfaceAudience.Private
+public class BackupFileSystemManager {
+ private static final Logger LOG = LoggerFactory.getLogger(BackupFileSystemManager.class);
+
+ public static final String WALS_DIR = "WALs";
+ public static final String BULKLOAD_FILES_DIR = "bulk-load-files";
+ private final String peerId;
+ private final FileSystem backupFs;
+ private final Path backupRootDir;
+ private final Path walsDir;
+ private final Path bulkLoadFilesDir;
+
+ public BackupFileSystemManager(String peerId, Configuration conf, String backupRootDirStr)
+ throws IOException {
+ this.peerId = peerId;
+ this.backupRootDir = new Path(backupRootDirStr);
+ this.backupFs = FileSystem.get(backupRootDir.toUri(), conf);
+ this.walsDir = createDirectory(WALS_DIR);
+ this.bulkLoadFilesDir = createDirectory(BULKLOAD_FILES_DIR);
+ }
+
+ private Path createDirectory(String dirName) throws IOException {
+ Path dirPath = new Path(backupRootDir, dirName);
+ backupFs.mkdirs(dirPath);
+ LOG.info("{} Initialized directory: {}", Utils.logPeerId(peerId), dirPath);
+ return dirPath;
+ }
+
+ public Path getWalsDir() {
+ return walsDir;
+ }
+
+ public Path getBulkLoadFilesDir() {
+ return bulkLoadFilesDir;
+ }
+
+ public FileSystem getBackupFs() {
+ return backupFs;
+ }
+
+ public static final class WalPathInfo {
+ private final Path prefixBeforeWALs;
+ private final String dateSegment;
+
+ public WalPathInfo(Path prefixBeforeWALs, String dateSegment) {
+ this.prefixBeforeWALs = prefixBeforeWALs;
+ this.dateSegment = dateSegment;
+ }
+
+ public Path getPrefixBeforeWALs() {
+ return prefixBeforeWALs;
+ }
+
+ public String getDateSegment() {
+ return dateSegment;
+ }
+ }
+
+ /**
+ * Validate the walPath has the expected structure: .../WALs// and return
+ * WalPathInfo(prefixBeforeWALs, dateSegment).
+ * @throws IOException if the path is not in expected format
+ */
+ public static WalPathInfo extractWalPathInfo(Path walPath) throws IOException {
+ if (walPath == null) {
+ throw new IllegalArgumentException("walPath must not be null");
+ }
+
+ Path dateDir = walPath.getParent(); // .../WALs/
+ if (dateDir == null) {
+ throw new IOException("Invalid WAL path: missing date directory. Path: " + walPath);
+ }
+
+ Path walsDir = dateDir.getParent(); // .../WALs
+ if (walsDir == null) {
+ throw new IOException("Invalid WAL path: missing WALs directory. Path: " + walPath);
+ }
+
+ String walsDirName = walsDir.getName();
+ if (!WALS_DIR.equals(walsDirName)) {
+ throw new IOException("Invalid WAL path: expected '" + WALS_DIR + "' segment but found '"
+ + walsDirName + "'. Path: " + walPath);
+ }
+
+ String dateSegment = dateDir.getName();
+ if (dateSegment == null || dateSegment.isEmpty()) {
+ throw new IOException("Invalid WAL path: date segment is empty. Path: " + walPath);
+ }
+
+ Path prefixBeforeWALs = walsDir.getParent(); // might be null if path is like "/WALs/..."
+ return new WalPathInfo(prefixBeforeWALs, dateSegment);
+ }
+
+ /**
+ * Resolve the full bulk-load file path corresponding to a relative bulk-load path referenced from
+ * a WAL file path. For a WAL path like: /some/prefix/.../WALs/23-08-2025/some-wal-file and a
+ * relative bulk path like: namespace/table/region/family/file, this returns:
+ * /some/prefix/.../bulk-load-files/23-08-2025/namespace/table/region/family/file
+ * @param walPath the Path to the WAL file (must contain the {@link #WALS_DIR} segment
+ * followed by date)
+ * @param relativeBulkPath the relative bulk-load file Path
+ * @return resolved full Path for the bulk-load file
+ * @throws IOException if the WAL path does not contain the expected segments
+ */
+ public static Path resolveBulkLoadFullPath(Path walPath, Path relativeBulkPath)
+ throws IOException {
+ WalPathInfo info = extractWalPathInfo(walPath);
+
+ Path prefixBeforeWALs = info.getPrefixBeforeWALs();
+ String dateSegment = info.getDateSegment();
+
+ Path full; // Build final path:
+ // /bulk-load-files//
+ if (prefixBeforeWALs == null || prefixBeforeWALs.toString().isEmpty()) {
+ full = new Path(BULKLOAD_FILES_DIR, new Path(dateSegment, relativeBulkPath));
+ } else {
+ full = new Path(new Path(prefixBeforeWALs, BULKLOAD_FILES_DIR),
+ new Path(dateSegment, relativeBulkPath));
+ }
+ return full;
+ }
+}
diff --git a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/util/BackupUtils.java b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/util/BackupUtils.java
index 183cc2054f1a..11e22efac97a 100644
--- a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/util/BackupUtils.java
+++ b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/util/BackupUtils.java
@@ -17,17 +17,28 @@
*/
package org.apache.hadoop.hbase.backup.util;
+import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.CONF_CONTINUOUS_BACKUP_WAL_DIR;
+import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.CONTINUOUS_BACKUP_REPLICATION_PEER;
+import static org.apache.hadoop.hbase.backup.replication.ContinuousBackupReplicationEndpoint.ONE_DAY_IN_MILLISECONDS;
+import static org.apache.hadoop.hbase.backup.util.BackupFileSystemManager.WALS_DIR;
+import static org.apache.hadoop.hbase.replication.regionserver.ReplicationMarkerChore.REPLICATION_MARKER_ENABLED_DEFAULT;
+import static org.apache.hadoop.hbase.replication.regionserver.ReplicationMarkerChore.REPLICATION_MARKER_ENABLED_KEY;
+
import java.io.FileNotFoundException;
import java.io.IOException;
import java.net.URLDecoder;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
+import java.util.Date;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
+import java.util.TimeZone;
import java.util.TreeMap;
import java.util.TreeSet;
import org.apache.hadoop.conf.Configuration;
@@ -39,6 +50,7 @@
import org.apache.hadoop.fs.PathFilter;
import org.apache.hadoop.fs.RemoteIterator;
import org.apache.hadoop.fs.permission.FsPermission;
+import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.MetaTableAccessor;
import org.apache.hadoop.hbase.ServerName;
@@ -56,6 +68,11 @@
import org.apache.hadoop.hbase.client.RegionInfo;
import org.apache.hadoop.hbase.client.TableDescriptor;
import org.apache.hadoop.hbase.master.region.MasterRegionFactory;
+import org.apache.hadoop.hbase.replication.ReplicationException;
+import org.apache.hadoop.hbase.replication.ReplicationGroupOffset;
+import org.apache.hadoop.hbase.replication.ReplicationQueueId;
+import org.apache.hadoop.hbase.replication.ReplicationQueueStorage;
+import org.apache.hadoop.hbase.replication.ReplicationStorageFactory;
import org.apache.hadoop.hbase.tool.BulkLoadHFiles;
import org.apache.hadoop.hbase.util.CommonFSUtils;
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
@@ -67,6 +84,7 @@
import org.slf4j.LoggerFactory;
import org.apache.hbase.thirdparty.com.google.common.base.Splitter;
+import org.apache.hbase.thirdparty.com.google.common.base.Strings;
import org.apache.hbase.thirdparty.com.google.common.collect.ImmutableMap;
import org.apache.hbase.thirdparty.com.google.common.collect.Iterables;
import org.apache.hbase.thirdparty.com.google.common.collect.Iterators;
@@ -79,6 +97,7 @@ public final class BackupUtils {
private static final Logger LOG = LoggerFactory.getLogger(BackupUtils.class);
public static final String LOGNAME_SEPARATOR = ".";
public static final int MILLISEC_IN_HOUR = 3600000;
+ public static final String DATE_FORMAT = "yyyy-MM-dd";
private BackupUtils() {
throw new AssertionError("Instantiating utility class...");
@@ -821,4 +840,233 @@ private static void logRollV2(Connection conn, String backupRootDir) throws IOEx
}
}
}
+
+ /**
+ * Calculates the replication checkpoint timestamp used for continuous backup.
+ *
+ * A replication checkpoint is the earliest timestamp across all region servers such that every
+ * WAL entry before that point is known to be replicated to the target system. This is essential
+ * for features like Point-in-Time Restore (PITR) and incremental backups, where we want to
+ * confidently restore data to a consistent state without missing updates.
+ *
+ * The checkpoint is calculated using a combination of:
+ *
+ * - The start timestamps of WAL files currently being replicated for each server.
+ * - The latest successfully replicated timestamp recorded by the replication marker chore.
+ *
+ *
+ * We combine these two sources to handle the following challenges:
+ *
+ * - Stale WAL start times: If replication traffic is low or WALs are long-lived, the
+ * replication offset may point to the same WAL for a long time, resulting in stale timestamps
+ * that underestimate progress. This could delay PITR unnecessarily.
+ * - Limitations of marker-only tracking: The replication marker chore stores the last
+ * successfully replicated timestamp per region server in a system table. However, this data may
+ * become stale if the server goes offline or region ownership changes. For example, if a region
+ * initially belonged to rs1 and was later moved to rs4 due to re-balancing, rs1’s marker would
+ * persist even though it no longer holds any regions. Relying solely on these stale markers could
+ * lead to incorrect or outdated checkpoints.
+ *
+ *
+ * To handle these limitations, the method:
+ *
+ * - Verifies that the continuous backup peer exists to ensure replication is enabled.
+ * - Retrieves WAL replication queue information for the peer, collecting WAL start times per
+ * region server. This gives us a lower bound for replication progress.
+ * - Reads the marker chore's replicated timestamps from the backup system table.
+ * - For servers found in both sources, if the marker timestamp is more recent than the WAL's
+ * start timestamp, we use the marker (since replication has progressed beyond the WAL).
+ * - We discard marker entries for region servers that are not present in WAL queues, assuming
+ * those servers are no longer relevant (e.g., decommissioned or reassigned).
+ * - The checkpoint is the minimum of all chosen timestamps — i.e., the slowest replicating
+ * region server.
+ * - Finally, we persist the updated marker information to include any newly participating
+ * region servers.
+ *
+ *
+ * Note: If the replication marker chore is disabled, we fall back to using only the WAL start
+ * times. This ensures correctness but may lead to conservative checkpoint estimates during idle
+ * periods.
+ * @param conn the HBase connection
+ * @return the calculated replication checkpoint timestamp
+ * @throws IOException if reading replication queues or updating the backup system table fails
+ */
+ public static long getReplicationCheckpoint(Connection conn) throws IOException {
+ Configuration conf = conn.getConfiguration();
+ long checkpoint = EnvironmentEdgeManager.getDelegate().currentTime();
+
+ // Step 1: Ensure the continuous backup replication peer exists
+ if (!continuousBackupReplicationPeerExists(conn.getAdmin())) {
+ String msg = "Replication peer '" + CONTINUOUS_BACKUP_REPLICATION_PEER
+ + "' not found. Continuous backup not enabled.";
+ LOG.error(msg);
+ throw new IOException(msg);
+ }
+
+ // Step 2: Get all replication queues for the continuous backup peer
+ ReplicationQueueStorage queueStorage =
+ ReplicationStorageFactory.getReplicationQueueStorage(conn, conf);
+
+ List queueIds;
+ try {
+ queueIds = queueStorage.listAllQueueIds(CONTINUOUS_BACKUP_REPLICATION_PEER);
+ } catch (ReplicationException e) {
+ String msg = "Failed to retrieve replication queue IDs for peer '"
+ + CONTINUOUS_BACKUP_REPLICATION_PEER + "'";
+ LOG.error(msg, e);
+ throw new IOException(msg, e);
+ }
+
+ if (queueIds.isEmpty()) {
+ String msg = "Replication peer '" + CONTINUOUS_BACKUP_REPLICATION_PEER + "' has no queues. "
+ + "This may indicate that continuous backup replication is not initialized correctly.";
+ LOG.error(msg);
+ throw new IOException(msg);
+ }
+
+ // Step 3: Build a map of ServerName -> WAL start timestamp (lowest seen per server)
+ Map serverToCheckpoint = new HashMap<>();
+ for (ReplicationQueueId queueId : queueIds) {
+ Map offsets;
+ try {
+ offsets = queueStorage.getOffsets(queueId);
+ } catch (ReplicationException e) {
+ String msg = "Failed to fetch WAL offsets for replication queue: " + queueId;
+ LOG.error(msg, e);
+ throw new IOException(msg, e);
+ }
+
+ for (ReplicationGroupOffset offset : offsets.values()) {
+ String walFile = offset.getWal();
+ long ts = AbstractFSWALProvider.getTimestamp(walFile); // WAL creation time
+ ServerName server = queueId.getServerName();
+ // Store the minimum timestamp per server (ts - 1 to avoid edge boundary issues)
+ serverToCheckpoint.merge(server, ts - 1, Math::min);
+ }
+ }
+
+ // Step 4: If replication markers are enabled, overlay fresher timestamps from backup system
+ // table
+ boolean replicationMarkerEnabled =
+ conf.getBoolean(REPLICATION_MARKER_ENABLED_KEY, REPLICATION_MARKER_ENABLED_DEFAULT);
+ if (replicationMarkerEnabled) {
+ try (BackupSystemTable backupSystemTable = new BackupSystemTable(conn)) {
+ Map markerTimestamps = backupSystemTable.getBackupCheckpointTimestamps();
+
+ for (Map.Entry entry : markerTimestamps.entrySet()) {
+ ServerName server = entry.getKey();
+ long markerTs = entry.getValue();
+
+ // If marker timestamp is newer, override
+ if (serverToCheckpoint.containsKey(server)) {
+ long current = serverToCheckpoint.get(server);
+ if (markerTs > current) {
+ serverToCheckpoint.put(server, markerTs);
+ }
+ } else {
+ // This server is no longer active (e.g., RS moved or removed); skip
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Skipping replication marker timestamp for inactive server: {}", server);
+ }
+ }
+ }
+
+ // Step 5: Persist current server timestamps into backup system table
+ for (Map.Entry entry : serverToCheckpoint.entrySet()) {
+ backupSystemTable.updateBackupCheckpointTimestamp(entry.getKey(), entry.getValue());
+ }
+ }
+ } else {
+ LOG.warn(
+ "Replication marker chore is disabled. Using WAL-based timestamps only for checkpoint calculation.");
+ }
+
+ // Step 6: Calculate final checkpoint as minimum timestamp across all active servers
+ for (long ts : serverToCheckpoint.values()) {
+ checkpoint = Math.min(checkpoint, ts);
+ }
+
+ return checkpoint;
+ }
+
+ private static boolean continuousBackupReplicationPeerExists(Admin admin) throws IOException {
+ return admin.listReplicationPeers().stream()
+ .anyMatch(peer -> peer.getPeerId().equals(CONTINUOUS_BACKUP_REPLICATION_PEER));
+ }
+
+ /**
+ * Convert dayInMillis to "yyyy-MM-dd" format
+ */
+ public static String formatToDateString(long dayInMillis) {
+ SimpleDateFormat dateFormat = new SimpleDateFormat(DATE_FORMAT);
+ dateFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ return dateFormat.format(new Date(dayInMillis));
+ }
+
+ /**
+ * Fetches bulkload filepaths based on the given time range from backup WAL directory.
+ */
+ public static List collectBulkFiles(Connection conn, TableName sourceTable,
+ TableName targetTable, long startTime, long endTime, Path restoreRootDir, List walDirs)
+ throws IOException {
+
+ if (walDirs.isEmpty()) {
+ String walBackupDir = conn.getConfiguration().get(CONF_CONTINUOUS_BACKUP_WAL_DIR);
+ if (Strings.isNullOrEmpty(walBackupDir)) {
+ throw new IOException(
+ "WAL backup directory is not configured " + CONF_CONTINUOUS_BACKUP_WAL_DIR);
+ }
+ Path walDirPath = new Path(walBackupDir);
+ walDirs =
+ BackupUtils.getValidWalDirs(conn.getConfiguration(), walDirPath, startTime, endTime);
+ }
+
+ if (walDirs.isEmpty()) {
+ LOG.warn("No valid WAL directories found for range {} - {}. Skipping bulk-file collection.",
+ startTime, endTime);
+ return Collections.emptyList();
+ }
+
+ LOG.info(
+ "Starting WAL bulk-file collection for source: {}, target: {}, time range: {} - {}, WAL "
+ + "backup dir: {}, restore root: {}",
+ sourceTable, targetTable, startTime, endTime, walDirs, restoreRootDir);
+ String walDirsCsv = String.join(",", walDirs);
+
+ return BulkFilesCollector.collectFromWalDirs(HBaseConfiguration.create(conn.getConfiguration()),
+ walDirsCsv, restoreRootDir, sourceTable, targetTable, startTime, endTime);
+ }
+
+ /**
+ * Fetches valid WAL directories based on the given time range.
+ */
+ public static List getValidWalDirs(Configuration conf, Path walBackupDir, long startTime,
+ long endTime) throws IOException {
+ FileSystem backupFs = FileSystem.get(walBackupDir.toUri(), conf);
+ FileStatus[] dayDirs = backupFs.listStatus(new Path(walBackupDir, WALS_DIR));
+
+ List validDirs = new ArrayList<>();
+ SimpleDateFormat dateFormat = new SimpleDateFormat(DATE_FORMAT);
+
+ for (FileStatus dayDir : dayDirs) {
+ if (!dayDir.isDirectory()) {
+ continue; // Skip files, only process directories
+ }
+
+ String dirName = dayDir.getPath().getName();
+ try {
+ Date dirDate = dateFormat.parse(dirName);
+ long dirStartTime = dirDate.getTime(); // Start of that day (00:00:00)
+ long dirEndTime = dirStartTime + ONE_DAY_IN_MILLISECONDS - 1; // End time of day (23:59:59)
+
+ // Check if this day's WAL files overlap with the required time range
+ if (dirEndTime >= startTime && dirStartTime <= endTime) {
+ validDirs.add(dayDir.getPath().toString());
+ }
+ } catch (ParseException e) {
+ LOG.warn("Skipping invalid directory name: {}", dirName, e);
+ }
+ }
+ return validDirs;
+ }
}
diff --git a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/util/BulkFilesCollector.java b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/util/BulkFilesCollector.java
new file mode 100644
index 000000000000..718a662abb7b
--- /dev/null
+++ b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/util/BulkFilesCollector.java
@@ -0,0 +1,226 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.backup.util;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.LinkedHashSet;
+import java.util.List;
+import java.util.Set;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.LocatedFileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RemoteIterator;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.backup.mapreduce.BulkLoadCollectorJob;
+import org.apache.hadoop.hbase.mapreduce.WALInputFormat;
+import org.apache.hadoop.hbase.mapreduce.WALPlayer;
+import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
+import org.apache.hadoop.util.Tool;
+import org.apache.yetus.audience.InterfaceAudience;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Utility to run BulkLoadCollectorJob over a comma-separated list of WAL directories and return a
+ * deduplicated list of discovered bulk-load file paths.
+ */
+@InterfaceAudience.Private
+public final class BulkFilesCollector {
+
+ private static final Logger LOG = LoggerFactory.getLogger(BulkFilesCollector.class);
+
+ private BulkFilesCollector() {
+ /* static only */ }
+
+ /**
+ * Convenience overload: collector will create and configure BulkLoadCollectorJob internally.
+ * @param conf cluster/configuration used to initialize job and access FS
+ * @param walDirsCsv comma-separated WAL directories
+ * @param restoreRootDir parent path under which temporary output dir will be created
+ * @param sourceTable source table name (for args/logging)
+ * @param targetTable target table name (for args/logging)
+ * @param startTime start time (ms) to set in the job config (WALInputFormat.START_TIME_KEY)
+ * @param endTime end time (ms) to set in the job config (WALInputFormat.END_TIME_KEY)
+ * @return deduplicated list of Paths discovered by the collector
+ * @throws IOException on IO or job failure
+ */
+ public static List collectFromWalDirs(Configuration conf, String walDirsCsv,
+ Path restoreRootDir, TableName sourceTable, TableName targetTable, long startTime, long endTime)
+ throws IOException {
+
+ // prepare job Tool
+ Configuration jobConf = new Configuration(conf);
+ if (startTime > 0) jobConf.setLong(WALInputFormat.START_TIME_KEY, startTime);
+ if (endTime > 0) jobConf.setLong(WALInputFormat.END_TIME_KEY, endTime);
+
+ // ignore empty WAL files by default to make collection robust
+ jobConf.setBoolean(WALPlayer.IGNORE_EMPTY_FILES, true);
+
+ BulkLoadCollectorJob bulkCollector = new BulkLoadCollectorJob();
+ bulkCollector.setConf(jobConf);
+
+ return collectFromWalDirs(conf, walDirsCsv, restoreRootDir, sourceTable, targetTable,
+ bulkCollector);
+ }
+
+ /**
+ * Primary implementation: runs the provided Tool (BulkLoadCollectorJob) with args "
+ * " and returns deduped list of Paths.
+ */
+ public static List collectFromWalDirs(Configuration conf, String walDirsCsv,
+ Path restoreRootDir, TableName sourceTable, TableName targetTable, Tool bulkCollector)
+ throws IOException {
+
+ if (walDirsCsv == null || walDirsCsv.trim().isEmpty()) {
+ throw new IOException(
+ "walDirsCsv must be a non-empty comma-separated list of WAL directories");
+ }
+
+ List walDirs =
+ Arrays.stream(walDirsCsv.split(",")).map(String::trim).filter(s -> !s.isEmpty()).toList();
+
+ if (walDirs.isEmpty()) {
+ throw new IOException("walDirsCsv did not contain any entries: '" + walDirsCsv + "'");
+ }
+
+ List existing = new ArrayList<>();
+ for (String d : walDirs) {
+ Path p = new Path(d);
+ try {
+ FileSystem fsForPath = p.getFileSystem(conf);
+ if (fsForPath.exists(p)) {
+ existing.add(d);
+ } else {
+ LOG.debug("WAL dir does not exist: {}", d);
+ }
+ } catch (IOException e) {
+ // If getting FS or checking existence fails, treat as missing but log the cause.
+ LOG.warn("Error checking WAL dir {}: {}", d, e.toString());
+ }
+ }
+
+ // If any of the provided walDirs are missing, fail with an informative message.
+ List missing = new ArrayList<>(walDirs);
+ missing.removeAll(existing);
+
+ if (!missing.isEmpty()) {
+ throw new IOException(
+ "Some of the provided WAL paths do not exist: " + String.join(", ", missing));
+ }
+
+ // Create unique temporary output dir under restoreRootDir, e.g.
+ // /_wal_collect_
+ final String unique = String.format("_wal_collect_%s%d", sourceTable.getQualifierAsString(),
+ EnvironmentEdgeManager.currentTime());
+ final Path bulkFilesOut = new Path(restoreRootDir, unique);
+
+ FileSystem fs = bulkFilesOut.getFileSystem(conf);
+
+ try {
+ // If bulkFilesOut exists for some reason, delete it.
+ if (fs.exists(bulkFilesOut)) {
+ LOG.info("Temporary bulkload file collect output directory {} already exists - deleting.",
+ bulkFilesOut);
+ fs.delete(bulkFilesOut, true);
+ }
+
+ final String[] args = new String[] { walDirsCsv, bulkFilesOut.toString(),
+ sourceTable.getNameAsString(), targetTable.getNameAsString() };
+
+ LOG.info("Running bulk collector Tool with args: {}", (Object) args);
+
+ int exitCode;
+ try {
+ exitCode = bulkCollector.run(args);
+ } catch (Exception e) {
+ LOG.error("Error during BulkLoadCollectorJob for {}: {}", sourceTable, e.getMessage(), e);
+ throw new IOException("Exception during BulkLoadCollectorJob collect", e);
+ }
+
+ if (exitCode != 0) {
+ throw new IOException("Bulk collector Tool returned non-zero exit code: " + exitCode);
+ }
+
+ LOG.info("BulkLoadCollectorJob collect completed successfully for {}", sourceTable);
+
+ // read and dedupe
+ List results = readBulkFilesListFromOutput(fs, bulkFilesOut);
+ LOG.info("BulkFilesCollector: discovered {} unique bulk-load files", results.size());
+ return results;
+ } finally {
+ // best-effort cleanup
+ try {
+ if (fs.exists(bulkFilesOut)) {
+ boolean deleted = fs.delete(bulkFilesOut, true);
+ if (!deleted) {
+ LOG.warn("Could not delete temporary bulkFilesOut directory {}", bulkFilesOut);
+ } else {
+ LOG.debug("Deleted temporary bulkFilesOut directory {}", bulkFilesOut);
+ }
+ }
+ } catch (IOException ioe) {
+ LOG.warn("Exception while deleting temporary bulkload file collect output dir {}: {}",
+ bulkFilesOut, ioe.getMessage(), ioe);
+ }
+ }
+ }
+
+ // reads all non-hidden files under bulkFilesOut, collects lines in insertion order, returns Paths
+ private static List readBulkFilesListFromOutput(FileSystem fs, Path bulkFilesOut)
+ throws IOException {
+ if (!fs.exists(bulkFilesOut)) {
+ LOG.warn("BulkFilesCollector: bulkFilesOut directory does not exist: {}", bulkFilesOut);
+ return new ArrayList<>();
+ }
+
+ RemoteIterator it = fs.listFiles(bulkFilesOut, true);
+ Set dedupe = new LinkedHashSet<>();
+
+ while (it.hasNext()) {
+ LocatedFileStatus status = it.next();
+ Path p = status.getPath();
+ String name = p.getName();
+ // skip hidden/system files like _SUCCESS or _logs
+ if (name.startsWith("_") || name.startsWith(".")) continue;
+
+ try (FSDataInputStream in = fs.open(p);
+ BufferedReader br = new BufferedReader(new InputStreamReader(in, StandardCharsets.UTF_8))) {
+ String line;
+ while ((line = br.readLine()) != null) {
+ line = line.trim();
+ if (line.isEmpty()) continue;
+ dedupe.add(line);
+ }
+ }
+ }
+
+ List result = new ArrayList<>(dedupe.size());
+ for (String s : dedupe)
+ result.add(new Path(s));
+
+ LOG.info("Collected {} unique bulk-load store files.", result.size());
+ return result;
+ }
+}
diff --git a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/util/BulkLoadProcessor.java b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/util/BulkLoadProcessor.java
new file mode 100644
index 000000000000..4ab8bfb104e4
--- /dev/null
+++ b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/util/BulkLoadProcessor.java
@@ -0,0 +1,114 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.backup.util;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.CellUtil;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.wal.WAL;
+import org.apache.hadoop.hbase.wal.WALEdit;
+import org.apache.hadoop.hbase.wal.WALKey;
+import org.apache.yetus.audience.InterfaceAudience;
+
+import org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos;
+
+/**
+ * Processes bulk load files from Write-Ahead Log (WAL) entries.
+ *
+ * Used by backup/restore and replication flows to discover HFiles referenced by bulk-load WALEdits.
+ * Returned {@link Path}s are constructed from the namespace/table/region/family/file components.
+ *
+ */
+@InterfaceAudience.Private
+public final class BulkLoadProcessor {
+ private BulkLoadProcessor() {
+ }
+
+ /**
+ * Extract bulk-load file {@link Path}s from a list of {@link WAL.Entry}.
+ * @param walEntries list of WAL entries.
+ * @return list of Paths in discovery order; empty list if none
+ * @throws IOException if descriptor parsing fails
+ */
+ public static List processBulkLoadFiles(List walEntries) throws IOException {
+ List bulkLoadFilePaths = new ArrayList<>();
+
+ for (WAL.Entry entry : walEntries) {
+ bulkLoadFilePaths.addAll(processBulkLoadFiles(entry.getKey(), entry.getEdit()));
+ }
+ return bulkLoadFilePaths;
+ }
+
+ /**
+ * Extract bulk-load file {@link Path}s from a single WAL entry.
+ * @param key WALKey containing table information; if null returns empty list
+ * @param edit WALEdit to scan; if null returns empty list
+ * @return list of Paths referenced by bulk-load descriptor(s) in this edit; may be empty or
+ * contain duplicates
+ * @throws IOException if descriptor parsing fails
+ */
+ public static List processBulkLoadFiles(WALKey key, WALEdit edit) throws IOException {
+ List bulkLoadFilePaths = new ArrayList<>();
+
+ for (Cell cell : edit.getCells()) {
+ if (CellUtil.matchingQualifier(cell, WALEdit.BULK_LOAD)) {
+ TableName tableName = key.getTableName();
+ String namespace = tableName.getNamespaceAsString();
+ String table = tableName.getQualifierAsString();
+ bulkLoadFilePaths.addAll(processBulkLoadDescriptor(cell, namespace, table));
+ }
+ }
+
+ return bulkLoadFilePaths;
+ }
+
+ private static List processBulkLoadDescriptor(Cell cell, String namespace, String table)
+ throws IOException {
+ List bulkLoadFilePaths = new ArrayList<>();
+ WALProtos.BulkLoadDescriptor bld = WALEdit.getBulkLoadDescriptor(cell);
+
+ if (bld == null || !bld.getReplicate() || bld.getEncodedRegionName() == null) {
+ return bulkLoadFilePaths; // Skip if not replicable
+ }
+
+ String regionName = bld.getEncodedRegionName().toStringUtf8();
+ for (WALProtos.StoreDescriptor storeDescriptor : bld.getStoresList()) {
+ bulkLoadFilePaths
+ .addAll(processStoreDescriptor(storeDescriptor, namespace, table, regionName));
+ }
+
+ return bulkLoadFilePaths;
+ }
+
+ private static List processStoreDescriptor(WALProtos.StoreDescriptor storeDescriptor,
+ String namespace, String table, String regionName) {
+ List paths = new ArrayList<>();
+ String columnFamily = storeDescriptor.getFamilyName().toStringUtf8();
+
+ for (String storeFile : storeDescriptor.getStoreFileList()) {
+ paths.add(new Path(namespace,
+ new Path(table, new Path(regionName, new Path(columnFamily, storeFile)))));
+ }
+
+ return paths;
+ }
+}
diff --git a/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/PITRTestUtil.java b/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/PITRTestUtil.java
new file mode 100644
index 000000000000..24f5237866db
--- /dev/null
+++ b/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/PITRTestUtil.java
@@ -0,0 +1,143 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.backup;
+
+import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.OPTION_ENABLE_CONTINUOUS_BACKUP;
+import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.OPTION_PITR_BACKUP_PATH;
+import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.OPTION_TABLE;
+import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.OPTION_TABLE_MAPPING;
+import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.OPTION_TO_DATETIME;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.stream.Collectors;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HBaseTestingUtil;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.tool.BulkLoadHFiles;
+import org.apache.hadoop.hbase.tool.BulkLoadHFilesTool;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.HFileTestUtil;
+import org.apache.yetus.audience.InterfaceAudience;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+@InterfaceAudience.Private
+public final class PITRTestUtil {
+ private static final Logger LOG = LoggerFactory.getLogger(PITRTestUtil.class);
+ private static final int DEFAULT_WAIT_FOR_REPLICATION_MS = 30_000;
+
+ private PITRTestUtil() {
+ // Utility class
+ }
+
+ public static String[] buildPITRArgs(TableName[] sourceTables, TableName[] targetTables,
+ long endTime, String backupRootDir) {
+ String sourceTableNames =
+ Arrays.stream(sourceTables).map(TableName::getNameAsString).collect(Collectors.joining(","));
+ String targetTableNames =
+ Arrays.stream(targetTables).map(TableName::getNameAsString).collect(Collectors.joining(","));
+
+ List args = new ArrayList<>();
+ args.add("-" + OPTION_TABLE);
+ args.add(sourceTableNames);
+ args.add("-" + OPTION_TABLE_MAPPING);
+ args.add(targetTableNames);
+ args.add("-" + OPTION_TO_DATETIME);
+ args.add(String.valueOf(endTime));
+
+ if (backupRootDir != null) {
+ args.add("-" + OPTION_PITR_BACKUP_PATH);
+ args.add(backupRootDir);
+ }
+
+ return args.toArray(new String[0]);
+ }
+
+ public static String[] buildBackupArgs(String backupType, TableName[] tables,
+ boolean continuousEnabled, String backupRootDir) {
+ String tableNames =
+ Arrays.stream(tables).map(TableName::getNameAsString).collect(Collectors.joining(","));
+
+ List args = new ArrayList<>(
+ Arrays.asList("create", backupType, backupRootDir, "-" + OPTION_TABLE, tableNames));
+
+ if (continuousEnabled) {
+ args.add("-" + OPTION_ENABLE_CONTINUOUS_BACKUP);
+ }
+
+ return args.toArray(new String[0]);
+ }
+
+ public static void loadRandomData(HBaseTestingUtil testUtil, TableName tableName, byte[] family,
+ int totalRows) throws IOException {
+ try (Table table = testUtil.getConnection().getTable(tableName)) {
+ testUtil.loadRandomRows(table, family, 32, totalRows);
+ }
+ }
+
+ public static void waitForReplication() {
+ try {
+ LOG.info("Waiting for replication to complete for {} ms", DEFAULT_WAIT_FOR_REPLICATION_MS);
+ Thread.sleep(DEFAULT_WAIT_FOR_REPLICATION_MS);
+ } catch (InterruptedException e) {
+ Thread.currentThread().interrupt();
+ throw new RuntimeException("Interrupted while waiting for replication", e);
+ }
+ }
+
+ public static int getRowCount(HBaseTestingUtil testUtil, TableName tableName) throws IOException {
+ try (Table table = testUtil.getConnection().getTable(tableName)) {
+ return HBaseTestingUtil.countRows(table);
+ }
+ }
+
+ public static void generateHFiles(Path outputDir, Configuration conf, String cfName)
+ throws IOException {
+ String hFileName = "MyHFile";
+ int numRows = 1000;
+
+ FileSystem fs = FileSystem.get(conf);
+ outputDir = outputDir.makeQualified(fs.getUri(), fs.getWorkingDirectory());
+
+ byte[] from = Bytes.toBytes(cfName + "begin");
+ byte[] to = Bytes.toBytes(cfName + "end");
+
+ Path familyDir = new Path(outputDir, cfName);
+ HFileTestUtil.createHFile(conf, fs, new Path(familyDir, hFileName), Bytes.toBytes(cfName),
+ Bytes.toBytes("qualifier"), from, to, numRows);
+ }
+
+ public static void bulkLoadHFiles(TableName tableName, Path inputDir, Connection conn,
+ Configuration conf) throws IOException {
+ conf.setBoolean(BulkLoadHFilesTool.BULK_LOAD_HFILES_BY_FAMILY, true);
+
+ try (Table table = conn.getTable(tableName)) {
+ BulkLoadHFiles loader = new BulkLoadHFilesTool(conf);
+ loader.bulkLoad(table.getName(), inputDir);
+ } finally {
+ conf.setBoolean(BulkLoadHFilesTool.BULK_LOAD_HFILES_BY_FAMILY, false);
+ }
+ }
+}
diff --git a/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestBackupBase.java b/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestBackupBase.java
index bfc61010257e..aeabc8698cd8 100644
--- a/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestBackupBase.java
+++ b/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestBackupBase.java
@@ -17,9 +17,18 @@
*/
package org.apache.hadoop.hbase.backup;
+import static org.apache.hadoop.hbase.HConstants.REPLICATION_BULKLOAD_ENABLE_KEY;
+import static org.apache.hadoop.hbase.HConstants.REPLICATION_CLUSTER_ID;
+import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.CONTINUOUS_BACKUP_REPLICATION_PEER;
+import static org.apache.hadoop.hbase.backup.replication.ContinuousBackupReplicationEndpoint.CONF_BACKUP_MAX_WAL_SIZE;
+import static org.apache.hadoop.hbase.backup.replication.ContinuousBackupReplicationEndpoint.CONF_STAGED_WAL_FLUSH_INITIAL_DELAY;
+import static org.apache.hadoop.hbase.backup.replication.ContinuousBackupReplicationEndpoint.CONF_STAGED_WAL_FLUSH_INTERVAL;
+import static org.apache.hadoop.hbase.mapreduce.WALPlayer.IGNORE_EMPTY_FILES;
+
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
+import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
@@ -40,6 +49,7 @@
import org.apache.hadoop.hbase.backup.BackupInfo.BackupState;
import org.apache.hadoop.hbase.backup.impl.BackupAdminImpl;
import org.apache.hadoop.hbase.backup.impl.BackupManager;
+import org.apache.hadoop.hbase.backup.impl.BackupManifest;
import org.apache.hadoop.hbase.backup.impl.BackupSystemTable;
import org.apache.hadoop.hbase.backup.impl.FullTableBackupClient;
import org.apache.hadoop.hbase.backup.impl.IncrementalBackupManager;
@@ -164,7 +174,7 @@ public void execute() throws IOException {
// copy out the table and region info files for each table
BackupUtils.copyTableRegionInfo(conn, backupInfo, conf);
// convert WAL to HFiles and copy them to .tmp under BACKUP_ROOT
- convertWALsToHFiles();
+ convertWALsToHFiles(new HashMap<>(), new HashMap<>());
incrementalCopyHFiles(new String[] { getBulkOutputDir().toString() },
backupInfo.getBackupRootDir());
failStageIf(Stage.stage_2);
@@ -189,7 +199,7 @@ public void execute() throws IOException {
BackupUtils.getMinValue(BackupUtils.getRSLogTimestampMins(newTableSetTimestampMap));
backupManager.writeBackupStartCode(newStartCode);
- handleBulkLoad(backupInfo.getTableNames());
+ handleBulkLoad(backupInfo.getTableNames(), new HashMap<>(), new HashMap<>());
failStageIf(Stage.stage_4);
// backup complete
@@ -291,6 +301,13 @@ public static void setUpHelper() throws Exception {
BACKUP_ROOT_DIR = Path.SEPARATOR + "backupUT";
BACKUP_REMOTE_ROOT_DIR = Path.SEPARATOR + "backupUT";
+ conf1.set(CONF_BACKUP_MAX_WAL_SIZE, "10240");
+ conf1.set(CONF_STAGED_WAL_FLUSH_INITIAL_DELAY, "10");
+ conf1.set(CONF_STAGED_WAL_FLUSH_INTERVAL, "10");
+ conf1.setBoolean(REPLICATION_BULKLOAD_ENABLE_KEY, true);
+ conf1.set(REPLICATION_CLUSTER_ID, "clusterId1");
+ conf1.setBoolean(IGNORE_EMPTY_FILES, true);
+
if (secure) {
// set the always on security provider
UserProvider.setUserProviderForTesting(TEST_UTIL.getConfiguration(),
@@ -401,15 +418,30 @@ protected BackupRequest createBackupRequest(BackupType type, List tab
return request;
}
+ protected BackupRequest createBackupRequest(BackupType type, List tables,
+ String rootDir, boolean noChecksumVerify, boolean isContinuousBackupEnabled) {
+ BackupRequest.Builder builder = new BackupRequest.Builder();
+ return builder.withBackupType(type).withTableList(tables).withTargetRootDir(rootDir)
+ .withNoChecksumVerify(noChecksumVerify).withContinuousBackupEnabled(isContinuousBackupEnabled)
+ .build();
+ }
+
protected String backupTables(BackupType type, List tables, String path)
throws IOException {
+ return backupTables(type, tables, path, false);
+ }
+
+ protected String backupTables(BackupType type, List tables, String path,
+ boolean isContinuousBackup) throws IOException {
Connection conn = null;
BackupAdmin badmin = null;
String backupId;
try {
conn = ConnectionFactory.createConnection(conf1);
badmin = new BackupAdminImpl(conn);
- BackupRequest request = createBackupRequest(type, new ArrayList<>(tables), path);
+
+ BackupRequest request =
+ createBackupRequest(type, new ArrayList<>(tables), path, false, isContinuousBackup);
backupId = badmin.backupTables(request);
} finally {
if (badmin != null) {
@@ -541,4 +573,20 @@ protected void dumpBackupDir() throws IOException {
LOG.debug(Objects.toString(it.next().getPath()));
}
}
+
+ BackupManifest getLatestBackupManifest(List backups) throws IOException {
+ BackupInfo newestBackup = backups.get(0);
+ return HBackupFileSystem.getManifest(conf1, new Path(BACKUP_ROOT_DIR),
+ newestBackup.getBackupId());
+ }
+
+ void deleteContinuousBackupReplicationPeerIfExists(Admin admin) throws IOException {
+ if (
+ admin.listReplicationPeers().stream()
+ .anyMatch(peer -> peer.getPeerId().equals(CONTINUOUS_BACKUP_REPLICATION_PEER))
+ ) {
+ admin.disableReplicationPeer(CONTINUOUS_BACKUP_REPLICATION_PEER);
+ admin.removeReplicationPeer(CONTINUOUS_BACKUP_REPLICATION_PEER);
+ }
+ }
}
diff --git a/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestBackupDelete.java b/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestBackupDelete.java
index 785859c52805..31eaaff50512 100644
--- a/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestBackupDelete.java
+++ b/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestBackupDelete.java
@@ -19,6 +19,7 @@
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
import java.io.ByteArrayOutputStream;
import java.io.PrintStream;
@@ -32,7 +33,6 @@
import org.apache.hadoop.hbase.util.EnvironmentEdge;
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
import org.apache.hadoop.util.ToolRunner;
-import org.junit.Assert;
import org.junit.ClassRule;
import org.junit.Test;
import org.junit.experimental.categories.Category;
@@ -138,7 +138,7 @@ public long currentTime() {
assertTrue(ret == 0);
} catch (Exception e) {
LOG.error("failed", e);
- Assert.fail(e.getMessage());
+ fail(e.getMessage());
}
String output = baos.toString();
LOG.info(baos.toString());
@@ -154,7 +154,7 @@ public long currentTime() {
assertTrue(ret == 0);
} catch (Exception e) {
LOG.error("failed", e);
- Assert.fail(e.getMessage());
+ fail(e.getMessage());
}
output = baos.toString();
LOG.info(baos.toString());
diff --git a/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestBackupDeleteWithCleanup.java b/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestBackupDeleteWithCleanup.java
new file mode 100644
index 000000000000..c6c6f5e9799e
--- /dev/null
+++ b/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestBackupDeleteWithCleanup.java
@@ -0,0 +1,314 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.backup;
+
+import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.CONF_CONTINUOUS_BACKUP_WAL_DIR;
+import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.CONTINUOUS_BACKUP_REPLICATION_PEER;
+import static org.apache.hadoop.hbase.backup.replication.ContinuousBackupReplicationEndpoint.ONE_DAY_IN_MILLISECONDS;
+import static org.apache.hadoop.hbase.backup.util.BackupFileSystemManager.BULKLOAD_FILES_DIR;
+import static org.apache.hadoop.hbase.backup.util.BackupFileSystemManager.WALS_DIR;
+import static org.apache.hadoop.hbase.backup.util.BackupUtils.DATE_FORMAT;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+import java.io.IOException;
+import java.text.SimpleDateFormat;
+import java.util.ArrayList;
+import java.util.Date;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HBaseClassTestRule;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.backup.impl.BackupAdminImpl;
+import org.apache.hadoop.hbase.backup.impl.BackupSystemTable;
+import org.apache.hadoop.hbase.backup.util.BackupFileSystemManager;
+import org.apache.hadoop.hbase.testclassification.LargeTests;
+import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
+import org.apache.hadoop.util.ToolRunner;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.ClassRule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+import org.apache.hbase.thirdparty.com.google.common.collect.Lists;
+
+@Category(LargeTests.class)
+public class TestBackupDeleteWithCleanup extends TestBackupBase {
+
+ @ClassRule
+ public static final HBaseClassTestRule CLASS_RULE =
+ HBaseClassTestRule.forClass(TestBackupDeleteWithCleanup.class);
+
+ String backupWalDirName = "TestBackupDeleteWithCleanup";
+
+ private FileSystem fs;
+ private Path backupWalDir;
+ private BackupSystemTable backupSystemTable;
+
+ @Before
+ public void setUpTest() throws Exception {
+ Path root = TEST_UTIL.getDataTestDirOnTestFS();
+ backupWalDir = new Path(root, backupWalDirName);
+ conf1.set(CONF_CONTINUOUS_BACKUP_WAL_DIR, backupWalDir.toString());
+ fs = FileSystem.get(conf1);
+ fs.mkdirs(backupWalDir);
+ backupSystemTable = new BackupSystemTable(TEST_UTIL.getConnection());
+ }
+
+ @After
+ public void tearDownTest() throws Exception {
+ if (backupSystemTable != null) {
+ backupSystemTable.close();
+ }
+ if (fs != null && backupWalDir != null) {
+ fs.delete(backupWalDir, true);
+ }
+
+ EnvironmentEdgeManager.reset();
+ }
+
+ @Test
+ public void testBackupDeleteWithCleanupLogic() throws Exception {
+ // Step 1: Setup Backup Folders
+ long currentTime = EnvironmentEdgeManager.getDelegate().currentTime();
+ setupBackupFolders(currentTime);
+
+ // Log the directory structure before cleanup
+ logDirectoryStructure(fs, backupWalDir, "Directory structure BEFORE cleanup:");
+
+ // Step 2: Simulate Backup Creation
+ backupSystemTable.addContinuousBackupTableSet(Set.of(table1),
+ currentTime - (2 * ONE_DAY_IN_MILLISECONDS));
+
+ EnvironmentEdgeManager
+ .injectEdge(() -> System.currentTimeMillis() - (2 * ONE_DAY_IN_MILLISECONDS));
+
+ String backupId = fullTableBackup(Lists.newArrayList(table1));
+ assertTrue(checkSucceeded(backupId));
+ String anotherBackupId = fullTableBackup(Lists.newArrayList(table1));
+ assertTrue(checkSucceeded(anotherBackupId));
+
+ // Step 3: Run Delete Command
+ deleteBackup(backupId);
+
+ // Log the directory structure after cleanup
+ logDirectoryStructure(fs, backupWalDir, "Directory structure AFTER cleanup:");
+
+ // Step 4: Verify Cleanup
+ verifyBackupCleanup(fs, backupWalDir, currentTime);
+
+ // Step 5: Verify System Table Update
+ verifySystemTableUpdate(backupSystemTable, currentTime);
+
+ // Cleanup
+ deleteBackup(anotherBackupId);
+ }
+
+ @Test
+ public void testSingleBackupForceDelete() throws Exception {
+ // Step 1: Setup Backup Folders
+ long currentTime = EnvironmentEdgeManager.getDelegate().currentTime();
+ setupBackupFolders(currentTime);
+
+ // Log the directory structure before cleanup
+ logDirectoryStructure(fs, backupWalDir, "Directory structure BEFORE cleanup:");
+
+ // Step 2: Simulate Backup Creation
+ backupSystemTable.addContinuousBackupTableSet(Set.of(table1),
+ currentTime - (2 * ONE_DAY_IN_MILLISECONDS));
+
+ EnvironmentEdgeManager
+ .injectEdge(() -> System.currentTimeMillis() - (2 * ONE_DAY_IN_MILLISECONDS));
+
+ String backupId = fullTableBackupWithContinuous(Lists.newArrayList(table1));
+ assertTrue(checkSucceeded(backupId));
+
+ assertTrue("Backup replication peer should be enabled after the backup",
+ continuousBackupReplicationPeerExistsAndEnabled());
+
+ // Step 3: Run Delete Command
+ deleteBackup(backupId);
+
+ // Log the directory structure after cleanup
+ logDirectoryStructure(fs, backupWalDir, "Directory structure AFTER cleanup:");
+
+ // Step 4: Verify CONTINUOUS_BACKUP_REPLICATION_PEER is disabled
+ assertFalse("Backup replication peer should be disabled or removed",
+ continuousBackupReplicationPeerExistsAndEnabled());
+
+ // Step 5: Verify that system table is updated to remove all the tables
+ Set remainingTables = backupSystemTable.getContinuousBackupTableSet().keySet();
+ assertTrue("System table should have no tables after all full backups are clear",
+ remainingTables.isEmpty());
+
+ // Step 6: Verify that the backup WAL directory is empty
+ assertTrue("WAL backup directory should be empty after force delete",
+ areWalAndBulkloadDirsEmpty(conf1, backupWalDir.toString()));
+
+ // Step 7: Take new full backup with continuous backup enabled
+ String backupIdContinuous = fullTableBackupWithContinuous(Lists.newArrayList(table1));
+
+ // Step 8: Verify CONTINUOUS_BACKUP_REPLICATION_PEER is enabled again
+ assertTrue("Backup replication peer should be re-enabled after new backup",
+ continuousBackupReplicationPeerExistsAndEnabled());
+
+ // And system table has new entry
+ Set newTables = backupSystemTable.getContinuousBackupTableSet().keySet();
+ assertTrue("System table should contain the table after new backup",
+ newTables.contains(table1));
+
+ // Cleanup
+ deleteBackup(backupIdContinuous);
+ }
+
+ private void setupBackupFolders(long currentTime) throws IOException {
+ setupBackupFolders(fs, backupWalDir, currentTime);
+ }
+
+ public static void setupBackupFolders(FileSystem fs, Path backupWalDir, long currentTime)
+ throws IOException {
+ Path walsDir = new Path(backupWalDir, WALS_DIR);
+ Path bulkLoadDir = new Path(backupWalDir, BULKLOAD_FILES_DIR);
+
+ fs.mkdirs(walsDir);
+ fs.mkdirs(bulkLoadDir);
+
+ SimpleDateFormat dateFormat = new SimpleDateFormat(DATE_FORMAT);
+
+ for (int i = 0; i < 5; i++) {
+ String dateStr = dateFormat.format(new Date(currentTime - (i * ONE_DAY_IN_MILLISECONDS)));
+ fs.mkdirs(new Path(walsDir, dateStr));
+ fs.mkdirs(new Path(bulkLoadDir, dateStr));
+ }
+ }
+
+ private static void verifyBackupCleanup(FileSystem fs, Path backupWalDir, long currentTime)
+ throws IOException {
+ Path walsDir = new Path(backupWalDir, WALS_DIR);
+ Path bulkLoadDir = new Path(backupWalDir, BULKLOAD_FILES_DIR);
+ SimpleDateFormat dateFormat = new SimpleDateFormat(DATE_FORMAT);
+
+ // Expect folders older than 3 days to be deleted
+ for (int i = 3; i < 5; i++) {
+ String oldDateStr = dateFormat.format(new Date(currentTime - (i * ONE_DAY_IN_MILLISECONDS)));
+ Path walPath = new Path(walsDir, oldDateStr);
+ Path bulkLoadPath = new Path(bulkLoadDir, oldDateStr);
+ assertFalse("Old WAL directory (" + walPath + ") should be deleted, but it exists!",
+ fs.exists(walPath));
+ assertFalse("Old BulkLoad directory (" + bulkLoadPath + ") should be deleted, but it exists!",
+ fs.exists(bulkLoadPath));
+ }
+
+ // Expect folders within the last 3 days to exist
+ for (int i = 0; i < 3; i++) {
+ String recentDateStr =
+ dateFormat.format(new Date(currentTime - (i * ONE_DAY_IN_MILLISECONDS)));
+ Path walPath = new Path(walsDir, recentDateStr);
+ Path bulkLoadPath = new Path(bulkLoadDir, recentDateStr);
+
+ assertTrue("Recent WAL directory (" + walPath + ") should exist, but it is missing!",
+ fs.exists(walPath));
+ assertTrue(
+ "Recent BulkLoad directory (" + bulkLoadPath + ") should exist, but it is missing!",
+ fs.exists(bulkLoadPath));
+ }
+ }
+
+ private void verifySystemTableUpdate(BackupSystemTable backupSystemTable, long currentTime)
+ throws IOException {
+ Map updatedTables = backupSystemTable.getContinuousBackupTableSet();
+
+ for (Map.Entry | |