Skip to content

Commit 5d815b8

Browse files
vinayakphegdeanmolnar
authored andcommitted
HBASE-29524 Handle bulk-loaded HFiles in delete and cleanup process (#7239)
Signed-off-by: Tak Lon (Stephen) Wu <taklwu@apache.org> Reviewed by: Kota-SH <shanmukhaharipriya@gmail.com>
1 parent 176e8c6 commit 5d815b8

File tree

3 files changed

+37
-5
lines changed

3 files changed

+37
-5
lines changed

hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupCommands.java

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1004,6 +1004,7 @@ private void deleteAllBackupWALFiles(Configuration conf, String backupWalDir)
10041004
new BackupFileSystemManager(CONTINUOUS_BACKUP_REPLICATION_PEER, conf, backupWalDir);
10051005
FileSystem fs = manager.getBackupFs();
10061006
Path walDir = manager.getWalsDir();
1007+
Path bulkloadDir = manager.getBulkLoadFilesDir();
10071008

10081009
// Delete contents under WAL directory
10091010
if (fs.exists(walDir)) {
@@ -1014,6 +1015,15 @@ private void deleteAllBackupWALFiles(Configuration conf, String backupWalDir)
10141015
System.out.println("Deleted all contents under WAL directory: " + walDir);
10151016
}
10161017

1018+
// Delete contents under bulk load directory
1019+
if (fs.exists(bulkloadDir)) {
1020+
FileStatus[] bulkContents = fs.listStatus(bulkloadDir);
1021+
for (FileStatus item : bulkContents) {
1022+
fs.delete(item.getPath(), true); // recursive delete of each child
1023+
}
1024+
System.out.println("Deleted all contents under Bulk Load directory: " + bulkloadDir);
1025+
}
1026+
10171027
} catch (IOException e) {
10181028
System.out.println("WARNING: Failed to delete contents under backup directories: "
10191029
+ backupWalDir + ". Error: " + e.getMessage());
@@ -1022,7 +1032,7 @@ private void deleteAllBackupWALFiles(Configuration conf, String backupWalDir)
10221032
}
10231033

10241034
/**
1025-
* Cleans up old WAL files based on the determined cutoff timestamp.
1035+
* Cleans up old WAL and bulk-loaded files based on the determined cutoff timestamp.
10261036
*/
10271037
void deleteOldWALFiles(Configuration conf, String backupWalDir, long cutoffTime)
10281038
throws IOException {
@@ -1033,6 +1043,7 @@ void deleteOldWALFiles(Configuration conf, String backupWalDir, long cutoffTime)
10331043
new BackupFileSystemManager(CONTINUOUS_BACKUP_REPLICATION_PEER, conf, backupWalDir);
10341044
FileSystem fs = manager.getBackupFs();
10351045
Path walDir = manager.getWalsDir();
1046+
Path bulkloadDir = manager.getBulkLoadFilesDir();
10361047

10371048
SimpleDateFormat dateFormat = new SimpleDateFormat(DATE_FORMAT);
10381049
dateFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
@@ -1058,6 +1069,9 @@ void deleteOldWALFiles(Configuration conf, String backupWalDir, long cutoffTime)
10581069
if (dayStart + ONE_DAY_IN_MILLISECONDS - 1 < cutoffTime) {
10591070
System.out.println("Deleting outdated WAL directory: " + dirPath);
10601071
fs.delete(dirPath, true);
1072+
Path bulkloadPath = new Path(bulkloadDir, dirName);
1073+
System.out.println("Deleting corresponding bulk-load directory: " + bulkloadPath);
1074+
fs.delete(bulkloadPath, true);
10611075
}
10621076
} catch (ParseException e) {
10631077
System.out.println("WARNING: Failed to parse directory name '" + dirName

hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestBackupDeleteWithCleanup.java

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.CONF_CONTINUOUS_BACKUP_WAL_DIR;
2121
import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.CONTINUOUS_BACKUP_REPLICATION_PEER;
22+
import static org.apache.hadoop.hbase.backup.replication.BackupFileSystemManager.BULKLOAD_FILES_DIR;
2223
import static org.apache.hadoop.hbase.backup.replication.BackupFileSystemManager.WALS_DIR;
2324
import static org.apache.hadoop.hbase.backup.replication.ContinuousBackupReplicationEndpoint.DATE_FORMAT;
2425
import static org.apache.hadoop.hbase.backup.replication.ContinuousBackupReplicationEndpoint.ONE_DAY_IN_MILLISECONDS;
@@ -164,7 +165,7 @@ public void testSingleBackupForceDelete() throws Exception {
164165

165166
// Step 6: Verify that the backup WAL directory is empty
166167
assertTrue("WAL backup directory should be empty after force delete",
167-
isWalDirsEmpty(conf1, backupWalDir.toString()));
168+
areWalAndBulkloadDirsEmpty(conf1, backupWalDir.toString()));
168169

169170
// Step 7: Take new full backup with continuous backup enabled
170171
String backupIdContinuous = fullTableBackupWithContinuous(Lists.newArrayList(table1));
@@ -189,38 +190,49 @@ private void setupBackupFolders(long currentTime) throws IOException {
189190
public static void setupBackupFolders(FileSystem fs, Path backupWalDir, long currentTime)
190191
throws IOException {
191192
Path walsDir = new Path(backupWalDir, WALS_DIR);
193+
Path bulkLoadDir = new Path(backupWalDir, BULKLOAD_FILES_DIR);
192194

193195
fs.mkdirs(walsDir);
196+
fs.mkdirs(bulkLoadDir);
194197

195198
SimpleDateFormat dateFormat = new SimpleDateFormat(DATE_FORMAT);
196199

197200
for (int i = 0; i < 5; i++) {
198201
String dateStr = dateFormat.format(new Date(currentTime - (i * ONE_DAY_IN_MILLISECONDS)));
199202
fs.mkdirs(new Path(walsDir, dateStr));
203+
fs.mkdirs(new Path(bulkLoadDir, dateStr));
200204
}
201205
}
202206

203207
private static void verifyBackupCleanup(FileSystem fs, Path backupWalDir, long currentTime)
204208
throws IOException {
205209
Path walsDir = new Path(backupWalDir, WALS_DIR);
210+
Path bulkLoadDir = new Path(backupWalDir, BULKLOAD_FILES_DIR);
206211
SimpleDateFormat dateFormat = new SimpleDateFormat(DATE_FORMAT);
207212

208213
// Expect folders older than 3 days to be deleted
209214
for (int i = 3; i < 5; i++) {
210215
String oldDateStr = dateFormat.format(new Date(currentTime - (i * ONE_DAY_IN_MILLISECONDS)));
211216
Path walPath = new Path(walsDir, oldDateStr);
217+
Path bulkLoadPath = new Path(bulkLoadDir, oldDateStr);
212218
assertFalse("Old WAL directory (" + walPath + ") should be deleted, but it exists!",
213219
fs.exists(walPath));
220+
assertFalse("Old BulkLoad directory (" + bulkLoadPath + ") should be deleted, but it exists!",
221+
fs.exists(bulkLoadPath));
214222
}
215223

216224
// Expect folders within the last 3 days to exist
217225
for (int i = 0; i < 3; i++) {
218226
String recentDateStr =
219227
dateFormat.format(new Date(currentTime - (i * ONE_DAY_IN_MILLISECONDS)));
220228
Path walPath = new Path(walsDir, recentDateStr);
229+
Path bulkLoadPath = new Path(bulkLoadDir, recentDateStr);
221230

222231
assertTrue("Recent WAL directory (" + walPath + ") should exist, but it is missing!",
223232
fs.exists(walPath));
233+
assertTrue(
234+
"Recent BulkLoad directory (" + bulkLoadPath + ") should exist, but it is missing!",
235+
fs.exists(bulkLoadPath));
224236
}
225237
}
226238

@@ -264,15 +276,16 @@ private boolean continuousBackupReplicationPeerExistsAndEnabled() throws IOExcep
264276
peer -> peer.getPeerId().equals(CONTINUOUS_BACKUP_REPLICATION_PEER) && peer.isEnabled());
265277
}
266278

267-
private static boolean isWalDirsEmpty(Configuration conf, String backupWalDir)
279+
private static boolean areWalAndBulkloadDirsEmpty(Configuration conf, String backupWalDir)
268280
throws IOException {
269281
BackupFileSystemManager manager =
270282
new BackupFileSystemManager(CONTINUOUS_BACKUP_REPLICATION_PEER, conf, backupWalDir);
271283

272284
FileSystem fs = manager.getBackupFs();
273285
Path walDir = manager.getWalsDir();
286+
Path bulkloadDir = manager.getBulkLoadFilesDir();
274287

275-
return isDirectoryEmpty(fs, walDir);
288+
return isDirectoryEmpty(fs, walDir) && isDirectoryEmpty(fs, bulkloadDir);
276289
}
277290

278291
private static boolean isDirectoryEmpty(FileSystem fs, Path dirPath) throws IOException {

hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/impl/TestBackupCommands.java

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.CONF_CONTINUOUS_BACKUP_WAL_DIR;
2121
import static org.apache.hadoop.hbase.backup.TestBackupDeleteWithCleanup.logDirectoryStructure;
2222
import static org.apache.hadoop.hbase.backup.TestBackupDeleteWithCleanup.setupBackupFolders;
23+
import static org.apache.hadoop.hbase.backup.replication.BackupFileSystemManager.BULKLOAD_FILES_DIR;
2324
import static org.apache.hadoop.hbase.backup.replication.BackupFileSystemManager.WALS_DIR;
2425
import static org.apache.hadoop.hbase.backup.replication.ContinuousBackupReplicationEndpoint.DATE_FORMAT;
2526
import static org.apache.hadoop.hbase.backup.replication.ContinuousBackupReplicationEndpoint.ONE_DAY_IN_MILLISECONDS;
@@ -134,7 +135,7 @@ public void testDeleteOldWALFilesOfCleanupCommand() throws IOException {
134135
fs.mkdirs(backupWalDir);
135136

136137
long currentTime = EnvironmentEdgeManager.getDelegate().currentTime();
137-
setupBackupFolders(fs, backupWalDir, currentTime); // Create 5 days of WALs folders
138+
setupBackupFolders(fs, backupWalDir, currentTime); // Create 5 days of WAL/bulkload-files folder
138139

139140
logDirectoryStructure(fs, backupWalDir, "Before cleanup:");
140141

@@ -154,18 +155,22 @@ public void testDeleteOldWALFilesOfCleanupCommand() throws IOException {
154155
private static void verifyCleanupOutcome(FileSystem fs, Path backupWalDir, long currentTime,
155156
long cutoffTime) throws IOException {
156157
Path walsDir = new Path(backupWalDir, WALS_DIR);
158+
Path bulkLoadDir = new Path(backupWalDir, BULKLOAD_FILES_DIR);
157159
SimpleDateFormat dateFormat = new SimpleDateFormat(DATE_FORMAT);
158160
dateFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
159161

160162
for (int i = 0; i < 5; i++) {
161163
long dayTime = currentTime - (i * ONE_DAY_IN_MILLISECONDS);
162164
String dayDir = dateFormat.format(new Date(dayTime));
163165
Path walPath = new Path(walsDir, dayDir);
166+
Path bulkPath = new Path(bulkLoadDir, dayDir);
164167

165168
if (dayTime + ONE_DAY_IN_MILLISECONDS - 1 < cutoffTime) {
166169
assertFalse("Old WAL dir should be deleted: " + walPath, fs.exists(walPath));
170+
assertFalse("Old BulkLoad dir should be deleted: " + bulkPath, fs.exists(bulkPath));
167171
} else {
168172
assertTrue("Recent WAL dir should exist: " + walPath, fs.exists(walPath));
173+
assertTrue("Recent BulkLoad dir should exist: " + bulkPath, fs.exists(bulkPath));
169174
}
170175
}
171176
}

0 commit comments

Comments
 (0)