Skip to content

Commit

Permalink
HBASE-27305 add an option to skip file splitting when bulkload hfiles (
Browse files Browse the repository at this point in the history
…#4709)

Co-authored-by: huiruan <huiruan@tencent.com>
Signed-off-by: Duo Zhang <zhangduo@apache.org>
(cherry picked from commit 00a719e)

Conflicts:
	hbase-server/src/main/java/org/apache/hadoop/hbase/tool/BulkLoadHFilesTool.java
  • Loading branch information
frostruan authored and Apache9 committed Aug 22, 2022
1 parent 1bd0b58 commit f66d67b
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,9 @@ public class LoadIncrementalHFiles extends Configured implements Tool {
BulkLoadHFiles.IGNORE_UNMATCHED_CF_CONF_KEY;
public final static String ALWAYS_COPY_FILES = BulkLoadHFiles.ALWAYS_COPY_FILES;

public static final String FAIL_IF_NEED_SPLIT_HFILE =
"hbase.loadincremental.fail.if.need.split.hfile";

// We use a '.' prefix which is ignored when walking directory trees
// above. It is invalid family name.
static final String TMP_DIR = ".tmp";
Expand All @@ -162,6 +165,8 @@ public class LoadIncrementalHFiles extends Configured implements Tool {

private boolean replicate = true;

private boolean failIfNeedSplitHFile = false;

/**
* Represents an HFile waiting to be loaded. An queue is used in this class in order to support
* the case where a region has split during the process of the load. When this happens, the HFile
Expand Down Expand Up @@ -195,6 +200,7 @@ public void initialize() {
assignSeqIds = conf.getBoolean(ASSIGN_SEQ_IDS, true);
maxFilesPerRegionPerFamily = conf.getInt(MAX_FILES_PER_REGION_PER_FAMILY, 32);
bulkLoadByFamily = conf.getBoolean(BulkLoadHFiles.BULK_LOAD_HFILES_BY_FAMILY, false);
failIfNeedSplitHFile = conf.getBoolean(FAIL_IF_NEED_SPLIT_HFILE, false);
nrThreads =
conf.getInt("hbase.loadincremental.threads.max", Runtime.getRuntime().availableProcessors());
numRetries = new AtomicInteger(0);
Expand Down Expand Up @@ -803,6 +809,11 @@ CacheConfig.DISABLED, true, getConf())) {
Bytes.compareTo(last.get(), startEndKeys.getSecond()[firstKeyRegionIdx]) < 0
|| Bytes.equals(startEndKeys.getSecond()[firstKeyRegionIdx], HConstants.EMPTY_BYTE_ARRAY);
if (!lastKeyInRange) {
if (failIfNeedSplitHFile) {
throw new IOException(
"The key range of hfile=" + hfilePath + " fits into no region. " + "And because "
+ FAIL_IF_NEED_SPLIT_HFILE + " was set to true, we just skip the next steps.");
}
int lastKeyRegionIdx = getRegionIndex(startEndKeys, last.get());
int splitIdx = (firstKeyRegionIdx + lastKeyRegionIdx) >>> 1;
// make sure the splitPoint is valid in case region overlap occur, maybe the splitPoint bigger
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

import static org.junit.Assert.assertArrayEquals;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertThrows;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;

Expand Down Expand Up @@ -793,4 +794,29 @@ protected List<LoadQueueItem> tryAtomicRegionLoad(Connection connection, TableNa
util.getConfiguration().setBoolean(BulkLoadHFiles.BULK_LOAD_HFILES_BY_FAMILY, false);
}
}

@Test
public void testFailIfNeedSplitHFile() throws IOException {
TableName tableName = TableName.valueOf(tn.getMethodName());
Table table = util.createTable(tableName, FAMILY);

util.loadTable(table, FAMILY);

FileSystem fs = util.getTestFileSystem();
Path sfPath = new Path(fs.getWorkingDirectory(), new Path(Bytes.toString(FAMILY), "file"));
HFileTestUtil.createHFile(util.getConfiguration(), fs, sfPath, FAMILY, QUALIFIER,
Bytes.toBytes("aaa"), Bytes.toBytes("zzz"), 1000);

util.getAdmin().split(tableName);
util.waitFor(10000, 1000, () -> util.getAdmin().getRegions(tableName).size() > 1);

Configuration config = new Configuration(util.getConfiguration());
config.setBoolean(BulkLoadHFilesTool.FAIL_IF_NEED_SPLIT_HFILE, true);
BulkLoadHFilesTool tool = new BulkLoadHFilesTool(config);

String[] args = new String[] { fs.getWorkingDirectory().toString(), tableName.toString() };
assertThrows(IOException.class, () -> tool.run(args));
util.getHBaseCluster().getRegions(tableName)
.forEach(r -> assertEquals(1, r.getStore(FAMILY).getStorefiles().size()));
}
}

0 comments on commit f66d67b

Please sign in to comment.