From 51576ae45a97036e9b0a578d742d27c89ea0e808 Mon Sep 17 00:00:00 2001 From: Ruanhui <32773751+frostruan@users.noreply.github.com> Date: Mon, 22 Aug 2022 21:14:30 +0800 Subject: [PATCH] HBASE-27305 add an option to skip file splitting when bulkload hfiles (#4709) Co-authored-by: huiruan Signed-off-by: Duo Zhang (cherry picked from commit 00a719e76f16b7380f6695fc986f003e0e5f47fe) Conflicts: hbase-server/src/main/java/org/apache/hadoop/hbase/tool/BulkLoadHFilesTool.java --- .../hbase/tool/LoadIncrementalHFiles.java | 11 ++++++++ .../hbase/tool/TestLoadIncrementalHFiles.java | 26 +++++++++++++++++++ 2 files changed, 37 insertions(+) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/LoadIncrementalHFiles.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/LoadIncrementalHFiles.java index 7842e21d076f..5e3f2e9468f4 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/LoadIncrementalHFiles.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/LoadIncrementalHFiles.java @@ -141,6 +141,9 @@ public class LoadIncrementalHFiles extends Configured implements Tool { BulkLoadHFiles.IGNORE_UNMATCHED_CF_CONF_KEY; public final static String ALWAYS_COPY_FILES = BulkLoadHFiles.ALWAYS_COPY_FILES; + public static final String FAIL_IF_NEED_SPLIT_HFILE = + "hbase.loadincremental.fail.if.need.split.hfile"; + // We use a '.' prefix which is ignored when walking directory trees // above. It is invalid family name. static final String TMP_DIR = ".tmp"; @@ -162,6 +165,8 @@ public class LoadIncrementalHFiles extends Configured implements Tool { private boolean replicate = true; + private boolean failIfNeedSplitHFile = false; + /** * Represents an HFile waiting to be loaded. An queue is used in this class in order to support * the case where a region has split during the process of the load. When this happens, the HFile @@ -195,6 +200,7 @@ public void initialize() { assignSeqIds = conf.getBoolean(ASSIGN_SEQ_IDS, true); maxFilesPerRegionPerFamily = conf.getInt(MAX_FILES_PER_REGION_PER_FAMILY, 32); bulkLoadByFamily = conf.getBoolean(BulkLoadHFiles.BULK_LOAD_HFILES_BY_FAMILY, false); + failIfNeedSplitHFile = conf.getBoolean(FAIL_IF_NEED_SPLIT_HFILE, false); nrThreads = conf.getInt("hbase.loadincremental.threads.max", Runtime.getRuntime().availableProcessors()); numRetries = new AtomicInteger(0); @@ -803,6 +809,11 @@ CacheConfig.DISABLED, true, getConf())) { Bytes.compareTo(last.get(), startEndKeys.getSecond()[firstKeyRegionIdx]) < 0 || Bytes.equals(startEndKeys.getSecond()[firstKeyRegionIdx], HConstants.EMPTY_BYTE_ARRAY); if (!lastKeyInRange) { + if (failIfNeedSplitHFile) { + throw new IOException( + "The key range of hfile=" + hfilePath + " fits into no region. " + "And because " + + FAIL_IF_NEED_SPLIT_HFILE + " was set to true, we just skip the next steps."); + } int lastKeyRegionIdx = getRegionIndex(startEndKeys, last.get()); int splitIdx = (firstKeyRegionIdx + lastKeyRegionIdx) >>> 1; // make sure the splitPoint is valid in case region overlap occur, maybe the splitPoint bigger diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/tool/TestLoadIncrementalHFiles.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/tool/TestLoadIncrementalHFiles.java index a842c1d223fc..cef666942f87 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/tool/TestLoadIncrementalHFiles.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/tool/TestLoadIncrementalHFiles.java @@ -19,6 +19,7 @@ import static org.junit.Assert.assertArrayEquals; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertThrows; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; @@ -793,4 +794,29 @@ protected List tryAtomicRegionLoad(Connection connection, TableNa util.getConfiguration().setBoolean(BulkLoadHFiles.BULK_LOAD_HFILES_BY_FAMILY, false); } } + + @Test + public void testFailIfNeedSplitHFile() throws IOException { + TableName tableName = TableName.valueOf(tn.getMethodName()); + Table table = util.createTable(tableName, FAMILY); + + util.loadTable(table, FAMILY); + + FileSystem fs = util.getTestFileSystem(); + Path sfPath = new Path(fs.getWorkingDirectory(), new Path(Bytes.toString(FAMILY), "file")); + HFileTestUtil.createHFile(util.getConfiguration(), fs, sfPath, FAMILY, QUALIFIER, + Bytes.toBytes("aaa"), Bytes.toBytes("zzz"), 1000); + + util.getAdmin().split(tableName); + util.waitFor(10000, 1000, () -> util.getAdmin().getRegions(tableName).size() > 1); + + Configuration config = new Configuration(util.getConfiguration()); + config.setBoolean(BulkLoadHFilesTool.FAIL_IF_NEED_SPLIT_HFILE, true); + BulkLoadHFilesTool tool = new BulkLoadHFilesTool(config); + + String[] args = new String[] { fs.getWorkingDirectory().toString(), tableName.toString() }; + assertThrows(IOException.class, () -> tool.run(args)); + util.getHBaseCluster().getRegions(tableName) + .forEach(r -> assertEquals(1, r.getStore(FAMILY).getStorefiles().size())); + } }