From 739a1548261e64d333fc42ae58b0ed3021e4c5b7 Mon Sep 17 00:00:00 2001 From: Kai33 Date: Fri, 21 Dec 2018 00:19:28 +0800 Subject: [PATCH] Fix DistCp not reassemble chunks when blocks per chunk > 0 --- .../apache/hadoop/tools/DistCpConstants.java | 3 +++ .../hadoop/tools/DistCpOptionSwitch.java | 2 +- .../hadoop/tools/TestDistCpOptions.java | 24 +++++++++++++++++++ 3 files changed, 28 insertions(+), 1 deletion(-) diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpConstants.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpConstants.java index 212256ccfd702..87d0c66592459 100644 --- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpConstants.java +++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpConstants.java @@ -123,6 +123,9 @@ private DistCpConstants() { public static final String CONF_LABEL_COPY_BUFFER_SIZE = "distcp.copy.buffer.size"; + /* DistCp Blocks Per Chunk */ + public static final String CONF_LABEL_BLOCKS_PER_CHUNK = "distcp.blocks.per.chunk"; + /** * Constants for DistCp return code to shell / consumer of ToolRunner's run */ diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptionSwitch.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptionSwitch.java index 3ce12b264d328..e57e413de3323 100644 --- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptionSwitch.java +++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptionSwitch.java @@ -192,7 +192,7 @@ public enum DistCpOptionSwitch { new Option("sizelimit", true, "(Deprecated!) Limit number of files " + "copied to <= n bytes")), - BLOCKS_PER_CHUNK("", + BLOCKS_PER_CHUNK(DistCpConstants.CONF_LABEL_BLOCKS_PER_CHUNK, new Option("blocksperchunk", true, "If set to a positive value, files" + "with more blocks than this value will be split into chunks of " + " blocks to be transferred in parallel, and " diff --git a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpOptions.java b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpOptions.java index dd8ec697b6502..2dc5717929d08 100644 --- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpOptions.java +++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpOptions.java @@ -20,6 +20,7 @@ import java.util.Collections; +import org.apache.hadoop.conf.Configuration; import org.junit.Assert; import org.junit.Test; @@ -533,4 +534,27 @@ public void testVerboseLog() { builder.withLogPath(logPath).withVerboseLog(true); Assert.assertTrue(builder.build().shouldVerboseLog()); } + + @Test + public void testAppendToConf() { + final int expectedBlocksPerChunk = 999; + final String expectedValForEmptyConfigKey = "VALUE_OF_EMPTY_CONFIG_KEY"; + + DistCpOptions options = new DistCpOptions.Builder( + Collections.singletonList(new Path("hdfs://localhost:8020/source")), + new Path("hdfs://localhost:8020/target/")) + .withBlocksPerChunk(expectedBlocksPerChunk) + .build(); + + Configuration config = new Configuration(); + config.set("", expectedValForEmptyConfigKey); + + options.appendToConf(config); + Assert.assertEquals(expectedBlocksPerChunk, + config.getInt(DistCpOptionSwitch.BLOCKS_PER_CHUNK.getConfigLabel(), 0)); + Assert.assertEquals("Some DistCpOptionSwitch's config label is empty! " + + "Pls ensure the config label is provided when append options to config, " + + "otherwise it may not be fetched properly", + expectedValForEmptyConfigKey, config.get("")); + } }