diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCp.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCp.java index 141f45d61f372..d86160e89bb23 100644 --- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCp.java +++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCp.java @@ -153,6 +153,14 @@ public int run(String[] argv) { Job job = null; try { job = execute(); + if (job != null) { + Long bytesCopied = job.getCounters(). + findCounter(CopyMapper.Counter.BYTESCOPIED).getValue(); + LOG.info("distcp copied {} number of bytes", bytesCopied); + //Set the config label so that consumer such as hive can see this distcp counter + getConf().set(DistCpConstants.CONF_LABEL_DISTCP_TOTAL_BYTES_COPIED, + String.valueOf(bytesCopied)); + } } catch (InvalidInputException e) { LOG.error("Invalid input: ", e); return DistCpConstants.INVALID_ARGUMENT; @@ -165,6 +173,8 @@ public int run(String[] argv) { } catch (XAttrsNotSupportedException e) { LOG.error("XAttrs not supported on at least one file system: ", e); return DistCpConstants.XATTRS_NOT_SUPPORTED; + } catch (IOException e) { + LOG.error("Exception encountered while retrieving distcp counter from job", e); } catch (Exception e) { LOG.error("Exception encountered ", e); return DistCpConstants.UNKNOWN_ERROR; diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpConstants.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpConstants.java index 6838d4f775753..ebcb31e26d311 100644 --- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpConstants.java +++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpConstants.java @@ -116,6 +116,11 @@ private DistCpConstants() { */ public static final String CONF_LABEL_DISTCP_JOB_ID = "distcp.job.id"; + /** + * DistCp Counter for consumers of the Distcp. + */ + public static final String CONF_LABEL_DISTCP_TOTAL_BYTES_COPIED = "distcp.total.bytes.copied"; + /* Meta folder where the job's intermediate data is kept */ public static final String CONF_LABEL_META_FOLDER = "distcp.meta.folder"; diff --git a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpSystem.java b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpSystem.java index 64c6800f9446a..caabf295301de 100644 --- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpSystem.java +++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpSystem.java @@ -19,6 +19,7 @@ package org.apache.hadoop.tools; import static org.apache.hadoop.test.GenericTestUtils.getMethodName; +import static org.apache.hadoop.tools.DistCpConstants.CONF_LABEL_DISTCP_TOTAL_BYTES_COPIED; import static org.hamcrest.core.Is.is; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotEquals; @@ -32,6 +33,7 @@ import java.util.List; import java.util.Random; +import org.apache.commons.lang3.math.NumberUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; @@ -452,7 +454,12 @@ public void testDistcpLargeFile() throws Exception { }; LOG.info("_____ running distcp: " + args[0] + " " + args[1]); - ToolRunner.run(conf, new DistCp(), args); + DistCp distcpTool = new DistCp(); + ToolRunner.run(conf, distcpTool, args); + final long bytesCopied = NumberUtils.toLong(distcpTool.getConf(). + get(CONF_LABEL_DISTCP_TOTAL_BYTES_COPIED), 0); + assertEquals("Bytes copied by distcp tool does not match source file length", + srcLen, bytesCopied); String realTgtPath = testDst; FileStatus[] dststat = getFileStatus(fs, realTgtPath, srcfiles);