From b0742511a7d46fc56698a3276db5949aaf505c70 Mon Sep 17 00:00:00 2001 From: Sadanand Shenoy Date: Tue, 7 Feb 2023 21:01:16 +0530 Subject: [PATCH 01/11] HDFS-16911.Distcp with snapshot diff to support Ozone filesystem --- .../hdfs/protocol/SnapshotDiffReport.java | 4 + .../java/org/apache/hadoop/tools/DistCp.java | 12 +- .../apache/hadoop/tools/DistCpConstants.java | 2 + .../org/apache/hadoop/tools/DistCpSync.java | 106 ++++++++++++------ 4 files changed, 87 insertions(+), 37 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/SnapshotDiffReport.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/SnapshotDiffReport.java index e6f20c9ce1b82..d3ca4925df9ff 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/SnapshotDiffReport.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/SnapshotDiffReport.java @@ -282,4 +282,8 @@ public String toString() { } return str.toString(); } + + public String getToSnapshot() { + return toSnapshot; + } } diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCp.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCp.java index 141f45d61f372..754968ff2b75c 100644 --- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCp.java +++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCp.java @@ -43,6 +43,7 @@ import org.apache.hadoop.util.ShutdownHookManager; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; +import org.apache.hadoop.util.ReflectionUtils; import org.apache.hadoop.classification.VisibleForTesting; @@ -84,7 +85,16 @@ private void prepareFileListing(Job job) throws Exception { if (context.shouldUseSnapshotDiff()) { // When "-diff" or "-rdiff" is passed, do sync() first, then // create copyListing based on snapshot diff. - DistCpSync distCpSync = new DistCpSync(context, job.getConfiguration()); + DistCpSync distCpSync; + Class distcpSyncClass = DistCpSync.getClass(job.getConfiguration()); + if (!distcpSyncClass.equals(DistCpSync.class)) { + // get the impl class + distCpSync = ReflectionUtils.newInstance(distcpSyncClass, + job.getConfiguration()); + distCpSync.init(context, job.getConfiguration()); + } else { + distCpSync = new DistCpSync(context, job.getConfiguration()); + } if (distCpSync.sync()) { createInputFileListingWithDiff(job, distCpSync); } else { diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpConstants.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpConstants.java index 289d552b86219..05b1644bca440 100644 --- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpConstants.java +++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpConstants.java @@ -122,6 +122,8 @@ private DistCpConstants() { /* DistCp CopyListing class override param */ public static final String CONF_LABEL_COPY_LISTING_CLASS = "distcp.copy.listing.class"; + public static final String CONF_LABEL_DISTCP_SYNC_CLASS = "distcp.sync.class"; + /** * DistCp Filter class override param. */ diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpSync.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpSync.java index 1cf2d97ec1f38..aa8b38205eea8 100644 --- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpSync.java +++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpSync.java @@ -51,7 +51,7 @@ * target since s1. All the files/directories in the target are the same with * source.s1 */ -class DistCpSync { +public class DistCpSync { private DistCpContext context; private Configuration conf; // diffMap maps snapshot diff op type to a list of diff ops. @@ -73,6 +73,19 @@ class DistCpSync { this.copyFilter.initialize(); } + public void init(DistCpContext context, Configuration conf) { + this.context = context; + this.conf = conf; + this.copyFilter = CopyFilter.getCopyFilter(conf); + this.copyFilter.initialize(); + } + + public static Class getClass(Configuration conf){ + return conf.getClass(DistCpConstants.CONF_LABEL_DISTCP_SYNC_CLASS, + DistCpSync.class,DistCpSync.class); + } + + @VisibleForTesting public void setCopyFilter(CopyFilter copyFilter) { this.copyFilter = copyFilter; @@ -82,6 +95,10 @@ private boolean isRdiff() { return context.shouldUseRdiff(); } + public DistCpSync() { + // empty constructor + } + /** * Check if three conditions are met before sync. * 1. Only one source directory. @@ -91,7 +108,7 @@ private boolean isRdiff() { * Throw exceptions if first two aren't met, and return false to fallback to * default distcp if the third condition isn't met. */ - private boolean preSyncCheck() throws IOException { + protected boolean preSyncCheck() throws IOException { List sourcePaths = context.getSourcePaths(); if (sourcePaths.size() != 1) { // we only support one source dir which must be a snapshottable directory @@ -106,20 +123,7 @@ private boolean preSyncCheck() throws IOException { final FileSystem snapshotDiffFs = isRdiff() ? tgtFs : srcFs; final Path snapshotDiffDir = isRdiff() ? targetDir : sourceDir; - // currently we require both the source and the target file system are - // DistributedFileSystem or (S)WebHdfsFileSystem. - if (!(srcFs instanceof DistributedFileSystem - || srcFs instanceof WebHdfsFileSystem)) { - throw new IllegalArgumentException("Unsupported source file system: " - + srcFs.getScheme() + "://. " + - "Supported file systems: hdfs://, webhdfs:// and swebhdfs://."); - } - if (!(tgtFs instanceof DistributedFileSystem - || tgtFs instanceof WebHdfsFileSystem)) { - throw new IllegalArgumentException("Unsupported target file system: " - + tgtFs.getScheme() + "://. " + - "Supported file systems: hdfs://, webhdfs:// and swebhdfs://."); - } + checkFilesystemSupport(srcFs, tgtFs); // make sure targetFS has no change between from and the current states if (!checkNoChange(tgtFs, targetDir)) { @@ -165,6 +169,23 @@ private boolean preSyncCheck() throws IOException { return true; } + protected void checkFilesystemSupport(FileSystem srcFs, FileSystem tgtFs) { + // currently we require both the source and the target file system are + // DistributedFileSystem or (S)WebHdfsFileSystem. + if (!(srcFs instanceof DistributedFileSystem + || srcFs instanceof WebHdfsFileSystem)) { + throw new IllegalArgumentException("Unsupported source file system: " + + srcFs.getScheme() + "://. " + + "Supported file systems: hdfs://, webhdfs:// and swebhdfs://."); + } + if (!(tgtFs instanceof DistributedFileSystem + || tgtFs instanceof WebHdfsFileSystem)) { + throw new IllegalArgumentException("Unsupported target file system: " + + tgtFs.getScheme() + "://. " + + "Supported file systems: hdfs://, webhdfs:// and swebhdfs://."); + } + } + public boolean sync() throws IOException { if (!preSyncCheck()) { return false; @@ -206,7 +227,7 @@ public boolean sync() throws IOException { * EnumMap whose key is DiffType, and value is a DiffInfo list. If there is * no entry for a given DiffType, the associated value will be an empty list. */ - private boolean getAllDiffs() throws IOException { + protected boolean getAllDiffs() throws IOException { Path ssDir = isRdiff()? context.getTargetPath() : context.getSourcePaths().get(0); @@ -215,17 +236,7 @@ private boolean getAllDiffs() throws IOException { FileSystem fs = ssDir.getFileSystem(conf); final String from = getSnapshotName(context.getFromSnapshot()); final String to = getSnapshotName(context.getToSnapshot()); - if (fs instanceof DistributedFileSystem) { - DistributedFileSystem dfs = (DistributedFileSystem)fs; - report = dfs.getSnapshotDiffReport(ssDir, from, to); - } else if (fs instanceof WebHdfsFileSystem) { - WebHdfsFileSystem webHdfs = (WebHdfsFileSystem)fs; - report = webHdfs.getSnapshotDiffReport(ssDir, from, to); - } else { - throw new IllegalArgumentException("Unsupported file system: " + - fs.getScheme() + "://. " + - "Supported file systems: hdfs://, webhdfs:// and swebhdfs://."); - } + report = getSnapshotDiffReport(ssDir, fs, from, to); this.diffMap = new EnumMap<>(SnapshotDiffReport.DiffType.class); for (SnapshotDiffReport.DiffType type : @@ -286,6 +297,23 @@ private boolean getAllDiffs() throws IOException { return false; } + protected SnapshotDiffReport getSnapshotDiffReport(Path ssDir, + FileSystem fs, String from, String to) throws IOException { + SnapshotDiffReport report; + if (fs instanceof DistributedFileSystem) { + DistributedFileSystem dfs = (DistributedFileSystem) fs; + report = dfs.getSnapshotDiffReport(ssDir, from, to); + } else if (fs instanceof WebHdfsFileSystem) { + WebHdfsFileSystem webHdfs = (WebHdfsFileSystem) fs; + report = webHdfs.getSnapshotDiffReport(ssDir, from, to); + } else { + throw new IllegalArgumentException("Unsupported file system: " + + fs.getScheme() + "://. " + + "Supported file systems: hdfs://, webhdfs:// and swebhdfs://."); + } + return report; + } + private String getSnapshotName(String name) { return Path.CUR_DIR.equals(name) ? "" : name; } @@ -324,17 +352,11 @@ private void deleteTargetTmpDir(FileSystem targetFs, * Compute the snapshot diff on the given file system. Return true if the diff * is empty, i.e., no changes have happened in the FS. */ - private boolean checkNoChange(FileSystem fs, Path path) { + protected boolean checkNoChange(FileSystem fs, Path path) { try { final String from = getSnapshotName(context.getFromSnapshot()); SnapshotDiffReport targetDiff = null; - if (fs instanceof DistributedFileSystem) { - DistributedFileSystem dfs = (DistributedFileSystem)fs; - targetDiff = dfs.getSnapshotDiffReport(path, from, ""); - } else { - WebHdfsFileSystem webHdfs = (WebHdfsFileSystem)fs; - targetDiff = webHdfs.getSnapshotDiffReport(path, from, ""); - } + targetDiff = getSnapshotDiffReport(path, fs, from, ""); if (!targetDiff.getDiffList().isEmpty()) { DistCp.LOG.warn("The target has been modified since snapshot " + context.getFromSnapshot()); @@ -490,6 +512,18 @@ private DiffInfo[] getCreateAndModifyDiffs() { return diffs.toArray(new DiffInfo[diffs.size()]); } + public DistCpContext getContext() { + return context; + } + + public EnumMap> getDiffMap() { + return diffMap; + } + + public CopyFilter getCopyFilter() { + return copyFilter; + } + /** * Probe for a path being a parent of another. * @return true if the parent's path matches the start of the child's From 5aabd75ccd8a767f9c0c811f0557d644a6620e9c Mon Sep 17 00:00:00 2001 From: Sadanand Shenoy Date: Tue, 14 Mar 2023 16:31:37 +0530 Subject: [PATCH 02/11] use fs.hasPathCapability to check FS compatibility with snapshots and call respective fs impl --- .../hdfs/protocol/SnapshotDiffReport.java | 9 +- .../java/org/apache/hadoop/tools/DistCp.java | 12 +- .../apache/hadoop/tools/DistCpConstants.java | 2 - .../org/apache/hadoop/tools/DistCpSync.java | 123 ++++++++---------- .../apache/hadoop/tools/TestDistCpSync.java | 66 ++++++++++ 5 files changed, 127 insertions(+), 85 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/SnapshotDiffReport.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/SnapshotDiffReport.java index d3ca4925df9ff..b4c7f7ad0ded8 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/SnapshotDiffReport.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/SnapshotDiffReport.java @@ -253,6 +253,11 @@ public String getFromSnapshot() { return fromSnapshot; } + /** @return {@link #toSnapshot} */ + public String getToSnapshot() { + return toSnapshot; + } + /** @return {@link #toSnapshot} */ public String getLaterSnapshotName() { return toSnapshot; @@ -282,8 +287,4 @@ public String toString() { } return str.toString(); } - - public String getToSnapshot() { - return toSnapshot; - } } diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCp.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCp.java index 754968ff2b75c..141f45d61f372 100644 --- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCp.java +++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCp.java @@ -43,7 +43,6 @@ import org.apache.hadoop.util.ShutdownHookManager; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; -import org.apache.hadoop.util.ReflectionUtils; import org.apache.hadoop.classification.VisibleForTesting; @@ -85,16 +84,7 @@ private void prepareFileListing(Job job) throws Exception { if (context.shouldUseSnapshotDiff()) { // When "-diff" or "-rdiff" is passed, do sync() first, then // create copyListing based on snapshot diff. - DistCpSync distCpSync; - Class distcpSyncClass = DistCpSync.getClass(job.getConfiguration()); - if (!distcpSyncClass.equals(DistCpSync.class)) { - // get the impl class - distCpSync = ReflectionUtils.newInstance(distcpSyncClass, - job.getConfiguration()); - distCpSync.init(context, job.getConfiguration()); - } else { - distCpSync = new DistCpSync(context, job.getConfiguration()); - } + DistCpSync distCpSync = new DistCpSync(context, job.getConfiguration()); if (distCpSync.sync()) { createInputFileListingWithDiff(job, distCpSync); } else { diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpConstants.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpConstants.java index 05b1644bca440..289d552b86219 100644 --- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpConstants.java +++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpConstants.java @@ -122,8 +122,6 @@ private DistCpConstants() { /* DistCp CopyListing class override param */ public static final String CONF_LABEL_COPY_LISTING_CLASS = "distcp.copy.listing.class"; - public static final String CONF_LABEL_DISTCP_SYNC_CLASS = "distcp.sync.class"; - /** * DistCp Filter class override param. */ diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpSync.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpSync.java index aa8b38205eea8..2a369c49115d1 100644 --- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpSync.java +++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpSync.java @@ -20,18 +20,19 @@ import org.apache.hadoop.classification.VisibleForTesting; import org.apache.hadoop.HadoopIllegalArgumentException; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.CommonPathCapabilities; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.DFSUtilClient; -import org.apache.hadoop.hdfs.DistributedFileSystem; import org.apache.hadoop.hdfs.protocol.HdfsConstants; import org.apache.hadoop.hdfs.protocol.SnapshotDiffReport; -import org.apache.hadoop.hdfs.web.WebHdfsFileSystem; import org.apache.hadoop.tools.CopyListing.InvalidInputException; import java.io.FileNotFoundException; import java.io.IOException; +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Method; import java.util.Arrays; import java.util.List; import java.util.Random; @@ -51,7 +52,7 @@ * target since s1. All the files/directories in the target are the same with * source.s1 */ -public class DistCpSync { +class DistCpSync { private DistCpContext context; private Configuration conf; // diffMap maps snapshot diff op type to a list of diff ops. @@ -73,19 +74,6 @@ public class DistCpSync { this.copyFilter.initialize(); } - public void init(DistCpContext context, Configuration conf) { - this.context = context; - this.conf = conf; - this.copyFilter = CopyFilter.getCopyFilter(conf); - this.copyFilter.initialize(); - } - - public static Class getClass(Configuration conf){ - return conf.getClass(DistCpConstants.CONF_LABEL_DISTCP_SYNC_CLASS, - DistCpSync.class,DistCpSync.class); - } - - @VisibleForTesting public void setCopyFilter(CopyFilter copyFilter) { this.copyFilter = copyFilter; @@ -95,10 +83,6 @@ private boolean isRdiff() { return context.shouldUseRdiff(); } - public DistCpSync() { - // empty constructor - } - /** * Check if three conditions are met before sync. * 1. Only one source directory. @@ -108,7 +92,7 @@ public DistCpSync() { * Throw exceptions if first two aren't met, and return false to fallback to * default distcp if the third condition isn't met. */ - protected boolean preSyncCheck() throws IOException { + private boolean preSyncCheck() throws IOException { List sourcePaths = context.getSourcePaths(); if (sourcePaths.size() != 1) { // we only support one source dir which must be a snapshottable directory @@ -123,7 +107,7 @@ protected boolean preSyncCheck() throws IOException { final FileSystem snapshotDiffFs = isRdiff() ? tgtFs : srcFs; final Path snapshotDiffDir = isRdiff() ? targetDir : sourceDir; - checkFilesystemSupport(srcFs, tgtFs); + checkFilesystemSupport(sourceDir,targetDir,srcFs, tgtFs); // make sure targetFS has no change between from and the current states if (!checkNoChange(tgtFs, targetDir)) { @@ -169,20 +153,27 @@ protected boolean preSyncCheck() throws IOException { return true; } - protected void checkFilesystemSupport(FileSystem srcFs, FileSystem tgtFs) { - // currently we require both the source and the target file system are - // DistributedFileSystem or (S)WebHdfsFileSystem. - if (!(srcFs instanceof DistributedFileSystem - || srcFs instanceof WebHdfsFileSystem)) { - throw new IllegalArgumentException("Unsupported source file system: " - + srcFs.getScheme() + "://. " + - "Supported file systems: hdfs://, webhdfs:// and swebhdfs://."); + /** + * Check if the source and target filesystems support snapshots. + */ + protected void checkFilesystemSupport(Path sourceDir, Path targetDir, + FileSystem srcFs, FileSystem tgtFs) throws IOException { + if (!srcFs.hasPathCapability(sourceDir, + CommonPathCapabilities.FS_SNAPSHOTS)) { + throw new IllegalArgumentException( + "The source file system " + srcFs.getScheme() + " does not support snapshot."); + } + if (!tgtFs.hasPathCapability(targetDir, + CommonPathCapabilities.FS_SNAPSHOTS)) { + throw new IllegalArgumentException( + "The target file system " + tgtFs.getScheme() + " does not support snapshot."); } - if (!(tgtFs instanceof DistributedFileSystem - || tgtFs instanceof WebHdfsFileSystem)) { - throw new IllegalArgumentException("Unsupported target file system: " - + tgtFs.getScheme() + "://. " + - "Supported file systems: hdfs://, webhdfs:// and swebhdfs://."); + try { + getSnapshotDiffReportMethod(srcFs); + getSnapshotDiffReportMethod(tgtFs); + } catch (NoSuchMethodException e) { + throw new IllegalArgumentException( + "The file system does not support getSnapshotDiffReport", e); } } @@ -227,16 +218,15 @@ public boolean sync() throws IOException { * EnumMap whose key is DiffType, and value is a DiffInfo list. If there is * no entry for a given DiffType, the associated value will be an empty list. */ - protected boolean getAllDiffs() throws IOException { + private boolean getAllDiffs() throws IOException { Path ssDir = isRdiff()? context.getTargetPath() : context.getSourcePaths().get(0); try { - SnapshotDiffReport report = null; - FileSystem fs = ssDir.getFileSystem(conf); final String from = getSnapshotName(context.getFromSnapshot()); final String to = getSnapshotName(context.getToSnapshot()); - report = getSnapshotDiffReport(ssDir, fs, from, to); + SnapshotDiffReport report = + getSnapshotDiffReport(ssDir.getFileSystem(conf), ssDir, from, to); this.diffMap = new EnumMap<>(SnapshotDiffReport.DiffType.class); for (SnapshotDiffReport.DiffType type : @@ -297,21 +287,31 @@ protected boolean getAllDiffs() throws IOException { return false; } - protected SnapshotDiffReport getSnapshotDiffReport(Path ssDir, - FileSystem fs, String from, String to) throws IOException { - SnapshotDiffReport report; - if (fs instanceof DistributedFileSystem) { - DistributedFileSystem dfs = (DistributedFileSystem) fs; - report = dfs.getSnapshotDiffReport(ssDir, from, to); - } else if (fs instanceof WebHdfsFileSystem) { - WebHdfsFileSystem webHdfs = (WebHdfsFileSystem) fs; - report = webHdfs.getSnapshotDiffReport(ssDir, from, to); - } else { - throw new IllegalArgumentException("Unsupported file system: " + - fs.getScheme() + "://. " + - "Supported file systems: hdfs://, webhdfs:// and swebhdfs://."); + /** + * Check if the filesystem implementation has a method named + * getSnapshotDiffReport. + */ + private static Method getSnapshotDiffReportMethod(FileSystem fs) + throws NoSuchMethodException { + return fs.getClass().getMethod( + "getSnapshotDiffReport", Path.class, String.class, String.class); + } + + + private static SnapshotDiffReport getSnapshotDiffReport( + final FileSystem fs, + final Path snapshotDir, + final String fromSnapshot, + final String toSnapshot) throws IOException { + try { + return (SnapshotDiffReport) getSnapshotDiffReportMethod(fs).invoke( + fs, snapshotDir, fromSnapshot, toSnapshot); + } catch (InvocationTargetException e) { + throw new IOException(e.getCause()); + } catch (NoSuchMethodException|IllegalAccessException e) { + throw new IllegalArgumentException( + "failed to invoke getSnapshotDiffReport.", e); } - return report; } private String getSnapshotName(String name) { @@ -352,11 +352,10 @@ private void deleteTargetTmpDir(FileSystem targetFs, * Compute the snapshot diff on the given file system. Return true if the diff * is empty, i.e., no changes have happened in the FS. */ - protected boolean checkNoChange(FileSystem fs, Path path) { + private boolean checkNoChange(FileSystem fs, Path path) { try { final String from = getSnapshotName(context.getFromSnapshot()); - SnapshotDiffReport targetDiff = null; - targetDiff = getSnapshotDiffReport(path, fs, from, ""); + SnapshotDiffReport targetDiff = getSnapshotDiffReport(fs, path, from, ""); if (!targetDiff.getDiffList().isEmpty()) { DistCp.LOG.warn("The target has been modified since snapshot " + context.getFromSnapshot()); @@ -512,18 +511,6 @@ private DiffInfo[] getCreateAndModifyDiffs() { return diffs.toArray(new DiffInfo[diffs.size()]); } - public DistCpContext getContext() { - return context; - } - - public EnumMap> getDiffMap() { - return diffMap; - } - - public CopyFilter getCopyFilter() { - return copyFilter; - } - /** * Probe for a path being a parent of another. * @return true if the parent's path matches the start of the child's diff --git a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpSync.java b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpSync.java index 93796e752ebc9..1a0ee93640c98 100644 --- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpSync.java +++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpSync.java @@ -23,6 +23,8 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.contract.ContractTestUtils; +import org.apache.hadoop.fs.RawLocalFileSystem; +import org.apache.hadoop.fs.CommonPathCapabilities; import org.apache.hadoop.hdfs.DFSTestUtil; import org.apache.hadoop.hdfs.DistributedFileSystem; import org.apache.hadoop.hdfs.HdfsConfiguration; @@ -38,6 +40,7 @@ import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.security.Credentials; import org.apache.hadoop.test.GenericTestUtils; +import org.apache.hadoop.test.LambdaTestUtils; import org.apache.hadoop.tools.mapred.CopyMapper; import org.junit.After; import org.junit.Assert; @@ -47,6 +50,7 @@ import java.io.IOException; import java.io.FileWriter; import java.io.BufferedWriter; +import java.net.URI; import java.nio.file.Files; import java.util.Arrays; import java.util.ArrayList; @@ -56,6 +60,8 @@ import java.util.Map; import java.util.regex.Pattern; +import static org.apache.hadoop.fs.impl.PathCapabilitiesSupport.validatePathCapabilityArgs; + public class TestDistCpSync { private MiniDFSCluster cluster; private final Configuration conf = new HdfsConfiguration(); @@ -89,6 +95,7 @@ public void setUp() throws Exception { conf.set(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH, target.toString()); conf.set(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH, target.toString()); + conf.setClass("fs.dummy.impl", DummyFs.class, FileSystem.class); } @After @@ -1276,4 +1283,63 @@ private void snapshotDiffWithPaths(Path sourceFSPath, verifyCopyByFs(sourceFS, targetFS, sourceFS.getFileStatus(sourceFSPath), targetFS.getFileStatus(targetFSPath), false); } + + @Test + public void testSyncSnapshotDiffWithLocalFileSystem() throws Exception { + String[] args = new String[]{"-update", "-diff", "s1", "s2", + "file:///source", "file:///target"}; + LambdaTestUtils.intercept( + IllegalArgumentException.class, + "The source file system file does not support snapshot", + () -> new DistCp(conf, OptionsParser.parse(args)).execute()); + } + + @Test + public void testSyncSnapshotDiffWithDummyFileSystem() throws Exception { + String[] args = + new String[] { "-update", "-diff", "s1", "s2", "dummy:///source", + "dummy:///target" }; + try { + FileSystem dummyFs = FileSystem.get(URI.create("dummy:///"), conf); + Assert.assertTrue(dummyFs instanceof DummyFs); + new DistCp(conf, OptionsParser.parse(args)).execute(); + } catch (Exception e) { + // can expect other exceptions as source and target paths + // are not created, assert if the exception is not arising + // due to the filesystem not supporting snapshots. + Assert.assertFalse(e.getMessage().contains("does not support snapshot")); + } + } + + public static class DummyFs extends RawLocalFileSystem { + public DummyFs() { + super(); + } + + public URI getUri() { + return URI.create("dummy:///"); + } + + @Override + public boolean hasPathCapability(Path path, String capability) + throws IOException { + switch (validatePathCapabilityArgs(makeQualified(path), capability)) { + case CommonPathCapabilities.FS_SNAPSHOTS: + return true; + default: + return super.hasPathCapability(path, capability); + } + } + + @Override + public FileStatus getFileStatus(Path f) throws IOException { + return new FileStatus(); + } + + public SnapshotDiffReport getSnapshotDiffReport(final Path snapshotDir, + final String fromSnapshot, final String toSnapshot) { + return new SnapshotDiffReport(snapshotDir.getName(), fromSnapshot, + toSnapshot, Collections.EMPTY_LIST); + } + } } From fd34ce6ced0729fcae98848598ea11fb7fc47221 Mon Sep 17 00:00:00 2001 From: Sadanand Shenoy Date: Tue, 14 Mar 2023 16:44:00 +0530 Subject: [PATCH 03/11] add javadoc --- .../src/main/java/org/apache/hadoop/tools/DistCpSync.java | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpSync.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpSync.java index 2a369c49115d1..6294b055f0f81 100644 --- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpSync.java +++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpSync.java @@ -156,7 +156,7 @@ private boolean preSyncCheck() throws IOException { /** * Check if the source and target filesystems support snapshots. */ - protected void checkFilesystemSupport(Path sourceDir, Path targetDir, + private void checkFilesystemSupport(Path sourceDir, Path targetDir, FileSystem srcFs, FileSystem tgtFs) throws IOException { if (!srcFs.hasPathCapability(sourceDir, CommonPathCapabilities.FS_SNAPSHOTS)) { @@ -297,7 +297,10 @@ private static Method getSnapshotDiffReportMethod(FileSystem fs) "getSnapshotDiffReport", Path.class, String.class, String.class); } - + /** + * Get the snapshotDiff b/w the fromSnapshot & toSnapshot for the given + * filesystem. + */ private static SnapshotDiffReport getSnapshotDiffReport( final FileSystem fs, final Path snapshotDir, From 6fd1f57bc530397d93785ccaac90fb4ebc15109e Mon Sep 17 00:00:00 2001 From: Sadanand Shenoy Date: Tue, 14 Mar 2023 23:41:14 +0530 Subject: [PATCH 04/11] fix javac warning --- .../src/test/java/org/apache/hadoop/tools/TestDistCpSync.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpSync.java b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpSync.java index 1a0ee93640c98..aab7318d98c14 100644 --- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpSync.java +++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpSync.java @@ -1339,7 +1339,7 @@ public FileStatus getFileStatus(Path f) throws IOException { public SnapshotDiffReport getSnapshotDiffReport(final Path snapshotDir, final String fromSnapshot, final String toSnapshot) { return new SnapshotDiffReport(snapshotDir.getName(), fromSnapshot, - toSnapshot, Collections.EMPTY_LIST); + toSnapshot, new ArrayList()); } } } From 66a3e14a519ba58d059795f4ad6181b0a731fe41 Mon Sep 17 00:00:00 2001 From: Sadanand Shenoy Date: Thu, 16 Mar 2023 21:02:34 +0530 Subject: [PATCH 05/11] address comments --- .../org/apache/hadoop/tools/DistCpSync.java | 18 +++++++++++++++--- .../apache/hadoop/tools/TestDistCpSync.java | 2 +- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpSync.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpSync.java index 6294b055f0f81..dc1375aaddf05 100644 --- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpSync.java +++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpSync.java @@ -161,20 +161,32 @@ private void checkFilesystemSupport(Path sourceDir, Path targetDir, if (!srcFs.hasPathCapability(sourceDir, CommonPathCapabilities.FS_SNAPSHOTS)) { throw new IllegalArgumentException( - "The source file system " + srcFs.getScheme() + " does not support snapshot."); + "The source file system " + srcFs.getScheme() + + " does not support snapshot."); } if (!tgtFs.hasPathCapability(targetDir, CommonPathCapabilities.FS_SNAPSHOTS)) { throw new IllegalArgumentException( - "The target file system " + tgtFs.getScheme() + " does not support snapshot."); + "The target file system " + tgtFs.getScheme() + + " does not support snapshot."); } try { getSnapshotDiffReportMethod(srcFs); + } catch (NoSuchMethodException e) { + throw new IllegalArgumentException( + "The source file system " + srcFs.getScheme() + + " does not support getSnapshotDiffReport", + e); + } + try { getSnapshotDiffReportMethod(tgtFs); } catch (NoSuchMethodException e) { throw new IllegalArgumentException( - "The file system does not support getSnapshotDiffReport", e); + "The target file system " + tgtFs.getScheme() + + " does not support getSnapshotDiffReport", + e); } + } public boolean sync() throws IOException { diff --git a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpSync.java b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpSync.java index aab7318d98c14..fdda300aa0f81 100644 --- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpSync.java +++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpSync.java @@ -1305,7 +1305,7 @@ public void testSyncSnapshotDiffWithDummyFileSystem() throws Exception { new DistCp(conf, OptionsParser.parse(args)).execute(); } catch (Exception e) { // can expect other exceptions as source and target paths - // are not created, assert if the exception is not arising + // are not created, assert that the exception is not arising // due to the filesystem not supporting snapshots. Assert.assertFalse(e.getMessage().contains("does not support snapshot")); } From ac44e1ccb1b7b0cb04a1929e1b0f8863d8c4f969 Mon Sep 17 00:00:00 2001 From: Sadanand Shenoy Date: Thu, 16 Mar 2023 21:06:59 +0530 Subject: [PATCH 06/11] remove getter --- .../org/apache/hadoop/hdfs/protocol/SnapshotDiffReport.java | 5 ----- .../test/java/org/apache/hadoop/tools/TestDistCpSync.java | 3 ++- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/SnapshotDiffReport.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/SnapshotDiffReport.java index b4c7f7ad0ded8..e6f20c9ce1b82 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/SnapshotDiffReport.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/SnapshotDiffReport.java @@ -253,11 +253,6 @@ public String getFromSnapshot() { return fromSnapshot; } - /** @return {@link #toSnapshot} */ - public String getToSnapshot() { - return toSnapshot; - } - /** @return {@link #toSnapshot} */ public String getLaterSnapshotName() { return toSnapshot; diff --git a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpSync.java b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpSync.java index fdda300aa0f81..eaa0442c88011 100644 --- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpSync.java +++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpSync.java @@ -1307,7 +1307,8 @@ public void testSyncSnapshotDiffWithDummyFileSystem() throws Exception { // can expect other exceptions as source and target paths // are not created, assert that the exception is not arising // due to the filesystem not supporting snapshots. - Assert.assertFalse(e.getMessage().contains("does not support snapshot")); + Assert.assertFalse(e.getMessage().contains("does not support snapshot") + || e.getMessage().contains("does not support getSnapshotDiffReport")); } } From 57bb1d1c36cbab18fccb73c739e06563c3934290 Mon Sep 17 00:00:00 2001 From: Sadanand Shenoy Date: Thu, 23 Mar 2023 00:02:38 +0530 Subject: [PATCH 07/11] throw UnsupportedOperationException instead --- .../src/main/java/org/apache/hadoop/tools/DistCpSync.java | 8 ++++---- .../test/java/org/apache/hadoop/tools/TestDistCpSync.java | 7 ++++--- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpSync.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpSync.java index dc1375aaddf05..8e5d1ad5cd244 100644 --- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpSync.java +++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpSync.java @@ -160,20 +160,20 @@ private void checkFilesystemSupport(Path sourceDir, Path targetDir, FileSystem srcFs, FileSystem tgtFs) throws IOException { if (!srcFs.hasPathCapability(sourceDir, CommonPathCapabilities.FS_SNAPSHOTS)) { - throw new IllegalArgumentException( + throw new UnsupportedOperationException( "The source file system " + srcFs.getScheme() + " does not support snapshot."); } if (!tgtFs.hasPathCapability(targetDir, CommonPathCapabilities.FS_SNAPSHOTS)) { - throw new IllegalArgumentException( + throw new UnsupportedOperationException( "The target file system " + tgtFs.getScheme() + " does not support snapshot."); } try { getSnapshotDiffReportMethod(srcFs); } catch (NoSuchMethodException e) { - throw new IllegalArgumentException( + throw new UnsupportedOperationException( "The source file system " + srcFs.getScheme() + " does not support getSnapshotDiffReport", e); @@ -181,7 +181,7 @@ private void checkFilesystemSupport(Path sourceDir, Path targetDir, try { getSnapshotDiffReportMethod(tgtFs); } catch (NoSuchMethodException e) { - throw new IllegalArgumentException( + throw new UnsupportedOperationException( "The target file system " + tgtFs.getScheme() + " does not support getSnapshotDiffReport", e); diff --git a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpSync.java b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpSync.java index eaa0442c88011..9e776b0dced90 100644 --- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpSync.java +++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpSync.java @@ -1303,12 +1303,13 @@ public void testSyncSnapshotDiffWithDummyFileSystem() throws Exception { FileSystem dummyFs = FileSystem.get(URI.create("dummy:///"), conf); Assert.assertTrue(dummyFs instanceof DummyFs); new DistCp(conf, OptionsParser.parse(args)).execute(); + } catch (UnsupportedOperationException e) { + Assert.fail("Dummy FS supports snapshots," + + "did not expect UnsupportedOperationException"); } catch (Exception e) { // can expect other exceptions as source and target paths - // are not created, assert that the exception is not arising + // are not created, assert if the exception is not arising // due to the filesystem not supporting snapshots. - Assert.assertFalse(e.getMessage().contains("does not support snapshot") - || e.getMessage().contains("does not support getSnapshotDiffReport")); } } From 345eac1bded740bf7ad00b40d24baa9b57d083ae Mon Sep 17 00:00:00 2001 From: Sadanand Shenoy Date: Thu, 23 Mar 2023 00:04:35 +0530 Subject: [PATCH 08/11] throw UnsupportedOperationException instead --- .../src/test/java/org/apache/hadoop/tools/TestDistCpSync.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpSync.java b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpSync.java index 9e776b0dced90..60ef59bb793a8 100644 --- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpSync.java +++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpSync.java @@ -1308,8 +1308,7 @@ public void testSyncSnapshotDiffWithDummyFileSystem() throws Exception { "did not expect UnsupportedOperationException"); } catch (Exception e) { // can expect other exceptions as source and target paths - // are not created, assert if the exception is not arising - // due to the filesystem not supporting snapshots. + // are not created. } } From 89da1b092bc82fa7f8e6f78065994296f214a350 Mon Sep 17 00:00:00 2001 From: Sadanand Shenoy Date: Thu, 23 Mar 2023 08:34:43 +0530 Subject: [PATCH 09/11] fix test --- .../src/test/java/org/apache/hadoop/tools/TestDistCpSync.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpSync.java b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpSync.java index 60ef59bb793a8..986657913fd19 100644 --- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpSync.java +++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpSync.java @@ -1289,7 +1289,7 @@ public void testSyncSnapshotDiffWithLocalFileSystem() throws Exception { String[] args = new String[]{"-update", "-diff", "s1", "s2", "file:///source", "file:///target"}; LambdaTestUtils.intercept( - IllegalArgumentException.class, + UnsupportedOperationException.class, "The source file system file does not support snapshot", () -> new DistCp(conf, OptionsParser.parse(args)).execute()); } From 2a3a07a8c12e3f5d8dd2bbabb183cc9ffe582c2f Mon Sep 17 00:00:00 2001 From: Sadanand Shenoy Date: Fri, 24 Mar 2023 02:37:06 +0530 Subject: [PATCH 10/11] address comments --- .../test/java/org/apache/hadoop/tools/TestDistCpSync.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpSync.java b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpSync.java index 986657913fd19..0fbcd6571c6e9 100644 --- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpSync.java +++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpSync.java @@ -61,6 +61,7 @@ import java.util.regex.Pattern; import static org.apache.hadoop.fs.impl.PathCapabilitiesSupport.validatePathCapabilityArgs; +import static org.assertj.core.api.AssertionsForClassTypes.assertThat; public class TestDistCpSync { private MiniDFSCluster cluster; @@ -1295,17 +1296,16 @@ public void testSyncSnapshotDiffWithLocalFileSystem() throws Exception { } @Test - public void testSyncSnapshotDiffWithDummyFileSystem() throws Exception { + public void testSyncSnapshotDiffWithDummyFileSystem() { String[] args = new String[] { "-update", "-diff", "s1", "s2", "dummy:///source", "dummy:///target" }; try { FileSystem dummyFs = FileSystem.get(URI.create("dummy:///"), conf); - Assert.assertTrue(dummyFs instanceof DummyFs); + assertThat(dummyFs).isInstanceOf(DummyFs.class); new DistCp(conf, OptionsParser.parse(args)).execute(); } catch (UnsupportedOperationException e) { - Assert.fail("Dummy FS supports snapshots," + - "did not expect UnsupportedOperationException"); + throw e; } catch (Exception e) { // can expect other exceptions as source and target paths // are not created. From a66c5f4108953a655a8cb70aa78d17574d1bf81d Mon Sep 17 00:00:00 2001 From: Sadanand Shenoy Date: Sat, 25 Mar 2023 20:48:49 +0530 Subject: [PATCH 11/11] address comment --- .../src/main/java/org/apache/hadoop/tools/DistCpSync.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpSync.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpSync.java index 8e5d1ad5cd244..dbc86fd0b4722 100644 --- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpSync.java +++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpSync.java @@ -325,7 +325,7 @@ private static SnapshotDiffReport getSnapshotDiffReport( throw new IOException(e.getCause()); } catch (NoSuchMethodException|IllegalAccessException e) { throw new IllegalArgumentException( - "failed to invoke getSnapshotDiffReport.", e); + "Failed to invoke getSnapshotDiffReport.", e); } }