From da1a2dcb8ba2070622bf841f7efe6f3433159666 Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Wed, 15 Mar 2023 13:55:20 +0000 Subject: [PATCH] HADOOP-18425. ABFS rename resilience through etags If "fs.azure.enable.rename.resilience" is true, then do a HEAD of the source file before the rename, which can then be used to recover from the failure, as the manifest committer does (HADOOP-18163). Change-Id: Ia417f1501f7274662eb9ff919c6378fb913b476b HADOOP-18425. ABFS rename resilience through etags only get the etag on HNS stores Change-Id: I9faffa78294e1782f0b2db3d1c997ec3fe53637c --- .../hadoop/fs/azurebfs/AbfsConfiguration.java | 7 ++++ .../fs/azurebfs/AzureBlobFileSystem.java | 33 +++++++++++++++++-- .../azurebfs/constants/ConfigurationKeys.java | 3 ++ .../constants/FileSystemConfigurations.java | 1 + .../fs/azurebfs/services/AbfsClient.java | 8 ++--- 5 files changed, 44 insertions(+), 8 deletions(-) diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java index 80f803d80dab0..1cc7e5ec7e19e 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java @@ -328,6 +328,10 @@ public class AbfsConfiguration{ FS_AZURE_ENABLE_ABFS_LIST_ITERATOR, DefaultValue = DEFAULT_ENABLE_ABFS_LIST_ITERATOR) private boolean enableAbfsListIterator; + @BooleanConfigurationValidatorAnnotation(ConfigurationKey = + FS_AZURE_ABFS_RENAME_RESILIENCE, DefaultValue = DEFAULT_ENABLE_ABFS_RENAME_RESILIENCE) + private boolean renameResilience; + public AbfsConfiguration(final Configuration rawConfig, String accountName) throws IllegalAccessException, InvalidConfigurationValueException, IOException { this.rawConfig = ProviderUtils.excludeIncompatibleCredentialProviders( @@ -1130,4 +1134,7 @@ public void setEnableAbfsListIterator(boolean enableAbfsListIterator) { this.enableAbfsListIterator = enableAbfsListIterator; } + public boolean getRenameResilience() { + return renameResilience; + } } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystem.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystem.java index 5534b5fb44a51..9facc44cd3b31 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystem.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystem.java @@ -46,6 +46,9 @@ import javax.annotation.Nullable; import org.apache.hadoop.classification.VisibleForTesting; +import org.apache.hadoop.fs.EtagSource; +import org.apache.hadoop.fs.azurebfs.services.AbfsHttpOperation; +import org.apache.hadoop.fs.azurebfs.services.AbfsRestOperation; import org.apache.hadoop.util.Preconditions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -154,6 +157,11 @@ public class AzureBlobFileSystem extends FileSystem /** Rate limiting for operations which use it to throttle their IO. */ private RateLimiting rateLimiting; + /** + * Enable resilient rename. + */ + private boolean renameResilience; + @Override public void initialize(URI uri, Configuration configuration) throws IOException { @@ -226,6 +234,8 @@ public void initialize(URI uri, Configuration configuration) } rateLimiting = RateLimitingFactory.create(abfsConfiguration.getRateLimit()); + + renameResilience = abfsConfiguration.getRenameResilience(); LOG.debug("Initializing AzureBlobFileSystem for {} complete", uri); } @@ -442,10 +452,13 @@ public boolean rename(final Path src, final Path dst) throws IOException { } // Non-HNS account need to check dst status on driver side. - if (!abfsStore.getIsNamespaceEnabled(tracingContext) && dstFileStatus == null) { + final boolean isNamespaceEnabled = abfsStore.getIsNamespaceEnabled(tracingContext); + if (!isNamespaceEnabled && dstFileStatus == null) { dstFileStatus = tryGetFileStatus(qualifiedDstPath, tracingContext); } + FileStatus sourceFileStatus = null; + try { String sourceFileName = src.getName(); Path adjustedDst = dst; @@ -459,10 +472,24 @@ public boolean rename(final Path src, final Path dst) throws IOException { qualifiedDstPath = makeQualified(adjustedDst); - abfsStore.rename(qualifiedSrcPath, qualifiedDstPath, tracingContext, null); + String etag = null; + if (renameResilience && isNamespaceEnabled) { + // for resilient rename on an HNS store, get the etag before + // attempting the rename, and pass it down + sourceFileStatus = abfsStore.getFileStatus(qualifiedSrcPath, tracingContext); + etag = ((EtagSource) sourceFileStatus).getEtag(); + } + boolean recovered = abfsStore.rename(qualifiedSrcPath, qualifiedDstPath, tracingContext, + etag); + if (recovered) { + LOG.info("Recovered from rename failure of {} to {}", + qualifiedSrcPath, qualifiedDstPath); + } return true; } catch (AzureBlobFileSystemException ex) { - LOG.debug("Rename operation failed. ", ex); + LOG.debug("Rename {} to {} failed. source {} dest {}", + qualifiedSrcPath, qualifiedDstPath, + sourceFileStatus, dstFileStatus, ex); checkException( src, ex, diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ConfigurationKeys.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ConfigurationKeys.java index a59f76b6d0fe0..9b1ff0d9ef1fe 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ConfigurationKeys.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ConfigurationKeys.java @@ -233,6 +233,9 @@ public final class ConfigurationKeys { /** Key for rate limit capacity, as used by IO operations which try to throttle themselves. */ public static final String FS_AZURE_ABFS_IO_RATE_LIMIT = "fs.azure.io.rate.limit"; + /** Add extra resilience to rename failures, at the expense of performance. */ + public static final String FS_AZURE_ABFS_RENAME_RESILIENCE = "fs.azure.enable.rename.resilience"; + public static String accountProperty(String property, String account) { return property + "." + account; } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java index 9994d9f5207f3..fcbeb1ab5387e 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java @@ -118,6 +118,7 @@ public final class FileSystemConfigurations { public static final int STREAM_ID_LEN = 12; public static final boolean DEFAULT_ENABLE_ABFS_LIST_ITERATOR = true; + public static final boolean DEFAULT_ENABLE_ABFS_RENAME_RESILIENCE = true; /** * Limit of queued block upload operations before writes diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java index 25562660ae231..eb181ba571955 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java @@ -560,6 +560,7 @@ public AbfsClientRenameResult renamePath( if (!op.hasResult()) { throw e; } + LOG.debug("Rename of {} to {} failed, attempting recovery", source, destination, e); // ref: HADOOP-18242. Rename failure occurring due to a rare case of // tracking metadata being in incomplete state. @@ -574,18 +575,15 @@ public AbfsClientRenameResult renamePath( // then we can retry the rename operation. AbfsRestOperation sourceStatusOp = getPathStatus(source, false, tracingContext); - isMetadataIncompleteState = true; // Extract the sourceEtag, using the status Op, and set it // for future rename recovery. AbfsHttpOperation sourceStatusResult = sourceStatusOp.getResult(); String sourceEtagAfterFailure = extractEtagHeader(sourceStatusResult); renamePath(source, destination, continuation, tracingContext, - sourceEtagAfterFailure, isMetadataIncompleteState); + sourceEtagAfterFailure, true); } // if we get out of the condition without a successful rename, then // it isn't metadata incomplete state issue. - isMetadataIncompleteState = false; - boolean etagCheckSucceeded = renameIdempotencyCheckOp( source, sourceEtag, op, destination, tracingContext); @@ -594,7 +592,7 @@ public AbfsClientRenameResult renamePath( // throw back the exception throw e; } - return new AbfsClientRenameResult(op, true, isMetadataIncompleteState); + return new AbfsClientRenameResult(op, true, false); } }