From 69e50c7b4499bffc1eb372799ccba3f26c5fe54e Mon Sep 17 00:00:00 2001 From: Mehakmeet Singh Date: Tue, 15 Nov 2022 19:58:41 +0530 Subject: [PATCH 1/3] HADOOP-18528. Disable abfs prefetching by default (#5134) Disables block prefetching on ABFS InputStreams, by setting fs.azure.enable.readahead to false in core-default.xml and the matching java constant. This prevents HADOOP-18521. ABFS ReadBufferManager buffer sharing across concurrent HTTP requests. Once a fix for that is committed, this change can be reverted. Contributed by Mehakmeet Singh. --- .../hadoop-common/src/main/resources/core-default.xml | 7 +++++++ .../fs/azurebfs/constants/FileSystemConfigurations.java | 2 +- .../fs/azurebfs/services/AbfsInputStreamContext.java | 2 +- .../azurebfs/contract/ITestAbfsFileSystemContractSeek.java | 2 ++ .../hadoop/fs/azurebfs/services/TestAbfsInputStream.java | 2 ++ 5 files changed, 13 insertions(+), 2 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml index 4d5b8234056f9..047c5482062be 100644 --- a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml +++ b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml @@ -2166,6 +2166,13 @@ The switch to turn S3A auditing on or off. The AbstractFileSystem for gs: uris. + + fs.azure.enable.readahead + false + Disable readahead/prefetching in AbfsInputStream. + See HADOOP-18521 + + io.seqfile.compress.blocksize 1000000 diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java index 42f3b7503e03d..097285bb48fbc 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java @@ -106,7 +106,7 @@ public final class FileSystemConfigurations { public static final boolean DEFAULT_ABFS_LATENCY_TRACK = false; public static final long DEFAULT_SAS_TOKEN_RENEW_PERIOD_FOR_STREAMS_IN_SECONDS = 120; - public static final boolean DEFAULT_ENABLE_READAHEAD = true; + public static final boolean DEFAULT_ENABLE_READAHEAD = false; public static final String DEFAULT_FS_AZURE_USER_AGENT_PREFIX = EMPTY_STRING; public static final String DEFAULT_VALUE_UNKNOWN = "UNKNOWN"; diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsInputStreamContext.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsInputStreamContext.java index e258958b1a111..f6b330934cf5e 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsInputStreamContext.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsInputStreamContext.java @@ -35,7 +35,7 @@ public class AbfsInputStreamContext extends AbfsStreamContext { private boolean tolerateOobAppends; - private boolean isReadAheadEnabled = true; + private boolean isReadAheadEnabled = false; private boolean alwaysReadBufferSize; diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/contract/ITestAbfsFileSystemContractSeek.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/contract/ITestAbfsFileSystemContractSeek.java index f7fe5039799d7..aaf47f7a9c871 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/contract/ITestAbfsFileSystemContractSeek.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/contract/ITestAbfsFileSystemContractSeek.java @@ -34,6 +34,7 @@ import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.AZURE_READ_AHEAD_RANGE; import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.AZURE_READ_BUFFER_SIZE; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_ENABLE_READAHEAD; import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.MIN_BUFFER_SIZE; import static org.apache.hadoop.fs.contract.ContractTestUtils.createFile; import static org.apache.hadoop.fs.contract.ContractTestUtils.dataset; @@ -68,6 +69,7 @@ protected Configuration createConfiguration() { protected AbstractFSContract createContract(final Configuration conf) { conf.setInt(AZURE_READ_AHEAD_RANGE, MIN_BUFFER_SIZE); conf.setInt(AZURE_READ_BUFFER_SIZE, MIN_BUFFER_SIZE); + conf.setBoolean(FS_AZURE_ENABLE_READAHEAD, true); return new AbfsFileSystemContract(conf, isSecure); } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsInputStream.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsInputStream.java index b5ae9b737842d..69795ee5bd857 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsInputStream.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsInputStream.java @@ -106,6 +106,7 @@ private AbfsClient getMockAbfsClient() { private AbfsInputStream getAbfsInputStream(AbfsClient mockAbfsClient, String fileName) throws IOException { AbfsInputStreamContext inputStreamContext = new AbfsInputStreamContext(-1); + inputStreamContext.isReadAheadEnabled(true); // Create AbfsInputStream with the client instance AbfsInputStream inputStream = new AbfsInputStream( mockAbfsClient, @@ -131,6 +132,7 @@ public AbfsInputStream getAbfsInputStream(AbfsClient abfsClient, boolean alwaysReadBufferSize, int readAheadBlockSize) throws IOException { AbfsInputStreamContext inputStreamContext = new AbfsInputStreamContext(-1); + inputStreamContext.isReadAheadEnabled(true); // Create AbfsInputStream with the client instance AbfsInputStream inputStream = new AbfsInputStream( abfsClient, From cd929457c93f4c2460909ab65d2d3cc4e2f817ab Mon Sep 17 00:00:00 2001 From: Lei Yang Date: Tue, 15 Nov 2022 15:06:37 -0800 Subject: [PATCH 2/3] HDFS-16836: StandbyCheckpointer shouldn't trigger rollback fs image after RU is finalized (#5135) Co-authored-by: Lei Yang --- .../hdfs/server/namenode/FSEditLogLoader.java | 1 + .../hadoop/hdfs/TestRollingUpgrade.java | 36 +++++++++++++++++++ 2 files changed, 37 insertions(+) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java index a065fe6c0cfa8..efbc3b20c6032 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java @@ -912,6 +912,7 @@ private long applyEditLogOp(FSEditLogOp op, FSDirectory fsDir, fsNamesys.getFSImage().updateStorageVersion(); fsNamesys.getFSImage().renameCheckpoint(NameNodeFile.IMAGE_ROLLBACK, NameNodeFile.IMAGE); + fsNamesys.setNeedRollbackFsImage(false); break; } case OP_ADD_CACHE_DIRECTIVE: { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestRollingUpgrade.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestRollingUpgrade.java index 6e7014c42eb13..bb5da24a682f5 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestRollingUpgrade.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestRollingUpgrade.java @@ -33,6 +33,9 @@ import javax.management.ReflectionException; import javax.management.openmbean.CompositeDataSupport; +import org.apache.hadoop.hdfs.protocol.HdfsConstants; +import org.apache.hadoop.hdfs.server.namenode.NameNode; +import org.apache.hadoop.hdfs.server.namenode.ha.HATestUtil; import org.junit.Rule; import org.junit.rules.TemporaryFolder; import org.slf4j.Logger; @@ -720,6 +723,39 @@ static void queryForPreparation(DistributedFileSystem dfs) throws IOException, } } + @Test + public void testEditLogTailerRollingUpgrade() throws IOException, InterruptedException { + Configuration conf = new Configuration(); + conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1); + conf.setInt(DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_PERIOD_KEY, 1); + + HAUtil.setAllowStandbyReads(conf, true); + + MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf) + .nnTopology(MiniDFSNNTopology.simpleHATopology()) + .numDataNodes(0) + .build(); + cluster.waitActive(); + + cluster.transitionToActive(0); + + NameNode nn1 = cluster.getNameNode(0); + NameNode nn2 = cluster.getNameNode(1); + try { + // RU start should trigger rollback image in standbycheckpointer + nn1.getRpcServer().rollingUpgrade(HdfsConstants.RollingUpgradeAction.PREPARE); + HATestUtil.waitForStandbyToCatchUp(nn1, nn2); + Assert.assertTrue(nn2.getNamesystem().isNeedRollbackFsImage()); + + // RU finalize should reset rollback image flag in standbycheckpointer + nn1.getRpcServer().rollingUpgrade(HdfsConstants.RollingUpgradeAction.FINALIZE); + HATestUtil.waitForStandbyToCatchUp(nn1, nn2); + Assert.assertFalse(nn2.getNamesystem().isNeedRollbackFsImage()); + } finally { + cluster.shutdown(); + } + } + /** * In non-HA setup, after rolling upgrade prepare, the Secondary NN should * still be able to do checkpoint From 142df247edab25f0f1e829ea8559fb42a925af7b Mon Sep 17 00:00:00 2001 From: Szilard Nemeth Date: Wed, 16 Nov 2022 13:07:05 +0100 Subject: [PATCH 3/3] YARN-11369. Commons.compress throws an IllegalArgumentException with large uids after 1.21. Contributed by Benjamin Teke --- .../org/apache/hadoop/mapred/uploader/FrameworkUploader.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-uploader/src/main/java/org/apache/hadoop/mapred/uploader/FrameworkUploader.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-uploader/src/main/java/org/apache/hadoop/mapred/uploader/FrameworkUploader.java index 862d68ebc0aaa..52b6dde3794d8 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-uploader/src/main/java/org/apache/hadoop/mapred/uploader/FrameworkUploader.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-uploader/src/main/java/org/apache/hadoop/mapred/uploader/FrameworkUploader.java @@ -331,6 +331,8 @@ void buildPackage() LOG.info("Compressing tarball"); try (TarArchiveOutputStream out = new TarArchiveOutputStream( targetStream)) { + // Workaround for the compress issue present from 1.21: COMPRESS-587 + out.setBigNumberMode(TarArchiveOutputStream.BIGNUMBER_STAR); for (String fullPath : filteredInputFiles) { LOG.info("Adding " + fullPath); File file = new File(fullPath);