From 6d23e36ef0a695085b348ac0f51cf4d30e17b5df Mon Sep 17 00:00:00 2001 From: Mukund Thakur Date: Tue, 16 Jan 2024 17:06:28 -0600 Subject: [PATCH] HADOOP-19015. Increase fs.s3a.connection.maximum to 500 to minimize risk of Timeout waiting for connection from pool. (#6372) HADOOP-19015. Increase fs.s3a.connection.maximum to 500 to minimize the risk of Timeout waiting for connection from the pool Contributed By: Mukund Thakur Conflicts: hadoop-common-project/hadoop-common/src/main/resources/core-default.xml hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/performance.md --- .../hadoop-common/src/main/resources/core-default.xml | 2 +- .../src/main/java/org/apache/hadoop/fs/s3a/Constants.java | 8 +++++++- .../src/site/markdown/tools/hadoop-aws/performance.md | 6 +++--- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml index 1501ee4bc268a..e8aec82bde995 100644 --- a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml +++ b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml @@ -1362,7 +1362,7 @@ fs.s3a.connection.maximum - 96 + 500 Controls the maximum number of simultaneous connections to S3. This must be bigger than the value of fs.s3a.threads.max so as to stop threads being blocked waiting for new HTTPS connections. diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java index 32f108f79808f..4fa9578ef387e 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java @@ -152,7 +152,13 @@ private Constants() { // number of simultaneous connections to s3 public static final String MAXIMUM_CONNECTIONS = "fs.s3a.connection.maximum"; - public static final int DEFAULT_MAXIMUM_CONNECTIONS = 96; + + /** + * Default value for {@link #MAXIMUM_CONNECTIONS}: {@value}. + * Future releases are likely to increase this value. + * Keep in sync with the value in {@code core-default.xml} + */ + public static final int DEFAULT_MAXIMUM_CONNECTIONS = 500; /** * Configuration option to configure expiration time of diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/performance.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/performance.md index 88e6e8a0b2115..45244d9c7814e 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/performance.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/performance.md @@ -211,7 +211,7 @@ for parallel IO (especially uploads) by setting the properties | property | meaning | default | |----------|---------|---------| | `fs.s3a.threads.max`| Threads in the AWS transfer manager| 10 | -| `fs.s3a.connection.maximum`| Maximum number of HTTP connections | 10| +| `fs.s3a.connection.maximum`| Maximum number of HTTP connections | 500 | We recommend using larger values for processes which perform a lot of IO: `DistCp`, Spark Workers and similar. @@ -219,11 +219,11 @@ a lot of IO: `DistCp`, Spark Workers and similar. ```xml fs.s3a.threads.max - 20 + 10 fs.s3a.connection.maximum - 20 + 500 ```