diff --git a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml index 1501ee4bc268a..e8aec82bde995 100644 --- a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml +++ b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml @@ -1362,7 +1362,7 @@ fs.s3a.connection.maximum - 96 + 500 Controls the maximum number of simultaneous connections to S3. This must be bigger than the value of fs.s3a.threads.max so as to stop threads being blocked waiting for new HTTPS connections. diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java index 32f108f79808f..4fa9578ef387e 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java @@ -152,7 +152,13 @@ private Constants() { // number of simultaneous connections to s3 public static final String MAXIMUM_CONNECTIONS = "fs.s3a.connection.maximum"; - public static final int DEFAULT_MAXIMUM_CONNECTIONS = 96; + + /** + * Default value for {@link #MAXIMUM_CONNECTIONS}: {@value}. + * Future releases are likely to increase this value. + * Keep in sync with the value in {@code core-default.xml} + */ + public static final int DEFAULT_MAXIMUM_CONNECTIONS = 500; /** * Configuration option to configure expiration time of diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/performance.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/performance.md index 88e6e8a0b2115..45244d9c7814e 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/performance.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/performance.md @@ -211,7 +211,7 @@ for parallel IO (especially uploads) by setting the properties | property | meaning | default | |----------|---------|---------| | `fs.s3a.threads.max`| Threads in the AWS transfer manager| 10 | -| `fs.s3a.connection.maximum`| Maximum number of HTTP connections | 10| +| `fs.s3a.connection.maximum`| Maximum number of HTTP connections | 500 | We recommend using larger values for processes which perform a lot of IO: `DistCp`, Spark Workers and similar. @@ -219,11 +219,11 @@ a lot of IO: `DistCp`, Spark Workers and similar. ```xml fs.s3a.threads.max - 20 + 10 fs.s3a.connection.maximum - 20 + 500 ```