diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala b/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala index 628b688dedba..2edd80db2637 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala @@ -529,16 +529,6 @@ private[spark] object SparkHadoopUtil extends Logging { if (conf.getOption("spark.hadoop.fs.s3a.downgrade.syncable.exceptions").isEmpty) { hadoopConf.set("fs.s3a.downgrade.syncable.exceptions", "true", setBySpark) } - // In Hadoop 3.3.1, AWS region handling with the default "" endpoint only works - // in EC2 deployments or when the AWS CLI is installed. - // The workaround is to set the name of the S3 endpoint explicitly, - // if not already set. See HADOOP-17771. - if (hadoopConf.get("fs.s3a.endpoint", "").isEmpty && - hadoopConf.get("fs.s3a.endpoint.region") == null) { - // set to US central endpoint which can also connect to buckets - // in other regions at the expense of a HEAD request during fs creation - hadoopConf.set("fs.s3a.endpoint", "s3.amazonaws.com", setBySpark) - } } private def appendSparkHiveConfigs(conf: SparkConf, hadoopConf: Configuration): Unit = { diff --git a/core/src/test/scala/org/apache/spark/deploy/SparkHadoopUtilSuite.scala b/core/src/test/scala/org/apache/spark/deploy/SparkHadoopUtilSuite.scala index 2326d10d4164..9a81cb947257 100644 --- a/core/src/test/scala/org/apache/spark/deploy/SparkHadoopUtilSuite.scala +++ b/core/src/test/scala/org/apache/spark/deploy/SparkHadoopUtilSuite.scala @@ -39,19 +39,6 @@ class SparkHadoopUtilSuite extends SparkFunSuite { assertConfigMatches(hadoopConf, "orc.filterPushdown", "true", SOURCE_SPARK_HADOOP) assertConfigMatches(hadoopConf, "fs.s3a.downgrade.syncable.exceptions", "true", SET_TO_DEFAULT_VALUES) - assertConfigMatches(hadoopConf, "fs.s3a.endpoint", "s3.amazonaws.com", SET_TO_DEFAULT_VALUES) - } - - /** - * An empty S3A endpoint will be overridden just as a null value - * would. - */ - test("appendSparkHadoopConfigs with S3A endpoint set to empty string") { - val sc = new SparkConf() - val hadoopConf = new Configuration(false) - sc.set("spark.hadoop.fs.s3a.endpoint", "") - new SparkHadoopUtil().appendSparkHadoopConfigs(sc, hadoopConf) - assertConfigMatches(hadoopConf, "fs.s3a.endpoint", "s3.amazonaws.com", SET_TO_DEFAULT_VALUES) } /** @@ -61,28 +48,8 @@ class SparkHadoopUtilSuite extends SparkFunSuite { val sc = new SparkConf() val hadoopConf = new Configuration(false) sc.set("spark.hadoop.fs.s3a.downgrade.syncable.exceptions", "false") - sc.set("spark.hadoop.fs.s3a.endpoint", "s3-eu-west-1.amazonaws.com") new SparkHadoopUtil().appendSparkHadoopConfigs(sc, hadoopConf) assertConfigValue(hadoopConf, "fs.s3a.downgrade.syncable.exceptions", "false") - assertConfigValue(hadoopConf, "fs.s3a.endpoint", - "s3-eu-west-1.amazonaws.com") - } - - /** - * If the endpoint region is set (even to a blank string) in - * "spark.hadoop.fs.s3a.endpoint.region" then the endpoint is not set, - * even when the s3a endpoint is "". - * This supports a feature in hadoop 3.3.1 where this configuration - * pair triggers a revert to the "SDK to work out the region" algorithm, - * which works on EC2 deployments. - */ - test("appendSparkHadoopConfigs with S3A endpoint region set to an empty string") { - val sc = new SparkConf() - val hadoopConf = new Configuration(false) - sc.set("spark.hadoop.fs.s3a.endpoint.region", "") - new SparkHadoopUtil().appendSparkHadoopConfigs(sc, hadoopConf) - // the endpoint value will not have been set - assertConfigValue(hadoopConf, "fs.s3a.endpoint", null) } /**