apache · dongjoon-hyun · Feb 21, 2024
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala b/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
@@ -529,16 +529,6 @@ private[spark] object SparkHadoopUtil extends Logging {
     if (conf.getOption("spark.hadoop.fs.s3a.downgrade.syncable.exceptions").isEmpty) {
       hadoopConf.set("fs.s3a.downgrade.syncable.exceptions", "true", setBySpark)
     }
-    // In Hadoop 3.3.1, AWS region handling with the default "" endpoint only works
-    // in EC2 deployments or when the AWS CLI is installed.
-    // The workaround is to set the name of the S3 endpoint explicitly,
-    // if not already set. See HADOOP-17771.
-    if (hadoopConf.get("fs.s3a.endpoint", "").isEmpty &&
-      hadoopConf.get("fs.s3a.endpoint.region") == null) {
-      // set to US central endpoint which can also connect to buckets
-      // in other regions at the expense of a HEAD request during fs creation
-      hadoopConf.set("fs.s3a.endpoint", "s3.amazonaws.com", setBySpark)
-    }
   }
 
   private def appendSparkHiveConfigs(conf: SparkConf, hadoopConf: Configuration): Unit = {

diff --git a/core/src/test/scala/org/apache/spark/deploy/SparkHadoopUtilSuite.scala b/core/src/test/scala/org/apache/spark/deploy/SparkHadoopUtilSuite.scala
@@ -39,19 +39,6 @@ class SparkHadoopUtilSuite extends SparkFunSuite {
     assertConfigMatches(hadoopConf, "orc.filterPushdown", "true", SOURCE_SPARK_HADOOP)
     assertConfigMatches(hadoopConf, "fs.s3a.downgrade.syncable.exceptions", "true",
       SET_TO_DEFAULT_VALUES)
-    assertConfigMatches(hadoopConf, "fs.s3a.endpoint", "s3.amazonaws.com", SET_TO_DEFAULT_VALUES)
-  }
-
-  /**
-   * An empty S3A endpoint will be overridden just as a null value
-   * would.
-   */
-  test("appendSparkHadoopConfigs with S3A endpoint set to empty string") {
-    val sc = new SparkConf()
-    val hadoopConf = new Configuration(false)
-    sc.set("spark.hadoop.fs.s3a.endpoint", "")
-    new SparkHadoopUtil().appendSparkHadoopConfigs(sc, hadoopConf)
-    assertConfigMatches(hadoopConf, "fs.s3a.endpoint", "s3.amazonaws.com", SET_TO_DEFAULT_VALUES)
   }
 
   /**
@@ -61,28 +48,8 @@ class SparkHadoopUtilSuite extends SparkFunSuite {
     val sc = new SparkConf()
     val hadoopConf = new Configuration(false)
     sc.set("spark.hadoop.fs.s3a.downgrade.syncable.exceptions", "false")
-    sc.set("spark.hadoop.fs.s3a.endpoint", "s3-eu-west-1.amazonaws.com")
     new SparkHadoopUtil().appendSparkHadoopConfigs(sc, hadoopConf)
     assertConfigValue(hadoopConf, "fs.s3a.downgrade.syncable.exceptions", "false")
-    assertConfigValue(hadoopConf, "fs.s3a.endpoint",
-      "s3-eu-west-1.amazonaws.com")
-  }
-
-  /**
-   * If the endpoint region is set (even to a blank string) in
-   * "spark.hadoop.fs.s3a.endpoint.region" then the endpoint is not set,
-   * even when the s3a endpoint is "".
-   * This supports a feature in hadoop 3.3.1 where this configuration
-   * pair triggers a revert to the "SDK to work out the region" algorithm,
-   * which works on EC2 deployments.
-   */
-  test("appendSparkHadoopConfigs with S3A endpoint region set to an empty string") {
-    val sc = new SparkConf()
-    val hadoopConf = new Configuration(false)
-    sc.set("spark.hadoop.fs.s3a.endpoint.region", "")
-    new SparkHadoopUtil().appendSparkHadoopConfigs(sc, hadoopConf)
-    // the endpoint value will not have been set
-    assertConfigValue(hadoopConf, "fs.s3a.endpoint", null)
   }
 
   /**