Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 0 additions & 10 deletions core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
Original file line number Diff line number Diff line change
Expand Up @@ -529,16 +529,6 @@ private[spark] object SparkHadoopUtil extends Logging {
if (conf.getOption("spark.hadoop.fs.s3a.downgrade.syncable.exceptions").isEmpty) {
hadoopConf.set("fs.s3a.downgrade.syncable.exceptions", "true", setBySpark)
}
// In Hadoop 3.3.1, AWS region handling with the default "" endpoint only works
// in EC2 deployments or when the AWS CLI is installed.
// The workaround is to set the name of the S3 endpoint explicitly,
// if not already set. See HADOOP-17771.
if (hadoopConf.get("fs.s3a.endpoint", "").isEmpty &&
hadoopConf.get("fs.s3a.endpoint.region") == null) {
// set to US central endpoint which can also connect to buckets
// in other regions at the expense of a HEAD request during fs creation
hadoopConf.set("fs.s3a.endpoint", "s3.amazonaws.com", setBySpark)
}
}

private def appendSparkHiveConfigs(conf: SparkConf, hadoopConf: Configuration): Unit = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,19 +39,6 @@ class SparkHadoopUtilSuite extends SparkFunSuite {
assertConfigMatches(hadoopConf, "orc.filterPushdown", "true", SOURCE_SPARK_HADOOP)
assertConfigMatches(hadoopConf, "fs.s3a.downgrade.syncable.exceptions", "true",
SET_TO_DEFAULT_VALUES)
assertConfigMatches(hadoopConf, "fs.s3a.endpoint", "s3.amazonaws.com", SET_TO_DEFAULT_VALUES)
}

/**
* An empty S3A endpoint will be overridden just as a null value
* would.
*/
test("appendSparkHadoopConfigs with S3A endpoint set to empty string") {
val sc = new SparkConf()
val hadoopConf = new Configuration(false)
sc.set("spark.hadoop.fs.s3a.endpoint", "")
new SparkHadoopUtil().appendSparkHadoopConfigs(sc, hadoopConf)
assertConfigMatches(hadoopConf, "fs.s3a.endpoint", "s3.amazonaws.com", SET_TO_DEFAULT_VALUES)
}

/**
Expand All @@ -61,28 +48,8 @@ class SparkHadoopUtilSuite extends SparkFunSuite {
val sc = new SparkConf()
val hadoopConf = new Configuration(false)
sc.set("spark.hadoop.fs.s3a.downgrade.syncable.exceptions", "false")
sc.set("spark.hadoop.fs.s3a.endpoint", "s3-eu-west-1.amazonaws.com")
new SparkHadoopUtil().appendSparkHadoopConfigs(sc, hadoopConf)
assertConfigValue(hadoopConf, "fs.s3a.downgrade.syncable.exceptions", "false")
assertConfigValue(hadoopConf, "fs.s3a.endpoint",
"s3-eu-west-1.amazonaws.com")
}

/**
* If the endpoint region is set (even to a blank string) in
* "spark.hadoop.fs.s3a.endpoint.region" then the endpoint is not set,
* even when the s3a endpoint is "".
* This supports a feature in hadoop 3.3.1 where this configuration
* pair triggers a revert to the "SDK to work out the region" algorithm,
* which works on EC2 deployments.
*/
test("appendSparkHadoopConfigs with S3A endpoint region set to an empty string") {
val sc = new SparkConf()
val hadoopConf = new Configuration(false)
sc.set("spark.hadoop.fs.s3a.endpoint.region", "")
new SparkHadoopUtil().appendSparkHadoopConfigs(sc, hadoopConf)
// the endpoint value will not have been set
assertConfigValue(hadoopConf, "fs.s3a.endpoint", null)
}

/**
Expand Down