From e693a3086d977aa858dead57fe17ef2eb3bca812 Mon Sep 17 00:00:00 2001 From: Cheng Pan Date: Thu, 15 Jan 2026 20:57:43 +0800 Subject: [PATCH 1/5] [WIP][SPARK-54276][BUILD] Bump Hadoop 3.4.3 RC0 --- .../spark/internal/config/package.scala | 3 ++- dev/deps/spark-deps-hadoop-3-hive-2.3 | 20 +++++++++---------- docs/building-spark.md | 2 +- pom.xml | 12 ++++++++++- project/SparkBuild.scala | 1 + .../kubernetes/integration-tests/README.md | 4 ++-- .../apache/spark/sql/internal/SQLConf.scala | 3 ++- .../hive/client/IsolatedClientLoader.scala | 2 +- 8 files changed, 30 insertions(+), 17 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala index 331d798a3d76..81f4fd56e243 100644 --- a/core/src/main/scala/org/apache/spark/internal/config/package.scala +++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala @@ -2771,7 +2771,8 @@ package object config { .version("2.3.0") .stringConf .toSequence - .createWithDefault(Nil) + .createWithDefault( + Seq("https://repository.apache.org/content/repositories/orgapachehadoop-1461/")) private[spark] val APP_ATTEMPT_ID = ConfigBuilder("spark.app.attempt.id") diff --git a/dev/deps/spark-deps-hadoop-3-hive-2.3 b/dev/deps/spark-deps-hadoop-3-hive-2.3 index 5fbda29ad4df..6671425e61fa 100644 --- a/dev/deps/spark-deps-hadoop-3-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-3-hive-2.3 @@ -67,16 +67,16 @@ gcs-connector/hadoop3-2.2.31/shaded/gcs-connector-hadoop3-2.2.31-shaded.jar gmetric4j/1.0.10//gmetric4j-1.0.10.jar gson/2.13.2//gson-2.13.2.jar guava/33.4.8-jre//guava-33.4.8-jre.jar -hadoop-aliyun/3.4.2//hadoop-aliyun-3.4.2.jar -hadoop-annotations/3.4.2//hadoop-annotations-3.4.2.jar -hadoop-aws/3.4.2//hadoop-aws-3.4.2.jar -hadoop-azure-datalake/3.4.2//hadoop-azure-datalake-3.4.2.jar -hadoop-azure/3.4.2//hadoop-azure-3.4.2.jar -hadoop-client-api/3.4.2//hadoop-client-api-3.4.2.jar -hadoop-client-runtime/3.4.2//hadoop-client-runtime-3.4.2.jar -hadoop-cloud-storage/3.4.2//hadoop-cloud-storage-3.4.2.jar -hadoop-huaweicloud/3.4.2//hadoop-huaweicloud-3.4.2.jar -hadoop-shaded-guava/1.4.0//hadoop-shaded-guava-1.4.0.jar +hadoop-aliyun/3.4.3//hadoop-aliyun-3.4.3.jar +hadoop-annotations/3.4.3//hadoop-annotations-3.4.3.jar +hadoop-aws/3.4.3//hadoop-aws-3.4.3.jar +hadoop-azure-datalake/3.4.3//hadoop-azure-datalake-3.4.3.jar +hadoop-azure/3.4.3//hadoop-azure-3.4.3.jar +hadoop-client-api/3.4.3//hadoop-client-api-3.4.3.jar +hadoop-client-runtime/3.4.3//hadoop-client-runtime-3.4.3.jar +hadoop-cloud-storage/3.4.3//hadoop-cloud-storage-3.4.3.jar +hadoop-huaweicloud/3.4.3//hadoop-huaweicloud-3.4.3.jar +hadoop-shaded-guava/1.5.0//hadoop-shaded-guava-1.5.0.jar hive-beeline/2.3.10//hive-beeline-2.3.10.jar hive-cli/2.3.10//hive-cli-2.3.10.jar hive-common/2.3.10//hive-common-2.3.10.jar diff --git a/docs/building-spark.md b/docs/building-spark.md index 1a2da3b01726..9d31d52b60c6 100644 --- a/docs/building-spark.md +++ b/docs/building-spark.md @@ -83,7 +83,7 @@ You can enable the `yarn` profile and specify the exact version of Hadoop to com Example: - ./build/mvn -Pyarn -Dhadoop.version=3.4.1 -DskipTests clean package + ./build/mvn -Pyarn -Dhadoop.version=3.4.3 -DskipTests clean package ## Building With Hive and JDBC Support diff --git a/pom.xml b/pom.xml index 79353b016cff..6f02f2afb2a3 100644 --- a/pom.xml +++ b/pom.xml @@ -127,7 +127,7 @@ 2.0.17 2.25.3 - 3.4.2 + 3.4.3 4.33.0 3.11.4 @@ -391,6 +391,16 @@ false + + hadoop-staging + https://repository.apache.org/content/repositories/orgapachehadoop-1461 + + true + + + false + + diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index 1cbb58fd5b29..ee92d9e36ac1 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -304,6 +304,7 @@ object SparkBuild extends PomBuild { // Google Mirror of Maven Central, placed first so that it's used instead of flaky Maven Central. // See https://storage-download.googleapis.com/maven-central/index.html for more info. "gcs-maven-central-mirror" at "https://maven-central.storage-download.googleapis.com/maven2/", + "hadoop-staging" at "https://repository.apache.org/content/repositories/orgapachehadoop-1461/", "jitpack" at "https://jitpack.io", DefaultMavenRepository, Resolver.mavenLocal, diff --git a/resource-managers/kubernetes/integration-tests/README.md b/resource-managers/kubernetes/integration-tests/README.md index e23c4ad7ed28..d7950eb673ad 100644 --- a/resource-managers/kubernetes/integration-tests/README.md +++ b/resource-managers/kubernetes/integration-tests/README.md @@ -136,8 +136,8 @@ properties to Maven. For example: mvn integration-test -am -pl :spark-kubernetes-integration-tests_2.13 \ -Pkubernetes -Pkubernetes-integration-tests \ - -Phadoop-3 -Dhadoop.version=3.4.0 \ - -Dspark.kubernetes.test.sparkTgz=spark-4.1.0-SNAPSHOT-bin-example.tgz \ + -Phadoop-3 -Dhadoop.version=3.4.3 \ + -Dspark.kubernetes.test.sparkTgz=spark-4.2.0-SNAPSHOT-bin-example.tgz \ -Dspark.kubernetes.test.imageTag=sometag \ -Dspark.kubernetes.test.imageRepo=docker.io/somerepo \ -Dspark.kubernetes.test.namespace=spark-int-tests \ diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index c8eddffe5c65..8f1920feef3b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -5516,7 +5516,8 @@ object SQLConf { .stringConf .createWithDefault( sys.env.getOrElse("DEFAULT_ARTIFACT_REPOSITORY", - "https://maven-central.storage-download.googleapis.com/maven2/")) + "https://maven-central.storage-download.googleapis.com/maven2/," + + "https://repository.apache.org/content/repositories/orgapachehadoop-1461")) val LEGACY_FROM_DAYTIME_STRING = buildConf("spark.sql.legacy.fromDayTimeString.enabled") diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala index c439dfbd9169..8460bdd43fb0 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala @@ -65,7 +65,7 @@ private[hive] object IsolatedClientLoader extends Logging { case e: RuntimeException if e.getMessage.contains("hadoop") => // If the error message contains hadoop, it is probably because the hadoop // version cannot be resolved. - val fallbackVersion = "3.4.2" + val fallbackVersion = "3.4.3" logWarning(log"Failed to resolve Hadoop artifacts for the version " + log"${MDC(HADOOP_VERSION, hadoopVersion)}. We will change the hadoop version from " + log"${MDC(HADOOP_VERSION, hadoopVersion)} to " + From dc806e579ae5b9fa57a28c674581cec07ec150ac Mon Sep 17 00:00:00 2001 From: Cheng Pan Date: Wed, 28 Jan 2026 17:43:37 +0800 Subject: [PATCH 2/5] test java 25 --- .github/workflows/build_main.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/build_main.yml b/.github/workflows/build_main.yml index 97c19d92eeae..684b025e6463 100644 --- a/.github/workflows/build_main.yml +++ b/.github/workflows/build_main.yml @@ -32,3 +32,5 @@ jobs: uses: ./.github/workflows/build_and_test.yml secrets: codecov_token: ${{ secrets.CODECOV_TOKEN }} + with: + java: 25 From f2f9d4cf0f43e3d769f1f252a9564bd4d1e245a0 Mon Sep 17 00:00:00 2001 From: Cheng Pan Date: Wed, 28 Jan 2026 18:33:40 +0800 Subject: [PATCH 3/5] -Dio.netty.noUnsafe=false --- R/run-tests.sh | 4 ++-- .../java/org/apache/spark/launcher/JavaModuleOptions.java | 1 + pom.xml | 1 + project/SparkBuild.scala | 1 + sql/catalyst/pom.xml | 2 +- sql/connect/bin/spark-connect-scala-client | 1 + sql/core/pom.xml | 2 +- 7 files changed, 8 insertions(+), 4 deletions(-) diff --git a/R/run-tests.sh b/R/run-tests.sh index 3a90b44c2b65..59186fd3a74f 100755 --- a/R/run-tests.sh +++ b/R/run-tests.sh @@ -30,9 +30,9 @@ if [[ $(echo $SPARK_AVRO_JAR_PATH | wc -l) -eq 1 ]]; then fi if [ -z "$SPARK_JARS" ]; then - SPARKR_SUPPRESS_DEPRECATION_WARNING=1 SPARK_TESTING=1 NOT_CRAN=true $FWDIR/../bin/spark-submit --driver-java-options "-Dlog4j.configurationFile=file:$FWDIR/log4j2.properties" --conf spark.hadoop.fs.defaultFS="file:///" --conf spark.driver.extraJavaOptions="-Dio.netty.tryReflectionSetAccessible=true -Xss4M" --conf spark.executor.extraJavaOptions="-Dio.netty.tryReflectionSetAccessible=true -Xss4M" $FWDIR/pkg/tests/run-all.R 2>&1 | tee -a $LOGFILE + SPARKR_SUPPRESS_DEPRECATION_WARNING=1 SPARK_TESTING=1 NOT_CRAN=true $FWDIR/../bin/spark-submit --driver-java-options "-Dlog4j.configurationFile=file:$FWDIR/log4j2.properties" --conf spark.hadoop.fs.defaultFS="file:///" --conf spark.driver.extraJavaOptions="-Xss4M" --conf spark.executor.extraJavaOptions="-Xss4M" $FWDIR/pkg/tests/run-all.R 2>&1 | tee -a $LOGFILE else - SPARKR_SUPPRESS_DEPRECATION_WARNING=1 SPARK_TESTING=1 NOT_CRAN=true $FWDIR/../bin/spark-submit --jars $SPARK_JARS --driver-java-options "-Dlog4j.configurationFile=file:$FWDIR/log4j2.properties" --conf spark.hadoop.fs.defaultFS="file:///" --conf spark.driver.extraJavaOptions="-Dio.netty.tryReflectionSetAccessible=true -Xss4M" --conf spark.executor.extraJavaOptions="-Dio.netty.tryReflectionSetAccessible=true -Xss4M" $FWDIR/pkg/tests/run-all.R 2>&1 | tee -a $LOGFILE + SPARKR_SUPPRESS_DEPRECATION_WARNING=1 SPARK_TESTING=1 NOT_CRAN=true $FWDIR/../bin/spark-submit --jars $SPARK_JARS --driver-java-options "-Dlog4j.configurationFile=file:$FWDIR/log4j2.properties" --conf spark.hadoop.fs.defaultFS="file:///" --conf spark.driver.extraJavaOptions="-Xss4M" --conf spark.executor.extraJavaOptions="-Xss4M" $FWDIR/pkg/tests/run-all.R 2>&1 | tee -a $LOGFILE fi FAILED=$((PIPESTATUS[0]||$FAILED)) diff --git a/launcher/src/main/java/org/apache/spark/launcher/JavaModuleOptions.java b/launcher/src/main/java/org/apache/spark/launcher/JavaModuleOptions.java index b42c7906fe18..49c24bc88783 100644 --- a/launcher/src/main/java/org/apache/spark/launcher/JavaModuleOptions.java +++ b/launcher/src/main/java/org/apache/spark/launcher/JavaModuleOptions.java @@ -47,6 +47,7 @@ public class JavaModuleOptions { "-Dio.netty.tryReflectionSetAccessible=true", "-Dio.netty.allocator.type=pooled", "-Dio.netty.handler.ssl.defaultEndpointVerificationAlgorithm=NONE", + "-Dio.netty.noUnsafe=false", "--enable-native-access=ALL-UNNAMED"}; /** diff --git a/pom.xml b/pom.xml index 6f02f2afb2a3..36ca17d12eae 100644 --- a/pom.xml +++ b/pom.xml @@ -335,6 +335,7 @@ -Dio.netty.tryReflectionSetAccessible=true -Dio.netty.allocator.type=pooled -Dio.netty.handler.ssl.defaultEndpointVerificationAlgorithm=NONE + -Dio.netty.noUnsafe=false --enable-native-access=ALL-UNNAMED 2.7.12 diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index ee92d9e36ac1..97b0e8cca4b5 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -1880,6 +1880,7 @@ object TestSettings { "-Dio.netty.tryReflectionSetAccessible=true", "-Dio.netty.allocator.type=pooled", "-Dio.netty.handler.ssl.defaultEndpointVerificationAlgorithm=NONE", + "-Dio.netty.noUnsafe=false", "--enable-native-access=ALL-UNNAMED").mkString(" ") s"-Xmx$heapSize -Xss4m -XX:MaxMetaspaceSize=$metaspaceSize -XX:ReservedCodeCacheSize=128m -Dfile.encoding=UTF-8 $extraTestJavaArgs" .split(" ").toSeq diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml index 9b7fb89ddd9e..fc4ed86bcabb 100644 --- a/sql/catalyst/pom.xml +++ b/sql/catalyst/pom.xml @@ -173,7 +173,7 @@ org.scalatest scalatest-maven-plugin - -ea -Xmx4g -Xss4m -XX:ReservedCodeCacheSize=${CodeCacheSize} ${extraJavaTestArgs} -Dio.netty.tryReflectionSetAccessible=true + -ea -Xmx4g -Xss4m -XX:ReservedCodeCacheSize=${CodeCacheSize} ${extraJavaTestArgs} diff --git a/sql/connect/bin/spark-connect-scala-client b/sql/connect/bin/spark-connect-scala-client index 4d508e626df7..91d9edcd04dc 100755 --- a/sql/connect/bin/spark-connect-scala-client +++ b/sql/connect/bin/spark-connect-scala-client @@ -71,6 +71,7 @@ JVM_ARGS="-XX:+IgnoreUnrecognizedVMOptions \ --add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED \ -Djdk.reflect.useDirectMethodHandle=false \ -Dio.netty.tryReflectionSetAccessible=true \ + -Dio.netty.noUnsafe=false --enable-native-access=ALL-UNNAMED \ $SCJVM_ARGS" diff --git a/sql/core/pom.xml b/sql/core/pom.xml index 7f321677ff2f..af1ee6e095ea 100644 --- a/sql/core/pom.xml +++ b/sql/core/pom.xml @@ -345,7 +345,7 @@ scalatest-maven-plugin - -ea -Xmx6g -Xss4m -XX:ReservedCodeCacheSize=${CodeCacheSize} ${extraJavaTestArgs} -Dio.netty.tryReflectionSetAccessible=true + -ea -Xmx6g -Xss4m -XX:ReservedCodeCacheSize=${CodeCacheSize} ${extraJavaTestArgs} From 0a2125304346edc0609912e5c2238c89e8848b8c Mon Sep 17 00:00:00 2001 From: Cheng Pan Date: Wed, 28 Jan 2026 20:07:49 +0800 Subject: [PATCH 4/5] sun.io.serialization.extendedDebugInfo --- .../serializer/SerializationDebugger.scala | 23 +++++++++++-------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/serializer/SerializationDebugger.scala b/core/src/main/scala/org/apache/spark/serializer/SerializationDebugger.scala index da08635eca4c..85b39f649004 100644 --- a/core/src/main/scala/org/apache/spark/serializer/SerializationDebugger.scala +++ b/core/src/main/scala/org/apache/spark/serializer/SerializationDebugger.scala @@ -20,14 +20,12 @@ package org.apache.spark.serializer import java.io._ import java.lang.invoke.MethodHandles import java.lang.reflect.{Field, Method} -import java.security.{AccessController, PrivilegedAction} import scala.annotation.tailrec import scala.collection.mutable import scala.util.control.NonFatal import org.apache.spark.internal.Logging -import org.apache.spark.util.SparkClassUtils private[spark] object SerializationDebugger extends Logging { @@ -69,14 +67,19 @@ private[spark] object SerializationDebugger extends Logging { new SerializationDebugger().visit(obj, List.empty) } - private[serializer] var enableDebugging: Boolean = { - val lookup = MethodHandles.lookup() - val clazz = SparkClassUtils.classForName("sun.security.action.GetBooleanAction") - val constructor = clazz.getConstructor(classOf[String]) - val mh = lookup.unreflectConstructor(constructor) - val action = mh.invoke("sun.io.serialization.extendedDebugInfo") - .asInstanceOf[PrivilegedAction[Boolean]] - !AccessController.doPrivileged(action).booleanValue() + private[serializer] var enableDebugging: Boolean = try { + // Try to access the private static boolean ObjectOutputStream.extendedDebugInfo + // to avoid handling SecurityManager changes across different version of JDKs. + // See details at - JEP 486: Permanently Disable the Security Manager (JDK 24) + val clazz = classOf[ObjectOutputStream] + val lookup = MethodHandles.privateLookupIn(clazz, MethodHandles.lookup()) + val vh = lookup.findStaticVarHandle(clazz, "extendedDebugInfo", java.lang.Boolean.TYPE) + !vh.get().asInstanceOf[Boolean] + } catch { + case NonFatal(_) => + // Fallback to the plain system property read which should work on modern JDKs. + // https://github.com/openjdk/jdk/commit/9b0ab92b16f682e65e9847e8127b6ce09fc5759c + !java.lang.Boolean.getBoolean("sun.io.serialization.extendedDebugInfo") } private class SerializationDebugger { From b1479e379729c494ce48ce29e63b498624838c6d Mon Sep 17 00:00:00 2001 From: Cheng Pan Date: Thu, 29 Jan 2026 01:06:54 +0800 Subject: [PATCH 5/5] adapt ServiceLoader behavior change --- .../deploy/security/HadoopDelegationTokenManager.scala | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/core/src/main/scala/org/apache/spark/deploy/security/HadoopDelegationTokenManager.scala b/core/src/main/scala/org/apache/spark/deploy/security/HadoopDelegationTokenManager.scala index bc7715941547..bc9cd509454b 100644 --- a/core/src/main/scala/org/apache/spark/deploy/security/HadoopDelegationTokenManager.scala +++ b/core/src/main/scala/org/apache/spark/deploy/security/HadoopDelegationTokenManager.scala @@ -270,7 +270,11 @@ private[spark] class HadoopDelegationTokenManager( val providers = mutable.ArrayBuffer[HadoopDelegationTokenProvider]() val iterator = loader.iterator - while (iterator.hasNext) { + + // By contract, either ServiceLoader's iterator.hasNext or iterator.next may throw exception + def hasNext = try iterator.hasNext catch { case _: Throwable => true } + + while (hasNext) { try { providers += iterator.next } catch {