Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/workflows/build_main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,3 +32,5 @@ jobs:
uses: ./.github/workflows/build_and_test.yml
secrets:
codecov_token: ${{ secrets.CODECOV_TOKEN }}
with:
java: 25
4 changes: 2 additions & 2 deletions R/run-tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,9 @@ if [[ $(echo $SPARK_AVRO_JAR_PATH | wc -l) -eq 1 ]]; then
fi

if [ -z "$SPARK_JARS" ]; then
SPARKR_SUPPRESS_DEPRECATION_WARNING=1 SPARK_TESTING=1 NOT_CRAN=true $FWDIR/../bin/spark-submit --driver-java-options "-Dlog4j.configurationFile=file:$FWDIR/log4j2.properties" --conf spark.hadoop.fs.defaultFS="file:///" --conf spark.driver.extraJavaOptions="-Dio.netty.tryReflectionSetAccessible=true -Xss4M" --conf spark.executor.extraJavaOptions="-Dio.netty.tryReflectionSetAccessible=true -Xss4M" $FWDIR/pkg/tests/run-all.R 2>&1 | tee -a $LOGFILE
SPARKR_SUPPRESS_DEPRECATION_WARNING=1 SPARK_TESTING=1 NOT_CRAN=true $FWDIR/../bin/spark-submit --driver-java-options "-Dlog4j.configurationFile=file:$FWDIR/log4j2.properties" --conf spark.hadoop.fs.defaultFS="file:///" --conf spark.driver.extraJavaOptions="-Xss4M" --conf spark.executor.extraJavaOptions="-Xss4M" $FWDIR/pkg/tests/run-all.R 2>&1 | tee -a $LOGFILE
else
SPARKR_SUPPRESS_DEPRECATION_WARNING=1 SPARK_TESTING=1 NOT_CRAN=true $FWDIR/../bin/spark-submit --jars $SPARK_JARS --driver-java-options "-Dlog4j.configurationFile=file:$FWDIR/log4j2.properties" --conf spark.hadoop.fs.defaultFS="file:///" --conf spark.driver.extraJavaOptions="-Dio.netty.tryReflectionSetAccessible=true -Xss4M" --conf spark.executor.extraJavaOptions="-Dio.netty.tryReflectionSetAccessible=true -Xss4M" $FWDIR/pkg/tests/run-all.R 2>&1 | tee -a $LOGFILE
SPARKR_SUPPRESS_DEPRECATION_WARNING=1 SPARK_TESTING=1 NOT_CRAN=true $FWDIR/../bin/spark-submit --jars $SPARK_JARS --driver-java-options "-Dlog4j.configurationFile=file:$FWDIR/log4j2.properties" --conf spark.hadoop.fs.defaultFS="file:///" --conf spark.driver.extraJavaOptions="-Xss4M" --conf spark.executor.extraJavaOptions="-Xss4M" $FWDIR/pkg/tests/run-all.R 2>&1 | tee -a $LOGFILE
fi

FAILED=$((PIPESTATUS[0]||$FAILED))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,11 @@ private[spark] class HadoopDelegationTokenManager(
val providers = mutable.ArrayBuffer[HadoopDelegationTokenProvider]()

val iterator = loader.iterator
while (iterator.hasNext) {

// By contract, either ServiceLoader's iterator.hasNext or iterator.next may throw exception
def hasNext = try iterator.hasNext catch { case _: Throwable => true }

while (hasNext) {
try {
providers += iterator.next
} catch {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2771,7 +2771,8 @@ package object config {
.version("2.3.0")
.stringConf
.toSequence
.createWithDefault(Nil)
.createWithDefault(
Seq("https://repository.apache.org/content/repositories/orgapachehadoop-1461/"))

private[spark] val APP_ATTEMPT_ID =
ConfigBuilder("spark.app.attempt.id")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,12 @@ package org.apache.spark.serializer
import java.io._
import java.lang.invoke.MethodHandles
import java.lang.reflect.{Field, Method}
import java.security.{AccessController, PrivilegedAction}

import scala.annotation.tailrec
import scala.collection.mutable
import scala.util.control.NonFatal

import org.apache.spark.internal.Logging
import org.apache.spark.util.SparkClassUtils

private[spark] object SerializationDebugger extends Logging {

Expand Down Expand Up @@ -69,14 +67,19 @@ private[spark] object SerializationDebugger extends Logging {
new SerializationDebugger().visit(obj, List.empty)
}

private[serializer] var enableDebugging: Boolean = {
val lookup = MethodHandles.lookup()
val clazz = SparkClassUtils.classForName("sun.security.action.GetBooleanAction")
val constructor = clazz.getConstructor(classOf[String])
val mh = lookup.unreflectConstructor(constructor)
val action = mh.invoke("sun.io.serialization.extendedDebugInfo")
.asInstanceOf[PrivilegedAction[Boolean]]
!AccessController.doPrivileged(action).booleanValue()
private[serializer] var enableDebugging: Boolean = try {
// Try to access the private static boolean ObjectOutputStream.extendedDebugInfo
// to avoid handling SecurityManager changes across different version of JDKs.
// See details at - JEP 486: Permanently Disable the Security Manager (JDK 24)
val clazz = classOf[ObjectOutputStream]
val lookup = MethodHandles.privateLookupIn(clazz, MethodHandles.lookup())
val vh = lookup.findStaticVarHandle(clazz, "extendedDebugInfo", java.lang.Boolean.TYPE)
!vh.get().asInstanceOf[Boolean]
} catch {
case NonFatal(_) =>
// Fallback to the plain system property read which should work on modern JDKs.
// https://github.com/openjdk/jdk/commit/9b0ab92b16f682e65e9847e8127b6ce09fc5759c
!java.lang.Boolean.getBoolean("sun.io.serialization.extendedDebugInfo")
}

private class SerializationDebugger {
Expand Down
20 changes: 10 additions & 10 deletions dev/deps/spark-deps-hadoop-3-hive-2.3
Original file line number Diff line number Diff line change
Expand Up @@ -67,16 +67,16 @@ gcs-connector/hadoop3-2.2.31/shaded/gcs-connector-hadoop3-2.2.31-shaded.jar
gmetric4j/1.0.10//gmetric4j-1.0.10.jar
gson/2.13.2//gson-2.13.2.jar
guava/33.4.8-jre//guava-33.4.8-jre.jar
hadoop-aliyun/3.4.2//hadoop-aliyun-3.4.2.jar
hadoop-annotations/3.4.2//hadoop-annotations-3.4.2.jar
hadoop-aws/3.4.2//hadoop-aws-3.4.2.jar
hadoop-azure-datalake/3.4.2//hadoop-azure-datalake-3.4.2.jar
hadoop-azure/3.4.2//hadoop-azure-3.4.2.jar
hadoop-client-api/3.4.2//hadoop-client-api-3.4.2.jar
hadoop-client-runtime/3.4.2//hadoop-client-runtime-3.4.2.jar
hadoop-cloud-storage/3.4.2//hadoop-cloud-storage-3.4.2.jar
hadoop-huaweicloud/3.4.2//hadoop-huaweicloud-3.4.2.jar
hadoop-shaded-guava/1.4.0//hadoop-shaded-guava-1.4.0.jar
hadoop-aliyun/3.4.3//hadoop-aliyun-3.4.3.jar
hadoop-annotations/3.4.3//hadoop-annotations-3.4.3.jar
hadoop-aws/3.4.3//hadoop-aws-3.4.3.jar
hadoop-azure-datalake/3.4.3//hadoop-azure-datalake-3.4.3.jar
hadoop-azure/3.4.3//hadoop-azure-3.4.3.jar
hadoop-client-api/3.4.3//hadoop-client-api-3.4.3.jar
hadoop-client-runtime/3.4.3//hadoop-client-runtime-3.4.3.jar
hadoop-cloud-storage/3.4.3//hadoop-cloud-storage-3.4.3.jar
hadoop-huaweicloud/3.4.3//hadoop-huaweicloud-3.4.3.jar
hadoop-shaded-guava/1.5.0//hadoop-shaded-guava-1.5.0.jar
hive-beeline/2.3.10//hive-beeline-2.3.10.jar
hive-cli/2.3.10//hive-cli-2.3.10.jar
hive-common/2.3.10//hive-common-2.3.10.jar
Expand Down
2 changes: 1 addition & 1 deletion docs/building-spark.md
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ You can enable the `yarn` profile and specify the exact version of Hadoop to com

Example:

./build/mvn -Pyarn -Dhadoop.version=3.4.1 -DskipTests clean package
./build/mvn -Pyarn -Dhadoop.version=3.4.3 -DskipTests clean package

## Building With Hive and JDBC Support

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ public class JavaModuleOptions {
"-Dio.netty.tryReflectionSetAccessible=true",
"-Dio.netty.allocator.type=pooled",
"-Dio.netty.handler.ssl.defaultEndpointVerificationAlgorithm=NONE",
"-Dio.netty.noUnsafe=false",
"--enable-native-access=ALL-UNNAMED"};

/**
Expand Down
13 changes: 12 additions & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@
<slf4j.version>2.0.17</slf4j.version>
<log4j.version>2.25.3</log4j.version>
<!-- make sure to update IsolatedClientLoader whenever this version is changed -->
<hadoop.version>3.4.2</hadoop.version>
<hadoop.version>3.4.3</hadoop.version>
<!-- SPARK-41247: When updating `protobuf.version`, also need to update `protoVersion` in `SparkBuild.scala` -->
<protobuf.version>4.33.0</protobuf.version>
<protoc-jar-maven-plugin.version>3.11.4</protoc-jar-maven-plugin.version>
Expand Down Expand Up @@ -335,6 +335,7 @@
-Dio.netty.tryReflectionSetAccessible=true
-Dio.netty.allocator.type=pooled
-Dio.netty.handler.ssl.defaultEndpointVerificationAlgorithm=NONE
-Dio.netty.noUnsafe=false
--enable-native-access=ALL-UNNAMED
</extraJavaTestArgs>
<mariadb.java.client.version>2.7.12</mariadb.java.client.version>
Expand Down Expand Up @@ -391,6 +392,16 @@
<enabled>false</enabled>
</snapshots>
</repository>
<repository>
<id>hadoop-staging</id>
<url>https://repository.apache.org/content/repositories/orgapachehadoop-1461</url>
<releases>
<enabled>true</enabled>
</releases>
<snapshots>
<enabled>false</enabled>
</snapshots>
</repository>
</repositories>
<pluginRepositories>
<pluginRepository>
Expand Down
2 changes: 2 additions & 0 deletions project/SparkBuild.scala
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,7 @@ object SparkBuild extends PomBuild {
// Google Mirror of Maven Central, placed first so that it's used instead of flaky Maven Central.
// See https://storage-download.googleapis.com/maven-central/index.html for more info.
"gcs-maven-central-mirror" at "https://maven-central.storage-download.googleapis.com/maven2/",
"hadoop-staging" at "https://repository.apache.org/content/repositories/orgapachehadoop-1461/",
"jitpack" at "https://jitpack.io",
DefaultMavenRepository,
Resolver.mavenLocal,
Expand Down Expand Up @@ -1879,6 +1880,7 @@ object TestSettings {
"-Dio.netty.tryReflectionSetAccessible=true",
"-Dio.netty.allocator.type=pooled",
"-Dio.netty.handler.ssl.defaultEndpointVerificationAlgorithm=NONE",
"-Dio.netty.noUnsafe=false",
"--enable-native-access=ALL-UNNAMED").mkString(" ")
s"-Xmx$heapSize -Xss4m -XX:MaxMetaspaceSize=$metaspaceSize -XX:ReservedCodeCacheSize=128m -Dfile.encoding=UTF-8 $extraTestJavaArgs"
.split(" ").toSeq
Expand Down
4 changes: 2 additions & 2 deletions resource-managers/kubernetes/integration-tests/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -136,8 +136,8 @@ properties to Maven. For example:

mvn integration-test -am -pl :spark-kubernetes-integration-tests_2.13 \
-Pkubernetes -Pkubernetes-integration-tests \
-Phadoop-3 -Dhadoop.version=3.4.0 \
-Dspark.kubernetes.test.sparkTgz=spark-4.1.0-SNAPSHOT-bin-example.tgz \
-Phadoop-3 -Dhadoop.version=3.4.3 \
-Dspark.kubernetes.test.sparkTgz=spark-4.2.0-SNAPSHOT-bin-example.tgz \
-Dspark.kubernetes.test.imageTag=sometag \
-Dspark.kubernetes.test.imageRepo=docker.io/somerepo \
-Dspark.kubernetes.test.namespace=spark-int-tests \
Expand Down
2 changes: 1 addition & 1 deletion sql/catalyst/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,7 @@
<groupId>org.scalatest</groupId>
<artifactId>scalatest-maven-plugin</artifactId>
<configuration>
<argLine>-ea -Xmx4g -Xss4m -XX:ReservedCodeCacheSize=${CodeCacheSize} ${extraJavaTestArgs} -Dio.netty.tryReflectionSetAccessible=true</argLine>
<argLine>-ea -Xmx4g -Xss4m -XX:ReservedCodeCacheSize=${CodeCacheSize} ${extraJavaTestArgs}</argLine>
</configuration>
</plugin>
</plugins>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5516,7 +5516,8 @@ object SQLConf {
.stringConf
.createWithDefault(
sys.env.getOrElse("DEFAULT_ARTIFACT_REPOSITORY",
"https://maven-central.storage-download.googleapis.com/maven2/"))
"https://maven-central.storage-download.googleapis.com/maven2/," +
"https://repository.apache.org/content/repositories/orgapachehadoop-1461"))

val LEGACY_FROM_DAYTIME_STRING =
buildConf("spark.sql.legacy.fromDayTimeString.enabled")
Expand Down
1 change: 1 addition & 0 deletions sql/connect/bin/spark-connect-scala-client
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ JVM_ARGS="-XX:+IgnoreUnrecognizedVMOptions \
--add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED \
-Djdk.reflect.useDirectMethodHandle=false \
-Dio.netty.tryReflectionSetAccessible=true \
-Dio.netty.noUnsafe=false
--enable-native-access=ALL-UNNAMED \
$SCJVM_ARGS"

Expand Down
2 changes: 1 addition & 1 deletion sql/core/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -345,7 +345,7 @@
<artifactId>scalatest-maven-plugin</artifactId>
<configuration>
<!-- SPARK-54830: Increase `-Xmx` to 6g to prevent OOM during testing, ensuring consistency with the SQL module configuration in `SparkBuild.scala`.-->
<argLine>-ea -Xmx6g -Xss4m -XX:ReservedCodeCacheSize=${CodeCacheSize} ${extraJavaTestArgs} -Dio.netty.tryReflectionSetAccessible=true</argLine>
<argLine>-ea -Xmx6g -Xss4m -XX:ReservedCodeCacheSize=${CodeCacheSize} ${extraJavaTestArgs}</argLine>
</configuration>
</plugin>
<plugin>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ private[hive] object IsolatedClientLoader extends Logging {
case e: RuntimeException if e.getMessage.contains("hadoop") =>
// If the error message contains hadoop, it is probably because the hadoop
// version cannot be resolved.
val fallbackVersion = "3.4.2"
val fallbackVersion = "3.4.3"
logWarning(log"Failed to resolve Hadoop artifacts for the version " +
log"${MDC(HADOOP_VERSION, hadoopVersion)}. We will change the hadoop version from " +
log"${MDC(HADOOP_VERSION, hadoopVersion)} to " +
Expand Down