From 0df3af8b76da7e8b0e53f889611ad2d7f8f19d33 Mon Sep 17 00:00:00 2001 From: Cheng Pan Date: Wed, 30 Jul 2025 10:21:10 -0700 Subject: [PATCH] [SPARK-53020][DEPLOY] JPMS args should also apply to non-SparkSubmit process ### What changes were proposed in this pull request? Currently, JPMS args are only applied to `SparkSubmit`-based processes, for non-`SparkSubmit` processes, e.g. Spark History Server, BeeLine, JPMS args are not applied automatically. For example, when trying run Spark History Server with Java 24, I see [JEP 472](https://openjdk.org/jeps/472) complains ``` $ spark-4.1.0-SNAPSHOT-bin-hadoop3 SPARK_NO_DAEMONIZE=1 sbin/start-history-server.sh starting org.apache.spark.deploy.history.HistoryServer, logging to /Users/chengpan/app/spark-4.1.0-SNAPSHOT-bin-hadoop3/logs/spark-chengpan-org.apache.spark.deploy.history.HistoryServer-1-H27212-MAC-01.local.out Spark Command: /Users/chengpan/.sdkman/candidates/java/24.0.2-tem/bin/java -cp /Users/chengpan/app/spark-4.1.0-SNAPSHOT-bin-hadoop3/conf/:/Users/chengpan/app/spark-4.1.0-SNAPSHOT-bin-hadoop3/jars/slf4j-api-2.0.17.jar:/Users/chengpan/app/spark-4.1.0-SNAPSHOT-bin-hadoop3/jars/* -Xmx1g org.apache.spark.deploy.history.HistoryServer ======================================== Using Spark's default log4j profile: org/apache/spark/log4j2-defaults.properties 25/07/30 21:28:39 INFO HistoryServer: Started daemon with process name: 54576H27212-MAC-01.local 25/07/30 21:28:39 INFO SignalUtils: Registering signal handler for TERM 25/07/30 21:28:39 INFO SignalUtils: Registering signal handler for HUP 25/07/30 21:28:39 INFO SignalUtils: Registering signal handler for INT WARNING: A restricted method in java.lang.System has been called WARNING: java.lang.System::loadLibrary has been called by org.apache.hadoop.util.NativeCodeLoader in an unnamed module (file:/Users/chengpan/app/spark-4.1.0-SNAPSHOT-bin-hadoop3/jars/hadoop-client-api-3.4.1.jar) WARNING: Use --enable-native-access=ALL-UNNAMED to avoid a warning for callers in this module WARNING: Restricted methods will be blocked in a future release unless native access is enabled 25/07/30 21:28:39 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable Using Spark's default log4j profile: org/apache/spark/log4j2-defaults.properties ... ``` ### Why are the changes needed? I think the JPMS args defined at `JavaModuleOptions.DEFAULT_MODULE_OPTIONS` are intended to be applied to all Spark process, so I would classify this as a bug. ### Does this PR introduce _any_ user-facing change? Seems no, as I haven't seen Spark deamon process failures caused by missing JPMS args in our real use cases. ### How was this patch tested? UT is added. Also verified Spark History Server start command: ``` spark-4.1.0-SNAPSHOT-bin-SPARK-53020 SPARK_NO_DAEMONIZE=1 sbin/start-history-server.sh starting org.apache.spark.deploy.history.HistoryServer, logging to /Users/chengpan/app/spark-4.1.0-SNAPSHOT-bin-SPARK-53020/logs/spark-chengpan-org.apache.spark.deploy.history.HistoryServer-1-H27212-MAC-01.local.out Spark Command: /Users/chengpan/.sdkman/candidates/java/24.0.2-tem/bin/java -cp /Users/chengpan/app/spark-4.1.0-SNAPSHOT-bin-SPARK-53020/conf/:/Users/chengpan/app/spark-4.1.0-SNAPSHOT-bin-SPARK-53020/jars/slf4j-api-2.0.17.jar:/Users/chengpan/app/spark-4.1.0-SNAPSHOT-bin-SPARK-53020/jars/* -Xmx1g -XX:+IgnoreUnrecognizedVMOptions --add-modules=jdk.incubator.vector --add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.lang.invoke=ALL-UNNAMED --add-opens=java.base/java.lang.reflect=ALL-UNNAMED --add-opens=java.base/java.io=ALL-UNNAMED --add-opens=java.base/java.net=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/java.util=ALL-UNNAMED --add-opens=java.base/java.util.concurrent=ALL-UNNAMED --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED --add-opens=java.base/jdk.internal.ref=ALL-UNNAMED --add-opens=java.base/sun.nio.ch=ALL-UNNAMED --add-opens=java.base/sun.nio.cs=ALL-UNNAMED --add-opens=java.base/sun.security.action=ALL-UNNAMED --add-opens=java.base/sun.util.calendar=ALL-UNNAMED --add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED -Djdk.reflect.useDirectMethodHandle=false -Dio.netty.tryReflectionSetAccessible=true --enable-native-access=ALL-UNNAMED org.apache.spark.deploy.history.HistoryServer ======================================== WARNING: Using incubator modules: jdk.incubator.vector WARNING: package sun.security.action not in java.base Using Spark's default log4j profile: org/apache/spark/log4j2-defaults.properties 25/07/30 22:06:38 INFO HistoryServer: Started daemon with process name: 60622H27212-MAC-01.local 25/07/30 22:06:38 INFO SignalUtils: Registering signal handler for TERM 25/07/30 22:06:38 INFO SignalUtils: Registering signal handler for HUP 25/07/30 22:06:38 INFO SignalUtils: Registering signal handler for INT 25/07/30 22:06:38 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable Using Spark's default log4j profile: org/apache/spark/log4j2-defaults.properties ... ``` ### Was this patch authored or co-authored using generative AI tooling? No. Closes #51725 from pan3793/SPARK-53020. Authored-by: Cheng Pan Signed-off-by: Dongjoon Hyun --- .../java/org/apache/spark/launcher/SparkClassCommandBuilder.java | 1 + .../org/apache/spark/launcher/SparkClassCommandBuilderSuite.java | 1 + 2 files changed, 2 insertions(+) diff --git a/launcher/src/main/java/org/apache/spark/launcher/SparkClassCommandBuilder.java b/launcher/src/main/java/org/apache/spark/launcher/SparkClassCommandBuilder.java index a9daf0e25722a..89ccafd96ba60 100644 --- a/launcher/src/main/java/org/apache/spark/launcher/SparkClassCommandBuilder.java +++ b/launcher/src/main/java/org/apache/spark/launcher/SparkClassCommandBuilder.java @@ -113,6 +113,7 @@ public List buildCommand(Map env) String mem = firstNonEmpty(memKey != null ? System.getenv(memKey) : null, DEFAULT_MEM); cmd.add("-Xmx" + mem); + addOptionString(cmd, JavaModuleOptions.defaultModuleOptions()); cmd.add(className); cmd.addAll(classArgs); return cmd; diff --git a/launcher/src/test/java/org/apache/spark/launcher/SparkClassCommandBuilderSuite.java b/launcher/src/test/java/org/apache/spark/launcher/SparkClassCommandBuilderSuite.java index 3f6d66bb5c968..2a48774d58843 100644 --- a/launcher/src/test/java/org/apache/spark/launcher/SparkClassCommandBuilderSuite.java +++ b/launcher/src/test/java/org/apache/spark/launcher/SparkClassCommandBuilderSuite.java @@ -33,6 +33,7 @@ public void testBeelineBuilder() throws Exception { SparkClassCommandBuilder builder = new SparkClassCommandBuilder("org.apache.hive.beeline.BeeLine", args); List strings = builder.buildCommand(new HashMap<>()); + assertTrue(strings.containsAll(Arrays.asList(JavaModuleOptions.defaultModuleOptionArray()))); assertTrue(strings.contains("-DmyKey=yourValue")); assertTrue(strings.contains("myBeelineArg")); }