From b6b69f9439e67b2dbcdfe8a8917c6a97934e34a8 Mon Sep 17 00:00:00 2001 From: Cheng Pan Date: Mon, 16 Mar 2026 01:03:28 +0800 Subject: [PATCH 1/4] Remove default jdk.reflect.useDirectMethodHandle=false --- .../src/main/scala/org/apache/spark/util/ClosureCleaner.scala | 4 +++- .../java/org/apache/spark/launcher/JavaModuleOptions.java | 1 - pom.xml | 1 - project/SparkBuild.scala | 1 - sql/connect/bin/spark-connect-scala-client | 1 - 5 files changed, 3 insertions(+), 5 deletions(-) diff --git a/common/utils/src/main/scala/org/apache/spark/util/ClosureCleaner.scala b/common/utils/src/main/scala/org/apache/spark/util/ClosureCleaner.scala index b23d13c32a104..c63a15710dee1 100644 --- a/common/utils/src/main/scala/org/apache/spark/util/ClosureCleaner.scala +++ b/common/utils/src/main/scala/org/apache/spark/util/ClosureCleaner.scala @@ -622,7 +622,9 @@ private[spark] object ClosureCleaner extends Logging { lambdaProxy: SerializedLambda): Option[F] = { val javaVersion = Runtime.version().feature() val useClone = System.getProperty("spark.cloneBasedClosureCleaner.enabled") == "true" || - System.getenv("SPARK_CLONE_BASED_CLOSURE_CLEANER") == "1" || javaVersion >= 22 + System.getenv("SPARK_CLONE_BASED_CLOSURE_CLEANER") == "1" || javaVersion >= 22 || + (javaVersion >= 18 && + System.getProperty("jdk.reflect.useDirectMethodHandle", "true") == "true") if (useClone) { val factory = makeClonedIndyLambdaFactory(indyLambda.getClass, lambdaProxy) diff --git a/launcher/src/main/java/org/apache/spark/launcher/JavaModuleOptions.java b/launcher/src/main/java/org/apache/spark/launcher/JavaModuleOptions.java index 49c24bc887838..ec3c030723ce3 100644 --- a/launcher/src/main/java/org/apache/spark/launcher/JavaModuleOptions.java +++ b/launcher/src/main/java/org/apache/spark/launcher/JavaModuleOptions.java @@ -43,7 +43,6 @@ public class JavaModuleOptions { "--add-opens=java.base/sun.security.action=ALL-UNNAMED", "--add-opens=java.base/sun.util.calendar=ALL-UNNAMED", "--add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED", - "-Djdk.reflect.useDirectMethodHandle=false", "-Dio.netty.tryReflectionSetAccessible=true", "-Dio.netty.allocator.type=pooled", "-Dio.netty.handler.ssl.defaultEndpointVerificationAlgorithm=NONE", diff --git a/pom.xml b/pom.xml index 84b2eb54f36c9..41ba850767093 100644 --- a/pom.xml +++ b/pom.xml @@ -331,7 +331,6 @@ --add-opens=java.base/sun.nio.cs=ALL-UNNAMED --add-opens=java.base/sun.security.action=ALL-UNNAMED --add-opens=java.base/sun.util.calendar=ALL-UNNAMED - -Djdk.reflect.useDirectMethodHandle=false -Dio.netty.tryReflectionSetAccessible=true -Dio.netty.allocator.type=pooled -Dio.netty.handler.ssl.defaultEndpointVerificationAlgorithm=NONE diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index 80f383e7d2a6c..2e2dfb37c07e2 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -1872,7 +1872,6 @@ object TestSettings { "--add-opens=java.base/sun.nio.cs=ALL-UNNAMED", "--add-opens=java.base/sun.security.action=ALL-UNNAMED", "--add-opens=java.base/sun.util.calendar=ALL-UNNAMED", - "-Djdk.reflect.useDirectMethodHandle=false", "-Dio.netty.tryReflectionSetAccessible=true", "-Dio.netty.allocator.type=pooled", "-Dio.netty.handler.ssl.defaultEndpointVerificationAlgorithm=NONE", diff --git a/sql/connect/bin/spark-connect-scala-client b/sql/connect/bin/spark-connect-scala-client index 085f7e92f3c21..019a42a2ba473 100755 --- a/sql/connect/bin/spark-connect-scala-client +++ b/sql/connect/bin/spark-connect-scala-client @@ -69,7 +69,6 @@ JVM_ARGS="-XX:+IgnoreUnrecognizedVMOptions \ --add-opens=java.base/sun.security.action=ALL-UNNAMED \ --add-opens=java.base/sun.util.calendar=ALL-UNNAMED \ --add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED \ - -Djdk.reflect.useDirectMethodHandle=false \ -Dio.netty.tryReflectionSetAccessible=true \ -Dio.netty.allocator.type=pooled \ -Dio.netty.handler.ssl.defaultEndpointVerificationAlgorithm=NONE \ From 8ff4c6f822474a23dbd4dcf91eb38af9620df6d1 Mon Sep 17 00:00:00 2001 From: pan3793 Date: Sun, 15 Mar 2026 18:27:52 +0000 Subject: [PATCH 2/4] Benchmark results for org.apache.spark.sql.execution.datasources.json.JsonBenchmark (JDK 21, Scala 2.13, split 1 of 1) --- .../JsonBenchmark-jdk21-results.txt | 102 +++++++++--------- 1 file changed, 51 insertions(+), 51 deletions(-) diff --git a/sql/core/benchmarks/JsonBenchmark-jdk21-results.txt b/sql/core/benchmarks/JsonBenchmark-jdk21-results.txt index 1972518ef5182..2d49ed10a6edd 100644 --- a/sql/core/benchmarks/JsonBenchmark-jdk21-results.txt +++ b/sql/core/benchmarks/JsonBenchmark-jdk21-results.txt @@ -7,124 +7,124 @@ OpenJDK 64-Bit Server VM 21.0.10+7-LTS on Linux 6.14.0-1017-azure AMD EPYC 7763 64-Core Processor JSON schema inferring: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -No encoding 2414 2427 11 2.1 482.8 1.0X -UTF-8 is set 4866 4910 39 1.0 973.2 0.5X +No encoding 2505 2612 96 2.0 501.0 1.0X +UTF-8 is set 5362 5380 16 0.9 1072.3 0.5X Preparing data for benchmarking ... OpenJDK 64-Bit Server VM 21.0.10+7-LTS on Linux 6.14.0-1017-azure AMD EPYC 7763 64-Core Processor count a short column: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -No encoding 2224 2247 31 2.2 444.8 1.0X -UTF-8 is set 4653 4660 8 1.1 930.5 0.5X +No encoding 1952 1958 5 2.6 390.4 1.0X +UTF-8 is set 4416 4420 5 1.1 883.3 0.4X Preparing data for benchmarking ... OpenJDK 64-Bit Server VM 21.0.10+7-LTS on Linux 6.14.0-1017-azure AMD EPYC 7763 64-Core Processor count a wide column: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -No encoding 4649 4735 95 0.2 4648.6 1.0X -UTF-8 is set 4502 4511 14 0.2 4501.5 1.0X +No encoding 4546 4551 5 0.2 4545.6 1.0X +UTF-8 is set 4369 4375 10 0.2 4369.2 1.0X Preparing data for benchmarking ... OpenJDK 64-Bit Server VM 21.0.10+7-LTS on Linux 6.14.0-1017-azure AMD EPYC 7763 64-Core Processor select wide row: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -No encoding 10646 10693 56 0.0 212923.7 1.0X -UTF-8 is set 11982 12025 38 0.0 239644.0 0.9X +No encoding 10036 10149 153 0.0 200713.2 1.0X +UTF-8 is set 10794 10832 34 0.0 215870.3 0.9X Preparing data for benchmarking ... OpenJDK 64-Bit Server VM 21.0.10+7-LTS on Linux 6.14.0-1017-azure AMD EPYC 7763 64-Core Processor Select a subset of 10 columns: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Select 10 columns 1900 1907 7 0.5 1899.8 1.0X -Select 1 column 1224 1233 8 0.8 1224.0 1.6X +Select 10 columns 1664 1665 1 0.6 1663.8 1.0X +Select 1 column 1123 1125 3 0.9 1123.1 1.5X Preparing data for benchmarking ... OpenJDK 64-Bit Server VM 21.0.10+7-LTS on Linux 6.14.0-1017-azure AMD EPYC 7763 64-Core Processor creation of JSON parser per line: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Short column without encoding 667 670 3 1.5 666.8 1.0X -Short column with UTF-8 1180 1184 6 0.8 1180.1 0.6X -Wide column without encoding 5505 5523 20 0.2 5504.9 0.1X -Wide column with UTF-8 9119 9135 19 0.1 9118.6 0.1X +Short column without encoding 583 586 3 1.7 583.3 1.0X +Short column with UTF-8 1118 1124 7 0.9 1118.2 0.5X +Wide column without encoding 5316 5335 18 0.2 5316.3 0.1X +Wide column with UTF-8 8905 8913 7 0.1 8904.8 0.1X Preparing data for benchmarking ... OpenJDK 64-Bit Server VM 21.0.10+7-LTS on Linux 6.14.0-1017-azure AMD EPYC 7763 64-Core Processor JSON functions: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Text read 60 65 4 16.6 60.4 1.0X -from_json 1186 1192 5 0.8 1186.4 0.1X -json_tuple 1048 1050 2 1.0 1048.5 0.1X -get_json_object wholestage off 1057 1065 7 0.9 1057.3 0.1X -get_json_object wholestage on 1013 1015 3 1.0 1013.1 0.1X +Text read 68 70 2 14.6 68.3 1.0X +from_json 1085 1089 5 0.9 1084.6 0.1X +json_tuple 1060 1061 1 0.9 1059.6 0.1X +get_json_object wholestage off 1070 1071 2 0.9 1070.1 0.1X +get_json_object wholestage on 990 994 4 1.0 989.6 0.1X Preparing data for benchmarking ... OpenJDK 64-Bit Server VM 21.0.10+7-LTS on Linux 6.14.0-1017-azure AMD EPYC 7763 64-Core Processor Dataset of json strings: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Text read 234 239 8 21.4 46.8 1.0X -schema inferring 1981 1986 5 2.5 396.1 0.1X -parsing 2912 2920 7 1.7 582.4 0.1X +Text read 240 241 1 20.8 48.1 1.0X +schema inferring 1831 1836 4 2.7 366.2 0.1X +parsing 2510 2518 11 2.0 502.1 0.1X Preparing data for benchmarking ... OpenJDK 64-Bit Server VM 21.0.10+7-LTS on Linux 6.14.0-1017-azure AMD EPYC 7763 64-Core Processor Json files in the per-line mode: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Text read 575 585 17 8.7 115.0 1.0X -Schema inferring 2448 2450 3 2.0 489.5 0.2X -Parsing without charset 3051 3055 7 1.6 610.1 0.2X -Parsing with UTF-8 5749 5755 8 0.9 1149.7 0.1X +Text read 618 623 5 8.1 123.5 1.0X +Schema inferring 2382 2384 2 2.1 476.5 0.3X +Parsing without charset 2665 2670 5 1.9 533.0 0.2X +Parsing with UTF-8 5166 5180 12 1.0 1033.2 0.1X OpenJDK 64-Bit Server VM 21.0.10+7-LTS on Linux 6.14.0-1017-azure AMD EPYC 7763 64-Core Processor Write dates and timestamps: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Create a dataset of timestamps 102 108 4 9.8 102.4 1.0X -to_json(timestamp) 543 545 2 1.8 543.3 0.2X -write timestamps to files 579 584 5 1.7 579.2 0.2X -Create a dataset of dates 114 124 10 8.7 114.5 0.9X -to_json(date) 419 420 1 2.4 418.7 0.2X -write dates to files 413 415 1 2.4 413.2 0.2X +Create a dataset of timestamps 105 108 3 9.5 105.5 1.0X +to_json(timestamp) 555 557 2 1.8 555.1 0.2X +write timestamps to files 618 626 12 1.6 617.8 0.2X +Create a dataset of dates 112 117 4 8.9 112.2 0.9X +to_json(date) 420 420 0 2.4 420.3 0.3X +write dates to files 401 403 2 2.5 401.4 0.3X OpenJDK 64-Bit Server VM 21.0.10+7-LTS on Linux 6.14.0-1017-azure AMD EPYC 7763 64-Core Processor Read dates and timestamps: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ----------------------------------------------------------------------------------------------------------------------------------------------------- -read timestamp text from files 149 154 8 6.7 149.1 1.0X -read timestamps from files 1100 1110 11 0.9 1099.5 0.1X -infer timestamps from files 2051 2059 12 0.5 2051.0 0.1X -read date text from files 139 141 2 7.2 139.3 1.1X -read date from files 743 747 6 1.3 743.1 0.2X -timestamp strings 142 145 3 7.1 141.7 1.1X -parse timestamps from Dataset[String] 1346 1349 4 0.7 1345.9 0.1X -infer timestamps from Dataset[String] 2268 2269 3 0.4 2267.5 0.1X -date strings 191 192 1 5.2 191.1 0.8X -parse dates from Dataset[String] 995 997 2 1.0 995.5 0.1X -from_json(timestamp) 1807 1809 2 0.6 1807.4 0.1X -from_json(date) 1469 1483 15 0.7 1469.2 0.1X -infer error timestamps from Dataset[String] with default format 1426 1429 3 0.7 1425.8 0.1X -infer error timestamps from Dataset[String] with user-provided format 1411 1417 7 0.7 1411.4 0.1X -infer error timestamps from Dataset[String] with legacy format 1431 1433 2 0.7 1430.7 0.1X +read timestamp text from files 149 153 7 6.7 148.5 1.0X +read timestamps from files 1052 1059 7 1.0 1051.7 0.1X +infer timestamps from files 1976 1980 4 0.5 1976.4 0.1X +read date text from files 154 157 3 6.5 153.5 1.0X +read date from files 647 655 6 1.5 647.4 0.2X +timestamp strings 146 147 0 6.8 146.5 1.0X +parse timestamps from Dataset[String] 1226 1228 2 0.8 1226.2 0.1X +infer timestamps from Dataset[String] 2150 2159 12 0.5 2149.5 0.1X +date strings 198 200 2 5.0 198.3 0.7X +parse dates from Dataset[String] 920 923 3 1.1 920.4 0.2X +from_json(timestamp) 1749 1751 4 0.6 1748.6 0.1X +from_json(date) 1464 1468 4 0.7 1464.3 0.1X +infer error timestamps from Dataset[String] with default format 1358 1362 3 0.7 1358.0 0.1X +infer error timestamps from Dataset[String] with user-provided format 1357 1360 3 0.7 1356.8 0.1X +infer error timestamps from Dataset[String] with legacy format 1384 1389 5 0.7 1383.5 0.1X OpenJDK 64-Bit Server VM 21.0.10+7-LTS on Linux 6.14.0-1017-azure AMD EPYC 7763 64-Core Processor Filters pushdown: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -w/o filters 6121 6125 4 0.0 61205.5 1.0X -pushdown disabled 5970 5973 5 0.0 59698.6 1.0X -w/ filters 689 701 10 0.1 6894.7 8.9X +w/o filters 6285 6289 4 0.0 62852.4 1.0X +pushdown disabled 6190 6193 5 0.0 61896.8 1.0X +w/ filters 599 602 6 0.2 5989.5 10.5X OpenJDK 64-Bit Server VM 21.0.10+7-LTS on Linux 6.14.0-1017-azure AMD EPYC 7763 64-Core Processor Partial JSON results: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -parse invalid JSON 2494 2510 14 0.0 249367.6 1.0X +parse invalid JSON 2393 2422 43 0.0 239282.1 1.0X From b97ac28720d6c2b06f98181700e8c54c6d97fef5 Mon Sep 17 00:00:00 2001 From: Cheng Pan Date: Mon, 16 Mar 2026 04:17:45 +0800 Subject: [PATCH 3/4] trigger java 21 GHA --- .github/workflows/build_main.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/build_main.yml b/.github/workflows/build_main.yml index 9ef52f326375b..75f48ba0f6d89 100644 --- a/.github/workflows/build_main.yml +++ b/.github/workflows/build_main.yml @@ -30,3 +30,5 @@ jobs: packages: write name: Run uses: ./.github/workflows/build_and_test.yml + with: + java: 21 \ No newline at end of file From 42971b748b63630b328e4d82a3b11c4f8659b6bf Mon Sep 17 00:00:00 2001 From: Cheng Pan Date: Mon, 16 Mar 2026 08:20:47 +0800 Subject: [PATCH 4/4] Revert "trigger java 21 GHA" This reverts commit b97ac28720d6c2b06f98181700e8c54c6d97fef5. --- .github/workflows/build_main.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/build_main.yml b/.github/workflows/build_main.yml index 75f48ba0f6d89..9ef52f326375b 100644 --- a/.github/workflows/build_main.yml +++ b/.github/workflows/build_main.yml @@ -30,5 +30,3 @@ jobs: packages: write name: Run uses: ./.github/workflows/build_and_test.yml - with: - java: 21 \ No newline at end of file