Skip to content

Commit

Permalink
[SPARK-44125][R] Support Java 21 in SparkR
Browse files Browse the repository at this point in the history
### What changes were proposed in this pull request?

This PR aims to support Java 21 in SparkR. Arrow-related issue will be fixed when we upgrade Arrow library. Also, the following JIRA is created to re-enable them even in Java 21.
- SPARK-44127 Reenable `test_sparkSQL_arrow.R` in Java 21

### Why are the changes needed?

To be ready for Java 21.

### Does this PR introduce _any_ user-facing change?

No, this is additional support.

### How was this patch tested?

Pass the CIs and do manual tests.

```
$ java -version
openjdk version "21-ea" 2023-09-19
OpenJDK Runtime Environment (build 21-ea+27-2343)
OpenJDK 64-Bit Server VM (build 21-ea+27-2343, mixed mode, sharing)

$ build/sbt test:package -Psparkr -Phive

$ R/install-dev.sh; R/run-tests.sh
...
══ Skipped ═════════════════════════════════════════════════════════════════════
1. createDataFrame/collect Arrow optimization ('test_sparkSQL_arrow.R:29:3') - Reason: sparkR.callJStatic("org.apache.spark.util.Utils", "isJavaVersionAtLeast21") is TRUE

2. createDataFrame/collect Arrow optimization - many partitions (partition order test) ('test_sparkSQL_arrow.R:47:3') - Reason: sparkR.callJStatic("org.apache.spark.util.Utils", "isJavaVersionAtLeast21") is TRUE

3. createDataFrame/collect Arrow optimization - type specification ('test_sparkSQL_arrow.R:54:3') - Reason: sparkR.callJStatic("org.apache.spark.util.Utils", "isJavaVersionAtLeast21") is TRUE

4. dapply() Arrow optimization ('test_sparkSQL_arrow.R:79:3') - Reason: sparkR.callJStatic("org.apache.spark.util.Utils", "isJavaVersionAtLeast21") is TRUE

5. dapply() Arrow optimization - type specification ('test_sparkSQL_arrow.R:114:3') - Reason: sparkR.callJStatic("org.apache.spark.util.Utils", "isJavaVersionAtLeast21") is TRUE

6. dapply() Arrow optimization - type specification (date and timestamp) ('test_sparkSQL_arrow.R:144:3') - Reason: sparkR.callJStatic("org.apache.spark.util.Utils", "isJavaVersionAtLeast21") is TRUE

7. gapply() Arrow optimization ('test_sparkSQL_arrow.R:154:3') - Reason: sparkR.callJStatic("org.apache.spark.util.Utils", "isJavaVersionAtLeast21") is TRUE

8. gapply() Arrow optimization - type specification ('test_sparkSQL_arrow.R:198:3') - Reason: sparkR.callJStatic("org.apache.spark.util.Utils", "isJavaVersionAtLeast21") is TRUE

9. gapply() Arrow optimization - type specification (date and timestamp) ('test_sparkSQL_arrow.R:231:3') - Reason: sparkR.callJStatic("org.apache.spark.util.Utils", "isJavaVersionAtLeast21") is TRUE

10. Arrow optimization - unsupported types ('test_sparkSQL_arrow.R:243:3') - Reason: sparkR.callJStatic("org.apache.spark.util.Utils", "isJavaVersionAtLeast21") is TRUE

11. SPARK-32478: gapply() Arrow optimization - error message for schema mismatch ('test_sparkSQL_arrow.R:255:3') - Reason: sparkR.callJStatic("org.apache.spark.util.Utils", "isJavaVersionAtLeast21") is TRUE

12. SPARK-43789: Automatically pick the number of partitions based on Arrow batch size ('test_sparkSQL_arrow.R:265:3') - Reason: sparkR.callJStatic("org.apache.spark.util.Utils", "isJavaVersionAtLeast21") is TRUE

13. sparkJars tag in SparkContext ('test_Windows.R:22:5') - Reason: This test is only for Windows, skipped

══ DONE ════════════════════════════════════════════════════════════════════════
...
* DONE

Status: 2 NOTEs
See
  ‘/Users/dongjoon/APACHE/spark-merge/R/SparkR.Rcheck/00check.log’
for details.

+ popd
Tests passed.
```

Closes #41680 from dongjoon-hyun/SPARK-44125.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
  • Loading branch information
dongjoon-hyun authored and pull[bot] committed Apr 30, 2024
1 parent 37b67ae commit 7556aa1
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 2 deletions.
6 changes: 4 additions & 2 deletions R/pkg/R/client.R
Original file line number Diff line number Diff line change
Expand Up @@ -93,8 +93,10 @@ checkJavaVersion <- function() {
}, javaVersionOut)

javaVersionStr <- strsplit(javaVersionFilter[[1]], '"', fixed = TRUE)[[1L]][2]
# javaVersionStr is of the form 1.8.0_92/9.0.x/11.0.x.
# We are using 8, 9, 10, 11 for sparkJavaVersion.
# javaVersionStr is of the form 1.8.0_92/11.0.x./17.0.x/21-ea/21
# We are using 8, 11, 17, and 21 for sparkJavaVersion.
javaVersionStr <- strsplit(javaVersionStr, "-ea", fixed = TRUE)[[1L]]

versions <- strsplit(javaVersionStr, ".", fixed = TRUE)[[1L]]
if ("1" == versions[1]) {
javaVersionNum <- as.integer(versions[2])
Expand Down
24 changes: 24 additions & 0 deletions R/pkg/tests/fulltests/test_sparkSQL_arrow.R
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ sparkSession <- sparkR.session(

test_that("createDataFrame/collect Arrow optimization", {
skip_if_not_installed("arrow")
# TODO(SPARK-44127) Reenable test_sparkSQL_arrow.R in Java 21
skip_if(sparkR.callJStatic("org.apache.spark.util.Utils", "isJavaVersionAtLeast21"))

conf <- callJMethod(sparkSession, "conf")
arrowEnabled <- sparkR.conf("spark.sql.execution.arrow.sparkr.enabled")[[1]]
Expand All @@ -43,12 +45,16 @@ test_that("createDataFrame/collect Arrow optimization", {

test_that("createDataFrame/collect Arrow optimization - many partitions (partition order test)", {
skip_if_not_installed("arrow")
# TODO(SPARK-44127) Reenable test_sparkSQL_arrow.R in Java 21
skip_if(sparkR.callJStatic("org.apache.spark.util.Utils", "isJavaVersionAtLeast21"))
expect_equal(collect(createDataFrame(mtcars, numPartitions = 32)),
collect(createDataFrame(mtcars, numPartitions = 1)))
})

test_that("createDataFrame/collect Arrow optimization - type specification", {
skip_if_not_installed("arrow")
# TODO(SPARK-44127) Reenable test_sparkSQL_arrow.R in Java 21
skip_if(sparkR.callJStatic("org.apache.spark.util.Utils", "isJavaVersionAtLeast21"))
rdf <- data.frame(list(list(a = 1,
b = "a",
c = TRUE,
Expand All @@ -73,6 +79,8 @@ test_that("createDataFrame/collect Arrow optimization - type specification", {

test_that("dapply() Arrow optimization", {
skip_if_not_installed("arrow")
# TODO(SPARK-44127) Reenable test_sparkSQL_arrow.R in Java 21
skip_if(sparkR.callJStatic("org.apache.spark.util.Utils", "isJavaVersionAtLeast21"))
df <- createDataFrame(mtcars)

conf <- callJMethod(sparkSession, "conf")
Expand Down Expand Up @@ -107,6 +115,8 @@ test_that("dapply() Arrow optimization", {

test_that("dapply() Arrow optimization - type specification", {
skip_if_not_installed("arrow")
# TODO(SPARK-44127) Reenable test_sparkSQL_arrow.R in Java 21
skip_if(sparkR.callJStatic("org.apache.spark.util.Utils", "isJavaVersionAtLeast21"))
# Note that regular dapply() seems not supporting date and timestamps
# whereas Arrow-optimized dapply() does.
rdf <- data.frame(list(list(a = 1,
Expand Down Expand Up @@ -136,6 +146,8 @@ test_that("dapply() Arrow optimization - type specification", {

test_that("dapply() Arrow optimization - type specification (date and timestamp)", {
skip_if_not_installed("arrow")
# TODO(SPARK-44127) Reenable test_sparkSQL_arrow.R in Java 21
skip_if(sparkR.callJStatic("org.apache.spark.util.Utils", "isJavaVersionAtLeast21"))
rdf <- data.frame(list(list(a = as.Date("1990-02-24"),
b = as.POSIXct("1990-02-24 12:34:56"))))
df <- createDataFrame(rdf)
Expand All @@ -145,6 +157,8 @@ test_that("dapply() Arrow optimization - type specification (date and timestamp)

test_that("gapply() Arrow optimization", {
skip_if_not_installed("arrow")
# TODO(SPARK-44127) Reenable test_sparkSQL_arrow.R in Java 21
skip_if(sparkR.callJStatic("org.apache.spark.util.Utils", "isJavaVersionAtLeast21"))
df <- createDataFrame(mtcars)

conf <- callJMethod(sparkSession, "conf")
Expand Down Expand Up @@ -188,6 +202,8 @@ test_that("gapply() Arrow optimization", {

test_that("gapply() Arrow optimization - type specification", {
skip_if_not_installed("arrow")
# TODO(SPARK-44127) Reenable test_sparkSQL_arrow.R in Java 21
skip_if(sparkR.callJStatic("org.apache.spark.util.Utils", "isJavaVersionAtLeast21"))
# Note that regular gapply() seems not supporting date and timestamps
# whereas Arrow-optimized gapply() does.
rdf <- data.frame(list(list(a = 1,
Expand Down Expand Up @@ -220,6 +236,8 @@ test_that("gapply() Arrow optimization - type specification", {

test_that("gapply() Arrow optimization - type specification (date and timestamp)", {
skip_if_not_installed("arrow")
# TODO(SPARK-44127) Reenable test_sparkSQL_arrow.R in Java 21
skip_if(sparkR.callJStatic("org.apache.spark.util.Utils", "isJavaVersionAtLeast21"))
rdf <- data.frame(list(list(a = as.Date("1990-02-24"),
b = as.POSIXct("1990-02-24 12:34:56"))))
df <- createDataFrame(rdf)
Expand All @@ -231,6 +249,8 @@ test_that("gapply() Arrow optimization - type specification (date and timestamp)

test_that("Arrow optimization - unsupported types", {
skip_if_not_installed("arrow")
# TODO(SPARK-44127) Reenable test_sparkSQL_arrow.R in Java 21
skip_if(sparkR.callJStatic("org.apache.spark.util.Utils", "isJavaVersionAtLeast21"))

expect_error(checkSchemaInArrow(structType("a FLOAT")), "not support float type")
expect_error(checkSchemaInArrow(structType("a BINARY")), "not support binary type")
Expand All @@ -242,6 +262,8 @@ test_that("Arrow optimization - unsupported types", {

test_that("SPARK-32478: gapply() Arrow optimization - error message for schema mismatch", {
skip_if_not_installed("arrow")
# TODO(SPARK-44127) Reenable test_sparkSQL_arrow.R in Java 21
skip_if(sparkR.callJStatic("org.apache.spark.util.Utils", "isJavaVersionAtLeast21"))
df <- createDataFrame(list(list(a = 1L, b = "a")))

expect_error(
Expand All @@ -251,6 +273,8 @@ test_that("SPARK-32478: gapply() Arrow optimization - error message for schema m

test_that("SPARK-43789: Automatically pick the number of partitions based on Arrow batch size", {
skip_if_not_installed("arrow")
# TODO(SPARK-44127) Reenable test_sparkSQL_arrow.R in Java 21
skip_if(sparkR.callJStatic("org.apache.spark.util.Utils", "isJavaVersionAtLeast21"))

conf <- callJMethod(sparkSession, "conf")
maxRecordsPerBatch <- sparkR.conf("spark.sql.execution.arrow.maxRecordsPerBatch")[[1]]
Expand Down

0 comments on commit 7556aa1

Please sign in to comment.