From c63e37ee64581abe5d3c639508627233acb2fd70 Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Fri, 3 Jun 2022 10:25:40 -0700
Subject: [PATCH] [SPARK-39372][R] Support R 4.2.0

### What changes were proposed in this pull request?

This PR proposes:

- Updates AppVeyor to use the latest R version 4.2.0.
- Uses the correct way of checking if an object is a matrix: `is.matrix`.
    After R 4.2.0,  `class(upperBoundsOnCoefficients) != "matrix")` fails:
    ```
    -- 1. Error (test_mllib_classification.R:245:3): spark.logit -------------------
    Error in `if (class(upperBoundsOnCoefficients) != "matrix") {
        stop("upperBoundsOnCoefficients must be a matrix.")
    }`: the condition has length > 1
    ```

    This fixes `spark.logit` when `lowerBoundsOnCoefficients` or `upperBoundsOnCoefficients` is specified.

- Explicitly use the first element in `is.na` comparison. From R 4.2.0, it throws an exception as below:
    ```
    Error in if (is.na(c(1, 2))) print("abc") : the condition has length > 1
    ```
    Previously it was a warning.

    This fixes `createDataFrame` or `as.DataFrame` when the data type is a nested complex type.

### Why are the changes needed?

To support/test the latest R. R community tends to use the latest versions aggressively.

### Does this PR introduce _any_ user-facing change?

Yes, after this PR, we officially support R 4.2.0 in SparkR.

### How was this patch tested?

CI in this PR should test it out.

Closes #36758 from HyukjinKwon/upgrade-r-appveyor.

Lead-authored-by: Hyukjin Kwon <gurwls223@apache.org>
Co-authored-by: Hyukjin Kwon <gurwls223@gmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 R/pkg/R/mllib_classification.R        | 4 ++--
 R/pkg/R/serialize.R                   | 7 ++++++-
 dev/appveyor-install-dependencies.ps1 | 2 +-
 3 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/R/pkg/R/mllib_classification.R b/R/pkg/R/mllib_classification.R
index 093467ecf7d28..7204f8bb7dff4 100644
--- a/R/pkg/R/mllib_classification.R
+++ b/R/pkg/R/mllib_classification.R
@@ -322,7 +322,7 @@ setMethod("spark.logit", signature(data = "SparkDataFrame", formula = "formula")
             }
 
             if (!is.null(lowerBoundsOnCoefficients)) {
-              if (class(lowerBoundsOnCoefficients) != "matrix") {
+              if (!is.matrix(lowerBoundsOnCoefficients)) {
                 stop("lowerBoundsOnCoefficients must be a matrix.")
               }
               row <- nrow(lowerBoundsOnCoefficients)
@@ -331,7 +331,7 @@ setMethod("spark.logit", signature(data = "SparkDataFrame", formula = "formula")
             }
 
             if (!is.null(upperBoundsOnCoefficients)) {
-              if (class(upperBoundsOnCoefficients) != "matrix") {
+              if (!is.matrix(upperBoundsOnCoefficients)) {
                 stop("upperBoundsOnCoefficients must be a matrix.")
               }
 
diff --git a/R/pkg/R/serialize.R b/R/pkg/R/serialize.R
index 7760d9be16f0b..85c318f30c338 100644
--- a/R/pkg/R/serialize.R
+++ b/R/pkg/R/serialize.R
@@ -58,7 +58,12 @@ writeObject <- function(con, object, writeType = TRUE) {
   # Checking types is needed here, since 'is.na' only handles atomic vectors,
   # lists and pairlists
   if (type %in% c("integer", "character", "logical", "double", "numeric")) {
-    if (is.na(object)) {
+    if (is.na(object[[1]])) {
+      # Uses the first element for now to keep the behavior same as R before
+      # 4.2.0. This is wrong because we should differenciate c(NA) from a
+      # single NA as the former means array(null) and the latter means null
+      # in Spark SQL. However, it requires non-trivial comparison to distinguish
+      # both in R. We should ideally fix this.
       object <- NULL
       type <- "NULL"
     }
diff --git a/dev/appveyor-install-dependencies.ps1 b/dev/appveyor-install-dependencies.ps1
index d469c98fdb3a2..19b49b90b3859 100644
--- a/dev/appveyor-install-dependencies.ps1
+++ b/dev/appveyor-install-dependencies.ps1
@@ -129,7 +129,7 @@ $env:PATH = "$env:HADOOP_HOME\bin;" + $env:PATH
 Pop-Location
 
 # ========================== R
-$rVer = "4.0.2"
+$rVer = "4.2.0"
 $rToolsVer = "4.0.2"
 
 InstallR