Skip to content

Commit

Permalink
[SPARK-24197][SPARKR][SQL] Adding array_sort function to SparkR
Browse files Browse the repository at this point in the history
## What changes were proposed in this pull request?

The PR adds array_sort function to SparkR.

## How was this patch tested?

Tests added into R/pkg/tests/fulltests/test_sparkSQL.R

## Example
```
> df <- createDataFrame(list(list(list(2L, 1L, 3L, NA)), list(list(NA, 6L, 5L, NA, 4L))))
> head(collect(select(df, array_sort(df[[1]]))))
```
Result:
```
   array_sort(_1)
1     1, 2, 3, NA
2 4, 5, 6, NA, NA
```

Author: Marek Novotny <mn.mikke@gmail.com>

Closes #21294 from mn-mikke/SPARK-24197.
  • Loading branch information
mn-mikke authored and HyukjinKwon committed May 11, 2018
1 parent a4206d5 commit 75cf369
Show file tree
Hide file tree
Showing 4 changed files with 32 additions and 7 deletions.
1 change: 1 addition & 0 deletions R/pkg/NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,7 @@ exportMethods("%<=>%",
"array_max",
"array_min",
"array_position",
"array_sort",
"asc",
"ascii",
"asin",
Expand Down
21 changes: 18 additions & 3 deletions R/pkg/R/functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ NULL
#' tmp <- mutate(df, v1 = create_array(df$mpg, df$cyl, df$hp))
#' head(select(tmp, array_contains(tmp$v1, 21), size(tmp$v1)))
#' head(select(tmp, array_max(tmp$v1), array_min(tmp$v1)))
#' head(select(tmp, array_position(tmp$v1, 21)))
#' head(select(tmp, array_position(tmp$v1, 21), array_sort(tmp$v1)))
#' head(select(tmp, flatten(tmp$v1)))
#' tmp2 <- mutate(tmp, v2 = explode(tmp$v1))
#' head(tmp2)
Expand Down Expand Up @@ -3043,6 +3043,20 @@ setMethod("array_position",
column(jc)
})

#' @details
#' \code{array_sort}: Sorts the input array in ascending order. The elements of the input array
#' must be orderable. NA elements will be placed at the end of the returned array.
#'
#' @rdname column_collection_functions
#' @aliases array_sort array_sort,Column-method
#' @note array_sort since 2.4.0
setMethod("array_sort",
signature(x = "Column"),
function(x) {
jc <- callJStatic("org.apache.spark.sql.functions", "array_sort", x@jc)
column(jc)
})

#' @details
#' \code{flatten}: Transforms an array of arrays into a single array.
#'
Expand Down Expand Up @@ -3125,8 +3139,9 @@ setMethod("size",
})

#' @details
#' \code{sort_array}: Sorts the input array in ascending or descending order according
#' to the natural ordering of the array elements.
#' \code{sort_array}: Sorts the input array in ascending or descending order according to
#' the natural ordering of the array elements. NA elements will be placed at the beginning of
#' the returned array in ascending order or at the end of the returned array in descending order.
#'
#' @rdname column_collection_functions
#' @param asc a logical flag indicating the sorting order.
Expand Down
4 changes: 4 additions & 0 deletions R/pkg/R/generics.R
Original file line number Diff line number Diff line change
Expand Up @@ -769,6 +769,10 @@ setGeneric("array_min", function(x) { standardGeneric("array_min") })
#' @name NULL
setGeneric("array_position", function(x, value) { standardGeneric("array_position") })

#' @rdname column_collection_functions
#' @name NULL
setGeneric("array_sort", function(x) { standardGeneric("array_sort") })

#' @rdname column_string_functions
#' @name NULL
setGeneric("ascii", function(x) { standardGeneric("ascii") })
Expand Down
13 changes: 9 additions & 4 deletions R/pkg/tests/fulltests/test_sparkSQL.R
Original file line number Diff line number Diff line change
Expand Up @@ -1479,8 +1479,7 @@ test_that("column functions", {
df5 <- createDataFrame(list(list(a = "010101")))
expect_equal(collect(select(df5, conv(df5$a, 2, 16)))[1, 1], "15")

# Test array_contains(), array_max(), array_min(), array_position(), element_at()
# and sort_array()
# Test array_contains(), array_max(), array_min(), array_position() and element_at()
df <- createDataFrame(list(list(list(1L, 2L, 3L)), list(list(6L, 5L, 4L))))
result <- collect(select(df, array_contains(df[[1]], 1L)))[[1]]
expect_equal(result, c(TRUE, FALSE))
Expand All @@ -1497,10 +1496,16 @@ test_that("column functions", {
result <- collect(select(df, element_at(df[[1]], 1L)))[[1]]
expect_equal(result, c(1, 6))

# Test array_sort() and sort_array()
df <- createDataFrame(list(list(list(2L, 1L, 3L, NA)), list(list(NA, 6L, 5L, NA, 4L))))

result <- collect(select(df, array_sort(df[[1]])))[[1]]
expect_equal(result, list(list(1L, 2L, 3L, NA), list(4L, 5L, 6L, NA, NA)))

result <- collect(select(df, sort_array(df[[1]], FALSE)))[[1]]
expect_equal(result, list(list(3L, 2L, 1L), list(6L, 5L, 4L)))
expect_equal(result, list(list(3L, 2L, 1L, NA), list(6L, 5L, 4L, NA, NA)))
result <- collect(select(df, sort_array(df[[1]])))[[1]]
expect_equal(result, list(list(1L, 2L, 3L), list(4L, 5L, 6L)))
expect_equal(result, list(list(NA, 1L, 2L, 3L), list(NA, NA, 4L, 5L, 6L)))

# Test flattern
df <- createDataFrame(list(list(list(list(1L, 2L), list(3L, 4L))),
Expand Down

0 comments on commit 75cf369

Please sign in to comment.