Skip to content

Commit

Permalink
[SPARK-13389][SPARKR] SparkR support first/last with ignore NAs
Browse files Browse the repository at this point in the history
## What changes were proposed in this pull request?

SparkR support first/last with ignore NAs

cc sun-rui felixcheung shivaram

## How was the this patch tested?

unit tests

Author: Yanbo Liang <ybliang8@gmail.com>

Closes #11267 from yanboliang/spark-13389.
  • Loading branch information
yanboliang authored and shivaram committed Mar 11, 2016
1 parent c3a6269 commit 4d535d1
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 10 deletions.
40 changes: 32 additions & 8 deletions R/pkg/R/functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -536,15 +536,27 @@ setMethod("factorial",
#'
#' Aggregate function: returns the first value in a group.
#'
#' The function by default returns the first values it sees. It will return the first non-missing
#' value it sees when na.rm is set to true. If all values are missing, then NA is returned.
#'
#' @rdname first
#' @name first
#' @family agg_funcs
#' @export
#' @examples \dontrun{first(df$c)}
#' @examples
#' \dontrun{
#' first(df$c)
#' first(df$c, TRUE)
#' }
setMethod("first",
signature(x = "Column"),
function(x) {
jc <- callJStatic("org.apache.spark.sql.functions", "first", x@jc)
signature(x = "characterOrColumn"),
function(x, na.rm = FALSE) {
col <- if (class(x) == "Column") {
x@jc
} else {
x
}
jc <- callJStatic("org.apache.spark.sql.functions", "first", col, na.rm)
column(jc)
})

Expand Down Expand Up @@ -663,15 +675,27 @@ setMethod("kurtosis",
#'
#' Aggregate function: returns the last value in a group.
#'
#' The function by default returns the last values it sees. It will return the last non-missing
#' value it sees when na.rm is set to true. If all values are missing, then NA is returned.
#'
#' @rdname last
#' @name last
#' @family agg_funcs
#' @export
#' @examples \dontrun{last(df$c)}
#' @examples
#' \dontrun{
#' last(df$c)
#' last(df$c, TRUE)
#' }
setMethod("last",
signature(x = "Column"),
function(x) {
jc <- callJStatic("org.apache.spark.sql.functions", "last", x@jc)
signature(x = "characterOrColumn"),
function(x, na.rm = FALSE) {
col <- if (class(x) == "Column") {
x@jc
} else {
x
}
jc <- callJStatic("org.apache.spark.sql.functions", "last", col, na.rm)
column(jc)
})

Expand Down
4 changes: 2 additions & 2 deletions R/pkg/R/generics.R
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ setGeneric("filterRDD", function(x, f) { standardGeneric("filterRDD") })

# @rdname first
# @export
setGeneric("first", function(x) { standardGeneric("first") })
setGeneric("first", function(x, ...) { standardGeneric("first") })

# @rdname flatMap
# @export
Expand Down Expand Up @@ -889,7 +889,7 @@ setGeneric("lag", function(x, ...) { standardGeneric("lag") })

#' @rdname last
#' @export
setGeneric("last", function(x) { standardGeneric("last") })
setGeneric("last", function(x, ...) { standardGeneric("last") })

#' @rdname last_day
#' @export
Expand Down
11 changes: 11 additions & 0 deletions R/pkg/inst/tests/testthat/test_sparkSQL.R
Original file line number Diff line number Diff line change
Expand Up @@ -1076,6 +1076,17 @@ test_that("column functions", {
result <- collect(select(df, encode(df$a, "utf-8"), decode(df$c, "utf-8")))
expect_equal(result[[1]][[1]], bytes)
expect_equal(result[[2]], markUtf8("大千世界"))

# Test first(), last()
df <- read.json(sqlContext, jsonPath)
expect_equal(collect(select(df, first(df$age)))[[1]], NA)
expect_equal(collect(select(df, first(df$age, TRUE)))[[1]], 30)
expect_equal(collect(select(df, first("age")))[[1]], NA)
expect_equal(collect(select(df, first("age", TRUE)))[[1]], 30)
expect_equal(collect(select(df, last(df$age)))[[1]], 19)
expect_equal(collect(select(df, last(df$age, TRUE)))[[1]], 19)
expect_equal(collect(select(df, last("age")))[[1]], 19)
expect_equal(collect(select(df, last("age", TRUE)))[[1]], 19)
})

test_that("column binary mathfunctions", {
Expand Down

0 comments on commit 4d535d1

Please sign in to comment.