-
Notifications
You must be signed in to change notification settings - Fork 28.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[SPARK-9324][SPARK-9322][SPARK-9321][SPARKR] Some aliases for R-like functions in DataFrames #7764
Changes from 2 commits
b5aa988
d9307f8
c1b88bd
f51cbef
5e4a4d0
56016f5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -255,6 +255,16 @@ setMethod("names", | |
columns(x) | ||
}) | ||
|
||
#' @rdname columns | ||
setMethod("names<-", | ||
signature(x = "DataFrame"), | ||
function(x, value) { | ||
if (!is.null(value)) { | ||
sdf <- callJMethod(x@sdf, "toDF", listToSeq(as.list(value))) | ||
dataFrame(sdf) | ||
} | ||
}) | ||
|
||
#' Register Temporary Table | ||
#' | ||
#' Registers a DataFrame as a Temporary Table in the SQLContext | ||
|
@@ -473,6 +483,14 @@ setMethod("distinct", | |
dataFrame(sdf) | ||
}) | ||
|
||
#' @rdname unique | ||
#' @aliases unique | ||
setMethod("unique", | ||
signature(x = "DataFrame"), | ||
function(x) { | ||
distinct(x) | ||
}) | ||
|
||
#' Sample | ||
#' | ||
#' Return a sampled subset of this DataFrame using a random seed. | ||
|
@@ -534,6 +552,53 @@ setMethod("count", | |
callJMethod(x@sdf, "count") | ||
}) | ||
|
||
#' @rdname nrow | ||
#' @aliases count | ||
setMethod("nrow", | ||
signature(x = "DataFrame"), | ||
function(x) { | ||
count(x) | ||
}) | ||
|
||
#' Returns the number of columns in a DataFrame | ||
#' | ||
#' @param x a SparkSQL DataFrame | ||
#' | ||
#' @rdname ncol | ||
#' @export | ||
#' @examples | ||
#'\dontrun{ | ||
#' sc <- sparkR.init() | ||
#' sqlContext <- sparkRSQL.init(sc) | ||
#' path <- "path/to/file.json" | ||
#' df <- jsonFile(sqlContext, path) | ||
#' ncol(df) | ||
#' } | ||
setMethod("ncol", | ||
signature(x = "DataFrame"), | ||
function(x) { | ||
length(columns(x)) | ||
}) | ||
|
||
#' Returns the dimentions (number for rows and columns) of a DataFrame | ||
#' @param x a SparkSQL DataFrame | ||
#' | ||
#' @rdname dim | ||
#' @export | ||
#' @examples | ||
#'\dontrun{ | ||
#' sc <- sparkR.init() | ||
#' sqlContext <- sparkRSQL.init(sc) | ||
#' path <- "path/to/file.json" | ||
#' df <- jsonFile(sqlContext, path) | ||
#' dim(df) | ||
#' } | ||
setMethod("dim", | ||
signature(x = "DataFrame"), | ||
function(x) { | ||
c(count(x), ncol(x)) | ||
}) | ||
|
||
#' Collects all the elements of a Spark DataFrame and coerces them into an R data.frame. | ||
#' | ||
#' @param x A SparkSQL DataFrame | ||
|
@@ -1231,6 +1296,24 @@ setMethod("unionAll", | |
dataFrame(unioned) | ||
}) | ||
|
||
setGeneric("rbind", signature = "...") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Lets move this to generics.R ? |
||
|
||
rbind.SparkDataFrames <- function(x, ..., deparse.level = 1) { | ||
allargs <- list(...) | ||
if (nargs() == 3) { | ||
unionAll(x, ...) | ||
} else { | ||
unionAll(x, Recall(..., deparse.level = 1)) | ||
} | ||
} | ||
|
||
#' @rdname rbind | ||
#' @aliases unionAll | ||
setMethod("rbind", | ||
signature(... = "DataFrame"), | ||
rbind.SparkDataFrames | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. any reason to not just inline the rbind.SparkDataFrames function here ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sure. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Its a good feature to have but I think its also fine to just support the two argument case if the other one adds too much complexity |
||
) | ||
|
||
#' Intersect | ||
#' | ||
#' Return a new DataFrame containing rows only in both this DataFrame | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
typo:
number of