Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
6fc97e0
Support for collect() on Columns
Feb 23, 2016
fbf9b02
Removed drop=F from other PR
Feb 24, 2016
04e728b
Removed NA default value for df in column() method
Feb 24, 2016
2d9ee18
Added docs to head(Column)
Feb 24, 2016
dc3df19
Still can't recreate build issue. Added default value df=NULL
Feb 24, 2016
1e06d3c
Noticed collate order is automatically changed after running install-…
Feb 24, 2016
d697d44
Still can't recreate build issue. Added minimal test case to debug it
Feb 24, 2016
24b6154
Reverted removing spaces in test cases. This works in my environment,…
Feb 24, 2016
6a38a3c
Added docs for collect(Column)
Feb 24, 2016
bf4df26
Handled case of 'orhpan' Columns with no parent DataFrame
Mar 30, 2016
c86bebb
Added tests for orphan Columns
Mar 30, 2016
bf6c456
Merged with the base branch
Mar 30, 2016
e5659ee
Fixed style issues
Mar 30, 2016
97920c6
Merged upstream branch
May 6, 2016
9c1661f
pkg/R/columnR
May 6, 2016
6cbebc1
Merge branch 'master' of https://github.com/apache/spark into SPARK-9325
May 6, 2016
8f906a2
Merged with upstream
Oct 11, 2016
ed0abf2
Removed method collect()
Oct 11, 2016
d2470fa
Removed spark.init() call, fixed docs
Oct 12, 2016
bf739e4
Used a singleton to generate empty DataFrame
Oct 12, 2016
20e53e8
Minor docs change
Oct 12, 2016
266d5ff
Added missing bracket
Oct 12, 2016
257fa86
Added documentation for parameter df of class Column
Oct 12, 2016
0691c32
Fixed docs issues
Oct 13, 2016
445407c
Removed commented code
Oct 13, 2016
1ace2e5
Fixed docs issues. Renamed parameter n as num for method head to have…
Oct 14, 2016
2bfb8a6
Style fixes
Oct 14, 2016
e0bba0a
Fixed docs issues
Oct 14, 2016
b25deb1
Cosmetic changes
Oct 19, 2016
76061ad
More cosmetics
Oct 19, 2016
ed1b382
More cosmetics
Oct 21, 2016
1338d71
Fixed style issues
Oct 21, 2016
c0f1906
Added test for Columns with no parent DataFrame
Oct 25, 2016
777aee3
Moved head documentation to generics.R
Oct 25, 2016
619f23b
Moved documentation for show method to generics.R
Oct 25, 2016
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 9 additions & 40 deletions R/pkg/R/DataFrame.R
Original file line number Diff line number Diff line change
Expand Up @@ -223,25 +223,7 @@ setMethod("showDF",
cat(s)
})

#' show
#'
#' Print class and type information of a Spark object.
#'
#' @param object a Spark object. Can be a SparkDataFrame, Column, GroupedData, WindowSpec.
#'
#' @family SparkDataFrame functions
#' @rdname show
#' @aliases show,SparkDataFrame-method
#' @name show
#' @export
#' @examples
#'\dontrun{
#' sparkR.session()
#' path <- "path/to/file.json"
#' df <- read.json(path)
#' show(df)
#'}
#' @note show(SparkDataFrame) since 1.4.0
setMethod("show", "SparkDataFrame",
function(object) {
cols <- lapply(dtypes(object), function(l) {
Expand Down Expand Up @@ -1166,28 +1148,7 @@ setMethod("take",
collect(limited)
})

#' Head
#'
#' Return the first \code{num} rows of a SparkDataFrame as a R data.frame. If \code{num} is not
#' specified, then head() returns the first 6 rows as with R data.frame.
#'
#' @param x a SparkDataFrame.
#' @param num the number of rows to return. Default is 6.
#' @return A data.frame.
#'
#' @family SparkDataFrame functions
#' @aliases head,SparkDataFrame-method
#' @rdname head
#' @name head
#' @export
#' @examples
#'\dontrun{
#' sparkR.session()
#' path <- "path/to/file.json"
#' df <- read.json(path)
#' head(df)
#' }
#' @note head since 1.4.0
setMethod("head",
signature(x = "SparkDataFrame"),
function(x, num = 6L) {
Expand Down Expand Up @@ -1679,7 +1640,7 @@ setMethod("foreachPartition",
############################## SELECT ##################################

getColumn <- function(x, c) {
column(callJMethod(x@sdf, "col", c))
column(callJMethod(x@sdf, "col", c), x)
}

#' @param name name of a Column (without being wrapped by \code{""}).
Expand Down Expand Up @@ -3321,3 +3282,11 @@ setMethod("randomSplit",
}
sapply(sdfs, dataFrame)
})

# A global singleton for an empty SparkR DataFrame.
getEmptySparkRDataFrame <- function() {
if (!exists(".emptyDataFrame", envir = .sparkREnv)) {
.sparkREnv$.emptyDataFrame <- as.DataFrame(data.frame(0))
}
.sparkREnv$.emptyDataFrame
}
78 changes: 54 additions & 24 deletions R/pkg/R/column.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
setOldClass("SparkDataFrame")
setClassUnion("SparkDataFrameOrNull", c("SparkDataFrame", "NULL"))

# Column Class

Expand All @@ -29,38 +31,66 @@ setOldClass("jobj")
#' @rdname column
#'
#' @slot jc reference to JVM SparkDataFrame column
#' @slot df the parent SparkDataFrame of the Column object
#' @export
#' @note Column since 1.4.0
setClass("Column",
slots = list(jc = "jobj"))
slots = list(jc = "jobj", df = "SparkDataFrameOrNull"))

#' A set of operations working with SparkDataFrame columns
#' @rdname columnfunctions
#' @name columnfunctions
NULL

setMethod("initialize", "Column", function(.Object, jc) {
setMethod("initialize", "Column", function(.Object, jc, df) {
.Object@jc <- jc

# Some Column objects don't have any referencing DataFrame. In such case, df will be NULL.
if (missing(df)) {
df <- NULL
}
.Object@df <- df
.Object
})

#' @rdname show
setMethod("show", signature = "Column", function(object) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you do need to add at least #' @rdname show here to tell roxygen2 they should go together

MAX_ELEMENTS <- 20
head.df <- head(object, MAX_ELEMENTS)

if (length(head.df) == 0) {
colname <- callJMethod(object@jc, "toString")
cat(paste0(colname, "\n"))
cat(paste0("<Empty column>\n"))
} else {
show(head.df)
}
if (length(head.df) == MAX_ELEMENTS) {
cat(paste0("\b...\nDisplaying up to ", as.character(MAX_ELEMENTS), " elements only."))
}
})

#' @rdname head
setMethod("head",
signature(x = "Column"),
function(x, num = 6L) {
if (is.null(x@df)) {
character(0)
} else {
head(select(x@df, x), num)[, 1]
}
})

#' @rdname column
#' @name column
#' @param df the parent SparkDataFrame. This is used to retrieve the contents of the column through method head.
#' @aliases column,jobj-method
setMethod("column",
signature(x = "jobj"),
function(x) {
new("Column", x)
})

#' @rdname show
#' @name show
#' @aliases show,Column-method
#' @export
#' @note show(Column) since 1.4.0
setMethod("show", "Column",
function(object) {
cat("Column", callJMethod(object@jc, "toString"), "\n")
function(x, df) {
if (missing(df)) {
df <- NULL
}
new("Column", jc = x, df = df)
})

operators <- list(
Expand Down Expand Up @@ -93,15 +123,15 @@ createOperator <- function(op) {
callJMethod(e1@jc, operators[[op]], e2)
}
}
column(jc)
column(jc, e1@df)
})
}

createColumnFunction1 <- function(name) {
setMethod(name,
signature(x = "Column"),
function(x) {
column(callJMethod(x@jc, name))
column(callJMethod(x@jc, name), x@df)
})
}

Expand All @@ -113,7 +143,7 @@ createColumnFunction2 <- function(name) {
data <- data@jc
}
jc <- callJMethod(x@jc, name, data)
column(jc)
column(jc, x@df)
})
}

Expand Down Expand Up @@ -148,7 +178,7 @@ setMethod("alias",
signature(object = "Column"),
function(object, data) {
if (is.character(data)) {
column(callJMethod(object@jc, "as", data))
column(callJMethod(object@jc, "as", data), object@df)
} else {
stop("data should be character")
}
Expand All @@ -170,7 +200,7 @@ setMethod("alias",
setMethod("substr", signature(x = "Column"),
function(x, start, stop) {
jc <- callJMethod(x@jc, "substr", as.integer(start - 1), as.integer(stop - start + 1))
column(jc)
column(jc, x@df)
})

#' startsWith
Expand Down Expand Up @@ -227,7 +257,7 @@ setMethod("between", signature(x = "Column"),
function(x, bounds) {
if (is.vector(bounds) && length(bounds) == 2) {
jc <- callJMethod(x@jc, "between", bounds[1], bounds[2])
column(jc)
column(jc, x@df)
} else {
stop("bounds should be a vector of lower and upper bounds")
}
Expand All @@ -253,7 +283,7 @@ setMethod("cast",
signature(x = "Column"),
function(x, dataType) {
if (is.character(dataType)) {
column(callJMethod(x@jc, "cast", dataType))
column(callJMethod(x@jc, "cast", dataType), x@df)
} else {
stop("dataType should be character")
}
Expand All @@ -278,7 +308,7 @@ setMethod("%in%",
signature(x = "Column"),
function(x, table) {
jc <- callJMethod(x@jc, "isin", as.list(table))
return(column(jc))
column(jc, x@df)
})

#' otherwise
Expand All @@ -300,5 +330,5 @@ setMethod("otherwise",
function(x, value) {
value <- if (class(value) == "Column") { value@jc } else { value }
jc <- callJMethod(x@jc, "otherwise", value)
column(jc)
column(jc, x@df)
})
Loading