Skip to content

Commit

Permalink
Merge in master (again)
Browse files Browse the repository at this point in the history
  • Loading branch information
holdenk committed Oct 1, 2015
2 parents 8ca0fa9 + 02026a8 commit 6f66f2c
Show file tree
Hide file tree
Showing 783 changed files with 24,096 additions and 11,682 deletions.
12 changes: 0 additions & 12 deletions .rat-excludes
Original file line number Diff line number Diff line change
Expand Up @@ -15,20 +15,8 @@ TAGS
RELEASE
control
docs
docker.properties.template
fairscheduler.xml.template
spark-defaults.conf.template
log4j.properties
log4j.properties.template
metrics.properties
metrics.properties.template
slaves
slaves.template
spark-env.sh
spark-env.cmd
spark-env.sh.template
log4j-defaults.properties
log4j-defaults-repl.properties
bootstrap-tooltip.js
jquery-1.11.1.min.js
d3.min.js
Expand Down
699 changes: 20 additions & 679 deletions LICENSE

Large diffs are not rendered by default.

35 changes: 35 additions & 0 deletions NOTICE
Original file line number Diff line number Diff line change
Expand Up @@ -572,3 +572,38 @@ Copyright 2009-2013 The Apache Software Foundation

Apache Avro IPC
Copyright 2009-2013 The Apache Software Foundation


Vis.js
Copyright 2010-2015 Almende B.V.

Vis.js is dual licensed under both

* The Apache 2.0 License
http://www.apache.org/licenses/LICENSE-2.0

and

* The MIT License
http://opensource.org/licenses/MIT

Vis.js may be distributed under either license.


Vis.js uses and redistributes the following third-party libraries:

- component-emitter
https://github.com/component/emitter
The MIT License

- hammer.js
http://hammerjs.github.io/
The MIT License

- moment.js
http://momentjs.com/
The MIT License

- keycharm
https://github.com/AlexDM0/keycharm
The MIT License
2 changes: 1 addition & 1 deletion R/pkg/DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: SparkR
Type: Package
Title: R frontend for Spark
Version: 1.5.0
Version: 1.6.0
Date: 2013-09-09
Author: The Apache Software Foundation
Maintainer: Shivaram Venkataraman <shivaram@cs.berkeley.edu>
Expand Down
4 changes: 4 additions & 0 deletions R/pkg/NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -69,9 +69,11 @@ exportMethods("arrange",
"selectExpr",
"show",
"showDF",
"subset",
"summarize",
"summary",
"take",
"transform",
"unionAll",
"unique",
"unpersist",
Expand Down Expand Up @@ -245,3 +247,5 @@ export("structField",
"structType.jobj",
"structType.structField",
"print.structType")

export("as.data.frame")
121 changes: 93 additions & 28 deletions R/pkg/R/DataFrame.R
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,7 @@ setMethod("names<-",
signature(x = "DataFrame"),
function(x, value) {
if (!is.null(value)) {
sdf <- callJMethod(x@sdf, "toDF", listToSeq(as.list(value)))
sdf <- callJMethod(x@sdf, "toDF", as.list(value))
dataFrame(sdf)
}
})
Expand Down Expand Up @@ -843,10 +843,10 @@ setMethod("groupBy",
function(x, ...) {
cols <- list(...)
if (length(cols) >= 1 && class(cols[[1]]) == "character") {
sgd <- callJMethod(x@sdf, "groupBy", cols[[1]], listToSeq(cols[-1]))
sgd <- callJMethod(x@sdf, "groupBy", cols[[1]], cols[-1])
} else {
jcol <- lapply(cols, function(c) { c@jc })
sgd <- callJMethod(x@sdf, "groupBy", listToSeq(jcol))
sgd <- callJMethod(x@sdf, "groupBy", jcol)
}
groupedData(sgd)
})
Expand Down Expand Up @@ -987,7 +987,7 @@ setMethod("$<-", signature(x = "DataFrame"),

setClassUnion("numericOrcharacter", c("numeric", "character"))

#' @rdname select
#' @rdname subset
#' @name [[
setMethod("[[", signature(x = "DataFrame", i = "numericOrcharacter"),
function(x, i) {
Expand All @@ -998,7 +998,7 @@ setMethod("[[", signature(x = "DataFrame", i = "numericOrcharacter"),
getColumn(x, i)
})

#' @rdname select
#' @rdname subset
#' @name [
setMethod("[", signature(x = "DataFrame", i = "missing"),
function(x, i, j, ...) {
Expand All @@ -1012,20 +1012,51 @@ setMethod("[", signature(x = "DataFrame", i = "missing"),
select(x, j)
})

#' @rdname select
#' @rdname subset
#' @name [
setMethod("[", signature(x = "DataFrame", i = "Column"),
function(x, i, j, ...) {
# It could handle i as "character" but it seems confusing and not required
# https://stat.ethz.ch/R-manual/R-devel/library/base/html/Extract.data.frame.html
filtered <- filter(x, i)
if (!missing(j)) {
filtered[, j]
filtered[, j, ...]
} else {
filtered
}
})

#' Subset
#'
#' Return subsets of DataFrame according to given conditions
#' @param x A DataFrame
#' @param subset A logical expression to filter on rows
#' @param select expression for the single Column or a list of columns to select from the DataFrame
#' @return A new DataFrame containing only the rows that meet the condition with selected columns
#' @export
#' @rdname subset
#' @name subset
#' @aliases [
#' @family subsetting functions
#' @examples
#' \dontrun{
#' # Columns can be selected using `[[` and `[`
#' df[[2]] == df[["age"]]
#' df[,2] == df[,"age"]
#' df[,c("name", "age")]
#' # Or to filter rows
#' df[df$age > 20,]
#' # DataFrame can be subset on both rows and Columns
#' df[df$name == "Smith", c(1,2)]
#' df[df$age %in% c(19, 30), 1:2]
#' subset(df, df$age %in% c(19, 30), 1:2)
#' subset(df, df$age %in% c(19), select = c(1,2))
#' }
setMethod("subset", signature(x = "DataFrame"),
function(x, subset, select, ...) {
x[subset, select, ...]
})

#' Select
#'
#' Selects a set of columns with names or Column expressions.
Expand All @@ -1034,26 +1065,21 @@ setMethod("[", signature(x = "DataFrame", i = "Column"),
#' @return A new DataFrame with selected columns
#' @export
#' @rdname select
#' @name select
#' @family subsetting functions
#' @examples
#' \dontrun{
#' select(df, "*")
#' select(df, "col1", "col2")
#' select(df, df$name, df$age + 1)
#' select(df, c("col1", "col2"))
#' select(df, list(df$name, df$age + 1))
#' # Columns can also be selected using `[[` and `[`
#' df[[2]] == df[["age"]]
#' df[,2] == df[,"age"]
#' df[,c("name", "age")]
#' # Similar to R data frames columns can also be selected using `$`
#' df$age
#' # It can also be subset on rows and Columns
#' df[df$name == "Smith", c(1,2)]
#' df[df$age %in% c(19, 30), 1:2]
#' }
setMethod("select", signature(x = "DataFrame", col = "character"),
function(x, col, ...) {
sdf <- callJMethod(x@sdf, "select", col, toSeq(...))
sdf <- callJMethod(x@sdf, "select", col, list(...))
dataFrame(sdf)
})

Expand All @@ -1064,7 +1090,7 @@ setMethod("select", signature(x = "DataFrame", col = "Column"),
jcols <- lapply(list(col, ...), function(c) {
c@jc
})
sdf <- callJMethod(x@sdf, "select", listToSeq(jcols))
sdf <- callJMethod(x@sdf, "select", jcols)
dataFrame(sdf)
})

Expand All @@ -1080,7 +1106,7 @@ setMethod("select",
col(c)@jc
}
})
sdf <- callJMethod(x@sdf, "select", listToSeq(cols))
sdf <- callJMethod(x@sdf, "select", cols)
dataFrame(sdf)
})

Expand All @@ -1107,7 +1133,7 @@ setMethod("selectExpr",
signature(x = "DataFrame", expr = "character"),
function(x, expr, ...) {
exprList <- list(expr, ...)
sdf <- callJMethod(x@sdf, "selectExpr", listToSeq(exprList))
sdf <- callJMethod(x@sdf, "selectExpr", exprList)
dataFrame(sdf)
})

Expand All @@ -1121,7 +1147,7 @@ setMethod("selectExpr",
#' @return A DataFrame with the new column added.
#' @rdname withColumn
#' @name withColumn
#' @aliases mutate
#' @aliases mutate transform
#' @export
#' @examples
#'\dontrun{
Expand All @@ -1141,11 +1167,12 @@ setMethod("withColumn",
#'
#' Return a new DataFrame with the specified columns added.
#'
#' @param x A DataFrame
#' @param .data A DataFrame
#' @param col a named argument of the form name = col
#' @return A new DataFrame with the new columns added.
#' @rdname withColumn
#' @name mutate
#' @aliases withColumn transform
#' @export
#' @examples
#'\dontrun{
Expand All @@ -1155,10 +1182,12 @@ setMethod("withColumn",
#' df <- jsonFile(sqlContext, path)
#' newDF <- mutate(df, newCol = df$col1 * 5, newCol2 = df$col1 * 2)
#' names(newDF) # Will contain newCol, newCol2
#' newDF2 <- transform(df, newCol = df$col1 / 5, newCol2 = df$col1 * 2)
#' }
setMethod("mutate",
signature(x = "DataFrame"),
function(x, ...) {
signature(.data = "DataFrame"),
function(.data, ...) {
x <- .data
cols <- list(...)
stopifnot(length(cols) > 0)
stopifnot(class(cols[[1]]) == "Column")
Expand All @@ -1173,6 +1202,16 @@ setMethod("mutate",
do.call(select, c(x, x$"*", cols))
})

#' @export
#' @rdname withColumn
#' @name transform
#' @aliases withColumn mutate
setMethod("transform",
signature(`_data` = "DataFrame"),
function(`_data`, ...) {
mutate(`_data`, ...)
})

#' WithColumnRenamed
#'
#' Rename an existing column in a DataFrame.
Expand Down Expand Up @@ -1272,12 +1311,12 @@ setMethod("arrange",
signature(x = "DataFrame", col = "characterOrColumn"),
function(x, col, ...) {
if (class(col) == "character") {
sdf <- callJMethod(x@sdf, "sort", col, toSeq(...))
sdf <- callJMethod(x@sdf, "sort", col, list(...))
} else if (class(col) == "Column") {
jcols <- lapply(list(col, ...), function(c) {
c@jc
})
sdf <- callJMethod(x@sdf, "sort", listToSeq(jcols))
sdf <- callJMethod(x@sdf, "sort", jcols)
}
dataFrame(sdf)
})
Expand All @@ -1300,6 +1339,7 @@ setMethod("orderBy",
#' @return A DataFrame containing only the rows that meet the condition.
#' @rdname filter
#' @name filter
#' @family subsetting functions
#' @export
#' @examples
#'\dontrun{
Expand Down Expand Up @@ -1624,7 +1664,7 @@ setMethod("describe",
signature(x = "DataFrame", col = "character"),
function(x, col, ...) {
colList <- list(col, ...)
sdf <- callJMethod(x@sdf, "describe", listToSeq(colList))
sdf <- callJMethod(x@sdf, "describe", colList)
dataFrame(sdf)
})

Expand All @@ -1634,7 +1674,7 @@ setMethod("describe",
signature(x = "DataFrame"),
function(x) {
colList <- as.list(c(columns(x)))
sdf <- callJMethod(x@sdf, "describe", listToSeq(colList))
sdf <- callJMethod(x@sdf, "describe", colList)
dataFrame(sdf)
})

Expand Down Expand Up @@ -1691,7 +1731,7 @@ setMethod("dropna",

naFunctions <- callJMethod(x@sdf, "na")
sdf <- callJMethod(naFunctions, "drop",
as.integer(minNonNulls), listToSeq(as.list(cols)))
as.integer(minNonNulls), as.list(cols))
dataFrame(sdf)
})

Expand Down Expand Up @@ -1775,7 +1815,7 @@ setMethod("fillna",
sdf <- if (length(cols) == 0) {
callJMethod(naFunctions, "fill", value)
} else {
callJMethod(naFunctions, "fill", value, listToSeq(as.list(cols)))
callJMethod(naFunctions, "fill", value, as.list(cols))
}
dataFrame(sdf)
})
Expand Down Expand Up @@ -1808,3 +1848,28 @@ setMethod("crosstab",
sct <- callJMethod(statFunctions, "crosstab", col1, col2)
collect(dataFrame(sct))
})


#' This function downloads the contents of a DataFrame into an R's data.frame.
#' Since data.frames are held in memory, ensure that you have enough memory
#' in your system to accommodate the contents.
#'
#' @title Download data from a DataFrame into a data.frame
#' @param x a DataFrame
#' @return a data.frame
#' @rdname as.data.frame
#' @examples \dontrun{
#'
#' irisDF <- createDataFrame(sqlContext, iris)
#' df <- as.data.frame(irisDF[irisDF$Species == "setosa", ])
#' }
setMethod("as.data.frame",
signature(x = "DataFrame"),
function(x, ...) {
# Check if additional parameters have been passed
if (length(list(...)) > 0) {
stop(paste("Unused argument(s): ", paste(list(...), collapse=", ")))
}
collect(x)
}
)
Loading

0 comments on commit 6f66f2c

Please sign in to comment.