Skip to content

Commit

Permalink
Merge branch 'master' of https://github.com/apache/spark into master_…
Browse files Browse the repository at this point in the history
…nravi
  • Loading branch information
nishkamravi2 committed Apr 24, 2015
2 parents 1c13b79 + 336f7f5 commit 2be1e76
Show file tree
Hide file tree
Showing 401 changed files with 11,270 additions and 4,949 deletions.
22 changes: 13 additions & 9 deletions CONTRIBUTING.md
@@ -1,12 +1,16 @@
## Contributing to Spark

Contributions via GitHub pull requests are gladly accepted from their original
author. Along with any pull requests, please state that the contribution is
your original work and that you license the work to the project under the
project's open source license. Whether or not you state this explicitly, by
submitting any copyrighted material via pull request, email, or other means
you agree to license the material under the project's open source license and
warrant that you have the legal authority to do so.
*Before opening a pull request*, review the
[Contributing to Spark wiki](https://cwiki.apache.org/confluence/display/SPARK/Contributing+to+Spark).
It lists steps that are required before creating a PR. In particular, consider:

- Is the change important and ready enough to ask the community to spend time reviewing?
- Have you searched for existing, related JIRAs and pull requests?
- Is this a new feature that can stand alone as a package on http://spark-packages.org ?
- Is the change being proposed clearly explained and motivated?

Please see the [Contributing to Spark wiki page](https://cwiki.apache.org/SPARK/Contributing+to+Spark)
for more information.
When you contribute code, you affirm that the contribution is your original work and that you
license the work to the project under the project's open source license. Whether or not you
state this explicitly, by submitting any copyrighted material via pull request, email, or
other means you agree to license the material under the project's open source license and
warrant that you have the legal authority to do so.
2 changes: 1 addition & 1 deletion R/pkg/DESCRIPTION
Expand Up @@ -19,7 +19,7 @@ Collate:
'jobj.R'
'RDD.R'
'pairRDD.R'
'SQLTypes.R'
'schema.R'
'column.R'
'group.R'
'DataFrame.R'
Expand Down
20 changes: 17 additions & 3 deletions R/pkg/NAMESPACE
Expand Up @@ -5,6 +5,7 @@ exportMethods(
"aggregateByKey",
"aggregateRDD",
"cache",
"cartesian",
"checkpoint",
"coalesce",
"cogroup",
Expand All @@ -28,6 +29,7 @@ exportMethods(
"fullOuterJoin",
"glom",
"groupByKey",
"intersection",
"join",
"keyBy",
"keys",
Expand All @@ -52,11 +54,14 @@ exportMethods(
"reduceByKeyLocally",
"repartition",
"rightOuterJoin",
"sampleByKey",
"sampleRDD",
"saveAsTextFile",
"saveAsObjectFile",
"sortBy",
"sortByKey",
"subtract",
"subtractByKey",
"sumRDD",
"take",
"takeOrdered",
Expand Down Expand Up @@ -95,6 +100,7 @@ exportClasses("DataFrame")
exportMethods("columns",
"distinct",
"dtypes",
"except",
"explain",
"filter",
"groupBy",
Expand All @@ -118,7 +124,6 @@ exportMethods("columns",
"show",
"showDF",
"sortDF",
"subtract",
"toJSON",
"toRDD",
"unionAll",
Expand Down Expand Up @@ -178,5 +183,14 @@ export("cacheTable",
"toDF",
"uncacheTable")

export("print.structType",
"print.structField")
export("sparkRSQL.init",
"sparkRHive.init")

export("structField",
"structField.jobj",
"structField.character",
"print.structField",
"structType",
"structType.jobj",
"structType.structField",
"print.structType")
26 changes: 17 additions & 9 deletions R/pkg/R/DataFrame.R
Expand Up @@ -17,7 +17,7 @@

# DataFrame.R - DataFrame class and methods implemented in S4 OO classes

#' @include generics.R jobj.R SQLTypes.R RDD.R pairRDD.R column.R group.R
#' @include generics.R jobj.R schema.R RDD.R pairRDD.R column.R group.R
NULL

setOldClass("jobj")
Expand Down Expand Up @@ -790,9 +790,12 @@ setMethod("$", signature(x = "DataFrame"),

setMethod("$<-", signature(x = "DataFrame"),
function(x, name, value) {
stopifnot(class(value) == "Column")
stopifnot(class(value) == "Column" || is.null(value))
cols <- columns(x)
if (name %in% cols) {
if (is.null(value)) {
cols <- Filter(function(c) { c != name }, cols)
}
cols <- lapply(cols, function(c) {
if (c == name) {
alias(value, name)
Expand All @@ -802,6 +805,9 @@ setMethod("$<-", signature(x = "DataFrame"),
})
nx <- select(x, cols)
} else {
if (is.null(value)) {
return(x)
}
nx <- withColumn(x, name, value)
}
x@sdf <- nx@sdf
Expand Down Expand Up @@ -1141,29 +1147,31 @@ setMethod("intersect",
dataFrame(intersected)
})

#' Subtract
#' except
#'
#' Return a new DataFrame containing rows in this DataFrame
#' but not in another DataFrame. This is equivalent to `EXCEPT` in SQL.
#'
#' @param x A Spark DataFrame
#' @param y A Spark DataFrame
#' @return A DataFrame containing the result of the subtract operation.
#' @rdname subtract
#' @return A DataFrame containing the result of the except operation.
#' @rdname except
#' @export
#' @examples
#'\dontrun{
#' sc <- sparkR.init()
#' sqlCtx <- sparkRSQL.init(sc)
#' df1 <- jsonFile(sqlCtx, path)
#' df2 <- jsonFile(sqlCtx, path2)
#' subtractDF <- subtract(df, df2)
#' exceptDF <- except(df, df2)
#' }
setMethod("subtract",
#' @rdname except
#' @export
setMethod("except",
signature(x = "DataFrame", y = "DataFrame"),
function(x, y) {
subtracted <- callJMethod(x@sdf, "except", y@sdf)
dataFrame(subtracted)
excepted <- callJMethod(x@sdf, "except", y@sdf)
dataFrame(excepted)
})

#' Save the contents of the DataFrame to a data source
Expand Down

0 comments on commit 2be1e76

Please sign in to comment.