Skip to content

Commit

Permalink
Merge remote-tracking branch 'remotes/apache/master' into SPARK-6869
Browse files Browse the repository at this point in the history
  • Loading branch information
lianhuiwang committed May 3, 2015
2 parents 20402cd + 49549d5 commit 150907b
Show file tree
Hide file tree
Showing 798 changed files with 36,532 additions and 8,406 deletions.
4 changes: 4 additions & 0 deletions .rat-excludes
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ TAGS
RELEASE
control
docs
docker.properties.template
fairscheduler.xml.template
spark-defaults.conf.template
log4j.properties
Expand All @@ -30,6 +31,9 @@ log4j-defaults.properties
bootstrap-tooltip.js
jquery-1.11.1.min.js
sorttable.js
vis.min.js
vis.min.css
vis.map
.*avsc
.*txt
.*json
Expand Down
22 changes: 13 additions & 9 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
## Contributing to Spark

Contributions via GitHub pull requests are gladly accepted from their original
author. Along with any pull requests, please state that the contribution is
your original work and that you license the work to the project under the
project's open source license. Whether or not you state this explicitly, by
submitting any copyrighted material via pull request, email, or other means
you agree to license the material under the project's open source license and
warrant that you have the legal authority to do so.
*Before opening a pull request*, review the
[Contributing to Spark wiki](https://cwiki.apache.org/confluence/display/SPARK/Contributing+to+Spark).
It lists steps that are required before creating a PR. In particular, consider:

- Is the change important and ready enough to ask the community to spend time reviewing?
- Have you searched for existing, related JIRAs and pull requests?
- Is this a new feature that can stand alone as a package on http://spark-packages.org ?
- Is the change being proposed clearly explained and motivated?

Please see the [Contributing to Spark wiki page](https://cwiki.apache.org/SPARK/Contributing+to+Spark)
for more information.
When you contribute code, you affirm that the contribution is your original work and that you
license the work to the project under the project's open source license. Whether or not you
state this explicitly, by submitting any copyrighted material via pull request, email, or
other means you agree to license the material under the project's open source license and
warrant that you have the legal authority to do so.
1 change: 1 addition & 0 deletions LICENSE
Original file line number Diff line number Diff line change
Expand Up @@ -814,6 +814,7 @@ BSD-style licenses
The following components are provided under a BSD-style license. See project link for details.

(BSD 3 Clause) core (com.github.fommil.netlib:core:1.1.2 - https://github.com/fommil/netlib-java/core)
(BSD 3 Clause) JPMML-Model (org.jpmml:pmml-model:1.1.15 - https://github.com/jpmml/jpmml-model)
(BSD 3-clause style license) jblas (org.jblas:jblas:1.2.3 - http://jblas.org/)
(BSD License) AntLR Parser Generator (antlr:antlr:2.7.7 - http://www.antlr.org/)
(BSD License) Javolution (javolution:javolution:5.5.1 - http://javolution.org)
Expand Down
6 changes: 3 additions & 3 deletions R/pkg/DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -17,19 +17,19 @@ License: Apache License (== 2.0)
Collate:
'generics.R'
'jobj.R'
'SQLTypes.R'
'RDD.R'
'pairRDD.R'
'schema.R'
'column.R'
'group.R'
'DataFrame.R'
'SQLContext.R'
'backend.R'
'broadcast.R'
'client.R'
'context.R'
'deserialize.R'
'serialize.R'
'sparkR.R'
'backend.R'
'client.R'
'utils.R'
'zzz.R'
21 changes: 18 additions & 3 deletions R/pkg/NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ exportMethods(
"aggregateByKey",
"aggregateRDD",
"cache",
"cartesian",
"checkpoint",
"coalesce",
"cogroup",
Expand All @@ -28,6 +29,7 @@ exportMethods(
"fullOuterJoin",
"glom",
"groupByKey",
"intersection",
"join",
"keyBy",
"keys",
Expand All @@ -52,11 +54,14 @@ exportMethods(
"reduceByKeyLocally",
"repartition",
"rightOuterJoin",
"sampleByKey",
"sampleRDD",
"saveAsTextFile",
"saveAsObjectFile",
"sortBy",
"sortByKey",
"subtract",
"subtractByKey",
"sumRDD",
"take",
"takeOrdered",
Expand All @@ -66,6 +71,7 @@ exportMethods(
"unpersist",
"value",
"values",
"zipPartitions",
"zipRDD",
"zipWithIndex",
"zipWithUniqueId"
Expand Down Expand Up @@ -95,6 +101,7 @@ exportClasses("DataFrame")
exportMethods("columns",
"distinct",
"dtypes",
"except",
"explain",
"filter",
"groupBy",
Expand All @@ -118,7 +125,6 @@ exportMethods("columns",
"show",
"showDF",
"sortDF",
"subtract",
"toJSON",
"toRDD",
"unionAll",
Expand Down Expand Up @@ -178,5 +184,14 @@ export("cacheTable",
"toDF",
"uncacheTable")

export("print.structType",
"print.structField")
export("sparkRSQL.init",
"sparkRHive.init")

export("structField",
"structField.jobj",
"structField.character",
"print.structField",
"structType",
"structType.jobj",
"structType.structField",
"print.structType")
26 changes: 17 additions & 9 deletions R/pkg/R/DataFrame.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

# DataFrame.R - DataFrame class and methods implemented in S4 OO classes

#' @include jobj.R SQLTypes.R RDD.R pairRDD.R column.R group.R
#' @include generics.R jobj.R schema.R RDD.R pairRDD.R column.R group.R
NULL

setOldClass("jobj")
Expand Down Expand Up @@ -790,9 +790,12 @@ setMethod("$", signature(x = "DataFrame"),

setMethod("$<-", signature(x = "DataFrame"),
function(x, name, value) {
stopifnot(class(value) == "Column")
stopifnot(class(value) == "Column" || is.null(value))
cols <- columns(x)
if (name %in% cols) {
if (is.null(value)) {
cols <- Filter(function(c) { c != name }, cols)
}
cols <- lapply(cols, function(c) {
if (c == name) {
alias(value, name)
Expand All @@ -802,6 +805,9 @@ setMethod("$<-", signature(x = "DataFrame"),
})
nx <- select(x, cols)
} else {
if (is.null(value)) {
return(x)
}
nx <- withColumn(x, name, value)
}
x@sdf <- nx@sdf
Expand Down Expand Up @@ -1141,29 +1147,31 @@ setMethod("intersect",
dataFrame(intersected)
})

#' Subtract
#' except
#'
#' Return a new DataFrame containing rows in this DataFrame
#' but not in another DataFrame. This is equivalent to `EXCEPT` in SQL.
#'
#' @param x A Spark DataFrame
#' @param y A Spark DataFrame
#' @return A DataFrame containing the result of the subtract operation.
#' @rdname subtract
#' @return A DataFrame containing the result of the except operation.
#' @rdname except
#' @export
#' @examples
#'\dontrun{
#' sc <- sparkR.init()
#' sqlCtx <- sparkRSQL.init(sc)
#' df1 <- jsonFile(sqlCtx, path)
#' df2 <- jsonFile(sqlCtx, path2)
#' subtractDF <- subtract(df, df2)
#' exceptDF <- except(df, df2)
#' }
setMethod("subtract",
#' @rdname except
#' @export
setMethod("except",
signature(x = "DataFrame", y = "DataFrame"),
function(x, y) {
subtracted <- callJMethod(x@sdf, "except", y@sdf)
dataFrame(subtracted)
excepted <- callJMethod(x@sdf, "except", y@sdf)
dataFrame(excepted)
})

#' Save the contents of the DataFrame to a data source
Expand Down
Loading

0 comments on commit 150907b

Please sign in to comment.