Skip to content

Commit

Permalink
Merge branch 'apache/master' into yh/pod-template
Browse files Browse the repository at this point in the history
  • Loading branch information
yifeih committed Aug 22, 2018
2 parents 1da79a8 + a998e9d commit 8ef756e
Show file tree
Hide file tree
Showing 181 changed files with 3,359 additions and 779 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ target/
unit-tests.log
work/
docs/.jekyll-metadata
*.crc

# For Hive
TempStatsStore/
Expand Down
2 changes: 0 additions & 2 deletions LICENSE-binary
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,6 @@ org.apache.xbean:xbean-asm5-shaded
com.squareup.okhttp3:logging-interceptor
com.squareup.okhttp3:okhttp
com.squareup.okio:okio
net.java.dev.jets3t:jets3t
org.apache.spark:spark-catalyst_2.11
org.apache.spark:spark-kvstore_2.11
org.apache.spark:spark-launcher_2.11
Expand Down Expand Up @@ -447,7 +446,6 @@ org.slf4j:jul-to-slf4j
org.slf4j:slf4j-api
org.slf4j:slf4j-log4j12
com.github.scopt:scopt_2.11
org.bouncycastle:bcprov-jdk15on

core/src/main/resources/org/apache/spark/ui/static/dagre-d3.min.js
core/src/main/resources/org/apache/spark/ui/static/*dataTables*
Expand Down
2 changes: 0 additions & 2 deletions NOTICE
Original file line number Diff line number Diff line change
Expand Up @@ -26,5 +26,3 @@ The following provides more details on the included cryptographic software:
This software uses Apache Commons Crypto (https://commons.apache.org/proper/commons-crypto/) to
support authentication, and encryption and decryption of data sent across the network between
services.

This software includes Bouncy Castle (http://bouncycastle.org/) to support the jets3t library.
21 changes: 0 additions & 21 deletions NOTICE-binary
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,6 @@ This software uses Apache Commons Crypto (https://commons.apache.org/proper/comm
support authentication, and encryption and decryption of data sent across the network between
services.

This software includes Bouncy Castle (http://bouncycastle.org/) to support the jets3t library.


// ------------------------------------------------------------------
// NOTICE file corresponding to the section 4d of The Apache License,
Expand Down Expand Up @@ -1162,25 +1160,6 @@ NonlinearMinimizer class in package breeze.optimize.proximal is distributed with
2015, Debasish Das (Verizon), all rights reserved.


=========================================================================
== NOTICE file corresponding to section 4(d) of the Apache License, ==
== Version 2.0, in this case for the distribution of jets3t. ==
=========================================================================

This product includes software developed by:

The Apache Software Foundation (http://www.apache.org/).

The ExoLab Project (http://www.exolab.org/)

Sun Microsystems (http://www.sun.com/)

Codehaus (http://castor.codehaus.org)

Tatu Saloranta (http://wiki.fasterxml.com/TatuSaloranta)



stream-lib
Copyright 2016 AddThis

Expand Down
2 changes: 2 additions & 0 deletions R/pkg/NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ exportMethods("arrange",
"dropna",
"dtypes",
"except",
"exceptAll",
"explain",
"fillna",
"filter",
Expand All @@ -131,6 +132,7 @@ exportMethods("arrange",
"hint",
"insertInto",
"intersect",
"intersectAll",
"isLocal",
"isStreaming",
"join",
Expand Down
59 changes: 58 additions & 1 deletion R/pkg/R/DataFrame.R
Original file line number Diff line number Diff line change
Expand Up @@ -2848,6 +2848,35 @@ setMethod("intersect",
dataFrame(intersected)
})

#' intersectAll
#'
#' Return a new SparkDataFrame containing rows in both this SparkDataFrame
#' and another SparkDataFrame while preserving the duplicates.
#' This is equivalent to \code{INTERSECT ALL} in SQL. Also as standard in
#' SQL, this function resolves columns by position (not by name).
#'
#' @param x a SparkDataFrame.
#' @param y a SparkDataFrame.
#' @return A SparkDataFrame containing the result of the intersect all operation.
#' @family SparkDataFrame functions
#' @aliases intersectAll,SparkDataFrame,SparkDataFrame-method
#' @rdname intersectAll
#' @name intersectAll
#' @examples
#'\dontrun{
#' sparkR.session()
#' df1 <- read.json(path)
#' df2 <- read.json(path2)
#' intersectAllDF <- intersectAll(df1, df2)
#' }
#' @note intersectAll since 2.4.0
setMethod("intersectAll",
signature(x = "SparkDataFrame", y = "SparkDataFrame"),
function(x, y) {
intersected <- callJMethod(x@sdf, "intersectAll", y@sdf)
dataFrame(intersected)
})

#' except
#'
#' Return a new SparkDataFrame containing rows in this SparkDataFrame
Expand All @@ -2867,7 +2896,6 @@ setMethod("intersect",
#' df2 <- read.json(path2)
#' exceptDF <- except(df, df2)
#' }
#' @rdname except
#' @note except since 1.4.0
setMethod("except",
signature(x = "SparkDataFrame", y = "SparkDataFrame"),
Expand All @@ -2876,6 +2904,35 @@ setMethod("except",
dataFrame(excepted)
})

#' exceptAll
#'
#' Return a new SparkDataFrame containing rows in this SparkDataFrame
#' but not in another SparkDataFrame while preserving the duplicates.
#' This is equivalent to \code{EXCEPT ALL} in SQL. Also as standard in
#' SQL, this function resolves columns by position (not by name).
#'
#' @param x a SparkDataFrame.
#' @param y a SparkDataFrame.
#' @return A SparkDataFrame containing the result of the except all operation.
#' @family SparkDataFrame functions
#' @aliases exceptAll,SparkDataFrame,SparkDataFrame-method
#' @rdname exceptAll
#' @name exceptAll
#' @examples
#'\dontrun{
#' sparkR.session()
#' df1 <- read.json(path)
#' df2 <- read.json(path2)
#' exceptAllDF <- exceptAll(df1, df2)
#' }
#' @note exceptAll since 2.4.0
setMethod("exceptAll",
signature(x = "SparkDataFrame", y = "SparkDataFrame"),
function(x, y) {
excepted <- callJMethod(x@sdf, "exceptAll", y@sdf)
dataFrame(excepted)
})

#' Save the contents of SparkDataFrame to a data source.
#'
#' The data source is specified by the \code{source} and a set of options (...).
Expand Down
6 changes: 6 additions & 0 deletions R/pkg/R/generics.R
Original file line number Diff line number Diff line change
Expand Up @@ -471,6 +471,9 @@ setGeneric("explain", function(x, ...) { standardGeneric("explain") })
#' @rdname except
setGeneric("except", function(x, y) { standardGeneric("except") })

#' @rdname exceptAll
setGeneric("exceptAll", function(x, y) { standardGeneric("exceptAll") })

#' @rdname nafunctions
setGeneric("fillna", function(x, value, cols = NULL) { standardGeneric("fillna") })

Expand All @@ -495,6 +498,9 @@ setGeneric("insertInto", function(x, tableName, ...) { standardGeneric("insertIn
#' @rdname intersect
setGeneric("intersect", function(x, y) { standardGeneric("intersect") })

#' @rdname intersectAll
setGeneric("intersectAll", function(x, y) { standardGeneric("intersectAll") })

#' @rdname isLocal
setGeneric("isLocal", function(x) { standardGeneric("isLocal") })

Expand Down
4 changes: 2 additions & 2 deletions R/pkg/tests/fulltests/test_mllib_classification.R
Original file line number Diff line number Diff line change
Expand Up @@ -382,10 +382,10 @@ test_that("spark.mlp", {
trainidxs <- base::sample(nrow(data), nrow(data) * 0.7)
traindf <- as.DataFrame(data[trainidxs, ])
testdf <- as.DataFrame(rbind(data[-trainidxs, ], c(0, "the other")))
model <- spark.mlp(traindf, clicked ~ ., layers = c(1, 3))
model <- spark.mlp(traindf, clicked ~ ., layers = c(1, 2))
predictions <- predict(model, testdf)
expect_error(collect(predictions))
model <- spark.mlp(traindf, clicked ~ ., layers = c(1, 3), handleInvalid = "skip")
model <- spark.mlp(traindf, clicked ~ ., layers = c(1, 2), handleInvalid = "skip")
predictions <- predict(model, testdf)
expect_equal(class(collect(predictions)$clicked[1]), "list")

Expand Down
19 changes: 19 additions & 0 deletions R/pkg/tests/fulltests/test_sparkSQL.R
Original file line number Diff line number Diff line change
Expand Up @@ -2482,6 +2482,25 @@ test_that("union(), unionByName(), rbind(), except(), and intersect() on a DataF
unlink(jsonPath2)
})

test_that("intersectAll() and exceptAll()", {
df1 <- createDataFrame(list(list("a", 1), list("a", 1), list("a", 1),
list("a", 1), list("b", 3), list("c", 4)),
schema = c("a", "b"))
df2 <- createDataFrame(list(list("a", 1), list("a", 1), list("b", 3)), schema = c("a", "b"))
intersectAllExpected <- data.frame("a" = c("a", "a", "b"), "b" = c(1, 1, 3),
stringsAsFactors = FALSE)
exceptAllExpected <- data.frame("a" = c("a", "a", "c"), "b" = c(1, 1, 4),
stringsAsFactors = FALSE)
intersectAllDf <- arrange(intersectAll(df1, df2), df1$a)
expect_is(intersectAllDf, "SparkDataFrame")
exceptAllDf <- arrange(exceptAll(df1, df2), df1$a)
expect_is(exceptAllDf, "SparkDataFrame")
intersectAllActual <- collect(intersectAllDf)
expect_identical(intersectAllActual, intersectAllExpected)
exceptAllActual <- collect(exceptAllDf)
expect_identical(exceptAllActual, exceptAllExpected)
})

test_that("withColumn() and withColumnRenamed()", {
df <- read.json(jsonPath)
newDF <- withColumn(df, "newAge", df$age + 2)
Expand Down
2 changes: 1 addition & 1 deletion R/pkg/vignettes/sparkr-vignettes.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -654,7 +654,7 @@ We use Titanic data set to show how to use `spark.mlp` in classification.
t <- as.data.frame(Titanic)
training <- createDataFrame(t)
# fit a Multilayer Perceptron Classification Model
model <- spark.mlp(training, Survived ~ Age + Sex, blockSize = 128, layers = c(2, 3), solver = "l-bfgs", maxIter = 100, tol = 0.5, stepSize = 1, seed = 1, initialWeights = c( 0, 0, 0, 5, 5, 5, 9, 9, 9))
model <- spark.mlp(training, Survived ~ Age + Sex, blockSize = 128, layers = c(2, 2), solver = "l-bfgs", maxIter = 100, tol = 0.5, stepSize = 1, seed = 1, initialWeights = c( 0, 0, 5, 5, 9, 9))
```

To avoid lengthy display, we only present partial results of the model summary. You can check the full result from your sparkR shell.
Expand Down
23 changes: 16 additions & 7 deletions bin/docker-image-tool.sh
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ function build {
)
local BASEDOCKERFILE=${BASEDOCKERFILE:-"$IMG_PATH/spark/Dockerfile"}
local PYDOCKERFILE=${PYDOCKERFILE:-"$IMG_PATH/spark/bindings/python/Dockerfile"}
local RDOCKERFILE=${RDOCKERFILE:-"$IMG_PATH/spark/bindings/R/Dockerfile"}

docker build $NOCACHEARG "${BUILD_ARGS[@]}" \
-t $(image_ref spark) \
Expand All @@ -79,11 +80,16 @@ function build {
docker build $NOCACHEARG "${BINDING_BUILD_ARGS[@]}" \
-t $(image_ref spark-py) \
-f "$PYDOCKERFILE" .

docker build $NOCACHEARG "${BINDING_BUILD_ARGS[@]}" \
-t $(image_ref spark-r) \
-f "$RDOCKERFILE" .
}

function push {
docker push "$(image_ref spark)"
docker push "$(image_ref spark-py)"
docker push "$(image_ref spark-r)"
}

function usage {
Expand All @@ -97,12 +103,13 @@ Commands:
push Push a pre-built image to a registry. Requires a repository address to be provided.
Options:
-f file Dockerfile to build for JVM based Jobs. By default builds the Dockerfile shipped with Spark.
-p file Dockerfile with Python baked in. By default builds the Dockerfile shipped with Spark.
-r repo Repository address.
-t tag Tag to apply to the built image, or to identify the image to be pushed.
-m Use minikube's Docker daemon.
-n Build docker image with --no-cache
-f file Dockerfile to build for JVM based Jobs. By default builds the Dockerfile shipped with Spark.
-p file Dockerfile to build for PySpark Jobs. Builds Python dependencies and ships with Spark.
-R file Dockerfile to build for SparkR Jobs. Builds R dependencies and ships with Spark.
-r repo Repository address.
-t tag Tag to apply to the built image, or to identify the image to be pushed.
-m Use minikube's Docker daemon.
-n Build docker image with --no-cache
-b arg Build arg to build or push the image. For multiple build args, this option needs to
be used separately for each build arg.
Expand Down Expand Up @@ -133,14 +140,16 @@ REPO=
TAG=
BASEDOCKERFILE=
PYDOCKERFILE=
RDOCKERFILE=
NOCACHEARG=
BUILD_PARAMS=
while getopts f:p:mr:t:n:b: option
while getopts f:p:R:mr:t:n:b: option
do
case "${option}"
in
f) BASEDOCKERFILE=${OPTARG};;
p) PYDOCKERFILE=${OPTARG};;
R) RDOCKERFILE=${OPTARG};;
r) REPO=${OPTARG};;
t) TAG=${OPTARG};;
n) NOCACHEARG="--no-cache";;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,7 @@ public void onComplete(String streamId) throws IOException {
callback.onSuccess(ByteBuffer.allocate(0));
} catch (Exception ex) {
IOException ioExc = new IOException("Failure post-processing complete stream;" +
" failing this rpc and leaving channel active");
" failing this rpc and leaving channel active", ex);
callback.onFailure(ioExc);
streamHandler.onFailure(streamId, ioExc);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ public abstract class BlockTransferMessage implements Encodable {
/** Preceding every serialized message is its type, which allows us to deserialize it. */
public enum Type {
OPEN_BLOCKS(0), UPLOAD_BLOCK(1), REGISTER_EXECUTOR(2), STREAM_HANDLE(3), REGISTER_DRIVER(4),
HEARTBEAT(5);
HEARTBEAT(5), UPLOAD_BLOCK_STREAM(6);

private final byte id;

Expand All @@ -67,6 +67,7 @@ public static BlockTransferMessage fromByteBuffer(ByteBuffer msg) {
case 3: return StreamHandle.decode(buf);
case 4: return RegisterDriver.decode(buf);
case 5: return ShuffleServiceHeartbeat.decode(buf);
case 6: return UploadBlockStream.decode(buf);
default: throw new IllegalArgumentException("Unknown message type: " + type);
}
}
Expand Down

0 comments on commit 8ef756e

Please sign in to comment.