Skip to content

Commit

Permalink
merge with master
Browse files Browse the repository at this point in the history
  • Loading branch information
yinxusen committed Oct 7, 2016
2 parents c28fdb8 + 2badb58 commit 265a62f
Show file tree
Hide file tree
Showing 1,754 changed files with 65,274 additions and 28,218 deletions.
7 changes: 7 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,14 @@
.idea/
.idea_modules/
.project
.pydevproject
.scala_dependencies
.settings
/lib/
R-unit-tests.log
R/unit-tests.out
R/cran-check.out
R/pkg/vignettes/sparkr-vignettes.html
build/*.jar
build/apache-maven*
build/scala*
Expand Down Expand Up @@ -78,3 +81,7 @@ spark-warehouse/
.RData
.RHistory
.Rhistory
*.Rproj
*.Rproj.*

.Rproj.user
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ notifications:
# 5. Run maven install before running lint-java.
install:
- export MAVEN_SKIP_RC=1
- build/mvn -T 4 -q -DskipTests -Pyarn -Phadoop-2.3 -Pkinesis-asl -Phive -Phive-thriftserver install
- build/mvn -T 4 -q -DskipTests -Pmesos -Pyarn -Phadoop-2.3 -Pkinesis-asl -Phive -Phive-thriftserver install

# 6. Run lint-java.
script:
Expand Down
2 changes: 1 addition & 1 deletion CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ It lists steps that are required before creating a PR. In particular, consider:

- Is the change important and ready enough to ask the community to spend time reviewing?
- Have you searched for existing, related JIRAs and pull requests?
- Is this a new feature that can stand alone as a package on http://spark-packages.org ?
- Is this a new feature that can stand alone as a [third party project](https://cwiki.apache.org/confluence/display/SPARK/Third+Party+Projects) ?
- Is the change being proposed clearly explained and motivated?

When you contribute code, you affirm that the contribution is your original work and that you
Expand Down
2 changes: 1 addition & 1 deletion LICENSE
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,7 @@ The text of each license is also included at licenses/LICENSE-[project].txt.
(New BSD license) Protocol Buffer Java API (org.spark-project.protobuf:protobuf-java:2.4.1-shaded - http://code.google.com/p/protobuf)
(The BSD License) Fortran to Java ARPACK (net.sourceforge.f2j:arpack_combined_all:0.1 - http://f2j.sourceforge.net)
(The BSD License) xmlenc Library (xmlenc:xmlenc:0.52 - http://xmlenc.sourceforge.net)
(The New BSD License) Py4J (net.sf.py4j:py4j:0.10.1 - http://py4j.sourceforge.net/)
(The New BSD License) Py4J (net.sf.py4j:py4j:0.10.3 - http://py4j.sourceforge.net/)
(Two-clause BSD-style license) JUnit-Interface (com.novocode:junit-interface:0.10 - http://github.com/szeiger/junit-interface/)
(BSD licence) sbt and sbt-launch-lib.bash
(BSD 3 Clause) d3.min.js (https://github.com/mbostock/d3/blob/master/LICENSE)
Expand Down
2 changes: 2 additions & 0 deletions R/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,5 @@
lib
pkg/man
pkg/html
SparkR.Rcheck/
SparkR_*.tar.gz
12 changes: 11 additions & 1 deletion R/WINDOWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,23 @@ To build SparkR on Windows, the following steps are required

1. Install R (>= 3.1) and [Rtools](http://cran.r-project.org/bin/windows/Rtools/). Make sure to
include Rtools and R in `PATH`.

2. Install
[JDK7](http://www.oracle.com/technetwork/java/javase/downloads/jdk7-downloads-1880260.html) and set
`JAVA_HOME` in the system environment variables.

3. Download and install [Maven](http://maven.apache.org/download.html). Also include the `bin`
directory in Maven in `PATH`.

4. Set `MAVEN_OPTS` as described in [Building Spark](http://spark.apache.org/docs/latest/building-spark.html).
5. Open a command shell (`cmd`) in the Spark directory and run `mvn -DskipTests -Psparkr package`

5. Open a command shell (`cmd`) in the Spark directory and build Spark with [Maven](http://spark.apache.org/docs/latest/building-spark.html#building-with-buildmvn) and include the `-Psparkr` profile to build the R package. For example to use the default Hadoop versions you can run

```bash
mvn.cmd -DskipTests -Psparkr package
```

`.\build\mvn` is a shell script so `mvn.cmd` should be used directly on Windows.

## Unit tests

Expand Down
64 changes: 64 additions & 0 deletions R/check-cran.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
#!/bin/bash

#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

set -o pipefail
set -e

FWDIR="$(cd `dirname $0`; pwd)"
pushd $FWDIR > /dev/null

if [ ! -z "$R_HOME" ]
then
R_SCRIPT_PATH="$R_HOME/bin"
else
# if system wide R_HOME is not found, then exit
if [ ! `command -v R` ]; then
echo "Cannot find 'R_HOME'. Please specify 'R_HOME' or make sure R is properly installed."
exit 1
fi
R_SCRIPT_PATH="$(dirname $(which R))"
fi
echo "USING R_HOME = $R_HOME"

# Build the latest docs
$FWDIR/create-docs.sh

# Build a zip file containing the source package
"$R_SCRIPT_PATH/"R CMD build $FWDIR/pkg

# Run check as-cran.
VERSION=`grep Version $FWDIR/pkg/DESCRIPTION | awk '{print $NF}'`

CRAN_CHECK_OPTIONS="--as-cran"

if [ -n "$NO_TESTS" ]
then
CRAN_CHECK_OPTIONS=$CRAN_CHECK_OPTIONS" --no-tests"
fi

if [ -n "$NO_MANUAL" ]
then
CRAN_CHECK_OPTIONS=$CRAN_CHECK_OPTIONS" --no-manual"
fi

echo "Running CRAN check with $CRAN_CHECK_OPTIONS options"

"$R_SCRIPT_PATH/"R CMD check $CRAN_CHECK_OPTIONS SparkR_"$VERSION".tar.gz

popd > /dev/null
30 changes: 28 additions & 2 deletions R/create-docs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,17 +17,26 @@
# limitations under the License.
#

# Script to create API docs for SparkR
# This requires `devtools` and `knitr` to be installed on the machine.
# Script to create API docs and vignettes for SparkR
# This requires `devtools`, `knitr` and `rmarkdown` to be installed on the machine.

# After running this script the html docs can be found in
# $SPARK_HOME/R/pkg/html
# The vignettes can be found in
# $SPARK_HOME/R/pkg/vignettes/sparkr_vignettes.html

set -o pipefail
set -e

# Figure out where the script is
export FWDIR="$(cd "`dirname "$0"`"; pwd)"
export SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"

# Required for setting SPARK_SCALA_VERSION
. "${SPARK_HOME}"/bin/load-spark-env.sh

echo "Using Scala $SPARK_SCALA_VERSION"

pushd $FWDIR

# Install the package (this will also generate the Rd files)
Expand All @@ -43,4 +52,21 @@ Rscript -e 'libDir <- "../../lib"; library(SparkR, lib.loc=libDir); library(knit

popd

# Find Spark jars.
if [ -f "${SPARK_HOME}/RELEASE" ]; then
SPARK_JARS_DIR="${SPARK_HOME}/jars"
else
SPARK_JARS_DIR="${SPARK_HOME}/assembly/target/scala-$SPARK_SCALA_VERSION/jars"
fi

# Only create vignettes if Spark JARs exist
if [ -d "$SPARK_JARS_DIR" ]; then
# render creates SparkR vignettes
Rscript -e 'library(rmarkdown); paths <- .libPaths(); .libPaths(c("lib", paths)); Sys.setenv(SPARK_HOME=tools::file_path_as_absolute("..")); render("pkg/vignettes/sparkr-vignettes.Rmd"); .libPaths(paths)'

find pkg/vignettes/. -not -name '.' -not -name '*.Rmd' -not -name '*.md' -not -name '*.pdf' -not -name '*.html' -delete
else
echo "Skipping R vignettes as Spark JARs not found in $SPARK_HOME"
fi

popd
5 changes: 5 additions & 0 deletions R/pkg/.Rbuildignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
^.*\.Rproj$
^\.Rproj\.user$
^\.lintr$
^src-native$
^html$
23 changes: 15 additions & 8 deletions R/pkg/DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,20 +1,25 @@
Package: SparkR
Type: Package
Title: R frontend for Spark
Title: R Frontend for Apache Spark
Version: 2.0.0
Date: 2013-09-09
Author: The Apache Software Foundation
Maintainer: Shivaram Venkataraman <shivaram@cs.berkeley.edu>
Imports:
methods
Date: 2016-08-27
Authors@R: c(person("Shivaram", "Venkataraman", role = c("aut", "cre"),
email = "shivaram@cs.berkeley.edu"),
person("Xiangrui", "Meng", role = "aut",
email = "meng@databricks.com"),
person("Felix", "Cheung", role = "aut",
email = "felixcheung@apache.org"),
person(family = "The Apache Software Foundation", role = c("aut", "cph")))
URL: http://www.apache.org/ http://spark.apache.org/
BugReports: https://cwiki.apache.org/confluence/display/SPARK/Contributing+to+Spark#ContributingtoSpark-ContributingBugReports
Depends:
R (>= 3.0),
methods,
methods
Suggests:
testthat,
e1071,
survival
Description: R frontend for Spark
Description: The SparkR package provides an R frontend for Apache Spark.
License: Apache License (== 2.0)
Collate:
'schema.R'
Expand All @@ -33,6 +38,8 @@ Collate:
'context.R'
'deserialize.R'
'functions.R'
'install.R'
'jvm.R'
'mllib.R'
'serialize.R'
'sparkR.R'
Expand Down
43 changes: 38 additions & 5 deletions R/pkg/NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# Imports from base R
importFrom(methods, setGeneric, setMethod, setOldClass)
# Do not include stats:: "rpois", "runif" - causes error at runtime
importFrom("methods", "setGeneric", "setMethod", "setOldClass")
importFrom("methods", "is", "new", "signature", "show")
importFrom("stats", "gaussian", "setNames")
importFrom("utils", "download.file", "packageVersion", "untar")

# Disable native libraries till we figure out how to package it
# See SPARKR-7839
Expand All @@ -11,8 +15,15 @@ export("sparkR.init")
export("sparkR.stop")
export("sparkR.session.stop")
export("sparkR.conf")
export("sparkR.version")
export("print.jobj")

export("sparkR.newJObject")
export("sparkR.callJMethod")
export("sparkR.callJStatic")

export("install.spark")

export("sparkRSQL.init",
"sparkRHive.init")

Expand All @@ -23,8 +34,16 @@ exportMethods("glm",
"summary",
"spark.kmeans",
"fitted",
"spark.mlp",
"spark.naiveBayes",
"spark.survreg")
"spark.survreg",
"spark.lda",
"spark.posterior",
"spark.perplexity",
"spark.isoreg",
"spark.gaussianMixture",
"spark.als",
"spark.kstest")

# Job group lifecycle management methods
export("setJobGroup",
Expand Down Expand Up @@ -317,14 +336,18 @@ export("as.DataFrame",
"read.parquet",
"read.text",
"spark.lapply",
"spark.addFile",
"spark.getSparkFilesRootDirectory",
"spark.getSparkFiles",
"sql",
"str",
"tableToDF",
"tableNames",
"tables",
"uncacheTable",
"print.summary.GeneralizedLinearRegressionModel",
"read.ml")
"read.ml",
"print.summary.KSTest")

export("structField",
"structField.jobj",
Expand All @@ -341,5 +364,15 @@ export("partitionBy",
"rowsBetween",
"rangeBetween")

export("window.partitionBy",
"window.orderBy")
export("windowPartitionBy",
"windowOrderBy")

S3method(print, jobj)
S3method(print, structField)
S3method(print, structType)
S3method(print, summary.GeneralizedLinearRegressionModel)
S3method(print, summary.KSTest)
S3method(structField, character)
S3method(structField, jobj)
S3method(structType, jobj)
S3method(structType, structField)

0 comments on commit 265a62f

Please sign in to comment.