Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/master' into return-map-output-m…
Browse files Browse the repository at this point in the history
…etadata
  • Loading branch information
mccheah committed May 22, 2020
2 parents 90084ea + 2115c55 commit 6ecd3ad
Show file tree
Hide file tree
Showing 1,150 changed files with 54,015 additions and 21,984 deletions.
29 changes: 29 additions & 0 deletions .asf.yaml
@@ -0,0 +1,29 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# https://cwiki.apache.org/confluence/display/INFRA/.asf.yaml+features+for+git+repositories
---
github:
description: "Apache Spark - A unified analytics engine for large-scale data processing"
homepage: https://spark.apache.org/
labels:
- python
- scala
- r
- java
- big-data
- jdbc
- sql
- spark
4 changes: 3 additions & 1 deletion .github/PULL_REQUEST_TEMPLATE
Expand Up @@ -29,9 +29,11 @@ Please clarify why the changes are needed. For instance,
-->


### Does this PR introduce any user-facing change?
### Does this PR introduce _any_ user-facing change?
<!--
Note that it means *any* user-facing change including all aspects such as the documentation fix.
If yes, please clarify the previous behavior and the change this PR proposes - provide the console output, description and/or an example to show the behavior difference if possible.
If possible, please also clarify if this is a user-facing change compared to the released Spark versions or within the unreleased branches such as master.
If no, write 'No'.
-->

Expand Down
133 changes: 133 additions & 0 deletions .github/autolabeler.yml
@@ -0,0 +1,133 @@
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

# Bot page: https://github.com/apps/probot-autolabeler
# The matching patterns follow the .gitignore spec.
# See: https://git-scm.com/docs/gitignore#_pattern_format
# Also, note that the plugin uses 'ignore' package. See also
# https://github.com/kaelzhang/node-ignore
INFRA:
- ".github/"
- "appveyor.yml"
- "/tools/"
- "/dev/create-release/"
- ".asf.yaml"
- ".gitattributes"
- ".gitignore"
- "/dev/github_jira_sync.py"
- "/dev/merge_spark_pr.py"
- "/dev/run-tests-jenkins*"
BUILD:
- "/dev/"
- "!/dev/github_jira_sync.py"
- "!/dev/merge_spark_pr.py"
- "!/dev/run-tests-jenkins*"
- "!/dev/.rat-excludes"
- "/build/"
- "/project/"
- "/assembly/"
- "*pom.xml"
- "/bin/docker-image-tool.sh"
- "/bin/find-spark-home*"
- "scalastyle-config.xml"
DOCS:
- "docs/"
- "/README.md"
- "/CONTRIBUTING.md"
EXAMPLES:
- "examples/"
- "/bin/run-example*"
CORE:
- "/core/"
- "!UI.scala"
- "!ui/"
- "/common/kvstore/"
- "/common/network-common/"
- "/common/network-shuffle/"
- "/python/pyspark/*.py"
- "/python/pyspark/tests/*.py"
SPARK SUBMIT:
- "/bin/spark-submit*"
SPARK SHELL:
- "/repl/"
- "/bin/spark-shell*"
SQL:
- "sql/"
- "/common/unsafe/"
- "!/python/pyspark/sql/avro/"
- "!/python/pyspark/sql/streaming.py"
- "!/python/pyspark/sql/tests/test_streaming.py"
- "/bin/spark-sql*"
- "/bin/beeline*"
- "/sbin/*thriftserver*.sh"
- "*SQL*.R"
- "DataFrame.R"
- "WindowSpec.R"
- "catalog.R"
- "column.R"
- "functions.R"
- "group.R"
- "schema.R"
- "types.R"
AVRO:
- "/external/avro/"
- "/python/pyspark/sql/avro/"
DSTREAM:
- "/streaming/"
- "/data/streaming/"
- "/external/flume*"
- "/external/kinesis*"
- "/external/kafka*"
- "/python/pyspark/streaming/"
GRAPHX:
- "/graphx/"
- "/data/graphx/"
ML:
- "ml/"
- "*mllib_*.R"
MLLIB:
- "spark/mllib/"
- "/mllib-local/"
- "/python/pyspark/mllib/"
STRUCTURED STREAMING:
- "sql/**/streaming/"
- "/external/kafka-0-10-sql/"
- "/python/pyspark/sql/streaming.py"
- "/python/pyspark/sql/tests/test_streaming.py"
- "*streaming.R"
PYTHON:
- "/bin/pyspark*"
- "python/"
R:
- "r/"
- "R/"
- "/bin/sparkR*"
YARN:
- "/resource-managers/yarn/"
MESOS:
- "/resource-managers/mesos/"
- "/sbin/*mesos*.sh"
KUBERNETES:
- "/resource-managers/kubernetes/"
WINDOWS:
- "*.cmd"
- "/R/pkg/tests/fulltests/test_Windows.R"
WEB UI:
- "ui/"
- "UI.scala"
DEPLOY:
- "/sbin/"
20 changes: 10 additions & 10 deletions .github/workflows/master.yml
Expand Up @@ -103,12 +103,12 @@ jobs:
- uses: actions/setup-java@v1
with:
java-version: '11'
- name: install R
- uses: r-lib/actions/setup-r@v1
with:
r-version: '3.6.2'
- name: Install lib
run: |
echo 'deb https://cloud.r-project.org/bin/linux/ubuntu bionic-cran35/' | sudo tee -a /etc/apt/sources.list
curl -sL "https://keyserver.ubuntu.com/pks/lookup?op=get&search=0xE298A3A825C0D65DFD57CBB651716619E084DAB9" | sudo apt-key add
sudo apt-get update
sudo apt-get install -y r-base r-base-dev libcurl4-openssl-dev
sudo apt-get install -y libcurl4-openssl-dev
- name: install R packages
run: |
sudo Rscript -e "install.packages(c('curl', 'xml2', 'httr', 'devtools', 'testthat', 'knitr', 'rmarkdown', 'roxygen2', 'e1071', 'survival'), repos='https://cloud.r-project.org/')"
Expand Down Expand Up @@ -139,12 +139,12 @@ jobs:
- uses: actions/setup-ruby@v1
with:
ruby-version: '2.7'
- name: Install R
- uses: r-lib/actions/setup-r@v1
with:
r-version: '3.6.2'
- name: Install lib and pandoc
run: |
echo 'deb https://cloud.r-project.org/bin/linux/ubuntu bionic-cran35/' | sudo tee -a /etc/apt/sources.list
curl -sL "https://keyserver.ubuntu.com/pks/lookup?op=get&search=0xE298A3A825C0D65DFD57CBB651716619E084DAB9" | sudo apt-key add
sudo apt-get update
sudo apt-get install -y r-base r-base-dev libcurl4-openssl-dev pandoc
sudo apt-get install -y libcurl4-openssl-dev pandoc
- name: Install packages
run: |
pip install sphinx mkdocs numpy
Expand Down
2 changes: 0 additions & 2 deletions .gitignore
Expand Up @@ -18,8 +18,6 @@
.idea_modules/
.project
.pydevproject
.python-version
.ruby-version
.scala_dependencies
.settings
/lib/
Expand Down
2 changes: 1 addition & 1 deletion R/create-docs.sh
Expand Up @@ -49,7 +49,7 @@ pushd "$FWDIR" > /dev/null
mkdir -p pkg/html
pushd pkg/html

"$R_SCRIPT_PATH/Rscript" -e 'libDir <- "../../lib"; library(SparkR, lib.loc=libDir); library(knitr); knit_rd("SparkR", links = tools::findHTMLlinks(paste(libDir, "SparkR", sep="/")))'
"$R_SCRIPT_PATH/Rscript" -e 'libDir <- "../../lib"; library(SparkR, lib.loc=libDir); knitr::knit_rd("SparkR", links = tools::findHTMLlinks(file.path(libDir, "SparkR")))'

popd

Expand Down
2 changes: 1 addition & 1 deletion R/create-rd.sh
Expand Up @@ -34,4 +34,4 @@ pushd "$FWDIR" > /dev/null
. "$FWDIR/find-r.sh"

# Generate Rd files if devtools is installed
"$R_SCRIPT_PATH/Rscript" -e ' if("devtools" %in% rownames(installed.packages())) { library(devtools); devtools::document(pkg="./pkg", roclets=c("rd")) }'
"$R_SCRIPT_PATH/Rscript" -e ' if(requireNamespace("devtools", quietly=TRUE)) { setwd("'$FWDIR'"); devtools::document(pkg="./pkg", roclets="rd") }'
2 changes: 1 addition & 1 deletion R/pkg/DESCRIPTION
Expand Up @@ -23,7 +23,7 @@ Suggests:
testthat,
e1071,
survival,
arrow
arrow (>= 0.15.1)
Collate:
'schema.R'
'generics.R'
Expand Down
12 changes: 11 additions & 1 deletion R/pkg/NAMESPACE
Expand Up @@ -28,6 +28,7 @@ importFrom("utils", "download.file", "object.size", "packageVersion", "tail", "u

# S3 methods exported
export("sparkR.session")
export("sparkR.init")
export("sparkR.session.stop")
export("sparkR.stop")
export("sparkR.conf")
Expand All @@ -41,6 +42,9 @@ export("sparkR.callJStatic")

export("install.spark")

export("sparkRSQL.init",
"sparkRHive.init")

# MLlib integration
exportMethods("glm",
"spark.glm",
Expand Down Expand Up @@ -68,7 +72,10 @@ exportMethods("glm",
"spark.freqItemsets",
"spark.associationRules",
"spark.findFrequentSequentialPatterns",
"spark.assignClusters")
"spark.assignClusters",
"spark.fmClassifier",
"spark.lm",
"spark.fmRegressor")

# Job group lifecycle management methods
export("setJobGroup",
Expand Down Expand Up @@ -148,6 +155,7 @@ exportMethods("arrange",
"printSchema",
"randomSplit",
"rbind",
"registerTempTable",
"rename",
"repartition",
"repartitionByRange",
Expand Down Expand Up @@ -431,8 +439,10 @@ export("as.DataFrame",
"cacheTable",
"clearCache",
"createDataFrame",
"createExternalTable",
"createTable",
"currentDatabase",
"dropTempTable",
"dropTempView",
"listColumns",
"listDatabases",
Expand Down

0 comments on commit 6ecd3ad

Please sign in to comment.