Skip to content

Commit

Permalink
Merge commit '2c5d489679ba3814973680d65853877664bcd931' into SPARK-24497
Browse files Browse the repository at this point in the history
-recursive-sql
  • Loading branch information
peter-toth committed Apr 14, 2020
2 parents 656995b + 2c5d489 commit 8205e61
Show file tree
Hide file tree
Showing 734 changed files with 28,655 additions and 8,917 deletions.
29 changes: 29 additions & 0 deletions .asf.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# https://cwiki.apache.org/confluence/display/INFRA/.asf.yaml+features+for+git+repositories
---
github:
description: "Apache Spark - A unified analytics engine for large-scale data processing"
homepage: https://spark.apache.org/
labels:
- python
- scala
- r
- java
- big-data
- jdbc
- sql
- spark
129 changes: 129 additions & 0 deletions .github/autolabeler.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

# Bot page: https://github.com/apps/probot-autolabeler
# The matching patterns follow the .gitignore spec.
# See: https://git-scm.com/docs/gitignore#_pattern_format
# Also, note that the plugin uses 'ignore' package. See also
# https://github.com/kaelzhang/node-ignore
INFRA:
- ".github/"
- "appveyor.yml"
- "/tools/"
- "/dev/create-release/"
- ".asf.yaml"
- ".gitattributes"
- ".gitignore"
- "/dev/github_jira_sync.py"
- "/dev/merge_spark_pr.py"
- "/dev/run-tests-jenkins*"
BUILD:
- "/dev/"
- "!/dev/github_jira_sync.py"
- "!/dev/merge_spark_pr.py"
- "!/dev/run-tests-jenkins*"
- "/build/"
- "/project/"
- "/assembly/"
- "*pom.xml"
- "/bin/docker-image-tool.sh"
- "/bin/find-spark-home*"
- "scalastyle-config.xml"
DOCS:
- "docs/"
- "/README.md"
- "/CONTRIBUTING.md"
EXAMPLES:
- "examples/"
- "/bin/run-example*"
CORE:
- "/core/"
- "/common/kvstore/"
- "/common/network-common/"
- "/common/network-shuffle/"
- "/python/pyspark/*.py"
- "/python/pyspark/tests/*.py"
SPARK SUBMIT:
- "/bin/spark-submit*"
SPARK SHELL:
- "/repl/"
- "/bin/spark-shell*"
SQL:
- "sql/"
- "/common/unsafe/"
- "!/python/pyspark/sql/avro/"
- "!/python/pyspark/sql/streaming.py"
- "!/python/pyspark/sql/tests/test_streaming.py"
- "/bin/spark-sql*"
- "/bin/beeline*"
- "/sbin/*thriftserver*.sh"
- "*SQL*.R"
- "DataFrame.R"
- "WindowSpec.R"
- "catalog.R"
- "column.R"
- "functions.R"
- "group.R"
- "schema.R"
- "types.R"
AVRO:
- "/external/avro/"
- "/python/pyspark/sql/avro/"
DSTREAM:
- "/streaming/"
- "/data/streaming/"
- "/external/flume*"
- "/external/kinesis*"
- "/external/kafka*"
- "/python/pyspark/streaming/"
GRAPHX:
- "/graphx/"
- "/data/graphx/"
ML:
- "ml/"
- "*mllib_*.R"
MLLIB:
- "spark/mllib/"
- "/mllib-local/"
- "/python/pyspark/mllib/"
STRUCTURED STREAMING:
- "sql/**/streaming/"
- "/external/kafka-0-10-sql/"
- "/python/pyspark/sql/streaming.py"
- "/python/pyspark/sql/tests/test_streaming.py"
- "*streaming.R"
PYTHON:
- "/bin/pyspark*"
- "python/"
R:
- "r/"
- "R/"
- "/bin/sparkR*"
YARN:
- "/resource-managers/yarn/"
MESOS:
- "/resource-managers/mesos/"
- "/sbin/*mesos*.sh"
KUBERNETES:
- "/resource-managers/kubernetes/"
WINDOWS:
- "*.cmd"
- "/R/pkg/tests/fulltests/test_Windows.R"
WEB UI:
- "ui/"
DEPLOY:
- "/sbin/"
2 changes: 0 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,6 @@
.idea_modules/
.project
.pydevproject
.python-version
.ruby-version
.scala_dependencies
.settings
/lib/
Expand Down
13 changes: 12 additions & 1 deletion R/pkg/NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ importFrom("utils", "download.file", "object.size", "packageVersion", "tail", "u

# S3 methods exported
export("sparkR.session")
export("sparkR.init")
export("sparkR.session.stop")
export("sparkR.stop")
export("sparkR.conf")
Expand All @@ -41,6 +42,9 @@ export("sparkR.callJStatic")

export("install.spark")

export("sparkRSQL.init",
"sparkRHive.init")

# MLlib integration
exportMethods("glm",
"spark.glm",
Expand Down Expand Up @@ -68,7 +72,10 @@ exportMethods("glm",
"spark.freqItemsets",
"spark.associationRules",
"spark.findFrequentSequentialPatterns",
"spark.assignClusters")
"spark.assignClusters",
"spark.fmClassifier",
"spark.lm",
"spark.fmRegressor")

# Job group lifecycle management methods
export("setJobGroup",
Expand Down Expand Up @@ -148,6 +155,7 @@ exportMethods("arrange",
"printSchema",
"randomSplit",
"rbind",
"registerTempTable",
"rename",
"repartition",
"repartitionByRange",
Expand Down Expand Up @@ -345,6 +353,7 @@ exportMethods("%<=>%",
"over",
"overlay",
"percent_rank",
"percentile_approx",
"pmod",
"posexplode",
"posexplode_outer",
Expand Down Expand Up @@ -430,8 +439,10 @@ export("as.DataFrame",
"cacheTable",
"clearCache",
"createDataFrame",
"createExternalTable",
"createTable",
"currentDatabase",
"dropTempTable",
"dropTempView",
"listColumns",
"listDatabases",
Expand Down
26 changes: 26 additions & 0 deletions R/pkg/R/DataFrame.R
Original file line number Diff line number Diff line change
Expand Up @@ -521,6 +521,32 @@ setMethod("createOrReplaceTempView",
invisible(callJMethod(x@sdf, "createOrReplaceTempView", viewName))
})

#' (Deprecated) Register Temporary Table
#'
#' Registers a SparkDataFrame as a Temporary Table in the SparkSession
#' @param x A SparkDataFrame
#' @param tableName A character vector containing the name of the table
#'
#' @seealso \link{createOrReplaceTempView}
#' @rdname registerTempTable-deprecated
#' @name registerTempTable
#' @aliases registerTempTable,SparkDataFrame,character-method
#' @examples
#'\dontrun{
#' sparkR.session()
#' path <- "path/to/file.json"
#' df <- read.json(path)
#' registerTempTable(df, "json_df")
#' new_df <- sql("SELECT * FROM json_df")
#'}
#' @note registerTempTable since 1.4.0
setMethod("registerTempTable",
signature(x = "SparkDataFrame", tableName = "character"),
function(x, tableName) {
.Deprecated("createOrReplaceTempView")
invisible(callJMethod(x@sdf, "createOrReplaceTempView", tableName))
})

#' insertInto
#'
#' Insert the contents of a SparkDataFrame into a table registered in the current SparkSession.
Expand Down
54 changes: 54 additions & 0 deletions R/pkg/R/catalog.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,35 @@

# catalog.R: SparkSession catalog functions

#' (Deprecated) Create an external table
#'
#' Creates an external table based on the dataset in a data source,
#' Returns a SparkDataFrame associated with the external table.
#'
#' The data source is specified by the \code{source} and a set of options(...).
#' If \code{source} is not specified, the default data source configured by
#' "spark.sql.sources.default" will be used.
#'
#' @param tableName a name of the table.
#' @param path the path of files to load.
#' @param source the name of external data source.
#' @param schema the schema of the data required for some data sources.
#' @param ... additional argument(s) passed to the method.
#' @return A SparkDataFrame.
#' @rdname createExternalTable-deprecated
#' @seealso \link{createTable}
#' @examples
#'\dontrun{
#' sparkR.session()
#' df <- createExternalTable("myjson", path="path/to/json", source="json", schema)
#' }
#' @name createExternalTable
#' @note createExternalTable since 1.4.0
createExternalTable <- function(tableName, path = NULL, source = NULL, schema = NULL, ...) {
.Deprecated("createTable", old = "createExternalTable")
createTable(tableName, path, source, schema, ...)
}

#' Creates a table based on the dataset in a data source
#'
#' Creates a table based on the dataset in a data source. Returns a SparkDataFrame associated with
Expand Down Expand Up @@ -130,6 +159,31 @@ clearCache <- function() {
invisible(callJMethod(catalog, "clearCache"))
}

#' (Deprecated) Drop Temporary Table
#'
#' Drops the temporary table with the given table name in the catalog.
#' If the table has been cached/persisted before, it's also unpersisted.
#'
#' @param tableName The name of the SparkSQL table to be dropped.
#' @seealso \link{dropTempView}
#' @rdname dropTempTable-deprecated
#' @examples
#' \dontrun{
#' sparkR.session()
#' df <- read.df(path, "parquet")
#' createOrReplaceTempView(df, "table")
#' dropTempTable("table")
#' }
#' @name dropTempTable
#' @note dropTempTable since 1.4.0
dropTempTable <- function(tableName) {
.Deprecated("dropTempView", old = "dropTempTable")
if (class(tableName) != "character") {
stop("tableName must be a string.")
}
dropTempView(tableName)
}

#' Drops the temporary view with the given view name in the catalog.
#'
#' Drops the temporary view with the given view name in the catalog.
Expand Down
Loading

0 comments on commit 8205e61

Please sign in to comment.