Skip to content

Commit

Permalink
Merge branch 'master' into SPARK-25696
Browse files Browse the repository at this point in the history
  • Loading branch information
httfighter committed Dec 5, 2018
2 parents 4c4674e + 7e3eb3c commit 8cc05a5
Show file tree
Hide file tree
Showing 1,362 changed files with 52,893 additions and 43,057 deletions.
1 change: 0 additions & 1 deletion .gitignore
Expand Up @@ -77,7 +77,6 @@ target/
unit-tests.log
work/
docs/.jekyll-metadata
*.crc

# For Hive
TempStatsStore/
Expand Down
2 changes: 1 addition & 1 deletion R/WINDOWS.md
Expand Up @@ -3,7 +3,7 @@
To build SparkR on Windows, the following steps are required

1. Install R (>= 3.1) and [Rtools](http://cran.r-project.org/bin/windows/Rtools/). Make sure to
include Rtools and R in `PATH`.
include Rtools and R in `PATH`. Note that support for R prior to version 3.4 is deprecated as of Spark 3.0.0.

2. Install
[JDK8](http://www.oracle.com/technetwork/java/javase/downloads/jdk8-downloads-2133151.html) and set
Expand Down
2 changes: 1 addition & 1 deletion R/pkg/DESCRIPTION
Expand Up @@ -15,7 +15,7 @@ URL: http://www.apache.org/ http://spark.apache.org/
BugReports: http://spark.apache.org/contributing.html
SystemRequirements: Java (== 8)
Depends:
R (>= 3.0),
R (>= 3.1),
methods
Suggests:
knitr,
Expand Down
22 changes: 10 additions & 12 deletions R/pkg/NAMESPACE
Expand Up @@ -28,9 +28,8 @@ importFrom("utils", "download.file", "object.size", "packageVersion", "tail", "u

# S3 methods exported
export("sparkR.session")
export("sparkR.init")
export("sparkR.stop")
export("sparkR.session.stop")
export("sparkR.stop")
export("sparkR.conf")
export("sparkR.version")
export("sparkR.uiWebUrl")
Expand All @@ -42,9 +41,6 @@ export("sparkR.callJStatic")

export("install.spark")

export("sparkRSQL.init",
"sparkRHive.init")

# MLlib integration
exportMethods("glm",
"spark.glm",
Expand All @@ -70,7 +66,8 @@ exportMethods("glm",
"spark.svmLinear",
"spark.fpGrowth",
"spark.freqItemsets",
"spark.associationRules")
"spark.associationRules",
"spark.findFrequentSequentialPatterns")

# Job group lifecycle management methods
export("setJobGroup",
Expand Down Expand Up @@ -150,15 +147,13 @@ exportMethods("arrange",
"printSchema",
"randomSplit",
"rbind",
"registerTempTable",
"rename",
"repartition",
"repartitionByRange",
"rollup",
"sample",
"sample_frac",
"sampleBy",
"saveAsParquetFile",
"saveAsTable",
"saveDF",
"schema",
Expand Down Expand Up @@ -200,6 +195,7 @@ exportMethods("%<=>%",
"acos",
"add_months",
"alias",
"approx_count_distinct",
"approxCountDistinct",
"approxQuantile",
"array_contains",
Expand Down Expand Up @@ -258,6 +254,7 @@ exportMethods("%<=>%",
"dayofweek",
"dayofyear",
"decode",
"degrees",
"dense_rank",
"desc",
"element_at",
Expand All @@ -274,6 +271,7 @@ exportMethods("%<=>%",
"floor",
"format_number",
"format_string",
"from_csv",
"from_json",
"from_unixtime",
"from_utc_timestamp",
Expand Down Expand Up @@ -339,6 +337,7 @@ exportMethods("%<=>%",
"posexplode",
"posexplode_outer",
"quarter",
"radians",
"rand",
"randn",
"rank",
Expand All @@ -352,6 +351,8 @@ exportMethods("%<=>%",
"row_number",
"rpad",
"rtrim",
"schema_of_csv",
"schema_of_json",
"second",
"sha1",
"sha2",
Expand Down Expand Up @@ -385,6 +386,7 @@ exportMethods("%<=>%",
"tanh",
"toDegrees",
"toRadians",
"to_csv",
"to_date",
"to_json",
"to_timestamp",
Expand Down Expand Up @@ -413,18 +415,14 @@ export("as.DataFrame",
"cacheTable",
"clearCache",
"createDataFrame",
"createExternalTable",
"createTable",
"currentDatabase",
"dropTempTable",
"dropTempView",
"jsonFile",
"listColumns",
"listDatabases",
"listFunctions",
"listTables",
"loadDF",
"parquetFile",
"read.df",
"read.jdbc",
"read.json",
Expand Down
104 changes: 55 additions & 49 deletions R/pkg/R/DataFrame.R
Expand Up @@ -226,7 +226,9 @@ setMethod("showDF",

#' show
#'
#' Print class and type information of a Spark object.
#' If eager evaluation is enabled and the Spark object is a SparkDataFrame, evaluate the
#' SparkDataFrame and print top rows of the SparkDataFrame, otherwise, print the class
#' and type information of the Spark object.
#'
#' @param object a Spark object. Can be a SparkDataFrame, Column, GroupedData, WindowSpec.
#'
Expand All @@ -244,11 +246,33 @@ setMethod("showDF",
#' @note show(SparkDataFrame) since 1.4.0
setMethod("show", "SparkDataFrame",
function(object) {
cols <- lapply(dtypes(object), function(l) {
paste(l, collapse = ":")
})
s <- paste(cols, collapse = ", ")
cat(paste(class(object), "[", s, "]\n", sep = ""))
allConf <- sparkR.conf()
prop <- allConf[["spark.sql.repl.eagerEval.enabled"]]
if (!is.null(prop) && identical(prop, "true")) {
argsList <- list()
argsList$x <- object
prop <- allConf[["spark.sql.repl.eagerEval.maxNumRows"]]
if (!is.null(prop)) {
numRows <- as.integer(prop)
if (numRows > 0) {
argsList$numRows <- numRows
}
}
prop <- allConf[["spark.sql.repl.eagerEval.truncate"]]
if (!is.null(prop)) {
truncate <- as.integer(prop)
if (truncate > 0) {
argsList$truncate <- truncate
}
}
do.call(showDF, argsList)
} else {
cols <- lapply(dtypes(object), function(l) {
paste(l, collapse = ":")
})
s <- paste(cols, collapse = ", ")
cat(paste(class(object), "[", s, "]\n", sep = ""))
}
})

#' DataTypes
Expand Down Expand Up @@ -497,32 +521,6 @@ setMethod("createOrReplaceTempView",
invisible(callJMethod(x@sdf, "createOrReplaceTempView", viewName))
})

#' (Deprecated) Register Temporary Table
#'
#' Registers a SparkDataFrame as a Temporary Table in the SparkSession
#' @param x A SparkDataFrame
#' @param tableName A character vector containing the name of the table
#'
#' @seealso \link{createOrReplaceTempView}
#' @rdname registerTempTable-deprecated
#' @name registerTempTable
#' @aliases registerTempTable,SparkDataFrame,character-method
#' @examples
#'\dontrun{
#' sparkR.session()
#' path <- "path/to/file.json"
#' df <- read.json(path)
#' registerTempTable(df, "json_df")
#' new_df <- sql("SELECT * FROM json_df")
#'}
#' @note registerTempTable since 1.4.0
setMethod("registerTempTable",
signature(x = "SparkDataFrame", tableName = "character"),
function(x, tableName) {
.Deprecated("createOrReplaceTempView")
invisible(callJMethod(x@sdf, "createOrReplaceTempView", tableName))
})

#' insertInto
#'
#' Insert the contents of a SparkDataFrame into a table registered in the current SparkSession.
Expand Down Expand Up @@ -768,6 +766,13 @@ setMethod("repartition",
#' \item{2.} {Return a new SparkDataFrame range partitioned by the given column(s),
#' using \code{spark.sql.shuffle.partitions} as number of partitions.}
#'}
#' At least one partition-by expression must be specified.
#' When no explicit sort order is specified, "ascending nulls first" is assumed.
#'
#' Note that due to performance reasons this method uses sampling to estimate the ranges.
#' Hence, the output may not be consistent, since sampling can return different values.
#' The sample size can be controlled by the config
#' \code{spark.sql.execution.rangeExchange.sampleSizePerPartition}.
#'
#' @param x a SparkDataFrame.
#' @param numPartitions the number of partitions to use.
Expand Down Expand Up @@ -822,7 +827,6 @@ setMethod("repartitionByRange",
#' toJSON
#'
#' Converts a SparkDataFrame into a SparkDataFrame of JSON string.
#'
#' Each row is turned into a JSON document with columns as different fields.
#' The returned SparkDataFrame has a single character column with the name \code{value}
#'
Expand Down Expand Up @@ -932,7 +936,6 @@ setMethod("write.orc",
#' path <- "path/to/file.json"
#' df <- read.json(path)
#' write.parquet(df, "/tmp/sparkr-tmp1/")
#' saveAsParquetFile(df, "/tmp/sparkr-tmp2/")
#'}
#' @note write.parquet since 1.6.0
setMethod("write.parquet",
Expand All @@ -943,17 +946,6 @@ setMethod("write.parquet",
invisible(handledCallJMethod(write, "parquet", path))
})

#' @rdname write.parquet
#' @name saveAsParquetFile
#' @aliases saveAsParquetFile,SparkDataFrame,character-method
#' @note saveAsParquetFile since 1.4.0
setMethod("saveAsParquetFile",
signature(x = "SparkDataFrame", path = "character"),
function(x, path) {
.Deprecated("write.parquet")
write.parquet(x, path)
})

#' Save the content of SparkDataFrame in a text file at the specified path.
#'
#' Save the content of the SparkDataFrame in a text file at the specified path.
Expand Down Expand Up @@ -2738,15 +2730,29 @@ setMethod("union",
dataFrame(unioned)
})

#' unionAll is deprecated - use union instead
#' @rdname union
#' @name unionAll
#' Return a new SparkDataFrame containing the union of rows.
#'
#' This is an alias for \code{union}.
#'
#' @param x a SparkDataFrame.
#' @param y a SparkDataFrame.
#' @return A SparkDataFrame containing the result of the unionAll operation.
#' @family SparkDataFrame functions
#' @aliases unionAll,SparkDataFrame,SparkDataFrame-method
#' @rdname unionAll
#' @name unionAll
#' @seealso \link{union}
#' @examples
#'\dontrun{
#' sparkR.session()
#' df1 <- read.json(path)
#' df2 <- read.json(path2)
#' unionAllDF <- unionAll(df1, df2)
#' }
#' @note unionAll since 1.4.0
setMethod("unionAll",
signature(x = "SparkDataFrame", y = "SparkDataFrame"),
function(x, y) {
.Deprecated("union")
union(x, y)
})

Expand Down

0 comments on commit 8cc05a5

Please sign in to comment.