Skip to content

Commit

Permalink
merged
Browse files Browse the repository at this point in the history
  • Loading branch information
bogdanrdc committed Jul 6, 2017
2 parents 58c3022 + 565e7a8 commit c2b71a0
Show file tree
Hide file tree
Showing 248 changed files with 6,454 additions and 3,530 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ R-unit-tests.log
R/unit-tests.out
R/cran-check.out
R/pkg/vignettes/sparkr-vignettes.html
R/pkg/tests/fulltests/Rplots.pdf
build/*.jar
build/apache-maven*
build/scala*
Expand Down
2 changes: 1 addition & 1 deletion LICENSE
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,7 @@ The text of each license is also included at licenses/LICENSE-[project].txt.
(New BSD license) Protocol Buffer Java API (org.spark-project.protobuf:protobuf-java:2.4.1-shaded - http://code.google.com/p/protobuf)
(The BSD License) Fortran to Java ARPACK (net.sourceforge.f2j:arpack_combined_all:0.1 - http://f2j.sourceforge.net)
(The BSD License) xmlenc Library (xmlenc:xmlenc:0.52 - http://xmlenc.sourceforge.net)
(The New BSD License) Py4J (net.sf.py4j:py4j:0.10.4 - http://py4j.sourceforge.net/)
(The New BSD License) Py4J (net.sf.py4j:py4j:0.10.6 - http://py4j.sourceforge.net/)
(Two-clause BSD-style license) JUnit-Interface (com.novocode:junit-interface:0.10 - http://github.com/szeiger/junit-interface/)
(BSD licence) sbt and sbt-launch-lib.bash
(BSD 3 Clause) d3.min.js (https://github.com/mbostock/d3/blob/master/LICENSE)
Expand Down
38 changes: 26 additions & 12 deletions R/pkg/R/SQLContext.R
Original file line number Diff line number Diff line change
Expand Up @@ -584,7 +584,7 @@ tableToDF <- function(tableName) {
#'
#' @param path The path of files to load
#' @param source The name of external data source
#' @param schema The data schema defined in structType
#' @param schema The data schema defined in structType or a DDL-formatted string.
#' @param na.strings Default string value for NA when source is "csv"
#' @param ... additional external data source specific named properties.
#' @return SparkDataFrame
Expand All @@ -600,6 +600,8 @@ tableToDF <- function(tableName) {
#' structField("info", "map<string,double>"))
#' df2 <- read.df(mapTypeJsonPath, "json", schema, multiLine = TRUE)
#' df3 <- loadDF("data/test_table", "parquet", mergeSchema = "true")
#' stringSchema <- "name STRING, info MAP<STRING, DOUBLE>"
#' df4 <- read.df(mapTypeJsonPath, "json", stringSchema, multiLine = TRUE)
#' }
#' @name read.df
#' @method read.df default
Expand All @@ -623,14 +625,19 @@ read.df.default <- function(path = NULL, source = NULL, schema = NULL, na.string
if (source == "csv" && is.null(options[["nullValue"]])) {
options[["nullValue"]] <- na.strings
}
read <- callJMethod(sparkSession, "read")
read <- callJMethod(read, "format", source)
if (!is.null(schema)) {
stopifnot(class(schema) == "structType")
sdf <- handledCallJStatic("org.apache.spark.sql.api.r.SQLUtils", "loadDF", sparkSession,
source, schema$jobj, options)
} else {
sdf <- handledCallJStatic("org.apache.spark.sql.api.r.SQLUtils", "loadDF", sparkSession,
source, options)
if (class(schema) == "structType") {
read <- callJMethod(read, "schema", schema$jobj)
} else if (is.character(schema)) {
read <- callJMethod(read, "schema", schema)
} else {
stop("schema should be structType or character.")
}
}
read <- callJMethod(read, "options", options)
sdf <- handledCallJMethod(read, "load")
dataFrame(sdf)
}

Expand Down Expand Up @@ -717,8 +724,8 @@ read.jdbc <- function(url, tableName,
#' "spark.sql.sources.default" will be used.
#'
#' @param source The name of external data source
#' @param schema The data schema defined in structType, this is required for file-based streaming
#' data source
#' @param schema The data schema defined in structType or a DDL-formatted string, this is
#' required for file-based streaming data source
#' @param ... additional external data source specific named options, for instance \code{path} for
#' file-based streaming data source
#' @return SparkDataFrame
Expand All @@ -733,6 +740,8 @@ read.jdbc <- function(url, tableName,
#' q <- write.stream(df, "text", path = "/home/user/out", checkpointLocation = "/home/user/cp")
#'
#' df <- read.stream("json", path = jsonDir, schema = schema, maxFilesPerTrigger = 1)
#' stringSchema <- "name STRING, info MAP<STRING, DOUBLE>"
#' df1 <- read.stream("json", path = jsonDir, schema = stringSchema, maxFilesPerTrigger = 1)
#' }
#' @name read.stream
#' @note read.stream since 2.2.0
Expand All @@ -750,10 +759,15 @@ read.stream <- function(source = NULL, schema = NULL, ...) {
read <- callJMethod(sparkSession, "readStream")
read <- callJMethod(read, "format", source)
if (!is.null(schema)) {
stopifnot(class(schema) == "structType")
read <- callJMethod(read, "schema", schema$jobj)
if (class(schema) == "structType") {
read <- callJMethod(read, "schema", schema$jobj)
} else if (is.character(schema)) {
read <- callJMethod(read, "schema", schema)
} else {
stop("schema should be structType or character.")
}
}
read <- callJMethod(read, "options", options)
sdf <- handledCallJMethod(read, "load")
dataFrame(callJMethod(sdf, "toDF"))
dataFrame(sdf)
}
Loading

0 comments on commit c2b71a0

Please sign in to comment.