[SPARK-15490][R][DOC] SparkR 2.0 QA: New R APIs and API docs for non-…

…MLib changes ## What changes were proposed in this pull request? R Docs changes include typos, format, layout. ## How was this patch tested? Test locally. Author: Kai Jiang <jiangkai@gmail.com> Closes #13394 from vectorijk/spark-15490.
apache · Jun 17, 2016 · 5fd20b6 · 5fd20b6
1 parent 63470af
commit 5fd20b6
Show file tree

Hide file tree

Showing 10 changed files with 123 additions and 88 deletions.
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
@@ -23,9 +23,11 @@ NULL
 setOldClass("jobj")
 setOldClass("structType")
 
-#' @title S4 class that represents a SparkDataFrame
-#' @description DataFrames can be created using functions like \link{createDataFrame},
-#'              \link{read.json}, \link{table} etc.
+#' S4 class that represents a SparkDataFrame
+#'
+#' DataFrames can be created using functions like \link{createDataFrame},
+#' \link{read.json}, \link{table} etc.
+#'
 #' @family SparkDataFrame functions
 #' @rdname SparkDataFrame
 #' @docType class
@@ -629,8 +631,6 @@ setMethod("repartition",
 #'
 #' @param x A SparkDataFrame
 #' @return A StringRRDD of JSON objects
-#' @family SparkDataFrame functions
-#' @rdname tojson
 #' @noRd
 #' @examples
 #'\dontrun{
@@ -648,7 +648,7 @@ setMethod("toJSON",
             RDD(jrdd, serializedMode = "string")
           })
 
-#' write.json
+#' Save the contents of SparkDataFrame as a JSON file
 #'
 #' Save the contents of a SparkDataFrame as a JSON file (one object per line). Files written out
 #' with this method can be read back in as a SparkDataFrame using read.json().
@@ -675,7 +675,7 @@ setMethod("write.json",
             invisible(callJMethod(write, "json", path))
           })
 
-#' write.parquet
+#' Save the contents of SparkDataFrame as a Parquet file, preserving the schema.
 #'
 #' Save the contents of a SparkDataFrame as a Parquet file, preserving the schema. Files written out
 #' with this method can be read back in as a SparkDataFrame using read.parquet().
@@ -713,9 +713,9 @@ setMethod("saveAsParquetFile",
             write.parquet(x, path)
           })
 
-#' write.text
+#' Save the content of SparkDataFrame in a text file at the specified path.
 #'
-#' Saves the content of the SparkDataFrame in a text file at the specified path.
+#' Save the content of the SparkDataFrame in a text file at the specified path.
 #' The SparkDataFrame must have only one column of string type with the name "value".
 #' Each row becomes a new line in the output file.
 #'
@@ -820,8 +820,6 @@ setMethod("sample_frac",
             sample(x, withReplacement, fraction, seed)
           })
 
-#' nrow
-#'
 #' Returns the number of rows in a SparkDataFrame
 #'
 #' @param x A SparkDataFrame
@@ -874,6 +872,8 @@ setMethod("ncol",
             length(columns(x))
           })
 
+#' Returns the dimensions of SparkDataFrame
+#'
 #' Returns the dimensions (number of rows and columns) of a SparkDataFrame
 #' @param x a SparkDataFrame
 #'
@@ -2012,8 +2012,9 @@ setMethod("join",
             dataFrame(sdf)
           })
 
+#' Merges two data frames
+#'
 #' @name merge
-#' @title Merges two data frames
 #' @param x the first data frame to be joined
 #' @param y the second data frame to be joined
 #' @param by a character vector specifying the join columns. If by is not
@@ -2127,7 +2128,6 @@ setMethod("merge",
             joinRes
           })
 
-#'
 #' Creates a list of columns by replacing the intersected ones with aliases.
 #' The name of the alias column is formed by concatanating the original column name and a suffix.
 #'
@@ -2182,8 +2182,9 @@ setMethod("unionAll",
             dataFrame(unioned)
           })
 
-#' @title Union two or more SparkDataFrames
-#' @description Returns a new SparkDataFrame containing rows of all parameters.
+#' Union two or more SparkDataFrames
+#'
+#' Returns a new SparkDataFrame containing rows of all parameters.
 #'
 #' @rdname rbind
 #' @name rbind
@@ -2254,20 +2255,22 @@ setMethod("except",
             dataFrame(excepted)
           })
 
-#' Save the contents of the SparkDataFrame to a data source
+#' Save the contents of SparkDataFrame to a data source.
 #'
 #' The data source is specified by the `source` and a set of options (...).
 #' If `source` is not specified, the default data source configured by
 #' spark.sql.sources.default will be used.
 #'
-#' Additionally, mode is used to specify the behavior of the save operation when
-#' data already exists in the data source. There are four modes: \cr
-#'  append: Contents of this SparkDataFrame are expected to be appended to existing data. \cr
-#'  overwrite: Existing data is expected to be overwritten by the contents of this
-#'     SparkDataFrame. \cr
-#'  error: An exception is expected to be thrown. \cr
-#'  ignore: The save operation is expected to not save the contents of the SparkDataFrame
-#'     and to not change the existing data. \cr
+#' Additionally, mode is used to specify the behavior of the save operation when data already
+#' exists in the data source. There are four modes:
+#' \itemize{
+#'   \item append: Contents of this SparkDataFrame are expected to be appended to existing data.
+#'   \item overwrite: Existing data is expected to be overwritten by the contents of this
+#'         SparkDataFrame.
+#'   \item error: An exception is expected to be thrown.
+#'   \item ignore: The save operation is expected to not save the contents of the SparkDataFrame
+#'         and to not change the existing data.
+#' }
 #'
 #' @param df A SparkDataFrame
 #' @param path A name for the table
@@ -2315,8 +2318,6 @@ setMethod("saveDF",
             write.df(df, path, source, mode, ...)
           })
 
-#' saveAsTable
-#'
 #' Save the contents of the SparkDataFrame to a data source as a table
 #'
 #' The data source is specified by the `source` and a set of options (...).
@@ -2543,11 +2544,12 @@ setMethod("fillna",
             dataFrame(sdf)
           })
 
+#' Download data from a SparkDataFrame into a data.frame
+#'
 #' This function downloads the contents of a SparkDataFrame into an R's data.frame.
 #' Since data.frames are held in memory, ensure that you have enough memory
 #' in your system to accommodate the contents.
 #'
-#' @title Download data from a SparkDataFrame into a data.frame
 #' @param x a SparkDataFrame
 #' @return a data.frame
 #' @family SparkDataFrame functions
@@ -2563,13 +2565,14 @@ setMethod("as.data.frame",
             as.data.frame(collect(x), row.names, optional, ...)
           })
 
+#' Attach SparkDataFrame to R search path
+#'
 #' The specified SparkDataFrame is attached to the R search path. This means that
 #' the SparkDataFrame is searched by R when evaluating a variable, so columns in
 #' the SparkDataFrame can be accessed by simply giving their names.
 #'
 #' @family SparkDataFrame functions
 #' @rdname attach
-#' @title Attach SparkDataFrame to R search path
 #' @param what (SparkDataFrame) The SparkDataFrame to attach
 #' @param pos (integer) Specify position in search() where to attach.
 #' @param name (character) Name to use for the attached SparkDataFrame. Names
@@ -2589,14 +2592,16 @@ setMethod("attach",
             attach(newEnv, pos = pos, name = name, warn.conflicts = warn.conflicts)
           })
 
+#' Evaluate a R expression in an environment constructed from a SparkDataFrame
+#'
 #' Evaluate a R expression in an environment constructed from a SparkDataFrame
 #' with() allows access to columns of a SparkDataFrame by simply referring to
 #' their name. It appends every column of a SparkDataFrame into a new
 #' environment. Then, the given expression is evaluated in this new
 #' environment.
 #'
 #' @rdname with
-#' @title Evaluate a R expression in an environment constructed from a SparkDataFrame
+#' @family SparkDataFrame functions
 #' @param data (SparkDataFrame) SparkDataFrame to use for constructing an environment.
 #' @param expr (expression) Expression to evaluate.
 #' @param ... arguments to be passed to future methods.
@@ -2612,10 +2617,12 @@ setMethod("with",
             eval(substitute(expr), envir = newEnv, enclos = newEnv)
           })
 
+#' Compactly display the structure of a dataset
+#'
 #' Display the structure of a SparkDataFrame, including column names, column types, as well as a
 #' a small sample of rows.
+#'
 #' @name str
-#' @title Compactly display the structure of a dataset
 #' @rdname str
 #' @family SparkDataFrame functions
 #' @param object a SparkDataFrame
@@ -2728,10 +2735,11 @@ setMethod("drop",
             base::drop(x)
           })
 
+#' Compute histogram statistics for given column
+#'
 #' This function computes a histogram for a given SparkR Column.
 #'
 #' @name histogram
-#' @title Histogram
 #' @param nbins the number of bins (optional). Default value is 10.
 #' @param df the SparkDataFrame containing the Column to build the histogram from.
 #' @param colname the name of the column to build the histogram from.
@@ -2847,18 +2855,21 @@ setMethod("histogram",
             return(histStats)
           })
 
-#' Saves the content of the SparkDataFrame to an external database table via JDBC
+#' Save the content of SparkDataFrame to an external database table via JDBC.
 #'
-#' Additional JDBC database connection properties can be set (...)
+#' Save the content of the SparkDataFrame to an external database table via JDBC. Additional JDBC
+#' database connection properties can be set (...)
 #'
 #' Also, mode is used to specify the behavior of the save operation when
-#' data already exists in the data source. There are four modes: \cr
-#'  append: Contents of this SparkDataFrame are expected to be appended to existing data. \cr
-#'  overwrite: Existing data is expected to be overwritten by the contents of this
-#'     SparkDataFrame. \cr
-#'  error: An exception is expected to be thrown. \cr
-#'  ignore: The save operation is expected to not save the contents of the SparkDataFrame
-#'     and to not change the existing data. \cr
+#' data already exists in the data source. There are four modes:
+#' \itemize{
+#'   \item append: Contents of this SparkDataFrame are expected to be appended to existing data.
+#'   \item overwrite: Existing data is expected to be overwritten by the contents of this
+#'         SparkDataFrame.
+#'   \item error: An exception is expected to be thrown.
+#'   \item ignore: The save operation is expected to not save the contents of the SparkDataFrame
+#'         and to not change the existing data.
+#' }
 #'
 #' @param x A SparkDataFrame
 #' @param url JDBC database url of the form `jdbc:subprotocol:subname`

diff --git a/R/pkg/R/RDD.R b/R/pkg/R/RDD.R
@@ -19,9 +19,11 @@
 
 setOldClass("jobj")
 
-#' @title S4 class that represents an RDD
-#' @description RDD can be created using functions like
+#' S4 class that represents an RDD
+#'
+#' RDD can be created using functions like
 #'              \code{parallelize}, \code{textFile} etc.
+#'
 #' @rdname RDD
 #' @seealso parallelize, textFile
 #' @slot env An R environment that stores bookkeeping states of the RDD
@@ -497,9 +499,9 @@ setMethod("map",
             lapply(X, FUN)
           })
 
-#' Flatten results after apply a function to all elements
+#' Flatten results after applying a function to all elements
 #'
-#' This function return a new RDD by first applying a function to all
+#' This function returns a new RDD by first applying a function to all
 #' elements of this RDD, and then flattening the results.
 #'
 #' @param X The RDD to apply the transformation.
@@ -713,7 +715,7 @@ setMethod("sumRDD",
             reduce(x, "+")
           })
 
-#' Applies a function to all elements in an RDD, and force evaluation.
+#' Applies a function to all elements in an RDD, and forces evaluation.
 #'
 #' @param x The RDD to apply the function
 #' @param func The function to be applied.
@@ -737,7 +739,7 @@ setMethod("foreach",
             invisible(collect(mapPartitions(x, partition.func)))
           })
 
-#' Applies a function to each partition in an RDD, and force evaluation.
+#' Applies a function to each partition in an RDD, and forces evaluation.
 #'
 #' @examples
 #'\dontrun{

diff --git a/R/pkg/R/WindowSpec.R b/R/pkg/R/WindowSpec.R
@@ -20,9 +20,10 @@
 #' @include generics.R jobj.R column.R
 NULL
 
-#' @title S4 class that represents a WindowSpec
-#' @description WindowSpec can be created by using window.partitionBy()
-#'              or window.orderBy()
+#' S4 class that represents a WindowSpec
+#'
+#' WindowSpec can be created by using window.partitionBy() or window.orderBy()
+#'
 #' @rdname WindowSpec
 #' @seealso \link{window.partitionBy}, \link{window.orderBy}
 #'

diff --git a/R/pkg/R/broadcast.R b/R/pkg/R/broadcast.R
@@ -23,9 +23,11 @@
 .broadcastValues <- new.env()
 .broadcastIdToName <- new.env()
 
-# @title S4 class that represents a Broadcast variable
-# @description Broadcast variables can be created using the broadcast
-#              function from a \code{SparkContext}.
+# S4 class that represents a Broadcast variable
+#
+# Broadcast variables can be created using the broadcast
+# function from a \code{SparkContext}.
+#
 # @rdname broadcast-class
 # @seealso broadcast
 #

diff --git a/R/pkg/R/column.R b/R/pkg/R/column.R
@@ -22,8 +22,10 @@ NULL
 
 setOldClass("jobj")
 
-#' @title S4 class that represents a SparkDataFrame column
-#' @description The column class supports unary, binary operations on SparkDataFrame columns
+#' S4 class that represents a SparkDataFrame column
+#'
+#' The column class supports unary, binary operations on SparkDataFrame columns
+#'
 #' @rdname column
 #'
 #' @slot jc reference to JVM SparkDataFrame column