From afe463c6584c23bb12e89315278b45ee456d6641 Mon Sep 17 00:00:00 2001 From: Sean Owen Date: Wed, 12 Dec 2018 09:03:13 -0600 Subject: [PATCH] [SPARK-19827][R][FOLLOWUP] spark.ml R API for PIC ## What changes were proposed in this pull request? Follow up style fixes to PIC in R; see #23072 ## How was this patch tested? Existing tests. Closes #23292 from srowen/SPARK-19827.2. Authored-by: Sean Owen Signed-off-by: Sean Owen --- R/pkg/R/mllib_clustering.R | 15 ++++++--------- R/pkg/R/mllib_fpm.R | 4 ++-- examples/src/main/r/ml/powerIterationClustering.R | 3 ++- 3 files changed, 10 insertions(+), 12 deletions(-) diff --git a/R/pkg/R/mllib_clustering.R b/R/pkg/R/mllib_clustering.R index 7d9dcebfe70d3..9b32b71d34fef 100644 --- a/R/pkg/R/mllib_clustering.R +++ b/R/pkg/R/mllib_clustering.R @@ -621,11 +621,10 @@ setMethod("write.ml", signature(object = "LDAModel", path = "character"), #' #' A scalable graph clustering algorithm. Users can call \code{spark.assignClusters} to #' return a cluster assignment for each input vertex. -#' -# Run the PIC algorithm and returns a cluster assignment for each input vertex. +#' Run the PIC algorithm and returns a cluster assignment for each input vertex. #' @param data a SparkDataFrame. #' @param k the number of clusters to create. -#' @param initMode the initialization algorithm. +#' @param initMode the initialization algorithm; "random" or "degree" #' @param maxIter the maximum number of iterations. #' @param sourceCol the name of the input column for source vertex IDs. #' @param destinationCol the name of the input column for destination vertex IDs @@ -633,18 +632,16 @@ setMethod("write.ml", signature(object = "LDAModel", path = "character"), #' we treat all instance weights as 1.0. #' @param ... additional argument(s) passed to the method. #' @return A dataset that contains columns of vertex id and the corresponding cluster for the id. -#' The schema of it will be: -#' \code{id: Long} -#' \code{cluster: Int} +#' The schema of it will be: \code{id: integer}, \code{cluster: integer} #' @rdname spark.powerIterationClustering -#' @aliases assignClusters,PowerIterationClustering-method,SparkDataFrame-method +#' @aliases spark.assignClusters,SparkDataFrame-method #' @examples #' \dontrun{ #' df <- createDataFrame(list(list(0L, 1L, 1.0), list(0L, 2L, 1.0), #' list(1L, 2L, 1.0), list(3L, 4L, 1.0), #' list(4L, 0L, 0.1)), #' schema = c("src", "dst", "weight")) -#' clusters <- spark.assignClusters(df, initMode="degree", weightCol="weight") +#' clusters <- spark.assignClusters(df, initMode = "degree", weightCol = "weight") #' showDF(clusters) #' } #' @note spark.assignClusters(SparkDataFrame) since 3.0.0 @@ -652,7 +649,7 @@ setMethod("spark.assignClusters", signature(data = "SparkDataFrame"), function(data, k = 2L, initMode = c("random", "degree"), maxIter = 20L, sourceCol = "src", destinationCol = "dst", weightCol = NULL) { - if (!is.numeric(k) || k < 1) { + if (!is.integer(k) || k < 1) { stop("k should be a number with value >= 1.") } if (!is.integer(maxIter) || maxIter <= 0) { diff --git a/R/pkg/R/mllib_fpm.R b/R/pkg/R/mllib_fpm.R index c248e9ec9be94..0cc7a16c302dc 100644 --- a/R/pkg/R/mllib_fpm.R +++ b/R/pkg/R/mllib_fpm.R @@ -183,8 +183,8 @@ setMethod("write.ml", signature(object = "FPGrowthModel", path = "character"), #' @return A complete set of frequent sequential patterns in the input sequences of itemsets. #' The returned \code{SparkDataFrame} contains columns of sequence and corresponding #' frequency. The schema of it will be: -#' \code{sequence: ArrayType(ArrayType(T))} (T is the item type) -#' \code{freq: Long} +#' \code{sequence: ArrayType(ArrayType(T))}, \code{freq: integer} +#' where T is the item type #' @rdname spark.prefixSpan #' @aliases findFrequentSequentialPatterns,PrefixSpan,SparkDataFrame-method #' @examples diff --git a/examples/src/main/r/ml/powerIterationClustering.R b/examples/src/main/r/ml/powerIterationClustering.R index ba43037106d14..3530d88e50509 100644 --- a/examples/src/main/r/ml/powerIterationClustering.R +++ b/examples/src/main/r/ml/powerIterationClustering.R @@ -30,7 +30,8 @@ df <- createDataFrame(list(list(0L, 1L, 1.0), list(0L, 2L, 1.0), list(4L, 0L, 0.1)), schema = c("src", "dst", "weight")) # assign clusters -clusters <- spark.assignClusters(df, k=2L, maxIter=20L, initMode="degree", weightCol="weight") +clusters <- spark.assignClusters(df, k = 2L, maxIter = 20L, + initMode = "degree", weightCol = "weight") showDF(arrange(clusters, clusters$id)) # $example off$