From afe463c6584c23bb12e89315278b45ee456d6641 Mon Sep 17 00:00:00 2001
From: Sean Owen <sean.owen@databricks.com>
Date: Wed, 12 Dec 2018 09:03:13 -0600
Subject: [PATCH] [SPARK-19827][R][FOLLOWUP] spark.ml R API for PIC

## What changes were proposed in this pull request?

Follow up style fixes to PIC in R; see #23072

## How was this patch tested?

Existing tests.

Closes #23292 from srowen/SPARK-19827.2.

Authored-by: Sean Owen <sean.owen@databricks.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 R/pkg/R/mllib_clustering.R                        | 15 ++++++---------
 R/pkg/R/mllib_fpm.R                               |  4 ++--
 examples/src/main/r/ml/powerIterationClustering.R |  3 ++-
 3 files changed, 10 insertions(+), 12 deletions(-)

diff --git a/R/pkg/R/mllib_clustering.R b/R/pkg/R/mllib_clustering.R
index 7d9dcebfe70d3..9b32b71d34fef 100644
--- a/R/pkg/R/mllib_clustering.R
+++ b/R/pkg/R/mllib_clustering.R
@@ -621,11 +621,10 @@ setMethod("write.ml", signature(object = "LDAModel", path = "character"),
 #'
 #' A scalable graph clustering algorithm. Users can call \code{spark.assignClusters} to
 #' return a cluster assignment for each input vertex.
-#'
-#  Run the PIC algorithm and returns a cluster assignment for each input vertex.
+#' Run the PIC algorithm and returns a cluster assignment for each input vertex.
 #' @param data a SparkDataFrame.
 #' @param k the number of clusters to create.
-#' @param initMode the initialization algorithm.
+#' @param initMode the initialization algorithm; "random" or "degree"
 #' @param maxIter the maximum number of iterations.
 #' @param sourceCol the name of the input column for source vertex IDs.
 #' @param destinationCol the name of the input column for destination vertex IDs
@@ -633,18 +632,16 @@ setMethod("write.ml", signature(object = "LDAModel", path = "character"),
 #'                  we treat all instance weights as 1.0.
 #' @param ... additional argument(s) passed to the method.
 #' @return A dataset that contains columns of vertex id and the corresponding cluster for the id.
-#'         The schema of it will be:
-#'         \code{id: Long}
-#'         \code{cluster: Int}
+#'         The schema of it will be: \code{id: integer}, \code{cluster: integer}
 #' @rdname spark.powerIterationClustering
-#' @aliases assignClusters,PowerIterationClustering-method,SparkDataFrame-method
+#' @aliases spark.assignClusters,SparkDataFrame-method
 #' @examples
 #' \dontrun{
 #' df <- createDataFrame(list(list(0L, 1L, 1.0), list(0L, 2L, 1.0),
 #'                            list(1L, 2L, 1.0), list(3L, 4L, 1.0),
 #'                            list(4L, 0L, 0.1)),
 #'                       schema = c("src", "dst", "weight"))
-#' clusters <- spark.assignClusters(df, initMode="degree", weightCol="weight")
+#' clusters <- spark.assignClusters(df, initMode = "degree", weightCol = "weight")
 #' showDF(clusters)
 #' }
 #' @note spark.assignClusters(SparkDataFrame) since 3.0.0
@@ -652,7 +649,7 @@ setMethod("spark.assignClusters",
           signature(data = "SparkDataFrame"),
           function(data, k = 2L, initMode = c("random", "degree"), maxIter = 20L,
             sourceCol = "src", destinationCol = "dst", weightCol = NULL) {
-            if (!is.numeric(k) || k < 1) {
+            if (!is.integer(k) || k < 1) {
               stop("k should be a number with value >= 1.")
             }
             if (!is.integer(maxIter) || maxIter <= 0) {
diff --git a/R/pkg/R/mllib_fpm.R b/R/pkg/R/mllib_fpm.R
index c248e9ec9be94..0cc7a16c302dc 100644
--- a/R/pkg/R/mllib_fpm.R
+++ b/R/pkg/R/mllib_fpm.R
@@ -183,8 +183,8 @@ setMethod("write.ml", signature(object = "FPGrowthModel", path = "character"),
 #' @return A complete set of frequent sequential patterns in the input sequences of itemsets.
 #'         The returned \code{SparkDataFrame} contains columns of sequence and corresponding
 #'         frequency. The schema of it will be:
-#'         \code{sequence: ArrayType(ArrayType(T))} (T is the item type)
-#'         \code{freq: Long}
+#'         \code{sequence: ArrayType(ArrayType(T))}, \code{freq: integer}
+#'         where T is the item type
 #' @rdname spark.prefixSpan
 #' @aliases findFrequentSequentialPatterns,PrefixSpan,SparkDataFrame-method
 #' @examples
diff --git a/examples/src/main/r/ml/powerIterationClustering.R b/examples/src/main/r/ml/powerIterationClustering.R
index ba43037106d14..3530d88e50509 100644
--- a/examples/src/main/r/ml/powerIterationClustering.R
+++ b/examples/src/main/r/ml/powerIterationClustering.R
@@ -30,7 +30,8 @@ df <- createDataFrame(list(list(0L, 1L, 1.0), list(0L, 2L, 1.0),
                            list(4L, 0L, 0.1)),
                       schema = c("src", "dst", "weight"))
 # assign clusters
-clusters <- spark.assignClusters(df, k=2L, maxIter=20L, initMode="degree", weightCol="weight")
+clusters <- spark.assignClusters(df, k = 2L, maxIter = 20L,
+                                 initMode = "degree", weightCol = "weight")
 
 showDF(arrange(clusters, clusters$id))
 # $example off$