From ec2e8f723b1bb9cbb603c0c85b97a2b89dc7c066 Mon Sep 17 00:00:00 2001
From: Holden Karau <holden@us.ibm.com>
Date: Sun, 7 Feb 2016 00:07:15 -0800
Subject: [PATCH 1/3] Remove rounding on integer in MFDataGenerator

---
 .../scala/org/apache/spark/mllib/util/MFDataGenerator.scala    | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/MFDataGenerator.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/MFDataGenerator.scala
index 8af6750da4ff3..dfd4115dda703 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/util/MFDataGenerator.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/util/MFDataGenerator.scala
@@ -105,8 +105,7 @@ object MFDataGenerator {
 
     // optionally generate testing data
     if (test) {
-      val testSampSize = math.min(
-        math.round(sampSize * testSampFact), math.round(mn - sampSize)).toInt
+      val testSampSize: Int = math.min(math.round(sampSize * testSampFact).toInt, mn - sampSize)
       val testOmega = shuffled.slice(sampSize, sampSize + testSampSize)
       val testOrdered = testOmega.sortWith(_ < _).toArray
       val testData: RDD[(Int, Int, Double)] = sc.parallelize(testOrdered)

From 5418c18f1574dcd11812f8101cc5bcf477394f54 Mon Sep 17 00:00:00 2001
From: Holden Karau <holden@us.ibm.com>
Date: Sun, 7 Feb 2016 00:08:12 -0800
Subject: [PATCH 2/3] remove type

---
 .../scala/org/apache/spark/mllib/util/MFDataGenerator.scala     | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/MFDataGenerator.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/MFDataGenerator.scala
index dfd4115dda703..898a09e51636c 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/util/MFDataGenerator.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/util/MFDataGenerator.scala
@@ -105,7 +105,7 @@ object MFDataGenerator {
 
     // optionally generate testing data
     if (test) {
-      val testSampSize: Int = math.min(math.round(sampSize * testSampFact).toInt, mn - sampSize)
+      val testSampSize = math.min(math.round(sampSize * testSampFact).toInt, mn - sampSize)
       val testOmega = shuffled.slice(sampSize, sampSize + testSampSize)
       val testOrdered = testOmega.sortWith(_ < _).toArray
       val testData: RDD[(Int, Int, Double)] = sc.parallelize(testOrdered)

From 4deb2491c2af7710afce4f3ffb1935e214e0f74e Mon Sep 17 00:00:00 2001
From: Holden Karau <holden@us.ibm.com>
Date: Sun, 7 Feb 2016 22:57:55 -0800
Subject: [PATCH 3/3] Add an internal version of setRuns for the KMeans MLLib
 implementation and have it used internally and when called from python. Also
 add a log error message that the number of runs is deprecated since people
 could be setting the number of runs through the train function as well

---
 .../spark/mllib/api/python/PythonMLLibAPI.scala     |  2 +-
 .../org/apache/spark/mllib/clustering/KMeans.scala  | 13 +++++++++++--
 2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
index 088ec6a0c0465..93cf16e6f0c2a 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
@@ -357,7 +357,7 @@ private[python] class PythonMLLibAPI extends Serializable {
     val kMeansAlg = new KMeans()
       .setK(k)
       .setMaxIterations(maxIterations)
-      .setRuns(runs)
+      .internalSetRuns(runs)
       .setInitializationMode(initializationMode)
       .setInitializationSteps(initializationSteps)
       .setEpsilon(epsilon)
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala
index 901164a391170..67de62bc2e848 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala
@@ -119,9 +119,18 @@ class KMeans private (
   @Since("0.8.0")
   @deprecated("Support for runs is deprecated. This param will have no effect in 2.0.0.", "1.6.0")
   def setRuns(runs: Int): this.type = {
+    internalSetRuns(runs)
+  }
+
+  // Internal version of setRuns for Python API, this should be removed at the same time as setRuns
+  // this is done to avoid deprecation warnings in our build.
+  private[mllib] def internalSetRuns(runs: Int): this.type = {
     if (runs <= 0) {
       throw new IllegalArgumentException("Number of runs must be positive")
     }
+    if (runs != 1) {
+      logWarning("Setting number of runs is deprecated and will have no effect in 2.0.0")
+    }
     this.runs = runs
     this
   }
@@ -502,7 +511,7 @@ object KMeans {
       seed: Long): KMeansModel = {
     new KMeans().setK(k)
       .setMaxIterations(maxIterations)
-      .setRuns(runs)
+      .internalSetRuns(runs)
       .setInitializationMode(initializationMode)
       .setSeed(seed)
       .run(data)
@@ -528,7 +537,7 @@ object KMeans {
       initializationMode: String): KMeansModel = {
     new KMeans().setK(k)
       .setMaxIterations(maxIterations)
-      .setRuns(runs)
+      .internalSetRuns(runs)
       .setInitializationMode(initializationMode)
       .run(data)
   }