From eecb4d73e1404aa5c2bb744d6a80e65cb85d2f6c Mon Sep 17 00:00:00 2001
From: Zheng RuiFeng <ruifengz@foxmail.com>
Date: Mon, 14 Nov 2016 18:13:58 +0800
Subject: [PATCH 1/4] create pr

---
 .../org/apache/spark/mllib/feature/ChiSqSelector.scala      | 6 ++++--
 .../scala/org/apache/spark/mllib/feature/HashingTF.scala    | 2 ++
 .../src/main/scala/org/apache/spark/mllib/feature/IDF.scala | 1 +
 3 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/ChiSqSelector.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/ChiSqSelector.scala
index f9156b642785f..501401e0fe5ee 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/feature/ChiSqSelector.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/ChiSqSelector.scala
@@ -192,25 +192,27 @@ class ChiSqSelector @Since("2.1.0") () extends Serializable {
   @Since("1.3.0")
   def this(numTopFeatures: Int) {
     this()
+    require(numTopFeatures > 0, s"numTopFeatures must be positive but got $numTopFeatures")
     this.numTopFeatures = numTopFeatures
   }
 
   @Since("1.6.0")
   def setNumTopFeatures(value: Int): this.type = {
+    require(value > 0, s"numTopFeatures must be positive but got $value")
     numTopFeatures = value
     this
   }
 
   @Since("2.1.0")
   def setPercentile(value: Double): this.type = {
-    require(0.0 <= value && value <= 1.0, "Percentile must be in [0,1]")
+    require(0.0 <= value && value <= 1.0, s"Percentile must be in [0,1] but got $value")
     percentile = value
     this
   }
 
   @Since("2.1.0")
   def setFpr(value: Double): this.type = {
-    require(0.0 <= value && value <= 1.0, "FPR must be in [0,1]")
+    require(0.0 <= value && value <= 1.0, s"FPR must be in [0,1] but got $value")
     fpr = value
     this
   }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/HashingTF.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/HashingTF.scala
index bc26655104a9b..3c69fb41c834b 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/feature/HashingTF.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/HashingTF.scala
@@ -38,6 +38,7 @@ import org.apache.spark.util.Utils
  */
 @Since("1.1.0")
 class HashingTF(val numFeatures: Int) extends Serializable {
+  require(numFeatures > 0, s"numFeatures must be positive but got $numFeatures")
 
   import HashingTF._
 
@@ -65,6 +66,7 @@ class HashingTF(val numFeatures: Int) extends Serializable {
    */
   @Since("2.0.0")
   def setHashAlgorithm(value: String): this.type = {
+    require(Array(Murmur3, Native).contains(value), s"hashAlgorithm: $value was not supported.")
     hashAlgorithm = value
     this
   }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/IDF.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/IDF.scala
index bb4b37ef21a84..15da4cc741e24 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/feature/IDF.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/IDF.scala
@@ -38,6 +38,7 @@ import org.apache.spark.rdd.RDD
  */
 @Since("1.1.0")
 class IDF @Since("1.2.0") (@Since("1.2.0") val minDocFreq: Int) {
+  require(minDocFreq > 0, s"minDocFreq must be positive but got $minDocFreq")
 
   @Since("1.1.0")
   def this() = this(0)

From 5ff61e6e53d403937354ca1f5029538b2db65603 Mon Sep 17 00:00:00 2001
From: Zheng RuiFeng <ruifengz@foxmail.com>
Date: Mon, 14 Nov 2016 19:24:36 +0800
Subject: [PATCH 2/4] create pr

---
 .../scala/org/apache/spark/ml/feature/IDF.scala     |  3 ++-
 .../scala/org/apache/spark/ml/feature/PCA.scala     |  3 ++-
 .../org/apache/spark/ml/feature/Word2Vec.scala      | 13 ++++++++-----
 .../spark/ml/regression/IsotonicRegression.scala    |  3 ++-
 .../spark/ml/regression/LinearRegression.scala      |  5 ++++-
 .../scala/org/apache/spark/ml/tree/treeParams.scala |  4 +++-
 6 files changed, 21 insertions(+), 10 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala
index 6386dd8a10801..46a0730f5ddb8 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala
@@ -44,7 +44,8 @@ private[feature] trait IDFBase extends Params with HasInputCol with HasOutputCol
    * @group param
    */
   final val minDocFreq = new IntParam(
-    this, "minDocFreq", "minimum number of documents in which a term should appear for filtering")
+    this, "minDocFreq", "minimum number of documents in which a term should appear for filtering" +
+      " (>= 0)", ParamValidators.gtEq(0))
 
   setDefault(minDocFreq -> 0)
 
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala
index 6b913480fdc28..444006fe1edb6 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala
@@ -44,7 +44,8 @@ private[feature] trait PCAParams extends Params with HasInputCol with HasOutputC
    * The number of principal components.
    * @group param
    */
-  final val k: IntParam = new IntParam(this, "k", "the number of principal components")
+  final val k: IntParam = new IntParam(this, "k", "the number of principal components (> 0)",
+    ParamValidators.gt(0))
 
   /** @group getParam */
   def getK: Int = $(k)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala
index d53f3df514dff..3ed08c983d561 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala
@@ -43,7 +43,8 @@ private[feature] trait Word2VecBase extends Params
    * @group param
    */
   final val vectorSize = new IntParam(
-    this, "vectorSize", "the dimension of codes after transforming from words")
+    this, "vectorSize", "the dimension of codes after transforming from words (> 0)",
+    ParamValidators.gt(0))
   setDefault(vectorSize -> 100)
 
   /** @group getParam */
@@ -55,7 +56,8 @@ private[feature] trait Word2VecBase extends Params
    * @group expertParam
    */
   final val windowSize = new IntParam(
-    this, "windowSize", "the window size (context words from [-window, window])")
+    this, "windowSize", "the window size (context words from [-window, window]) (> 0)",
+    ParamValidators.gt(0))
   setDefault(windowSize -> 5)
 
   /** @group expertGetParam */
@@ -67,7 +69,8 @@ private[feature] trait Word2VecBase extends Params
    * @group param
    */
   final val numPartitions = new IntParam(
-    this, "numPartitions", "number of partitions for sentences of words")
+    this, "numPartitions", "number of partitions for sentences of words (> 0)",
+    ParamValidators.gt(0))
   setDefault(numPartitions -> 1)
 
   /** @group getParam */
@@ -80,7 +83,7 @@ private[feature] trait Word2VecBase extends Params
    * @group param
    */
   final val minCount = new IntParam(this, "minCount", "the minimum number of times a token must " +
-    "appear to be included in the word2vec model's vocabulary")
+    "appear to be included in the word2vec model's vocabulary (>= 0)", ParamValidators.gtEq(0))
   setDefault(minCount -> 5)
 
   /** @group getParam */
@@ -95,7 +98,7 @@ private[feature] trait Word2VecBase extends Params
    */
   final val maxSentenceLength = new IntParam(this, "maxSentenceLength", "Maximum length " +
     "(in words) of each sentence in the input data. Any sentence longer than this threshold will " +
-    "be divided into chunks up to the size.")
+    "be divided into chunks up to the size (> 0)", ParamValidators.gt(0))
   setDefault(maxSentenceLength -> 1000)
 
   /** @group getParam */
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala
index cd7b4f2a9c56e..4d274f3a5bbf1 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala
@@ -61,7 +61,8 @@ private[regression] trait IsotonicRegressionBase extends Params with HasFeatures
    * @group param
    */
   final val featureIndex: IntParam = new IntParam(this, "featureIndex",
-    "The index of the feature if featuresCol is a vector column, no effect otherwise.")
+    "The index of the feature if featuresCol is a vector column, no effect otherwise (>= 0)",
+    ParamValidators.gtEq(0))
 
   /** @group getParam */
   final def getFeatureIndex: Int = $(featureIndex)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
index 9639b07496c13..65954e3ea2f9b 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
@@ -171,7 +171,10 @@ class LinearRegression @Since("1.3.0") (@Since("1.3.0") override val uid: String
    * @group setParam
    */
   @Since("1.6.0")
-  def setSolver(value: String): this.type = set(solver, value)
+  def setSolver(value: String): this.type = {
+    require(Array("auto", "l-bfgs", "normal").contains(value), s"Solver $value was not supported.")
+    set(solver, value)
+  }
   setDefault(solver -> "auto")
 
   /**
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala b/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala
index 57c7e44e97607..5a551533be9ca 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala
@@ -73,11 +73,13 @@ private[ml] trait DecisionTreeParams extends PredictorParams
 
   /**
    * Minimum information gain for a split to be considered at a tree node.
+   * Should be >= 0.0.
    * (default = 0.0)
    * @group param
    */
   final val minInfoGain: DoubleParam = new DoubleParam(this, "minInfoGain",
-    "Minimum information gain for a split to be considered at a tree node.")
+    "Minimum information gain for a split to be considered at a tree node.",
+    ParamValidators.gtEq(0.0))
 
   /**
    * Maximum memory in MB allocated to histogram aggregation. If too small, then 1 node will be

From 89c7f1aaee562aad5983556dac27c4507b78697b Mon Sep 17 00:00:00 2001
From: Zheng RuiFeng <ruifengz@foxmail.com>
Date: Mon, 14 Nov 2016 19:34:54 +0800
Subject: [PATCH 3/4] update

---
 .../org/apache/spark/mllib/feature/ChiSqSelector.scala      | 6 ++----
 .../scala/org/apache/spark/mllib/feature/HashingTF.scala    | 2 --
 .../src/main/scala/org/apache/spark/mllib/feature/IDF.scala | 1 -
 3 files changed, 2 insertions(+), 7 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/ChiSqSelector.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/ChiSqSelector.scala
index 501401e0fe5ee..f9156b642785f 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/feature/ChiSqSelector.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/ChiSqSelector.scala
@@ -192,27 +192,25 @@ class ChiSqSelector @Since("2.1.0") () extends Serializable {
   @Since("1.3.0")
   def this(numTopFeatures: Int) {
     this()
-    require(numTopFeatures > 0, s"numTopFeatures must be positive but got $numTopFeatures")
     this.numTopFeatures = numTopFeatures
   }
 
   @Since("1.6.0")
   def setNumTopFeatures(value: Int): this.type = {
-    require(value > 0, s"numTopFeatures must be positive but got $value")
     numTopFeatures = value
     this
   }
 
   @Since("2.1.0")
   def setPercentile(value: Double): this.type = {
-    require(0.0 <= value && value <= 1.0, s"Percentile must be in [0,1] but got $value")
+    require(0.0 <= value && value <= 1.0, "Percentile must be in [0,1]")
     percentile = value
     this
   }
 
   @Since("2.1.0")
   def setFpr(value: Double): this.type = {
-    require(0.0 <= value && value <= 1.0, s"FPR must be in [0,1] but got $value")
+    require(0.0 <= value && value <= 1.0, "FPR must be in [0,1]")
     fpr = value
     this
   }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/HashingTF.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/HashingTF.scala
index 3c69fb41c834b..bc26655104a9b 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/feature/HashingTF.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/HashingTF.scala
@@ -38,7 +38,6 @@ import org.apache.spark.util.Utils
  */
 @Since("1.1.0")
 class HashingTF(val numFeatures: Int) extends Serializable {
-  require(numFeatures > 0, s"numFeatures must be positive but got $numFeatures")
 
   import HashingTF._
 
@@ -66,7 +65,6 @@ class HashingTF(val numFeatures: Int) extends Serializable {
    */
   @Since("2.0.0")
   def setHashAlgorithm(value: String): this.type = {
-    require(Array(Murmur3, Native).contains(value), s"hashAlgorithm: $value was not supported.")
     hashAlgorithm = value
     this
   }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/IDF.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/IDF.scala
index 15da4cc741e24..bb4b37ef21a84 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/feature/IDF.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/IDF.scala
@@ -38,7 +38,6 @@ import org.apache.spark.rdd.RDD
  */
 @Since("1.1.0")
 class IDF @Since("1.2.0") (@Since("1.2.0") val minDocFreq: Int) {
-  require(minDocFreq > 0, s"minDocFreq must be positive but got $minDocFreq")
 
   @Since("1.1.0")
   def this() = this(0)

From 587fb9c8d233ec8d83750d4e1d39996da20b34e4 Mon Sep 17 00:00:00 2001
From: Zheng RuiFeng <ruifengz@foxmail.com>
Date: Tue, 15 Nov 2016 10:37:16 +0800
Subject: [PATCH 4/4] array->set; show supported options

---
 .../org/apache/spark/ml/regression/LinearRegression.scala      | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
index 65954e3ea2f9b..71c542adf6f6f 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
@@ -172,7 +172,8 @@ class LinearRegression @Since("1.3.0") (@Since("1.3.0") override val uid: String
    */
   @Since("1.6.0")
   def setSolver(value: String): this.type = {
-    require(Array("auto", "l-bfgs", "normal").contains(value), s"Solver $value was not supported.")
+    require(Set("auto", "l-bfgs", "normal").contains(value),
+      s"Solver $value was not supported. Supported options: auto, l-bfgs, normal")
     set(solver, value)
   }
   setDefault(solver -> "auto")