From e886e6e837aeb56a410621ed2ffa06bcaebabd35 Mon Sep 17 00:00:00 2001 From: Zheng RuiFeng Date: Fri, 27 May 2016 17:52:52 +0800 Subject: [PATCH 1/4] create pr --- .../scala/org/apache/spark/ml/feature/ElementwiseProduct.scala | 1 - mllib/src/main/scala/org/apache/spark/mllib/feature/PCA.scala | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala index 91989c3d2f5d2..9d2e60fa3f1e4 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala @@ -23,7 +23,6 @@ import org.apache.spark.ml.linalg.{Vector, VectorUDT} import org.apache.spark.ml.param.Param import org.apache.spark.ml.util.{DefaultParamsReadable, DefaultParamsWritable, Identifiable} import org.apache.spark.mllib.feature -import org.apache.spark.mllib.linalg.{Vectors => OldVectors} import org.apache.spark.mllib.linalg.VectorImplicits._ import org.apache.spark.sql.types.DataType diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/PCA.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/PCA.scala index 30c403e547bee..4d1039ae69f98 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/feature/PCA.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/PCA.scala @@ -40,7 +40,7 @@ class PCA @Since("1.4.0") (@Since("1.4.0") val k: Int) { */ @Since("1.4.0") def fit(sources: RDD[Vector]): PCAModel = { - require(k <= sources.first().size, + require(k < sources.first().size, s"source vector size is ${sources.first().size} must be greater than k=$k") val mat = new RowMatrix(sources) From 826bcc2b3b6f822e829a5868db08e5f07dc9a2ca Mon Sep 17 00:00:00 2001 From: Zheng RuiFeng Date: Fri, 27 May 2016 19:18:12 +0800 Subject: [PATCH 2/4] fix one nit --- .../src/main/scala/org/apache/spark/mllib/feature/PCA.scala | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/PCA.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/PCA.scala index 4d1039ae69f98..ca1287d1849bb 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/feature/PCA.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/PCA.scala @@ -40,8 +40,9 @@ class PCA @Since("1.4.0") (@Since("1.4.0") val k: Int) { */ @Since("1.4.0") def fit(sources: RDD[Vector]): PCAModel = { - require(k < sources.first().size, - s"source vector size is ${sources.first().size} must be greater than k=$k") + val numFeatures = sources.first().size + require(k < numFeatures, + s"source vector size is $numFeatures must be greater than k=$k") val mat = new RowMatrix(sources) val (pc, explainedVariance) = mat.computePrincipalComponentsAndExplainedVariance(k) From b25ae306c5c20be388173a789ed6d5b5f1922474 Mon Sep 17 00:00:00 2001 From: Zheng RuiFeng Date: Sat, 28 May 2016 08:04:13 +0800 Subject: [PATCH 3/4] update error message --- mllib/src/main/scala/org/apache/spark/mllib/feature/PCA.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/PCA.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/PCA.scala index ca1287d1849bb..8f890e56e71e2 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/feature/PCA.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/PCA.scala @@ -41,8 +41,8 @@ class PCA @Since("1.4.0") (@Since("1.4.0") val k: Int) { @Since("1.4.0") def fit(sources: RDD[Vector]): PCAModel = { val numFeatures = sources.first().size - require(k < numFeatures, - s"source vector size is $numFeatures must be greater than k=$k") + require(k <= numFeatures, + s"source vector size is $numFeatures must be no less than k=$k") val mat = new RowMatrix(sources) val (pc, explainedVariance) = mat.computePrincipalComponentsAndExplainedVariance(k) From f696138a2347d0c8668de1b7cdd9d927ff7efb9b Mon Sep 17 00:00:00 2001 From: Zheng RuiFeng Date: Sat, 28 May 2016 10:11:44 +0800 Subject: [PATCH 4/4] del "is" --- mllib/src/main/scala/org/apache/spark/mllib/feature/PCA.scala | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/PCA.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/PCA.scala index 8f890e56e71e2..15b72205ac17a 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/feature/PCA.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/PCA.scala @@ -42,7 +42,7 @@ class PCA @Since("1.4.0") (@Since("1.4.0") val k: Int) { def fit(sources: RDD[Vector]): PCAModel = { val numFeatures = sources.first().size require(k <= numFeatures, - s"source vector size is $numFeatures must be no less than k=$k") + s"source vector size $numFeatures must be no less than k=$k") val mat = new RowMatrix(sources) val (pc, explainedVariance) = mat.computePrincipalComponentsAndExplainedVariance(k) @@ -59,7 +59,6 @@ class PCA @Since("1.4.0") (@Since("1.4.0") val k: Int) { case m => throw new IllegalArgumentException("Unsupported matrix format. Expected " + s"SparseMatrix or DenseMatrix. Instead got: ${m.getClass}") - } val denseExplainedVariance = explainedVariance match { case dv: DenseVector =>