From d6e96ef8e3b2bf0fdb4b8ca5ffd01aaf1089e29a Mon Sep 17 00:00:00 2001 From: MechCoder Date: Fri, 19 Jun 2015 19:24:36 +0530 Subject: [PATCH 1/4] [SPARK-8479] Add numNonzeros and numActives to linalg.Matrices --- .../apache/spark/mllib/linalg/Matrices.scala | 35 +++++++++++++++++++ .../spark/mllib/linalg/MatricesSuite.scala | 10 ++++++ 2 files changed, 45 insertions(+) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala index 85e63b1382b5e..66d0cc801dde3 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala @@ -114,6 +114,16 @@ sealed trait Matrix extends Serializable { * corresponding value in the matrix with type `Double`. */ private[spark] def foreachActive(f: (Int, Int, Double) => Unit) + + /** + * Find the number of non-zero active values. + */ + def numNonzeros: Int + + /** + * Find the number of values stored explicitly. These values can be zero as well. + */ + def numActives: Int } @DeveloperApi @@ -323,6 +333,18 @@ class DenseMatrix( } } + override def numNonzeros: Int = { + var nnz = 0 + values.foreach { value => + if (value != 0) { + nnz += 1 + } + } + nnz + } + + override def numActives: Int = values.length + /** * Generate a `SparseMatrix` from the given `DenseMatrix`. The new matrix will have isTransposed * set to false. @@ -592,6 +614,19 @@ class SparseMatrix( def toDense: DenseMatrix = { new DenseMatrix(numRows, numCols, toArray) } + + override def numNonzeros: Int = { + var nnz = 0 + values.foreach { value => + if (value != 0) { + nnz += 1 + } + } + nnz + } + + override def numActives: Int = values.length + } /** diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala index 8dbb70f5d1c4c..a270ba2562db9 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala @@ -455,4 +455,14 @@ class MatricesSuite extends SparkFunSuite { lines = mat.toString(5, 100).lines.toArray assert(lines.size == 5 && lines.forall(_.size <= 100)) } + + test("numNonzeros and numActives") { + val dm1 = Matrices.dense(3, 2, Array(0, 0, -1, 1, 0, 1)) + assert(dm1.numNonzeros === 3) + assert(dm1.numActives === 6) + + val sm1 = Matrices.sparse(3, 2, Array(0, 2, 3), Array(0, 2, 1), Array(0.0, -1.2, 0.0)) + assert(sm1.numNonzeros === 1) + assert(sm1.numActives === 3) + } } From 2f62b2fe1a4f57574b52e62c56cebb922c6cf89b Mon Sep 17 00:00:00 2001 From: MechCoder Date: Fri, 19 Jun 2015 22:30:42 +0530 Subject: [PATCH 2/4] Add to MiMa excludes --- project/MimaExcludes.scala | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala index 6f86a505b3ae4..43a088d78428d 100644 --- a/project/MimaExcludes.scala +++ b/project/MimaExcludes.scala @@ -199,6 +199,12 @@ object MimaExcludes { ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.jdbc.PostgresQuirks"), ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.jdbc.NoQuirks"), ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.jdbc.MySQLQuirks") + ) ++ Seq( + // SPARK-8479 Add numNonzeros and numActives to Matrix. + ProblemFilters.exclude[MissingMethodProblem]( + "org.apache.spark.mllib.linalg.Matrix.numNonzeros"), + ProblemFilters.exclude[MissingMethodProblem]( + "org.apache.spark.mllib.linalg.Matrix.numActives") ) case v if v.startsWith("1.3") => From e2390f5b6f1556e25dda389fa88d96aa57f99988 Mon Sep 17 00:00:00 2001 From: MechCoder Date: Wed, 1 Jul 2015 21:06:43 +0530 Subject: [PATCH 3/4] Use count instead of foreach --- .../apache/spark/mllib/linalg/Matrices.scala | 20 ++----------------- project/MimaExcludes.scala | 16 +++++++++------ 2 files changed, 12 insertions(+), 24 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala index 66d0cc801dde3..a6179e36d1bec 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala @@ -333,15 +333,7 @@ class DenseMatrix( } } - override def numNonzeros: Int = { - var nnz = 0 - values.foreach { value => - if (value != 0) { - nnz += 1 - } - } - nnz - } + override def numNonzeros: Int = values.count(_ != 0) override def numActives: Int = values.length @@ -615,15 +607,7 @@ class SparseMatrix( new DenseMatrix(numRows, numCols, toArray) } - override def numNonzeros: Int = { - var nnz = 0 - values.foreach { value => - if (value != 0) { - nnz += 1 - } - } - nnz - } + override def numNonzeros: Int = values.count(_ != 0) override def numActives: Int = values.length diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala index 43a088d78428d..864f2ed38f0c5 100644 --- a/project/MimaExcludes.scala +++ b/project/MimaExcludes.scala @@ -75,6 +75,16 @@ object MimaExcludes { "org.apache.spark.sql.parquet.ParquetTypeInfo"), ProblemFilters.exclude[MissingClassProblem]( "org.apache.spark.sql.parquet.ParquetTypeInfo$") + ) ++ Seq( + // SPARK-8479 Add numNonzeros and numActives to Matrix. + ProblemFilters.exclude[MissingMethodProblem]( + "org.apache.spark.mllib.linalg.DenseMatrix.numNonzeros"), + ProblemFilters.exclude[MissingMethodProblem]( + "org.apache.spark.mllib.linalg.DenseMatrix.numActives"), + ProblemFilters.exclude[MissingMethodProblem]( + "org.apache.spark.mllib.linalg.SparseMatrix.numNonzeros"), + ProblemFilters.exclude[MissingMethodProblem]( + "org.apache.spark.mllib.linalg.SparseMatrix.numActives") ) case v if v.startsWith("1.4") => Seq( @@ -199,12 +209,6 @@ object MimaExcludes { ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.jdbc.PostgresQuirks"), ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.jdbc.NoQuirks"), ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.jdbc.MySQLQuirks") - ) ++ Seq( - // SPARK-8479 Add numNonzeros and numActives to Matrix. - ProblemFilters.exclude[MissingMethodProblem]( - "org.apache.spark.mllib.linalg.Matrix.numNonzeros"), - ProblemFilters.exclude[MissingMethodProblem]( - "org.apache.spark.mllib.linalg.Matrix.numActives") ) case v if v.startsWith("1.3") => From 252c6b72426300fa0859c9d83c0b014b5f94bf6e Mon Sep 17 00:00:00 2001 From: MechCoder Date: Wed, 1 Jul 2015 23:33:22 +0530 Subject: [PATCH 4/4] Add to MiMa excludes --- project/MimaExcludes.scala | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala index 864f2ed38f0c5..680b699e9e4a1 100644 --- a/project/MimaExcludes.scala +++ b/project/MimaExcludes.scala @@ -78,13 +78,9 @@ object MimaExcludes { ) ++ Seq( // SPARK-8479 Add numNonzeros and numActives to Matrix. ProblemFilters.exclude[MissingMethodProblem]( - "org.apache.spark.mllib.linalg.DenseMatrix.numNonzeros"), + "org.apache.spark.mllib.linalg.Matrix.numNonzeros"), ProblemFilters.exclude[MissingMethodProblem]( - "org.apache.spark.mllib.linalg.DenseMatrix.numActives"), - ProblemFilters.exclude[MissingMethodProblem]( - "org.apache.spark.mllib.linalg.SparseMatrix.numNonzeros"), - ProblemFilters.exclude[MissingMethodProblem]( - "org.apache.spark.mllib.linalg.SparseMatrix.numActives") + "org.apache.spark.mllib.linalg.Matrix.numActives") ) case v if v.startsWith("1.4") => Seq(