From 9cdb6c2023171bc01c6bb722545c7be69e7847b7 Mon Sep 17 00:00:00 2001 From: Yanbo Liang Date: Tue, 29 Sep 2015 17:11:56 +0800 Subject: [PATCH 1/4] Consolidate the Cholesky solvers in WeightedLeastSquares and ALS --- .../spark/ml/optim/WeightedLeastSquares.scala | 23 +--------- .../apache/spark/ml/recommendation/ALS.scala | 8 +--- .../mllib/linalg/CholeskyDecomposition.scala | 46 +++++++++++++++++++ 3 files changed, 49 insertions(+), 28 deletions(-) create mode 100644 mllib/src/main/scala/org/apache/spark/mllib/linalg/CholeskyDecomposition.scala diff --git a/mllib/src/main/scala/org/apache/spark/ml/optim/WeightedLeastSquares.scala b/mllib/src/main/scala/org/apache/spark/ml/optim/WeightedLeastSquares.scala index 4374e99631560..d7eaa5a9268ff 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/optim/WeightedLeastSquares.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/optim/WeightedLeastSquares.scala @@ -17,12 +17,8 @@ package org.apache.spark.ml.optim -import com.github.fommil.netlib.LAPACK.{getInstance => lapack} -import org.netlib.util.intW - import org.apache.spark.Logging import org.apache.spark.mllib.linalg._ -import org.apache.spark.mllib.linalg.distributed.RowMatrix import org.apache.spark.rdd.RDD /** @@ -110,7 +106,7 @@ private[ml] class WeightedLeastSquares( j += 1 } - val x = choleskySolve(aaBar.values, abBar) + val x = new DenseVector(CholeskyDecomposition.solve(aaBar.values, abBar.values)) // compute intercept val intercept = if (fitIntercept) { @@ -121,23 +117,6 @@ private[ml] class WeightedLeastSquares( new WeightedLeastSquaresModel(x, intercept) } - - /** - * Solves a symmetric positive definite linear system via Cholesky factorization. - * The input arguments are modified in-place to store the factorization and the solution. - * @param A the upper triangular part of A - * @param bx right-hand side - * @return the solution vector - */ - // TODO: SPARK-10490 - consolidate this and the Cholesky solver in ALS - private def choleskySolve(A: Array[Double], bx: DenseVector): DenseVector = { - val k = bx.size - val info = new intW(0) - lapack.dppsv("U", k, 1, A, bx.values, k, info) - val code = info.`val` - assert(code == 0, s"lapack.dpotrs returned $code.") - bx - } } private[ml] object WeightedLeastSquares { diff --git a/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala b/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala index 9a56a75b69d0b..99d462311690d 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala @@ -26,9 +26,7 @@ import scala.util.Sorting import scala.util.hashing.byteswap64 import com.github.fommil.netlib.BLAS.{getInstance => blas} -import com.github.fommil.netlib.LAPACK.{getInstance => lapack} import org.apache.hadoop.fs.{FileSystem, Path} -import org.netlib.util.intW import org.apache.spark.{Logging, Partitioner} import org.apache.spark.annotation.{DeveloperApi, Experimental} @@ -36,6 +34,7 @@ import org.apache.spark.ml.{Estimator, Model} import org.apache.spark.ml.param._ import org.apache.spark.ml.param.shared._ import org.apache.spark.ml.util.{Identifiable, SchemaUtils} +import org.apache.spark.mllib.linalg.CholeskyDecomposition import org.apache.spark.mllib.optimization.NNLS import org.apache.spark.rdd.RDD import org.apache.spark.sql.DataFrame @@ -387,10 +386,7 @@ object ALS extends Logging { i += j j += 1 } - val info = new intW(0) - lapack.dppsv(upper, k, 1, ne.ata, ne.atb, k, info) - val code = info.`val` - assert(code == 0, s"lapack.dppsv returned $code.") + CholeskyDecomposition.solve(ne.ata, ne.atb) val x = new Array[Float](k) i = 0 while (i < k) { diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/CholeskyDecomposition.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/CholeskyDecomposition.scala new file mode 100644 index 0000000000000..b5dc2481d9c2c --- /dev/null +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/CholeskyDecomposition.scala @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.mllib.linalg + +import com.github.fommil.netlib.LAPACK.{getInstance => lapack} +import org.apache.spark.annotation.Experimental +import org.netlib.util.intW + +/** + * :: Experimental :: + * Compute Cholesky decomposition. + */ +@Experimental +private[spark] object CholeskyDecomposition { + + /** + * Solves a symmetric positive definite linear system via Cholesky factorization. + * The input arguments are modified in-place to store the factorization and the solution. + * @param A the upper triangular part of A + * @param bx right-hand side + * @return the solution vector + */ + private[spark] def solve(A: Array[Double], bx: Array[Double]): Array[Double] = { + val k = bx.size + val info = new intW(0) + lapack.dppsv("U", k, 1, A, bx, k, info) + val code = info.`val` + assert(code == 0, s"lapack.dpotrs returned $code.") + bx + } +} From 72f8782c5b39afa0d403482908fd877ef53cc463 Mon Sep 17 00:00:00 2001 From: Yanbo Liang Date: Tue, 29 Sep 2015 17:14:13 +0800 Subject: [PATCH 2/4] fix typo --- .../org/apache/spark/mllib/linalg/CholeskyDecomposition.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/CholeskyDecomposition.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/CholeskyDecomposition.scala index b5dc2481d9c2c..20a9455d41ce2 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/CholeskyDecomposition.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/CholeskyDecomposition.scala @@ -33,7 +33,7 @@ private[spark] object CholeskyDecomposition { * The input arguments are modified in-place to store the factorization and the solution. * @param A the upper triangular part of A * @param bx right-hand side - * @return the solution vector + * @return the solution array */ private[spark] def solve(A: Array[Double], bx: Array[Double]): Array[Double] = { val k = bx.size From 55955e7f1e592140bf6c04cdbfc913242b451faf Mon Sep 17 00:00:00 2001 From: Yanbo Liang Date: Wed, 30 Sep 2015 11:03:07 +0800 Subject: [PATCH 3/4] remove Experimental tag from private class --- .../main/scala/org/apache/spark/ml/recommendation/ALS.scala | 2 -- .../apache/spark/mllib/linalg/CholeskyDecomposition.scala | 4 +--- .../apache/spark/mllib/linalg/EigenValueDecomposition.scala | 5 +---- 3 files changed, 2 insertions(+), 9 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala b/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala index 99d462311690d..b247eccc7142c 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala @@ -365,8 +365,6 @@ object ALS extends Logging { /** Cholesky solver for least square problems. */ private[recommendation] class CholeskySolver extends LeastSquaresNESolver { - private val upper = "U" - /** * Solves a least squares problem with L2 regularization: * diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/CholeskyDecomposition.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/CholeskyDecomposition.scala index 20a9455d41ce2..3ec45ccad63cc 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/CholeskyDecomposition.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/CholeskyDecomposition.scala @@ -18,14 +18,12 @@ package org.apache.spark.mllib.linalg import com.github.fommil.netlib.LAPACK.{getInstance => lapack} -import org.apache.spark.annotation.Experimental import org.netlib.util.intW /** * :: Experimental :: * Compute Cholesky decomposition. */ -@Experimental private[spark] object CholeskyDecomposition { /** @@ -35,7 +33,7 @@ private[spark] object CholeskyDecomposition { * @param bx right-hand side * @return the solution array */ - private[spark] def solve(A: Array[Double], bx: Array[Double]): Array[Double] = { + def solve(A: Array[Double], bx: Array[Double]): Array[Double] = { val k = bx.size val info = new intW(0) lapack.dppsv("U", k, 1, A, bx, k, info) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/EigenValueDecomposition.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/EigenValueDecomposition.scala index ae3ba3099c878..2aac776514a98 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/EigenValueDecomposition.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/EigenValueDecomposition.scala @@ -21,13 +21,10 @@ import breeze.linalg.{DenseMatrix => BDM, DenseVector => BDV} import com.github.fommil.netlib.ARPACK import org.netlib.util.{intW, doubleW} -import org.apache.spark.annotation.Experimental - /** * :: Experimental :: * Compute eigen-decomposition. */ -@Experimental private[mllib] object EigenValueDecomposition { /** * Compute the leading k eigenvalues and eigenvectors on a symmetric square matrix using ARPACK. @@ -46,7 +43,7 @@ private[mllib] object EigenValueDecomposition { * for more details). The maximum number of Arnoldi update iterations is set to 300 in this * function. */ - private[mllib] def symmetricEigs( + def symmetricEigs( mul: BDV[Double] => BDV[Double], n: Int, k: Int, From 82aad641d83f19e679f3995379caafd8dbd80eba Mon Sep 17 00:00:00 2001 From: Yanbo Liang Date: Wed, 30 Sep 2015 11:13:48 +0800 Subject: [PATCH 4/4] fix typos --- .../org/apache/spark/mllib/linalg/CholeskyDecomposition.scala | 1 - .../org/apache/spark/mllib/linalg/EigenValueDecomposition.scala | 1 - 2 files changed, 2 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/CholeskyDecomposition.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/CholeskyDecomposition.scala index 3ec45ccad63cc..66eb40b6f4a69 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/CholeskyDecomposition.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/CholeskyDecomposition.scala @@ -21,7 +21,6 @@ import com.github.fommil.netlib.LAPACK.{getInstance => lapack} import org.netlib.util.intW /** - * :: Experimental :: * Compute Cholesky decomposition. */ private[spark] object CholeskyDecomposition { diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/EigenValueDecomposition.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/EigenValueDecomposition.scala index 2aac776514a98..863abe86d38d7 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/EigenValueDecomposition.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/EigenValueDecomposition.scala @@ -22,7 +22,6 @@ import com.github.fommil.netlib.ARPACK import org.netlib.util.{intW, doubleW} /** - * :: Experimental :: * Compute eigen-decomposition. */ private[mllib] object EigenValueDecomposition {