From a604816b25988f1200758b65a3ae15efbb684de7 Mon Sep 17 00:00:00 2001 From: bittmannm Date: Fri, 6 Feb 2015 14:12:51 -0500 Subject: [PATCH 1/4] [SPARK-5656] Fail gracefully for large values of k and/or n that will exceed max int. Large values of k and/or n in EigenValueDecomposition.symmetricEigs will result in array initialization to a value larger than Integer.MAX_VALUE in the following: var v = new Array[Double](n * ncv) --- .../apache/spark/mllib/linalg/EigenValueDecomposition.scala | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/EigenValueDecomposition.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/EigenValueDecomposition.scala index 3515461b52493..9a9b3135f97f5 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/EigenValueDecomposition.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/EigenValueDecomposition.scala @@ -66,6 +66,9 @@ private[mllib] object EigenValueDecomposition { // ncv must be smaller than n val ncv = math.min(2 * k, n) + require(ncv * n.toLong < Integer.MAX_VALUE, "Product of 2*k*n must be smaller than " + + s"Integer.MAX_VALUE. Found required eigenvalues k = $k and matrix dimension n = $n") + // "I" for standard eigenvalue problem, "G" for generalized eigenvalue problem val bmat = "I" // "LM" : compute the NEV largest (in magnitude) eigenvalues From 860836be46f737ea6fe1494b3f006d7ed5240d22 Mon Sep 17 00:00:00 2001 From: mbittmann Date: Fri, 6 Feb 2015 18:30:46 -0500 Subject: [PATCH 2/4] Array size check updates based on code review Move the size check closer to array allocation, set to '<=' and add additional check. --- .../spark/mllib/linalg/EigenValueDecomposition.scala | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/EigenValueDecomposition.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/EigenValueDecomposition.scala index 9a9b3135f97f5..602e296f845d4 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/EigenValueDecomposition.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/EigenValueDecomposition.scala @@ -66,9 +66,6 @@ private[mllib] object EigenValueDecomposition { // ncv must be smaller than n val ncv = math.min(2 * k, n) - require(ncv * n.toLong < Integer.MAX_VALUE, "Product of 2*k*n must be smaller than " + - s"Integer.MAX_VALUE. Found required eigenvalues k = $k and matrix dimension n = $n") - // "I" for standard eigenvalue problem, "G" for generalized eigenvalue problem val bmat = "I" // "LM" : compute the NEV largest (in magnitude) eigenvalues @@ -82,6 +79,10 @@ private[mllib] object EigenValueDecomposition { // Mode 1: A*x = lambda*x, A symmetric iparam(6) = 1 + require(ncv * n.toLong <= Integer.MAX_VALUE, "Product of 2*k*n must be smaller than " + + s"Integer.MAX_VALUE. Found required eigenvalues k = $k and matrix dimension n = $n") + require(ncv * (ncv.toLong + 8) <= Integer.MAX_VALUE, "Product of ncv * (ncv + 8) must be smaller than " + + s"Integer.MAX_VALUE. The value of ncv is computed by: math.min(2 * k, n). Found ncv = $ncv and k = $k.") var ido = new intW(0) var info = new intW(0) var resid = new Array[Double](n) From e49cbbb0f9e0aa23c7f74bcf59df5c6967d6f5c0 Mon Sep 17 00:00:00 2001 From: mbittmann Date: Fri, 6 Feb 2015 19:08:56 -0500 Subject: [PATCH 3/4] [SPARK-5656] Simply error message --- .../spark/mllib/linalg/EigenValueDecomposition.scala | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/EigenValueDecomposition.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/EigenValueDecomposition.scala index 602e296f845d4..329d5591bad04 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/EigenValueDecomposition.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/EigenValueDecomposition.scala @@ -79,10 +79,11 @@ private[mllib] object EigenValueDecomposition { // Mode 1: A*x = lambda*x, A symmetric iparam(6) = 1 - require(ncv * n.toLong <= Integer.MAX_VALUE, "Product of 2*k*n must be smaller than " + - s"Integer.MAX_VALUE. Found required eigenvalues k = $k and matrix dimension n = $n") - require(ncv * (ncv.toLong + 8) <= Integer.MAX_VALUE, "Product of ncv * (ncv + 8) must be smaller than " + - s"Integer.MAX_VALUE. The value of ncv is computed by: math.min(2 * k, n). Found ncv = $ncv and k = $k.") + require(n * ncv.toLong <= Integer.MAX_VALUE, "Large n and/or k will exceed " + + s"max array size. Found required eigenvalues k = $k and matrix dimension n = $n") + require(ncv * (ncv.toLong + 8) <= Integer.MAX_VALUE, "Large k will exceed " + + s"max array size. Found k=$k.") + var ido = new intW(0) var info = new intW(0) var resid = new Array[Double](n) From ee56e05644bbb39883828b2eff58af5e0a4df309 Mon Sep 17 00:00:00 2001 From: mbittmann Date: Sat, 7 Feb 2015 09:23:29 -0500 Subject: [PATCH 4/4] [SPARK-5656] Combine checks into simple message --- .../apache/spark/mllib/linalg/EigenValueDecomposition.scala | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/EigenValueDecomposition.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/EigenValueDecomposition.scala index 329d5591bad04..9d6f97528148e 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/EigenValueDecomposition.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/EigenValueDecomposition.scala @@ -79,10 +79,8 @@ private[mllib] object EigenValueDecomposition { // Mode 1: A*x = lambda*x, A symmetric iparam(6) = 1 - require(n * ncv.toLong <= Integer.MAX_VALUE, "Large n and/or k will exceed " + - s"max array size. Found required eigenvalues k = $k and matrix dimension n = $n") - require(ncv * (ncv.toLong + 8) <= Integer.MAX_VALUE, "Large k will exceed " + - s"max array size. Found k=$k.") + require(n * ncv.toLong <= Integer.MAX_VALUE && ncv * (ncv.toLong + 8) <= Integer.MAX_VALUE, + s"k = $k and/or n = $n are too large to compute an eigendecomposition") var ido = new intW(0) var info = new intW(0)