From 14ba93e5ae8c402778348f5d3793167478c35a3d Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Mon, 13 Jun 2016 11:28:05 -0700 Subject: [PATCH 1/2] [SPARK-15922][MLLIB] `toIndexedRowMatrix` should consider the case `cols < colsPerBlock` --- .../apache/spark/mllib/linalg/distributed/BlockMatrix.scala | 2 +- .../spark/mllib/linalg/distributed/BlockMatrixSuite.scala | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala index 7a24617781ece..50378a9ef8bd8 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala @@ -288,7 +288,7 @@ class BlockMatrix @Since("1.3.0") ( vectors.foreach { case (blockColIdx: Int, vec: BV[Double]) => val offset = colsPerBlock * blockColIdx - wholeVector(offset until offset + colsPerBlock) := vec + wholeVector(offset until offset + Math.min(cols, colsPerBlock)) := vec } new IndexedRow(rowIdx, Vectors.fromBreeze(wholeVector)) } diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrixSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrixSuite.scala index e5a2cbbb588df..61266f3c78dbc 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrixSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrixSuite.scala @@ -135,6 +135,11 @@ class BlockMatrixSuite extends SparkFunSuite with MLlibTestSparkContext { assert(rowMat.numCols() === n) assert(rowMat.toBreeze() === gridBasedMat.toBreeze()) + // SPARK-15922: BlockMatrix to IndexedRowMatrix throws an error" + val bmat = rowMat.toBlockMatrix + val imat = bmat.toIndexedRowMatrix + imat.rows.collect + val rows = 1 val cols = 10 From 50f52f46665613994e78c921888eb86a0f3dbac8 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Mon, 13 Jun 2016 12:13:07 -0700 Subject: [PATCH 2/2] Address comments. --- .../org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala index 50378a9ef8bd8..639295c695255 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala @@ -288,7 +288,7 @@ class BlockMatrix @Since("1.3.0") ( vectors.foreach { case (blockColIdx: Int, vec: BV[Double]) => val offset = colsPerBlock * blockColIdx - wholeVector(offset until offset + Math.min(cols, colsPerBlock)) := vec + wholeVector(offset until Math.min(cols, offset + colsPerBlock)) := vec } new IndexedRow(rowIdx, Vectors.fromBreeze(wholeVector)) }