From c408c0539e040883f22d7ce22fc508cd102af05c Mon Sep 17 00:00:00 2001 From: Burak Yavuz Date: Mon, 19 Jan 2015 15:24:40 -0800 Subject: [PATCH 01/11] [SPARK-5321] Support for transposing local matrices added --- .../org/apache/spark/mllib/linalg/BLAS.scala | 39 +-- .../apache/spark/mllib/linalg/Matrices.scala | 266 +++++++++++++----- .../apache/spark/mllib/linalg/BLASSuite.scala | 38 ++- .../spark/mllib/linalg/MatricesSuite.scala | 60 +++- 4 files changed, 303 insertions(+), 100 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/BLAS.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/BLAS.scala index 3414daccd7ca4..6b50749283718 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/BLAS.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/BLAS.scala @@ -317,19 +317,23 @@ private[spark] object BLAS extends Serializable with Logging { B: DenseMatrix, beta: Double, C: DenseMatrix): Unit = { + val transModA = transA ^ A.isTransposed + val transModB = transB ^ B.isTransposed val mA: Int = if (!transA) A.numRows else A.numCols val nB: Int = if (!transB) B.numCols else B.numRows val kA: Int = if (!transA) A.numCols else A.numRows val kB: Int = if (!transB) B.numRows else B.numCols - val tAstr = if (!transA) "N" else "T" - val tBstr = if (!transB) "N" else "T" + val tAstr = if (!transModA) "N" else "T" + val tBstr = if (!transModB) "N" else "T" + val lda = if (!A.isTransposed) A.numRows else A.numCols + val ldb = if (!B.isTransposed) B.numRows else B.numCols require(kA == kB, s"The columns of A don't match the rows of B. A: $kA, B: $kB") require(mA == C.numRows, s"The rows of C don't match the rows of A. C: ${C.numRows}, A: $mA") require(nB == C.numCols, s"The columns of C don't match the columns of B. C: ${C.numCols}, A: $nB") - nativeBLAS.dgemm(tAstr, tBstr, mA, nB, kA, alpha, A.values, A.numRows, B.values, B.numRows, + nativeBLAS.dgemm(tAstr, tBstr, mA, nB, kA, alpha, A.values, lda, B.values, ldb, beta, C.values, C.numRows) } @@ -345,6 +349,8 @@ private[spark] object BLAS extends Serializable with Logging { B: DenseMatrix, beta: Double, C: DenseMatrix): Unit = { + val transModA = transA ^ A.isTransposed + val transModB = transB ^ B.isTransposed val mA: Int = if (!transA) A.numRows else A.numCols val nB: Int = if (!transB) B.numCols else B.numRows val kA: Int = if (!transA) A.numCols else A.numRows @@ -358,13 +364,13 @@ private[spark] object BLAS extends Serializable with Logging { val Avals = A.values val Bvals = B.values val Cvals = C.values - val Arows = if (!transA) A.rowIndices else A.colPtrs - val Acols = if (!transA) A.colPtrs else A.rowIndices + val Arows = if (!transModA) A.rowIndices else A.colPtrs + val Acols = if (!transModA) A.colPtrs else A.rowIndices // Slicing is easy in this case. This is the optimal multiplication setting for sparse matrices - if (transA){ + if (transModA){ var colCounterForB = 0 - if (!transB) { // Expensive to put the check inside the loop + if (!transModB) { // Expensive to put the check inside the loop while (colCounterForB < nB) { var rowCounterForA = 0 val Cstart = colCounterForB * mA @@ -410,7 +416,7 @@ private[spark] object BLAS extends Serializable with Logging { // Perform matrix multiplication and add to C. The rows of A are multiplied by the columns of // B, and added to C. var colCounterForB = 0 // the column to be updated in C - if (!transB) { // Expensive to put the check inside the loop + if (!transModB) { // Expensive to put the check inside the loop while (colCounterForB < nB) { var colCounterForA = 0 // The column of A to multiply with the row of B val Bstart = colCounterForB * kB @@ -463,7 +469,6 @@ private[spark] object BLAS extends Serializable with Logging { x: DenseVector, beta: Double, y: DenseVector): Unit = { - val mA: Int = if (!trans) A.numRows else A.numCols val nx: Int = x.size val nA: Int = if (!trans) A.numCols else A.numRows @@ -514,8 +519,11 @@ private[spark] object BLAS extends Serializable with Logging { x: DenseVector, beta: Double, y: DenseVector): Unit = { - val tStrA = if (!trans) "N" else "T" - nativeBLAS.dgemv(tStrA, A.numRows, A.numCols, alpha, A.values, A.numRows, x.values, 1, beta, + val transMod = trans ^ A.isTransposed + val tStrA = if (!transMod) "N" else "T" + val mA = if (!A.isTransposed) A.numRows else A.numCols + val nA = if (!A.isTransposed) A.numCols else A.numRows + nativeBLAS.dgemv(tStrA, mA, nA, alpha, A.values, mA, x.values, 1, beta, y.values, 1) } @@ -530,18 +538,17 @@ private[spark] object BLAS extends Serializable with Logging { x: DenseVector, beta: Double, y: DenseVector): Unit = { - val xValues = x.values val yValues = y.values - + val transMod = trans ^ A.isTransposed val mA: Int = if (!trans) A.numRows else A.numCols val nA: Int = if (!trans) A.numCols else A.numRows val Avals = A.values - val Arows = if (!trans) A.rowIndices else A.colPtrs - val Acols = if (!trans) A.colPtrs else A.rowIndices + val Arows = if (!transMod) A.rowIndices else A.colPtrs + val Acols = if (!transMod) A.colPtrs else A.rowIndices // Slicing is easy in this case. This is the optimal multiplication setting for sparse matrices - if (trans) { + if (transMod) { var rowCounter = 0 while (rowCounter < mA) { var i = Arows(rowCounter) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala index 5a7281ec6dc3c..df19e68c23198 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala @@ -17,7 +17,7 @@ package org.apache.spark.mllib.linalg -import java.util.{Arrays, Random} +import java.util.{Arrays => jArrays, Random} import scala.collection.mutable.{ArrayBuilder => MArrayBuilder, HashSet => MHashSet, ArrayBuffer} @@ -34,6 +34,9 @@ sealed trait Matrix extends Serializable { /** Number of columns. */ def numCols: Int + /** Flag that keeps track whether the matrix is transposed or not. False by default. */ + private[linalg] var isTransposed = false + /** Converts to a dense array in column major. */ def toArray: Array[Double] @@ -52,9 +55,12 @@ sealed trait Matrix extends Serializable { /** Get a deep copy of the matrix. */ def copy: Matrix + /** Transpose the Matrix */ + def transpose: Matrix + /** Convenience method for `Matrix`-`DenseMatrix` multiplication. */ def multiply(y: DenseMatrix): DenseMatrix = { - val C: DenseMatrix = Matrices.zeros(numRows, y.numCols).asInstanceOf[DenseMatrix] + val C: DenseMatrix = DenseMatrix.zeros(numRows, y.numCols) BLAS.gemm(false, false, 1.0, this, y, 0.0, C) C } @@ -62,21 +68,7 @@ sealed trait Matrix extends Serializable { /** Convenience method for `Matrix`-`DenseVector` multiplication. */ def multiply(y: DenseVector): DenseVector = { val output = new DenseVector(new Array[Double](numRows)) - BLAS.gemv(1.0, this, y, 0.0, output) - output - } - - /** Convenience method for `Matrix`^T^-`DenseMatrix` multiplication. */ - private[mllib] def transposeMultiply(y: DenseMatrix): DenseMatrix = { - val C: DenseMatrix = Matrices.zeros(numCols, y.numCols).asInstanceOf[DenseMatrix] - BLAS.gemm(true, false, 1.0, this, y, 0.0, C) - C - } - - /** Convenience method for `Matrix`^T^-`DenseVector` multiplication. */ - private[mllib] def transposeMultiply(y: DenseVector): DenseVector = { - val output = new DenseVector(new Array[Double](numCols)) - BLAS.gemv(true, 1.0, this, y, 0.0, output) + BLAS.gemv(false, 1.0, this, y, 0.0, output) output } @@ -114,21 +106,47 @@ class DenseMatrix(val numRows: Int, val numCols: Int, val values: Array[Double]) require(values.length == numRows * numCols, "The number of values supplied doesn't match the " + s"size of the matrix! values.length: ${values.length}, numRows * numCols: ${numRows * numCols}") - override def toArray: Array[Double] = values + override def toArray: Array[Double] = { + if (!isTransposed) { + values + } else { + val transposedValues = new Array[Double](values.length) + var j = 0 + while (j < numCols) { + var i = 0 + val indStart = j * numRows + while (i < numRows) { + transposedValues(indStart + i) = values(j + i * numCols) + i += 1 + } + j += 1 + } + transposedValues + } + } override def equals(o: Any) = o match { case m: DenseMatrix => - m.numRows == numRows && m.numCols == numCols && Arrays.equals(toArray, m.toArray) + m.numRows == numRows && m.numCols == numCols && jArrays.equals(toArray, m.toArray) case _ => false } - private[mllib] def toBreeze: BM[Double] = new BDM[Double](numRows, numCols, values) + private[mllib] def toBreeze: BM[Double] = { + if (!isTransposed) { + new BDM[Double](numRows, numCols, values) + } else { + val breezeMatrix = new BDM[Double](numCols, numRows, values) + breezeMatrix.t + } + } private[mllib] def apply(i: Int): Double = values(i) private[mllib] def apply(i: Int, j: Int): Double = values(index(i, j)) - private[mllib] def index(i: Int, j: Int): Int = i + numRows * j + private[mllib] def index(i: Int, j: Int): Int = { + if (!isTransposed) i + numRows * j else j + numCols * i + } private[mllib] def update(i: Int, j: Int, v: Double): Unit = { values(index(i, j)) = v @@ -148,6 +166,12 @@ class DenseMatrix(val numRows: Int, val numCols: Int, val values: Array[Double]) this } + override def transpose: Matrix = { + val transposedMatrix = new DenseMatrix(numCols, numRows, values) + transposedMatrix.isTransposed = !isTransposed + transposedMatrix + } + /** Generate a `SparseMatrix` from the given `DenseMatrix`. */ def toSparse(): SparseMatrix = { val spVals: MArrayBuilder[Double] = new MArrayBuilder.ofDouble @@ -159,7 +183,7 @@ class DenseMatrix(val numRows: Int, val numCols: Int, val values: Array[Double]) var i = 0 val indStart = j * numRows while (i < numRows) { - val v = values(indStart + i) + val v = values(index(i, j)) if (v != 0.0) { rowIndices += i spVals += v @@ -281,31 +305,53 @@ class SparseMatrix( require(values.length == rowIndices.length, "The number of row indices and values don't match! " + s"values.length: ${values.length}, rowIndices.length: ${rowIndices.length}") - require(colPtrs.length == numCols + 1, "The length of the column indices should be the " + - s"number of columns + 1. Currently, colPointers.length: ${colPtrs.length}, " + - s"numCols: $numCols") + // The Or statement is for the case when the matrix is transposed + require(colPtrs.length == numCols + 1 || colPtrs.length == numRows + 1, "The length of the " + + "column indices should be the number of columns + 1. Currently, colPointers.length: " + + s"${colPtrs.length}, numCols: $numCols") require(values.length == colPtrs.last, "The last value of colPtrs must equal the number of " + s"elements. values.length: ${values.length}, colPtrs.last: ${colPtrs.last}") override def toArray: Array[Double] = { val arr = new Array[Double](numRows * numCols) - var j = 0 - while (j < numCols) { - var i = colPtrs(j) - val indEnd = colPtrs(j + 1) - val offset = j * numRows - while (i < indEnd) { - val rowIndex = rowIndices(i) - arr(offset + rowIndex) = values(i) + // if statement inside the loop would be expensive + if (!isTransposed) { + var j = 0 + while (j < numCols) { + var i = colPtrs(j) + val indEnd = colPtrs(j + 1) + val offset = j * numRows + while (i < indEnd) { + val rowIndex = rowIndices(i) + arr(offset + rowIndex) = values(i) + i += 1 + } + j += 1 + } + } else { + var i = 0 + while (i < numRows) { + var j = colPtrs(i) + val jEnd = colPtrs(i + 1) + while (j < jEnd) { + val colIndex = rowIndices(j) + arr(colIndex * numRows + i) = values(j) + j += 1 + } i += 1 } - j += 1 } arr } - private[mllib] def toBreeze: BM[Double] = - new BSM[Double](values, numRows, numCols, colPtrs, rowIndices) + private[mllib] def toBreeze: BM[Double] = { + if (!isTransposed) { + new BSM[Double](values, numRows, numCols, colPtrs, rowIndices) + } else { + val breezeMatrix = new BSM[Double](values, numCols, numRows, colPtrs, rowIndices) + breezeMatrix.t + } + } private[mllib] def apply(i: Int, j: Int): Double = { val ind = index(i, j) @@ -313,7 +359,11 @@ class SparseMatrix( } private[mllib] def index(i: Int, j: Int): Int = { - Arrays.binarySearch(rowIndices, colPtrs(j), colPtrs(j + 1), i) + if (!isTransposed) { + jArrays.binarySearch(rowIndices, colPtrs(j), colPtrs(j + 1), i) + } else { + jArrays.binarySearch(rowIndices, colPtrs(i), colPtrs(i + 1), j) + } } private[mllib] def update(i: Int, j: Int, v: Double): Unit = { @@ -322,7 +372,7 @@ class SparseMatrix( throw new NoSuchElementException("The given row and column indices correspond to a zero " + "value. Only non-zero elements in Sparse Matrices can be updated.") } else { - values(index(i, j)) = v + values(ind) = v } } @@ -341,6 +391,12 @@ class SparseMatrix( this } + override def transpose: Matrix = { + val transposedMatrix = new SparseMatrix(numCols, numRows, colPtrs, rowIndices, values) + transposedMatrix.isTransposed = !isTransposed + transposedMatrix + } + /** Generate a `DenseMatrix` from the given `SparseMatrix`. */ def toDense(): DenseMatrix = { new DenseMatrix(numRows, numCols, toArray) @@ -681,41 +737,72 @@ object Matrices { var startCol = 0 val entries: Array[(Int, Int, Double)] = matrices.flatMap { case spMat: SparseMatrix => - var j = 0 val colPtrs = spMat.colPtrs val rowIndices = spMat.rowIndices val values = spMat.values val data = new Array[(Int, Int, Double)](values.length) val nCols = spMat.numCols - while (j < nCols) { - var idx = colPtrs(j) - while (idx < colPtrs(j + 1)) { - val i = rowIndices(idx) - val v = values(idx) - data(idx) = (i, j + startCol, v) - idx += 1 + if (!spMat.isTransposed) { + var j = 0 + while (j < nCols) { + var idx = colPtrs(j) + while (idx < colPtrs(j + 1)) { + val i = rowIndices(idx) + val v = values(idx) + data(idx) = (i, j + startCol, v) + idx += 1 + } + j += 1 + } + } else { + var i = 0 + val nRows = spMat.numRows + while (i < nRows) { + var idx = colPtrs(i) + while (idx < colPtrs(i + 1)) { + val j = rowIndices(idx) + val v = values(idx) + data(idx) = (i, j + startCol, v) + idx += 1 + } + i += 1 } - j += 1 } startCol += nCols data case dnMat: DenseMatrix => val data = new ArrayBuffer[(Int, Int, Double)]() - var j = 0 val nCols = dnMat.numCols val nRows = dnMat.numRows val values = dnMat.values - while (j < nCols) { + if (!dnMat.isTransposed) { + var j = 0 + while (j < nCols) { + var i = 0 + val indStart = j * nRows + while (i < nRows) { + val v = values(indStart + i) + if (v != 0.0) { + data.append((i, j + startCol, v)) + } + i += 1 + } + j += 1 + } + } else { var i = 0 - val indStart = j * nRows while (i < nRows) { - val v = values(indStart + i) - if (v != 0.0) { - data.append((i, j + startCol, v)) + var j = 0 + val indStart = i * nCols + while (j < nCols) { + val v = values(indStart + j) + if (v != 0.0) { + data.append((i, j + startCol, v)) + } + j += 1 } i += 1 } - j += 1 } startCol += nCols data @@ -744,14 +831,12 @@ object Matrices { require(numCols == mat.numCols, "The number of rows of the matrices in this sequence, " + "don't match!") mat match { - case sparse: SparseMatrix => - hasSparse = true - case dense: DenseMatrix => + case sparse: SparseMatrix => hasSparse = true + case dense: DenseMatrix => // empty on purpose case _ => throw new IllegalArgumentException("Unsupported matrix format. Expected " + s"SparseMatrix or DenseMatrix. Instead got: ${mat.getClass}") } numRows += mat.numRows - } if (!hasSparse) { val allValues = new Array[Double](numRows * numCols) @@ -777,40 +862,71 @@ object Matrices { var startRow = 0 val entries: Array[(Int, Int, Double)] = matrices.flatMap { case spMat: SparseMatrix => - var j = 0 val colPtrs = spMat.colPtrs val rowIndices = spMat.rowIndices val values = spMat.values val data = new Array[(Int, Int, Double)](values.length) - while (j < numCols) { - var idx = colPtrs(j) - while (idx < colPtrs(j + 1)) { - val i = rowIndices(idx) - val v = values(idx) - data(idx) = (i + startRow, j, v) - idx += 1 + val nRows = spMat.numRows + if (!spMat.isTransposed) { + var j = 0 + while (j < numCols) { + var idx = colPtrs(j) + while (idx < colPtrs(j + 1)) { + val i = rowIndices(idx) + val v = values(idx) + data(idx) = (i + startRow, j, v) + idx += 1 + } + j += 1 + } + } else { + var i = 0 + while (i < nRows) { + var idx = colPtrs(i) + while (idx < colPtrs(i + 1)) { + val j = rowIndices(idx) + val v = values(idx) + data(idx) = (i + startRow, j, v) + idx += 1 + } + i += 1 } - j += 1 } - startRow += spMat.numRows + startRow += nRows data case dnMat: DenseMatrix => val data = new ArrayBuffer[(Int, Int, Double)]() - var j = 0 val nCols = dnMat.numCols val nRows = dnMat.numRows val values = dnMat.values - while (j < nCols) { + if (!dnMat.isTransposed) { + var j = 0 + while (j < nCols) { + var i = 0 + val indStart = j * nRows + while (i < nRows) { + val v = values(indStart + i) + if (v != 0.0) { + data.append((i + startRow, j, v)) + } + i += 1 + } + j += 1 + } + } else { var i = 0 - val indStart = j * nRows while (i < nRows) { - val v = values(indStart + i) - if (v != 0.0) { - data.append((i + startRow, j, v)) + var j = 0 + val indStart = i * nCols + while (j < nCols) { + val v = values(indStart + j) + if (v != 0.0) { + data.append((i + startRow, j, v)) + } + j += 1 } i += 1 } - j += 1 } startRow += nRows data diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/BLASSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/BLASSuite.scala index 771878e925ea7..48d8e543d4902 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/BLASSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/BLASSuite.scala @@ -176,9 +176,10 @@ class BLASSuite extends FunSuite { val B = new DenseMatrix(3, 2, Array(1.0, 0.0, 0.0, 0.0, 2.0, 1.0)) val expected = new DenseMatrix(4, 2, Array(0.0, 1.0, 0.0, 0.0, 4.0, 0.0, 2.0, 3.0)) + val BT = new DenseMatrix(2, 3, Array(1.0, 0.0, 0.0, 2.0, 0.0, 1.0)) - assert(dA multiply B ~== expected absTol 1e-15) - assert(sA multiply B ~== expected absTol 1e-15) + assert(dA.multiply(B) ~== expected absTol 1e-15) + assert(sA.multiply(B) ~== expected absTol 1e-15) val C1 = new DenseMatrix(4, 2, Array(1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0)) val C2 = C1.copy @@ -188,6 +189,10 @@ class BLASSuite extends FunSuite { val C6 = C1.copy val C7 = C1.copy val C8 = C1.copy + val C9 = C1.copy + val C10 = C1.copy + val C11 = C1.copy + val C12 = C1.copy val expected2 = new DenseMatrix(4, 2, Array(2.0, 1.0, 4.0, 2.0, 4.0, 0.0, 4.0, 3.0)) val expected3 = new DenseMatrix(4, 2, Array(2.0, 2.0, 4.0, 2.0, 8.0, 0.0, 6.0, 6.0)) @@ -211,17 +216,31 @@ class BLASSuite extends FunSuite { val sAT = new SparseMatrix(3, 4, Array(0, 1, 2, 3, 4), Array(1, 0, 1, 2), Array(2.0, 1.0, 1.0, 3.0)) - assert(dAT transposeMultiply B ~== expected absTol 1e-15) - assert(sAT transposeMultiply B ~== expected absTol 1e-15) + val dATT = dAT.transpose + val sATT = sAT.transpose + val BTT = BT.transpose.asInstanceOf[DenseMatrix] + + assert(dATT.multiply(B) ~== expected absTol 1e-15) + assert(sATT.multiply(B) ~== expected absTol 1e-15) + assert(dATT.multiply(BTT) ~== expected absTol 1e-15) + assert(sATT.multiply(BTT) ~== expected absTol 1e-15) gemm(true, false, 1.0, dAT, B, 2.0, C5) gemm(true, false, 1.0, sAT, B, 2.0, C6) gemm(true, false, 2.0, dAT, B, 2.0, C7) gemm(true, false, 2.0, sAT, B, 2.0, C8) + gemm(true, true, 1.0, dAT, BT, 2.0, C9) + gemm(true, true, 1.0, sAT, BT, 2.0, C10) + gemm(true, true, 2.0, dAT, BT, 2.0, C11) + gemm(true, true, 2.0, sAT, BT, 2.0, C12) assert(C5 ~== expected2 absTol 1e-15) assert(C6 ~== expected2 absTol 1e-15) assert(C7 ~== expected3 absTol 1e-15) assert(C8 ~== expected3 absTol 1e-15) + assert(C9 ~== expected2 absTol 1e-15) + assert(C10 ~== expected2 absTol 1e-15) + assert(C11 ~== expected3 absTol 1e-15) + assert(C12 ~== expected3 absTol 1e-15) } test("gemv") { @@ -233,8 +252,8 @@ class BLASSuite extends FunSuite { val x = new DenseVector(Array(1.0, 2.0, 3.0)) val expected = new DenseVector(Array(4.0, 1.0, 2.0, 9.0)) - assert(dA multiply x ~== expected absTol 1e-15) - assert(sA multiply x ~== expected absTol 1e-15) + assert(dA.multiply(x) ~== expected absTol 1e-15) + assert(sA.multiply(x) ~== expected absTol 1e-15) val y1 = new DenseVector(Array(1.0, 3.0, 1.0, 0.0)) val y2 = y1.copy @@ -266,8 +285,11 @@ class BLASSuite extends FunSuite { val sAT = new SparseMatrix(3, 4, Array(0, 1, 2, 3, 4), Array(1, 0, 1, 2), Array(2.0, 1.0, 1.0, 3.0)) - assert(dAT transposeMultiply x ~== expected absTol 1e-15) - assert(sAT transposeMultiply x ~== expected absTol 1e-15) + val dATT = dAT.transpose + val sATT = sAT.transpose + + assert(dATT.multiply(x) ~== expected absTol 1e-15) + assert(sATT.multiply(x) ~== expected absTol 1e-15) gemv(true, 1.0, dAT, x, 2.0, y5) gemv(true, 1.0, sAT, x, 2.0, y6) diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala index a35d0fe389fdd..a337413f6bf70 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala @@ -161,6 +161,32 @@ class MatricesSuite extends FunSuite { assert(deMat1.toArray === deMat2.toArray) } + test("transpose") { + val dA = + new DenseMatrix(4, 3, Array(0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0)) + val sA = new SparseMatrix(4, 3, Array(0, 1, 3, 4), Array(1, 0, 2, 3), Array(1.0, 2.0, 1.0, 3.0)) + + val dAT = dA.transpose.asInstanceOf[DenseMatrix] + val sAT = sA.transpose.asInstanceOf[SparseMatrix] + val dATexpected = + new DenseMatrix(3, 4, Array(0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 3.0)) + val sATexpected = + new SparseMatrix(3, 4, Array(0, 1, 2, 3, 4), Array(1, 0, 1, 2), Array(2.0, 1.0, 1.0, 3.0)) + + assert(dAT.toBreeze === dATexpected.toBreeze) + assert(sAT.toBreeze === sATexpected.toBreeze) + assert(dA(1, 0) === dAT(0, 1)) + assert(dA(2, 1) === dAT(1, 2)) + assert(sA(1, 0) === sAT(0, 1)) + assert(sA(2, 1) === sAT(1, 2)) + + assert(!dA.toArray.eq(dAT.toArray), "has to have a new array") + assert(dA.toArray.eq(dAT.transpose.toArray), "should not copy array") + + assert(dAT.toSparse().toBreeze === sATexpected.toBreeze) + assert(sAT.toDense().toBreeze === dATexpected.toBreeze) + } + test("horzcat, vertcat, eye, speye") { val m = 3 val n = 2 @@ -168,9 +194,20 @@ class MatricesSuite extends FunSuite { val allValues = Array(1.0, 2.0, 0.0, 0.0, 4.0, 5.0) val colPtrs = Array(0, 2, 4) val rowIndices = Array(0, 1, 1, 2) + // transposed versions + val allValuesT = Array(1.0, 0.0, 2.0, 4.0, 0.0, 5.0) + val colPtrsT = Array(0, 1, 3, 4) + val rowIndicesT = Array(0, 0, 1, 1) val spMat1 = new SparseMatrix(m, n, colPtrs, rowIndices, values) val deMat1 = new DenseMatrix(m, n, allValues) + val spMat1T = new SparseMatrix(n, m, colPtrsT, rowIndicesT, values) + val deMat1T = new DenseMatrix(n, m, allValuesT) + + // should equal spMat1 & deMat1 respectively + val spMat1TT = spMat1T.transpose + val deMat1TT = deMat1T.transpose + val deMat2 = Matrices.eye(3) val spMat2 = Matrices.speye(3) val deMat3 = Matrices.eye(2) @@ -180,7 +217,6 @@ class MatricesSuite extends FunSuite { val spHorz2 = Matrices.horzcat(Array(spMat1, deMat2)) val spHorz3 = Matrices.horzcat(Array(deMat1, spMat2)) val deHorz1 = Matrices.horzcat(Array(deMat1, deMat2)) - val deHorz2 = Matrices.horzcat(Array[Matrix]()) assert(deHorz1.numRows === 3) @@ -212,6 +248,17 @@ class MatricesSuite extends FunSuite { assert(deHorz1(2, 4) === 1.0) assert(deHorz1(1, 4) === 0.0) + // containing transposed matrices + val spHorzT = Matrices.horzcat(Array(spMat1TT, spMat2)) + val spHorz2T = Matrices.horzcat(Array(spMat1TT, deMat2)) + val spHorz3T = Matrices.horzcat(Array(deMat1TT, spMat2)) + val deHorz1T = Matrices.horzcat(Array(deMat1TT, deMat2)) + + assert(deHorz1T.toBreeze === deHorz1.toBreeze) + assert(spHorzT.toBreeze === spHorz.toBreeze) + assert(spHorz2T.toBreeze === spHorz2.toBreeze) + assert(spHorz3T.toBreeze === spHorz3.toBreeze) + intercept[IllegalArgumentException] { Matrices.horzcat(Array(spMat1, spMat3)) } @@ -251,6 +298,17 @@ class MatricesSuite extends FunSuite { assert(deVert1(3, 1) === 0.0) assert(deVert1(4, 1) === 1.0) + // containing transposed matrices + val spVertT = Matrices.vertcat(Array(spMat1TT, spMat3)) + val deVert1T = Matrices.vertcat(Array(deMat1TT, deMat3)) + val spVert2T = Matrices.vertcat(Array(spMat1TT, deMat3)) + val spVert3T = Matrices.vertcat(Array(deMat1TT, spMat3)) + + assert(deVert1T.toBreeze === deVert1.toBreeze) + assert(spVertT.toBreeze === spVert.toBreeze) + assert(spVert2T.toBreeze === spVert2.toBreeze) + assert(spVert3T.toBreeze === spVert3.toBreeze) + intercept[IllegalArgumentException] { Matrices.vertcat(Array(spMat1, spMat2)) } From c55f29a30e4d04c13e203daccc44a16bee0a8cd1 Mon Sep 17 00:00:00 2001 From: Burak Yavuz Date: Mon, 19 Jan 2015 15:33:02 -0800 Subject: [PATCH 02/11] [SPARK-5321] Support for transposing local matrices cleaned up --- .../apache/spark/mllib/linalg/Matrices.scala | 231 +++++++++--------- 1 file changed, 116 insertions(+), 115 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala index df19e68c23198..39c5b791a9211 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala @@ -735,77 +735,77 @@ object Matrices { new DenseMatrix(numRows, numCols, matrices.flatMap(_.toArray)) } else { var startCol = 0 - val entries: Array[(Int, Int, Double)] = matrices.flatMap { - case spMat: SparseMatrix => - val colPtrs = spMat.colPtrs - val rowIndices = spMat.rowIndices - val values = spMat.values - val data = new Array[(Int, Int, Double)](values.length) - val nCols = spMat.numCols - if (!spMat.isTransposed) { - var j = 0 - while (j < nCols) { - var idx = colPtrs(j) - while (idx < colPtrs(j + 1)) { - val i = rowIndices(idx) - val v = values(idx) - data(idx) = (i, j + startCol, v) - idx += 1 - } - j += 1 - } - } else { - var i = 0 - val nRows = spMat.numRows - while (i < nRows) { - var idx = colPtrs(i) - while (idx < colPtrs(i + 1)) { - val j = rowIndices(idx) - val v = values(idx) - data(idx) = (i, j + startCol, v) - idx += 1 + val entries: Array[(Int, Int, Double)] = matrices.flatMap { mat => + val nRows = mat.numRows + val nCols = mat.numCols + mat match { + case spMat: SparseMatrix => + val colPtrs = spMat.colPtrs + val rowIndices = spMat.rowIndices + val values = spMat.values + val data = new Array[(Int, Int, Double)](values.length) + if (!spMat.isTransposed) { + var j = 0 + while (j < nCols) { + var idx = colPtrs(j) + while (idx < colPtrs(j + 1)) { + val i = rowIndices(idx) + val v = values(idx) + data(idx) = (i, j + startCol, v) + idx += 1 + } + j += 1 } - i += 1 - } - } - startCol += nCols - data - case dnMat: DenseMatrix => - val data = new ArrayBuffer[(Int, Int, Double)]() - val nCols = dnMat.numCols - val nRows = dnMat.numRows - val values = dnMat.values - if (!dnMat.isTransposed) { - var j = 0 - while (j < nCols) { + } else { var i = 0 - val indStart = j * nRows while (i < nRows) { - val v = values(indStart + i) - if (v != 0.0) { - data.append((i, j + startCol, v)) + var idx = colPtrs(i) + while (idx < colPtrs(i + 1)) { + val j = rowIndices(idx) + val v = values(idx) + data(idx) = (i, j + startCol, v) + idx += 1 } i += 1 } - j += 1 } - } else { - var i = 0 - while (i < nRows) { + startCol += nCols + data + case dnMat: DenseMatrix => + val data = new ArrayBuffer[(Int, Int, Double)]() + val values = dnMat.values + if (!dnMat.isTransposed) { var j = 0 - val indStart = i * nCols while (j < nCols) { - val v = values(indStart + j) - if (v != 0.0) { - data.append((i, j + startCol, v)) + var i = 0 + val indStart = j * nRows + while (i < nRows) { + val v = values(indStart + i) + if (v != 0.0) { + data.append((i, j + startCol, v)) + } + i += 1 } j += 1 } - i += 1 + } else { + var i = 0 + while (i < nRows) { + var j = 0 + val indStart = i * nCols + while (j < nCols) { + val v = values(indStart + j) + if (v != 0.0) { + data.append((i, j + startCol, v)) + } + j += 1 + } + i += 1 + } } - } - startCol += nCols - data + startCol += nCols + data + } } SparseMatrix.fromCOO(numRows, numCols, entries) } @@ -860,76 +860,77 @@ object Matrices { new DenseMatrix(numRows, numCols, allValues) } else { var startRow = 0 - val entries: Array[(Int, Int, Double)] = matrices.flatMap { - case spMat: SparseMatrix => - val colPtrs = spMat.colPtrs - val rowIndices = spMat.rowIndices - val values = spMat.values - val data = new Array[(Int, Int, Double)](values.length) - val nRows = spMat.numRows - if (!spMat.isTransposed) { - var j = 0 - while (j < numCols) { - var idx = colPtrs(j) - while (idx < colPtrs(j + 1)) { - val i = rowIndices(idx) - val v = values(idx) - data(idx) = (i + startRow, j, v) - idx += 1 - } - j += 1 - } - } else { - var i = 0 - while (i < nRows) { - var idx = colPtrs(i) - while (idx < colPtrs(i + 1)) { - val j = rowIndices(idx) - val v = values(idx) - data(idx) = (i + startRow, j, v) - idx += 1 + val entries: Array[(Int, Int, Double)] = matrices.flatMap { mat => + val nRows = mat.numRows + val nCols = mat.numCols + mat match { + case spMat: SparseMatrix => + val colPtrs = spMat.colPtrs + val rowIndices = spMat.rowIndices + val values = spMat.values + val data = new Array[(Int, Int, Double)](values.length) + if (!spMat.isTransposed) { + var j = 0 + while (j < nCols) { + var idx = colPtrs(j) + while (idx < colPtrs(j + 1)) { + val i = rowIndices(idx) + val v = values(idx) + data(idx) = (i + startRow, j, v) + idx += 1 + } + j += 1 } - i += 1 - } - } - startRow += nRows - data - case dnMat: DenseMatrix => - val data = new ArrayBuffer[(Int, Int, Double)]() - val nCols = dnMat.numCols - val nRows = dnMat.numRows - val values = dnMat.values - if (!dnMat.isTransposed) { - var j = 0 - while (j < nCols) { + } else { var i = 0 - val indStart = j * nRows while (i < nRows) { - val v = values(indStart + i) - if (v != 0.0) { - data.append((i + startRow, j, v)) + var idx = colPtrs(i) + while (idx < colPtrs(i + 1)) { + val j = rowIndices(idx) + val v = values(idx) + data(idx) = (i + startRow, j, v) + idx += 1 } i += 1 } - j += 1 } - } else { - var i = 0 - while (i < nRows) { + startRow += nRows + data + case dnMat: DenseMatrix => + val data = new ArrayBuffer[(Int, Int, Double)]() + val values = dnMat.values + if (!dnMat.isTransposed) { var j = 0 - val indStart = i * nCols while (j < nCols) { - val v = values(indStart + j) - if (v != 0.0) { - data.append((i + startRow, j, v)) + var i = 0 + val indStart = j * nRows + while (i < nRows) { + val v = values(indStart + i) + if (v != 0.0) { + data.append((i + startRow, j, v)) + } + i += 1 } j += 1 } - i += 1 + } else { + var i = 0 + while (i < nRows) { + var j = 0 + val indStart = i * nCols + while (j < nCols) { + val v = values(indStart + j) + if (v != 0.0) { + data.append((i + startRow, j, v)) + } + j += 1 + } + i += 1 + } } - } - startRow += nRows - data + startRow += nRows + data + } } SparseMatrix.fromCOO(numRows, numCols, entries) } From 2a63593bc8fb375af1676c2ba4c2fa6aa6680e32 Mon Sep 17 00:00:00 2001 From: Burak Yavuz Date: Mon, 19 Jan 2015 19:08:25 -0800 Subject: [PATCH 03/11] [SPARK-5321] fixed bug causing failed gemm test --- .../org/apache/spark/mllib/linalg/BLAS.scala | 44 +++++++++++-------- .../apache/spark/mllib/linalg/Matrices.scala | 1 - .../apache/spark/mllib/linalg/BLASSuite.scala | 8 ++++ 3 files changed, 33 insertions(+), 20 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/BLAS.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/BLAS.scala index 6b50749283718..53836e981e019 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/BLAS.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/BLAS.scala @@ -364,8 +364,8 @@ private[spark] object BLAS extends Serializable with Logging { val Avals = A.values val Bvals = B.values val Cvals = C.values - val Arows = if (!transModA) A.rowIndices else A.colPtrs - val Acols = if (!transModA) A.colPtrs else A.rowIndices + val ArowIndices = A.rowIndices + val AcolPtrs = A.colPtrs // Slicing is easy in this case. This is the optimal multiplication setting for sparse matrices if (transModA){ @@ -376,11 +376,11 @@ private[spark] object BLAS extends Serializable with Logging { val Cstart = colCounterForB * mA val Bstart = colCounterForB * kA while (rowCounterForA < mA) { - var i = Arows(rowCounterForA) - val indEnd = Arows(rowCounterForA + 1) + var i = AcolPtrs(rowCounterForA) + val indEnd = AcolPtrs(rowCounterForA + 1) var sum = 0.0 while (i < indEnd) { - sum += Avals(i) * Bvals(Bstart + Acols(i)) + sum += Avals(i) * Bvals(Bstart + ArowIndices(i)) i += 1 } val Cindex = Cstart + rowCounterForA @@ -390,20 +390,23 @@ private[spark] object BLAS extends Serializable with Logging { colCounterForB += 1 } } else { + // this is a hack, because the indexing is different when B.isTranspose is true, but transB + // is false and when transB is true, but B.isTranspose is false + val tempB = if (transB) B else new DenseMatrix(B.numCols, B.numRows, B.values) while (colCounterForB < nB) { - var rowCounter = 0 + var rowCounterForA = 0 val Cstart = colCounterForB * mA - while (rowCounter < mA) { - var i = Arows(rowCounter) - val indEnd = Arows(rowCounter + 1) + while (rowCounterForA < mA) { + var i = AcolPtrs(rowCounterForA) + val indEnd = AcolPtrs(rowCounterForA + 1) var sum = 0.0 while (i < indEnd) { - sum += Avals(i) * B(colCounterForB, Acols(i)) + sum += Avals(i) * tempB(colCounterForB, ArowIndices(i)) i += 1 } - val Cindex = Cstart + rowCounter + val Cindex = Cstart + rowCounterForA Cvals(Cindex) = beta * Cvals(Cindex) + sum * alpha - rowCounter += 1 + rowCounterForA += 1 } colCounterForB += 1 } @@ -422,11 +425,11 @@ private[spark] object BLAS extends Serializable with Logging { val Bstart = colCounterForB * kB val Cstart = colCounterForB * mA while (colCounterForA < kA) { - var i = Acols(colCounterForA) - val indEnd = Acols(colCounterForA + 1) + var i = AcolPtrs(colCounterForA) + val indEnd = AcolPtrs(colCounterForA + 1) val Bval = Bvals(Bstart + colCounterForA) * alpha while (i < indEnd) { - Cvals(Cstart + Arows(i)) += Avals(i) * Bval + Cvals(Cstart + ArowIndices(i)) += Avals(i) * Bval i += 1 } colCounterForA += 1 @@ -434,15 +437,18 @@ private[spark] object BLAS extends Serializable with Logging { colCounterForB += 1 } } else { + // this is a hack, because the indexing is different when B.isTranspose is true, but transB + // is false and when transB is true, but B.isTranspose is false + val tempB = if (transB) B else new DenseMatrix(B.numCols, B.numRows, B.values) while (colCounterForB < nB) { var colCounterForA = 0 // The column of A to multiply with the row of B val Cstart = colCounterForB * mA while (colCounterForA < kA) { - var i = Acols(colCounterForA) - val indEnd = Acols(colCounterForA + 1) - val Bval = B(colCounterForB, colCounterForA) * alpha + var i = AcolPtrs(colCounterForA) + val indEnd = AcolPtrs(colCounterForA + 1) + val Bval = tempB(colCounterForB, colCounterForA) * alpha while (i < indEnd) { - Cvals(Cstart + Arows(i)) += Avals(i) * Bval + Cvals(Cstart + ArowIndices(i)) += Avals(i) * Bval i += 1 } colCounterForA += 1 diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala index 39c5b791a9211..cfc244b0c0b21 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala @@ -181,7 +181,6 @@ class DenseMatrix(val numRows: Int, val numCols: Int, val values: Array[Double]) var j = 0 while (j < numCols) { var i = 0 - val indStart = j * numRows while (i < numRows) { val v = values(index(i, j)) if (v != 0.0) { diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/BLASSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/BLASSuite.scala index 48d8e543d4902..5f585ee918589 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/BLASSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/BLASSuite.scala @@ -193,6 +193,10 @@ class BLASSuite extends FunSuite { val C10 = C1.copy val C11 = C1.copy val C12 = C1.copy + val C13 = C1.copy + val C14 = C1.copy + val C15 = C1.copy + val C16 = C1.copy val expected2 = new DenseMatrix(4, 2, Array(2.0, 1.0, 4.0, 2.0, 4.0, 0.0, 4.0, 3.0)) val expected3 = new DenseMatrix(4, 2, Array(2.0, 2.0, 4.0, 2.0, 8.0, 0.0, 6.0, 6.0)) @@ -233,6 +237,10 @@ class BLASSuite extends FunSuite { gemm(true, true, 1.0, sAT, BT, 2.0, C10) gemm(true, true, 2.0, dAT, BT, 2.0, C11) gemm(true, true, 2.0, sAT, BT, 2.0, C12) + gemm(false, true, 1.0, dA, BT, 2.0, C13) + gemm(false, true, 1.0, sA, BT, 2.0, C14) + gemm(false, true, 2.0, dA, BT, 2.0, C15) + gemm(false, true, 2.0, sA, BT, 2.0, C16) assert(C5 ~== expected2 absTol 1e-15) assert(C6 ~== expected2 absTol 1e-15) assert(C7 ~== expected3 absTol 1e-15) From a01bd5f75bcdea21d98541299afcf04bc4f26d14 Mon Sep 17 00:00:00 2001 From: Burak Yavuz Date: Mon, 19 Jan 2015 22:55:29 -0800 Subject: [PATCH 04/11] [SPARK-5321] Fixed MiMa issues --- project/MimaExcludes.scala | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala index 0ccbfcb0c43ff..b879cfcab9868 100644 --- a/project/MimaExcludes.scala +++ b/project/MimaExcludes.scala @@ -52,6 +52,16 @@ object MimaExcludes { "org.apache.spark.mllib.linalg.Matrices.randn"), ProblemFilters.exclude[MissingMethodProblem]( "org.apache.spark.mllib.linalg.Matrices.rand") + ) ++ Seq( + // SPARK-5321 + ProblemFilters.exclude[MissingMethodProblem]( + "org.apache.spark.mllib.linalg.SparseMatrix.transposeMultiply"), + ProblemFilters.exclude[MissingMethodProblem]( + "org.apache.spark.mllib.linalg.Matrix.transpose"), + ProblemFilters.exclude[MissingMethodProblem]( + "org.apache.spark.mllib.linalg.DenseMatrix.transposeMultiply"), + ProblemFilters.exclude[MissingMethodProblem]( + "org.apache.spark.mllib.linalg.Matrix.isTransposed_=") ) ++ Seq( // SPARK-3325 ProblemFilters.exclude[MissingMethodProblem]( From dd45c881458eddca9127695584d9f0b654e9db90 Mon Sep 17 00:00:00 2001 From: Burak Yavuz Date: Tue, 20 Jan 2015 21:40:41 -0800 Subject: [PATCH 05/11] addressed code review --- .../org/apache/spark/mllib/linalg/BLAS.scala | 131 +++-------- .../apache/spark/mllib/linalg/Matrices.scala | 220 ++++++++---------- .../apache/spark/mllib/linalg/BLASSuite.scala | 56 ++--- 3 files changed, 141 insertions(+), 266 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/BLAS.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/BLAS.scala index 53836e981e019..5c5082e2d21d2 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/BLAS.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/BLAS.scala @@ -257,8 +257,6 @@ private[spark] object BLAS extends Serializable with Logging { /** * C := alpha * A * B + beta * C - * @param transA whether to use the transpose of matrix A (true), or A itself (false). - * @param transB whether to use the transpose of matrix B (true), or B itself (false). * @param alpha a scalar to scale the multiplication A * B. * @param A the matrix A that will be left multiplied to B. Size of m x k. * @param B the matrix B that will be left multiplied by A. Size of k x n. @@ -266,8 +264,6 @@ private[spark] object BLAS extends Serializable with Logging { * @param C the resulting matrix C. Size of m x n. */ def gemm( - transA: Boolean, - transB: Boolean, alpha: Double, A: Matrix, B: DenseMatrix, @@ -278,63 +274,39 @@ private[spark] object BLAS extends Serializable with Logging { } else { A match { case sparse: SparseMatrix => - gemm(transA, transB, alpha, sparse, B, beta, C) + gemm(alpha, sparse, B, beta, C) case dense: DenseMatrix => - gemm(transA, transB, alpha, dense, B, beta, C) + gemm(alpha, dense, B, beta, C) case _ => throw new IllegalArgumentException(s"gemm doesn't support matrix type ${A.getClass}.") } } } - /** - * C := alpha * A * B + beta * C - * - * @param alpha a scalar to scale the multiplication A * B. - * @param A the matrix A that will be left multiplied to B. Size of m x k. - * @param B the matrix B that will be left multiplied by A. Size of k x n. - * @param beta a scalar that can be used to scale matrix C. - * @param C the resulting matrix C. Size of m x n. - */ - def gemm( - alpha: Double, - A: Matrix, - B: DenseMatrix, - beta: Double, - C: DenseMatrix): Unit = { - gemm(false, false, alpha, A, B, beta, C) - } - /** * C := alpha * A * B + beta * C * For `DenseMatrix` A. */ private def gemm( - transA: Boolean, - transB: Boolean, alpha: Double, A: DenseMatrix, B: DenseMatrix, beta: Double, C: DenseMatrix): Unit = { - val transModA = transA ^ A.isTransposed - val transModB = transB ^ B.isTransposed - val mA: Int = if (!transA) A.numRows else A.numCols - val nB: Int = if (!transB) B.numCols else B.numRows - val kA: Int = if (!transA) A.numCols else A.numRows - val kB: Int = if (!transB) B.numRows else B.numCols - val tAstr = if (!transModA) "N" else "T" - val tBstr = if (!transModB) "N" else "T" + val tAstr = if (!A.isTransposed) "N" else "T" + val tBstr = if (!B.isTransposed) "N" else "T" val lda = if (!A.isTransposed) A.numRows else A.numCols val ldb = if (!B.isTransposed) B.numRows else B.numCols - require(kA == kB, s"The columns of A don't match the rows of B. A: $kA, B: $kB") - require(mA == C.numRows, s"The rows of C don't match the rows of A. C: ${C.numRows}, A: $mA") - require(nB == C.numCols, - s"The columns of C don't match the columns of B. C: ${C.numCols}, A: $nB") + require(A.numCols == B.numRows, + s"The columns of A don't match the rows of B. A: ${A.numCols}, B: ${B.numRows}") + require(A.numRows == C.numRows, + s"The rows of C don't match the rows of A. C: ${C.numRows}, A: ${A.numRows}") + require(B.numCols == C.numCols, + s"The columns of C don't match the columns of B. C: ${C.numCols}, A: ${B.numCols}") - nativeBLAS.dgemm(tAstr, tBstr, mA, nB, kA, alpha, A.values, lda, B.values, ldb, - beta, C.values, C.numRows) + nativeBLAS.dgemm(tAstr, tBstr, A.numRows, B.numCols, A.numCols, alpha, A.values, lda, + B.values, ldb, beta, C.values, C.numRows) } /** @@ -342,19 +314,15 @@ private[spark] object BLAS extends Serializable with Logging { * For `SparseMatrix` A. */ private def gemm( - transA: Boolean, - transB: Boolean, alpha: Double, A: SparseMatrix, B: DenseMatrix, beta: Double, C: DenseMatrix): Unit = { - val transModA = transA ^ A.isTransposed - val transModB = transB ^ B.isTransposed - val mA: Int = if (!transA) A.numRows else A.numCols - val nB: Int = if (!transB) B.numCols else B.numRows - val kA: Int = if (!transA) A.numCols else A.numRows - val kB: Int = if (!transB) B.numRows else B.numCols + val mA: Int = A.numRows + val nB: Int = B.numCols + val kA: Int = A.numCols + val kB: Int = B.numRows require(kA == kB, s"The columns of A don't match the rows of B. A: $kA, B: $kB") require(mA == C.numRows, s"The rows of C don't match the rows of A. C: ${C.numRows}, A: $mA") @@ -368,9 +336,9 @@ private[spark] object BLAS extends Serializable with Logging { val AcolPtrs = A.colPtrs // Slicing is easy in this case. This is the optimal multiplication setting for sparse matrices - if (transModA){ + if (A.isTransposed){ var colCounterForB = 0 - if (!transModB) { // Expensive to put the check inside the loop + if (!B.isTransposed) { // Expensive to put the check inside the loop while (colCounterForB < nB) { var rowCounterForA = 0 val Cstart = colCounterForB * mA @@ -390,9 +358,6 @@ private[spark] object BLAS extends Serializable with Logging { colCounterForB += 1 } } else { - // this is a hack, because the indexing is different when B.isTranspose is true, but transB - // is false and when transB is true, but B.isTranspose is false - val tempB = if (transB) B else new DenseMatrix(B.numCols, B.numRows, B.values) while (colCounterForB < nB) { var rowCounterForA = 0 val Cstart = colCounterForB * mA @@ -401,7 +366,7 @@ private[spark] object BLAS extends Serializable with Logging { val indEnd = AcolPtrs(rowCounterForA + 1) var sum = 0.0 while (i < indEnd) { - sum += Avals(i) * tempB(colCounterForB, ArowIndices(i)) + sum += Avals(i) * B(ArowIndices(i), colCounterForB) i += 1 } val Cindex = Cstart + rowCounterForA @@ -419,7 +384,7 @@ private[spark] object BLAS extends Serializable with Logging { // Perform matrix multiplication and add to C. The rows of A are multiplied by the columns of // B, and added to C. var colCounterForB = 0 // the column to be updated in C - if (!transModB) { // Expensive to put the check inside the loop + if (!B.isTransposed) { // Expensive to put the check inside the loop while (colCounterForB < nB) { var colCounterForA = 0 // The column of A to multiply with the row of B val Bstart = colCounterForB * kB @@ -437,16 +402,13 @@ private[spark] object BLAS extends Serializable with Logging { colCounterForB += 1 } } else { - // this is a hack, because the indexing is different when B.isTranspose is true, but transB - // is false and when transB is true, but B.isTranspose is false - val tempB = if (transB) B else new DenseMatrix(B.numCols, B.numRows, B.values) while (colCounterForB < nB) { var colCounterForA = 0 // The column of A to multiply with the row of B val Cstart = colCounterForB * mA while (colCounterForA < kA) { var i = AcolPtrs(colCounterForA) val indEnd = AcolPtrs(colCounterForA + 1) - val Bval = tempB(colCounterForB, colCounterForA) * alpha + val Bval = B(colCounterForA, colCounterForB) * alpha while (i < indEnd) { Cvals(Cstart + ArowIndices(i)) += Avals(i) * Bval i += 1 @@ -461,7 +423,6 @@ private[spark] object BLAS extends Serializable with Logging { /** * y := alpha * A * x + beta * y - * @param trans whether to use the transpose of matrix A (true), or A itself (false). * @param alpha a scalar to scale the multiplication A * x. * @param A the matrix A that will be left multiplied to x. Size of m x n. * @param x the vector x that will be left multiplied by A. Size of n x 1. @@ -469,64 +430,40 @@ private[spark] object BLAS extends Serializable with Logging { * @param y the resulting vector y. Size of m x 1. */ def gemv( - trans: Boolean, alpha: Double, A: Matrix, x: DenseVector, beta: Double, y: DenseVector): Unit = { - val mA: Int = if (!trans) A.numRows else A.numCols - val nx: Int = x.size - val nA: Int = if (!trans) A.numCols else A.numRows - - require(nA == nx, s"The columns of A don't match the number of elements of x. A: $nA, x: $nx") - require(mA == y.size, - s"The rows of A don't match the number of elements of y. A: $mA, y:${y.size}}") + require(A.numCols == x.size, + s"The columns of A don't match the number of elements of x. A: ${A.numCols}, x: ${x.size}") + require(A.numRows == y.size, + s"The rows of A don't match the number of elements of y. A: ${A.numRows}, y:${y.size}}") if (alpha == 0.0) { logDebug("gemv: alpha is equal to 0. Returning y.") } else { A match { case sparse: SparseMatrix => - gemv(trans, alpha, sparse, x, beta, y) + gemv(alpha, sparse, x, beta, y) case dense: DenseMatrix => - gemv(trans, alpha, dense, x, beta, y) + gemv(alpha, dense, x, beta, y) case _ => throw new IllegalArgumentException(s"gemv doesn't support matrix type ${A.getClass}.") } } } - /** - * y := alpha * A * x + beta * y - * - * @param alpha a scalar to scale the multiplication A * x. - * @param A the matrix A that will be left multiplied to x. Size of m x n. - * @param x the vector x that will be left multiplied by A. Size of n x 1. - * @param beta a scalar that can be used to scale vector y. - * @param y the resulting vector y. Size of m x 1. - */ - def gemv( - alpha: Double, - A: Matrix, - x: DenseVector, - beta: Double, - y: DenseVector): Unit = { - gemv(false, alpha, A, x, beta, y) - } - /** * y := alpha * A * x + beta * y * For `DenseMatrix` A. */ private def gemv( - trans: Boolean, alpha: Double, A: DenseMatrix, x: DenseVector, beta: Double, y: DenseVector): Unit = { - val transMod = trans ^ A.isTransposed - val tStrA = if (!transMod) "N" else "T" + val tStrA = if (!A.isTransposed) "N" else "T" val mA = if (!A.isTransposed) A.numRows else A.numCols val nA = if (!A.isTransposed) A.numCols else A.numRows nativeBLAS.dgemv(tStrA, mA, nA, alpha, A.values, mA, x.values, 1, beta, @@ -538,7 +475,6 @@ private[spark] object BLAS extends Serializable with Logging { * For `SparseMatrix` A. */ private def gemv( - trans: Boolean, alpha: Double, A: SparseMatrix, x: DenseVector, @@ -546,15 +482,14 @@ private[spark] object BLAS extends Serializable with Logging { y: DenseVector): Unit = { val xValues = x.values val yValues = y.values - val transMod = trans ^ A.isTransposed - val mA: Int = if (!trans) A.numRows else A.numCols - val nA: Int = if (!trans) A.numCols else A.numRows + val mA: Int = A.numRows + val nA: Int = A.numCols val Avals = A.values - val Arows = if (!transMod) A.rowIndices else A.colPtrs - val Acols = if (!transMod) A.colPtrs else A.rowIndices + val Arows = if (!A.isTransposed) A.rowIndices else A.colPtrs + val Acols = if (!A.isTransposed) A.colPtrs else A.rowIndices // Slicing is easy in this case. This is the optimal multiplication setting for sparse matrices - if (transMod) { + if (A.isTransposed) { var rowCounter = 0 while (rowCounter < mA) { var i = Arows(rowCounter) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala index cfc244b0c0b21..b3373bb44be5a 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala @@ -17,7 +17,7 @@ package org.apache.spark.mllib.linalg -import java.util.{Arrays => jArrays, Random} +import java.util.{Arrays, Random} import scala.collection.mutable.{ArrayBuilder => MArrayBuilder, HashSet => MHashSet, ArrayBuffer} @@ -61,14 +61,14 @@ sealed trait Matrix extends Serializable { /** Convenience method for `Matrix`-`DenseMatrix` multiplication. */ def multiply(y: DenseMatrix): DenseMatrix = { val C: DenseMatrix = DenseMatrix.zeros(numRows, y.numCols) - BLAS.gemm(false, false, 1.0, this, y, 0.0, C) + BLAS.gemm(1.0, this, y, 0.0, C) C } /** Convenience method for `Matrix`-`DenseVector` multiplication. */ def multiply(y: DenseVector): DenseVector = { val output = new DenseVector(new Array[Double](numRows)) - BLAS.gemv(false, 1.0, this, y, 0.0, output) + BLAS.gemv(1.0, this, y, 0.0, output) output } @@ -84,6 +84,16 @@ sealed trait Matrix extends Serializable { * backing array. For example, an operation such as addition or subtraction will only be * performed on the non-zero values in a `SparseMatrix`. */ private[mllib] def update(f: Double => Double): Matrix + + /** + * Applies a function `f` to all the active elements of dense and sparse matrix. + * + * @param f the function takes three parameters where the first two parameters are the row + * and column indices respectively with the type `Int`, the third parameter is + * the corresponding array index, and the final parameter is the corresponding + * value in the matrix with type `Double`. + */ + private[spark] def foreachActive(f: (Int, Int, Int, Double) => Unit) } /** @@ -127,7 +137,7 @@ class DenseMatrix(val numRows: Int, val numCols: Int, val values: Array[Double]) override def equals(o: Any) = o match { case m: DenseMatrix => - m.numRows == numRows && m.numCols == numCols && jArrays.equals(toArray, m.toArray) + m.numRows == numRows && m.numCols == numCols && Arrays.equals(toArray, m.toArray) case _ => false } @@ -172,6 +182,34 @@ class DenseMatrix(val numRows: Int, val numCols: Int, val values: Array[Double]) transposedMatrix } + private[spark] override def foreachActive(f: (Int, Int, Int, Double) => Unit): Unit = { + if (!isTransposed) { + // outer loop over columns + var j = 0 + while (j < numCols) { + var i = 0 + while (i < numRows) { + val ind = index(i, j) + f(i, j, ind, values(ind)) + i += 1 + } + j += 1 + } + } else { + // outer loop over rows + var i = 0 + while (i < numRows) { + var j = 0 + while (j < numCols) { + val ind = index(i, j) + f(i, j, ind, values(ind)) + j += 1 + } + i += 1 + } + } + } + /** Generate a `SparseMatrix` from the given `DenseMatrix`. */ def toSparse(): SparseMatrix = { val spVals: MArrayBuilder[Double] = new MArrayBuilder.ofDouble @@ -314,6 +352,9 @@ class SparseMatrix( override def toArray: Array[Double] = { val arr = new Array[Double](numRows * numCols) // if statement inside the loop would be expensive + def fillMatrixArray(i: Int, j: Int, ind: Int, v: Int): Unit = { + + } if (!isTransposed) { var j = 0 while (j < numCols) { @@ -359,9 +400,9 @@ class SparseMatrix( private[mllib] def index(i: Int, j: Int): Int = { if (!isTransposed) { - jArrays.binarySearch(rowIndices, colPtrs(j), colPtrs(j + 1), i) + Arrays.binarySearch(rowIndices, colPtrs(j), colPtrs(j + 1), i) } else { - jArrays.binarySearch(rowIndices, colPtrs(i), colPtrs(i + 1), j) + Arrays.binarySearch(rowIndices, colPtrs(i), colPtrs(i + 1), j) } } @@ -396,6 +437,31 @@ class SparseMatrix( transposedMatrix } + private[spark] override def foreachActive(f: (Int, Int, Int, Double) => Unit): Unit = { + if (!isTransposed) { + var j = 0 + while (j < numCols) { + var idx = colPtrs(j) + while (idx < colPtrs(j + 1)) { + f(rowIndices(idx), j, idx, values(idx)) + idx += 1 + } + j += 1 + } + } else { + var i = 0 + while (i < numRows) { + var idx = colPtrs(i) + while (idx < colPtrs(i + 1)) { + val j = rowIndices(idx) + f(i, j, idx, values(idx)) + idx += 1 + } + i += 1 + } + } + } + /** Generate a `DenseMatrix` from the given `SparseMatrix`. */ def toDense(): DenseMatrix = { new DenseMatrix(numRows, numCols, toArray) @@ -735,73 +801,24 @@ object Matrices { } else { var startCol = 0 val entries: Array[(Int, Int, Double)] = matrices.flatMap { mat => - val nRows = mat.numRows val nCols = mat.numCols mat match { case spMat: SparseMatrix => - val colPtrs = spMat.colPtrs - val rowIndices = spMat.rowIndices - val values = spMat.values - val data = new Array[(Int, Int, Double)](values.length) - if (!spMat.isTransposed) { - var j = 0 - while (j < nCols) { - var idx = colPtrs(j) - while (idx < colPtrs(j + 1)) { - val i = rowIndices(idx) - val v = values(idx) - data(idx) = (i, j + startCol, v) - idx += 1 - } - j += 1 - } - } else { - var i = 0 - while (i < nRows) { - var idx = colPtrs(i) - while (idx < colPtrs(i + 1)) { - val j = rowIndices(idx) - val v = values(idx) - data(idx) = (i, j + startCol, v) - idx += 1 - } - i += 1 - } + val data = new Array[(Int, Int, Double)](spMat.values.length) + def generateEntries(i: Int, j: Int, index: Int, v: Double): Unit = { + data(index) = (i, j + startCol, v) } + spMat.foreachActive(generateEntries) startCol += nCols data case dnMat: DenseMatrix => val data = new ArrayBuffer[(Int, Int, Double)]() - val values = dnMat.values - if (!dnMat.isTransposed) { - var j = 0 - while (j < nCols) { - var i = 0 - val indStart = j * nRows - while (i < nRows) { - val v = values(indStart + i) - if (v != 0.0) { - data.append((i, j + startCol, v)) - } - i += 1 - } - j += 1 - } - } else { - var i = 0 - while (i < nRows) { - var j = 0 - val indStart = i * nCols - while (j < nCols) { - val v = values(indStart + j) - if (v != 0.0) { - data.append((i, j + startCol, v)) - } - j += 1 - } - i += 1 + def generateEntries(i: Int, j: Int, index: Int, v: Double): Unit = { + if (v != 0.0) { + data.append((i, j + startCol, v)) } } + dnMat.foreachActive(generateEntries) startCol += nCols data } @@ -843,17 +860,11 @@ object Matrices { matrices.foreach { mat => var j = 0 val nRows = mat.numRows - val values = mat.toArray - while (j < numCols) { - var i = 0 + def fillEntries(i: Int, j: Int, index: Int, v: Double): Unit = { val indStart = j * numRows + startRow - val subMatStart = j * nRows - while (i < nRows) { - allValues(indStart + i) = values(subMatStart + i) - i += 1 - } - j += 1 + allValues(indStart + i) = v } + mat.foreachActive(fillEntries) startRow += nRows } new DenseMatrix(numRows, numCols, allValues) @@ -861,72 +872,23 @@ object Matrices { var startRow = 0 val entries: Array[(Int, Int, Double)] = matrices.flatMap { mat => val nRows = mat.numRows - val nCols = mat.numCols mat match { case spMat: SparseMatrix => - val colPtrs = spMat.colPtrs - val rowIndices = spMat.rowIndices - val values = spMat.values - val data = new Array[(Int, Int, Double)](values.length) - if (!spMat.isTransposed) { - var j = 0 - while (j < nCols) { - var idx = colPtrs(j) - while (idx < colPtrs(j + 1)) { - val i = rowIndices(idx) - val v = values(idx) - data(idx) = (i + startRow, j, v) - idx += 1 - } - j += 1 - } - } else { - var i = 0 - while (i < nRows) { - var idx = colPtrs(i) - while (idx < colPtrs(i + 1)) { - val j = rowIndices(idx) - val v = values(idx) - data(idx) = (i + startRow, j, v) - idx += 1 - } - i += 1 - } + val data = new Array[(Int, Int, Double)](spMat.values.length) + def generateEntries(i: Int, j: Int, index: Int, v: Double): Unit = { + data(index) = (i + startRow, j, v) } + spMat.foreachActive(generateEntries) startRow += nRows data case dnMat: DenseMatrix => val data = new ArrayBuffer[(Int, Int, Double)]() - val values = dnMat.values - if (!dnMat.isTransposed) { - var j = 0 - while (j < nCols) { - var i = 0 - val indStart = j * nRows - while (i < nRows) { - val v = values(indStart + i) - if (v != 0.0) { - data.append((i + startRow, j, v)) - } - i += 1 - } - j += 1 - } - } else { - var i = 0 - while (i < nRows) { - var j = 0 - val indStart = i * nCols - while (j < nCols) { - val v = values(indStart + j) - if (v != 0.0) { - data.append((i + startRow, j, v)) - } - j += 1 - } - i += 1 + def generateEntries(i: Int, j: Int, index: Int, v: Double): Unit = { + if (v != 0.0) { + data.append((i + startRow, j, v)) } } + dnMat.foreachActive(generateEntries) startRow += nRows data } diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/BLASSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/BLASSuite.scala index 5f585ee918589..2e657885e5a67 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/BLASSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/BLASSuite.scala @@ -169,14 +169,14 @@ class BLASSuite extends FunSuite { } test("gemm") { - val dA = new DenseMatrix(4, 3, Array(0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0)) val sA = new SparseMatrix(4, 3, Array(0, 1, 3, 4), Array(1, 0, 2, 3), Array(1.0, 2.0, 1.0, 3.0)) val B = new DenseMatrix(3, 2, Array(1.0, 0.0, 0.0, 0.0, 2.0, 1.0)) val expected = new DenseMatrix(4, 2, Array(0.0, 1.0, 0.0, 0.0, 4.0, 0.0, 2.0, 3.0)) - val BT = new DenseMatrix(2, 3, Array(1.0, 0.0, 0.0, 2.0, 0.0, 1.0)) + val BTman = new DenseMatrix(2, 3, Array(1.0, 0.0, 0.0, 2.0, 0.0, 1.0)) + val BT = B.transpose assert(dA.multiply(B) ~== expected absTol 1e-15) assert(sA.multiply(B) ~== expected absTol 1e-15) @@ -193,10 +193,6 @@ class BLASSuite extends FunSuite { val C10 = C1.copy val C11 = C1.copy val C12 = C1.copy - val C13 = C1.copy - val C14 = C1.copy - val C15 = C1.copy - val C16 = C1.copy val expected2 = new DenseMatrix(4, 2, Array(2.0, 1.0, 4.0, 2.0, 4.0, 0.0, 4.0, 3.0)) val expected3 = new DenseMatrix(4, 2, Array(2.0, 2.0, 4.0, 2.0, 8.0, 0.0, 6.0, 6.0)) @@ -211,36 +207,32 @@ class BLASSuite extends FunSuite { withClue("columns of A don't match the rows of B") { intercept[Exception] { - gemm(true, false, 1.0, dA, B, 2.0, C1) + gemm(1.0, dA, B, 2.0, C1) } } - val dAT = + val dATman = new DenseMatrix(3, 4, Array(0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 3.0)) - val sAT = + val sATman = new SparseMatrix(3, 4, Array(0, 1, 2, 3, 4), Array(1, 0, 1, 2), Array(2.0, 1.0, 1.0, 3.0)) - val dATT = dAT.transpose - val sATT = sAT.transpose - val BTT = BT.transpose.asInstanceOf[DenseMatrix] + val dATT = dATman.transpose + val sATT = sATman.transpose + val BTT = BTman.transpose.asInstanceOf[DenseMatrix] assert(dATT.multiply(B) ~== expected absTol 1e-15) assert(sATT.multiply(B) ~== expected absTol 1e-15) assert(dATT.multiply(BTT) ~== expected absTol 1e-15) assert(sATT.multiply(BTT) ~== expected absTol 1e-15) - gemm(true, false, 1.0, dAT, B, 2.0, C5) - gemm(true, false, 1.0, sAT, B, 2.0, C6) - gemm(true, false, 2.0, dAT, B, 2.0, C7) - gemm(true, false, 2.0, sAT, B, 2.0, C8) - gemm(true, true, 1.0, dAT, BT, 2.0, C9) - gemm(true, true, 1.0, sAT, BT, 2.0, C10) - gemm(true, true, 2.0, dAT, BT, 2.0, C11) - gemm(true, true, 2.0, sAT, BT, 2.0, C12) - gemm(false, true, 1.0, dA, BT, 2.0, C13) - gemm(false, true, 1.0, sA, BT, 2.0, C14) - gemm(false, true, 2.0, dA, BT, 2.0, C15) - gemm(false, true, 2.0, sA, BT, 2.0, C16) + gemm(1.0, dATT, BTT, 2.0, C5) + gemm(1.0, sATT, BTT, 2.0, C6) + gemm(2.0, dATT, BTT, 2.0, C7) + gemm(2.0, sATT, BTT, 2.0, C8) + gemm(1.0, dA, BTT, 2.0, C9) + gemm(1.0, sA, BTT, 2.0, C10) + gemm(2.0, dA, BTT, 2.0, C11) + gemm(2.0, sA, BTT, 2.0, C12) assert(C5 ~== expected2 absTol 1e-15) assert(C6 ~== expected2 absTol 1e-15) assert(C7 ~== expected3 absTol 1e-15) @@ -267,10 +259,6 @@ class BLASSuite extends FunSuite { val y2 = y1.copy val y3 = y1.copy val y4 = y1.copy - val y5 = y1.copy - val y6 = y1.copy - val y7 = y1.copy - val y8 = y1.copy val expected2 = new DenseVector(Array(6.0, 7.0, 4.0, 9.0)) val expected3 = new DenseVector(Array(10.0, 8.0, 6.0, 18.0)) @@ -284,10 +272,9 @@ class BLASSuite extends FunSuite { assert(y4 ~== expected3 absTol 1e-15) withClue("columns of A don't match the rows of B") { intercept[Exception] { - gemv(true, 1.0, dA, x, 2.0, y1) + gemv(1.0, dA, x, 2.0, y1) } } - val dAT = new DenseMatrix(3, 4, Array(0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 3.0)) val sAT = @@ -298,14 +285,5 @@ class BLASSuite extends FunSuite { assert(dATT.multiply(x) ~== expected absTol 1e-15) assert(sATT.multiply(x) ~== expected absTol 1e-15) - - gemv(true, 1.0, dAT, x, 2.0, y5) - gemv(true, 1.0, sAT, x, 2.0, y6) - gemv(true, 2.0, dAT, x, 2.0, y7) - gemv(true, 2.0, sAT, x, 2.0, y8) - assert(y5 ~== expected2 absTol 1e-15) - assert(y6 ~== expected2 absTol 1e-15) - assert(y7 ~== expected3 absTol 1e-15) - assert(y8 ~== expected3 absTol 1e-15) } } From ccccdecd2b58ed7980f2fd0b6d361bf4293eb036 Mon Sep 17 00:00:00 2001 From: Burak Yavuz Date: Tue, 20 Jan 2015 22:46:12 -0800 Subject: [PATCH 06/11] fixed failed test --- .../test/scala/org/apache/spark/mllib/linalg/BLASSuite.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/BLASSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/BLASSuite.scala index 2e657885e5a67..b0b78acd6df16 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/BLASSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/BLASSuite.scala @@ -207,7 +207,7 @@ class BLASSuite extends FunSuite { withClue("columns of A don't match the rows of B") { intercept[Exception] { - gemm(1.0, dA, B, 2.0, C1) + gemm(1.0, dA.transpose, B, 2.0, C1) } } @@ -272,7 +272,7 @@ class BLASSuite extends FunSuite { assert(y4 ~== expected3 absTol 1e-15) withClue("columns of A don't match the rows of B") { intercept[Exception] { - gemv(1.0, dA, x, 2.0, y1) + gemv(1.0, dA.transpose, x, 2.0, y1) } } val dAT = From f1c1742eef9af456ee6f08f5d6d7c5053bcbd300 Mon Sep 17 00:00:00 2001 From: Burak Yavuz Date: Wed, 21 Jan 2015 11:27:35 -0800 Subject: [PATCH 07/11] small refactoring --- .../apache/spark/mllib/linalg/Matrices.scala | 25 +++++--------- .../linalg/BreezeMatrixConversionSuite.scala | 9 +++++ .../spark/mllib/linalg/MatricesSuite.scala | 34 +++++++++++++++++++ 3 files changed, 52 insertions(+), 16 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala index b3373bb44be5a..bc8178281db6d 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala @@ -352,9 +352,6 @@ class SparseMatrix( override def toArray: Array[Double] = { val arr = new Array[Double](numRows * numCols) // if statement inside the loop would be expensive - def fillMatrixArray(i: Int, j: Int, ind: Int, v: Int): Unit = { - - } if (!isTransposed) { var j = 0 while (j < numCols) { @@ -678,10 +675,11 @@ object Matrices { private[mllib] def fromBreeze(breeze: BM[Double]): Matrix = { breeze match { case dm: BDM[Double] => - require(dm.majorStride == dm.rows, - "Do not support stride size different from the number of rows.") - new DenseMatrix(dm.rows, dm.cols, dm.data) + val mat = new DenseMatrix(dm.rows, dm.cols, dm.data) + mat.isTransposed = dm.isTranspose + mat case sm: BSM[Double] => + // There is no isTranspose flag for sparse matrices in Breeze new SparseMatrix(sm.rows, sm.cols, sm.colPtrs, sm.rowIndices, sm.data) case _ => throw new UnsupportedOperationException( @@ -805,20 +803,18 @@ object Matrices { mat match { case spMat: SparseMatrix => val data = new Array[(Int, Int, Double)](spMat.values.length) - def generateEntries(i: Int, j: Int, index: Int, v: Double): Unit = { + spMat.foreachActive { (i, j, index, v) => data(index) = (i, j + startCol, v) } - spMat.foreachActive(generateEntries) startCol += nCols data case dnMat: DenseMatrix => val data = new ArrayBuffer[(Int, Int, Double)]() - def generateEntries(i: Int, j: Int, index: Int, v: Double): Unit = { + dnMat.foreachActive { (i, j, index, v) => if (v != 0.0) { data.append((i, j + startCol, v)) } } - dnMat.foreachActive(generateEntries) startCol += nCols data } @@ -860,11 +856,10 @@ object Matrices { matrices.foreach { mat => var j = 0 val nRows = mat.numRows - def fillEntries(i: Int, j: Int, index: Int, v: Double): Unit = { + mat.foreachActive { (i, j, index, v) => val indStart = j * numRows + startRow allValues(indStart + i) = v } - mat.foreachActive(fillEntries) startRow += nRows } new DenseMatrix(numRows, numCols, allValues) @@ -875,20 +870,18 @@ object Matrices { mat match { case spMat: SparseMatrix => val data = new Array[(Int, Int, Double)](spMat.values.length) - def generateEntries(i: Int, j: Int, index: Int, v: Double): Unit = { + spMat.foreachActive { (i, j, index, v) => data(index) = (i + startRow, j, v) } - spMat.foreachActive(generateEntries) startRow += nRows data case dnMat: DenseMatrix => val data = new ArrayBuffer[(Int, Int, Double)]() - def generateEntries(i: Int, j: Int, index: Int, v: Double): Unit = { + dnMat.foreachActive { (i, j, index, v) => if (v != 0.0) { data.append((i + startRow, j, v)) } } - dnMat.foreachActive(generateEntries) startRow += nRows data } diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/BreezeMatrixConversionSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/BreezeMatrixConversionSuite.scala index 73a6d3a27d868..2031032373971 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/BreezeMatrixConversionSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/BreezeMatrixConversionSuite.scala @@ -36,6 +36,11 @@ class BreezeMatrixConversionSuite extends FunSuite { assert(mat.numRows === breeze.rows) assert(mat.numCols === breeze.cols) assert(mat.values.eq(breeze.data), "should not copy data") + // transposed matrix + val matTransposed = Matrices.fromBreeze(breeze.t).asInstanceOf[DenseMatrix] + assert(matTransposed.numRows === breeze.cols) + assert(matTransposed.numCols === breeze.rows) + assert(matTransposed.values.eq(breeze.data), "should not copy data") } test("sparse matrix to breeze") { @@ -58,5 +63,9 @@ class BreezeMatrixConversionSuite extends FunSuite { assert(mat.numRows === breeze.rows) assert(mat.numCols === breeze.cols) assert(mat.values.eq(breeze.data), "should not copy data") + val matTransposed = Matrices.fromBreeze(breeze.t).asInstanceOf[SparseMatrix] + assert(matTransposed.numRows === breeze.cols) + assert(matTransposed.numCols === breeze.rows) + assert(!matTransposed.values.eq(breeze.data), "has to copy data") } } diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala index a337413f6bf70..ab029015104cf 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala @@ -187,6 +187,40 @@ class MatricesSuite extends FunSuite { assert(sAT.toDense().toBreeze === dATexpected.toBreeze) } + test("foreachActive") { + val m = 3 + val n = 2 + val values = Array(1.0, 2.0, 4.0, 5.0) + val allValues = Array(1.0, 2.0, 0.0, 0.0, 4.0, 5.0) + val colPtrs = Array(0, 2, 4) + val rowIndices = Array(0, 1, 1, 2) + + val sp = new SparseMatrix(m, n, colPtrs, rowIndices, values) + val dn = new DenseMatrix(m, n, allValues) + + val dnMap = scala.collection.mutable.Map[(Int, Int), Double]() + dn.foreachActive { (i, j, index, value) => + dnMap.put((i, j), value) + } + assert(dnMap.size === 6) + assert(dnMap.get(0, 0) === Some(1.0)) + assert(dnMap.get(1, 0) === Some(2.0)) + assert(dnMap.get(2, 0) === Some(0.0)) + assert(dnMap.get(0, 1) === Some(0.0)) + assert(dnMap.get(1, 1) === Some(4.0)) + assert(dnMap.get(2, 1) === Some(5.0)) + + val spMap = scala.collection.mutable.Map[Int, Double]() + sp.foreachActive { (i, j, index, value) => + spMap.put(index, value) + } + assert(spMap.size === 4) + assert(spMap.get(0) === Some(1.0)) + assert(spMap.get(1) === Some(2.0)) + assert(spMap.get(2) === Some(4.0)) + assert(spMap.get(3) === Some(5.0)) + } + test("horzcat, vertcat, eye, speye") { val m = 3 val n = 2 From 77481e8f68cb442f923bf7a814c308421540b212 Mon Sep 17 00:00:00 2001 From: Burak Yavuz Date: Wed, 21 Jan 2015 12:17:39 -0800 Subject: [PATCH 08/11] fixed MiMa --- project/MimaExcludes.scala | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala index b879cfcab9868..9b1c5b31387cf 100644 --- a/project/MimaExcludes.scala +++ b/project/MimaExcludes.scala @@ -61,7 +61,9 @@ object MimaExcludes { ProblemFilters.exclude[MissingMethodProblem]( "org.apache.spark.mllib.linalg.DenseMatrix.transposeMultiply"), ProblemFilters.exclude[MissingMethodProblem]( - "org.apache.spark.mllib.linalg.Matrix.isTransposed_=") + "org.apache.spark.mllib.linalg.Matrix.isTransposed_="), + ProblemFilters.exclude[MissingMethodProblem]( + "org.apache.spark.mllib.linalg.Matrix.foreachActive") ) ++ Seq( // SPARK-3325 ProblemFilters.exclude[MissingMethodProblem]( From c524770af3b9964ff5faee33cc2bd74cd50570c7 Mon Sep 17 00:00:00 2001 From: Burak Yavuz Date: Mon, 26 Jan 2015 19:02:05 -0800 Subject: [PATCH 09/11] address code review comments 2 --- .../org/apache/spark/mllib/linalg/BLAS.scala | 12 +++-- .../apache/spark/mllib/linalg/Matrices.scala | 48 +++++++++++-------- .../spark/mllib/linalg/MatricesSuite.scala | 8 ++-- project/MimaExcludes.scala | 4 +- 4 files changed, 43 insertions(+), 29 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/BLAS.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/BLAS.scala index 5c5082e2d21d2..43634a2092bfe 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/BLAS.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/BLAS.scala @@ -261,7 +261,9 @@ private[spark] object BLAS extends Serializable with Logging { * @param A the matrix A that will be left multiplied to B. Size of m x k. * @param B the matrix B that will be left multiplied by A. Size of k x n. * @param beta a scalar that can be used to scale matrix C. - * @param C the resulting matrix C. Size of m x n. + * @param C the resulting matrix C. Size of m x n. C.isTransposed must be false. In other words, + * C cannot be the product of a `transpose()` call, or be converted from a transposed + * Breeze Matrix using `Matrices.fromBreeze()`. */ def gemm( alpha: Double, @@ -269,6 +271,8 @@ private[spark] object BLAS extends Serializable with Logging { B: DenseMatrix, beta: Double, C: DenseMatrix): Unit = { + require(!C.isTransposed, + "The matrix C cannot be the product of a transpose() call. C.isTransposed must be false.") if (alpha == 0.0) { logDebug("gemm: alpha is equal to 0. Returning C.") } else { @@ -293,8 +297,8 @@ private[spark] object BLAS extends Serializable with Logging { B: DenseMatrix, beta: Double, C: DenseMatrix): Unit = { - val tAstr = if (!A.isTransposed) "N" else "T" - val tBstr = if (!B.isTransposed) "N" else "T" + val tAstr = if (A.isTransposed) "T" else "N" + val tBstr = if (B.isTransposed) "T" else "N" val lda = if (!A.isTransposed) A.numRows else A.numCols val ldb = if (!B.isTransposed) B.numRows else B.numCols @@ -463,7 +467,7 @@ private[spark] object BLAS extends Serializable with Logging { x: DenseVector, beta: Double, y: DenseVector): Unit = { - val tStrA = if (!A.isTransposed) "N" else "T" + val tStrA = if (A.isTransposed) "T" else "N" val mA = if (!A.isTransposed) A.numRows else A.numCols val nA = if (!A.isTransposed) A.numCols else A.numRows nativeBLAS.dgemv(tStrA, mA, nA, alpha, A.values, mA, x.values, 1, beta, diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala index bc8178281db6d..03d3344ec2d3b 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala @@ -35,7 +35,10 @@ sealed trait Matrix extends Serializable { def numCols: Int /** Flag that keeps track whether the matrix is transposed or not. False by default. */ - private[linalg] var isTransposed = false + private[mllib] var isTrans = false + + /** Returns whether the matrix is transposed or not. */ + def isTransposed: Boolean = isTrans /** Converts to a dense array in column major. */ def toArray: Array[Double] @@ -89,11 +92,10 @@ sealed trait Matrix extends Serializable { * Applies a function `f` to all the active elements of dense and sparse matrix. * * @param f the function takes three parameters where the first two parameters are the row - * and column indices respectively with the type `Int`, the third parameter is - * the corresponding array index, and the final parameter is the corresponding - * value in the matrix with type `Double`. + * and column indices respectively with the type `Int`, and the final parameter is the + * corresponding value in the matrix with type `Double`. */ - private[spark] def foreachActive(f: (Int, Int, Int, Double) => Unit) + private[spark] def foreachActive(f: (Int, Int, Double) => Unit) } /** @@ -178,11 +180,11 @@ class DenseMatrix(val numRows: Int, val numCols: Int, val values: Array[Double]) override def transpose: Matrix = { val transposedMatrix = new DenseMatrix(numCols, numRows, values) - transposedMatrix.isTransposed = !isTransposed + transposedMatrix.isTrans = !isTransposed transposedMatrix } - private[spark] override def foreachActive(f: (Int, Int, Int, Double) => Unit): Unit = { + private[spark] override def foreachActive(f: (Int, Int, Double) => Unit): Unit = { if (!isTransposed) { // outer loop over columns var j = 0 @@ -190,7 +192,7 @@ class DenseMatrix(val numRows: Int, val numCols: Int, val values: Array[Double]) var i = 0 while (i < numRows) { val ind = index(i, j) - f(i, j, ind, values(ind)) + f(i, j, values(ind)) i += 1 } j += 1 @@ -202,7 +204,7 @@ class DenseMatrix(val numRows: Int, val numCols: Int, val values: Array[Double]) var j = 0 while (j < numCols) { val ind = index(i, j) - f(i, j, ind, values(ind)) + f(i, j, values(ind)) j += 1 } i += 1 @@ -430,17 +432,17 @@ class SparseMatrix( override def transpose: Matrix = { val transposedMatrix = new SparseMatrix(numCols, numRows, colPtrs, rowIndices, values) - transposedMatrix.isTransposed = !isTransposed + transposedMatrix.isTrans = !isTransposed transposedMatrix } - private[spark] override def foreachActive(f: (Int, Int, Int, Double) => Unit): Unit = { + private[spark] override def foreachActive(f: (Int, Int, Double) => Unit): Unit = { if (!isTransposed) { var j = 0 while (j < numCols) { var idx = colPtrs(j) while (idx < colPtrs(j + 1)) { - f(rowIndices(idx), j, idx, values(idx)) + f(rowIndices(idx), j, values(idx)) idx += 1 } j += 1 @@ -451,7 +453,7 @@ class SparseMatrix( var idx = colPtrs(i) while (idx < colPtrs(i + 1)) { val j = rowIndices(idx) - f(i, j, idx, values(idx)) + f(i, j, values(idx)) idx += 1 } i += 1 @@ -676,7 +678,7 @@ object Matrices { breeze match { case dm: BDM[Double] => val mat = new DenseMatrix(dm.rows, dm.cols, dm.data) - mat.isTransposed = dm.isTranspose + mat.isTrans = dm.isTranspose mat case sm: BSM[Double] => // There is no isTranspose flag for sparse matrices in Breeze @@ -803,14 +805,16 @@ object Matrices { mat match { case spMat: SparseMatrix => val data = new Array[(Int, Int, Double)](spMat.values.length) - spMat.foreachActive { (i, j, index, v) => - data(index) = (i, j + startCol, v) + var cnt = 0 + spMat.foreachActive { (i, j, v) => + data(cnt) = (i, j + startCol, v) + cnt += 1 } startCol += nCols data case dnMat: DenseMatrix => val data = new ArrayBuffer[(Int, Int, Double)]() - dnMat.foreachActive { (i, j, index, v) => + dnMat.foreachActive { (i, j, v) => if (v != 0.0) { data.append((i, j + startCol, v)) } @@ -856,7 +860,7 @@ object Matrices { matrices.foreach { mat => var j = 0 val nRows = mat.numRows - mat.foreachActive { (i, j, index, v) => + mat.foreachActive { (i, j, v) => val indStart = j * numRows + startRow allValues(indStart + i) = v } @@ -870,14 +874,16 @@ object Matrices { mat match { case spMat: SparseMatrix => val data = new Array[(Int, Int, Double)](spMat.values.length) - spMat.foreachActive { (i, j, index, v) => - data(index) = (i + startRow, j, v) + var cnt = 0 + spMat.foreachActive { (i, j, v) => + data(cnt) = (i + startRow, j, v) + cnt += 1 } startRow += nRows data case dnMat: DenseMatrix => val data = new ArrayBuffer[(Int, Int, Double)]() - dnMat.foreachActive { (i, j, index, v) => + dnMat.foreachActive { (i, j, v) => if (v != 0.0) { data.append((i + startRow, j, v)) } diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala index ab029015104cf..d028a4187e9d4 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala @@ -199,7 +199,7 @@ class MatricesSuite extends FunSuite { val dn = new DenseMatrix(m, n, allValues) val dnMap = scala.collection.mutable.Map[(Int, Int), Double]() - dn.foreachActive { (i, j, index, value) => + dn.foreachActive { (i, j, value) => dnMap.put((i, j), value) } assert(dnMap.size === 6) @@ -211,8 +211,10 @@ class MatricesSuite extends FunSuite { assert(dnMap.get(2, 1) === Some(5.0)) val spMap = scala.collection.mutable.Map[Int, Double]() - sp.foreachActive { (i, j, index, value) => - spMap.put(index, value) + var cnt = 0 + sp.foreachActive { (i, j, value) => + spMap.put(cnt, value) + cnt += 1 } assert(spMap.size === 4) assert(spMap.get(0) === Some(1.0)) diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala index 9b1c5b31387cf..8e56639117504 100644 --- a/project/MimaExcludes.scala +++ b/project/MimaExcludes.scala @@ -61,7 +61,9 @@ object MimaExcludes { ProblemFilters.exclude[MissingMethodProblem]( "org.apache.spark.mllib.linalg.DenseMatrix.transposeMultiply"), ProblemFilters.exclude[MissingMethodProblem]( - "org.apache.spark.mllib.linalg.Matrix.isTransposed_="), + "org.apache.spark.mllib.linalg.Matrix.isTrans_="), + ProblemFilters.exclude[MissingMethodProblem]( + "org.apache.spark.mllib.linalg.Matrix.isTransposed"), ProblemFilters.exclude[MissingMethodProblem]( "org.apache.spark.mllib.linalg.Matrix.foreachActive") ) ++ Seq( From caf44387c2d3af5df771b9ce74aa8a9bac3f0827 Mon Sep 17 00:00:00 2001 From: Burak Yavuz Date: Tue, 27 Jan 2015 00:02:06 -0800 Subject: [PATCH 10/11] addressed code review v3 --- .../org/apache/spark/mllib/linalg/BLAS.scala | 4 +- .../apache/spark/mllib/linalg/Matrices.scala | 160 +++++++++--------- .../spark/mllib/linalg/MatricesSuite.scala | 60 +++---- project/MimaExcludes.scala | 4 +- 4 files changed, 114 insertions(+), 114 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/BLAS.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/BLAS.scala index 43634a2092bfe..34e0392f1b21a 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/BLAS.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/BLAS.scala @@ -261,9 +261,7 @@ private[spark] object BLAS extends Serializable with Logging { * @param A the matrix A that will be left multiplied to B. Size of m x k. * @param B the matrix B that will be left multiplied by A. Size of k x n. * @param beta a scalar that can be used to scale matrix C. - * @param C the resulting matrix C. Size of m x n. C.isTransposed must be false. In other words, - * C cannot be the product of a `transpose()` call, or be converted from a transposed - * Breeze Matrix using `Matrices.fromBreeze()`. + * @param C the resulting matrix C. Size of m x n. C.isTransposed must be false. */ def gemm( alpha: Double, diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala index 03d3344ec2d3b..cfa1826dfbac6 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala @@ -35,13 +35,16 @@ sealed trait Matrix extends Serializable { def numCols: Int /** Flag that keeps track whether the matrix is transposed or not. False by default. */ - private[mllib] var isTrans = false - - /** Returns whether the matrix is transposed or not. */ - def isTransposed: Boolean = isTrans + val isTransposed: Boolean = false /** Converts to a dense array in column major. */ - def toArray: Array[Double] + def toArray: Array[Double] = { + val newArray = new Array[Double](numRows * numCols) + foreachActive { (i, j, v) => + newArray(j * numRows + i) = v + } + newArray + } /** Converts to a breeze matrix. */ private[mllib] def toBreeze: BM[Double] @@ -58,7 +61,7 @@ sealed trait Matrix extends Serializable { /** Get a deep copy of the matrix. */ def copy: Matrix - /** Transpose the Matrix */ + /** Transpose the Matrix. Returns a new `Matrix` instance sharing the same underlying data. */ def transpose: Matrix /** Convenience method for `Matrix`-`DenseMatrix` multiplication. */ @@ -89,7 +92,8 @@ sealed trait Matrix extends Serializable { private[mllib] def update(f: Double => Double): Matrix /** - * Applies a function `f` to all the active elements of dense and sparse matrix. + * Applies a function `f` to all the active elements of dense and sparse matrix. The ordering + * of the elements are not defined. * * @param f the function takes three parameters where the first two parameters are the row * and column indices respectively with the type `Int`, and the final parameter is the @@ -112,30 +116,35 @@ sealed trait Matrix extends Serializable { * @param numRows number of rows * @param numCols number of columns * @param values matrix entries in column major + * @param isTransposed whether the matrix is transposed. If true, `values` stores the matrix in + * row major. */ -class DenseMatrix(val numRows: Int, val numCols: Int, val values: Array[Double]) extends Matrix { +class DenseMatrix( + val numRows: Int, + val numCols: Int, + val values: Array[Double], + override val isTransposed: Boolean) extends Matrix { require(values.length == numRows * numCols, "The number of values supplied doesn't match the " + s"size of the matrix! values.length: ${values.length}, numRows * numCols: ${numRows * numCols}") - override def toArray: Array[Double] = { - if (!isTransposed) { - values - } else { - val transposedValues = new Array[Double](values.length) - var j = 0 - while (j < numCols) { - var i = 0 - val indStart = j * numRows - while (i < numRows) { - transposedValues(indStart + i) = values(j + i * numCols) - i += 1 - } - j += 1 - } - transposedValues - } - } + /** + * Column-major dense matrix. + * The entry values are stored in a single array of doubles with columns listed in sequence. + * For example, the following matrix + * {{{ + * 1.0 2.0 + * 3.0 4.0 + * 5.0 6.0 + * }}} + * is stored as `[1.0, 3.0, 5.0, 2.0, 4.0, 6.0]`. + * + * @param numRows number of rows + * @param numCols number of columns + * @param values matrix entries in column major + */ + def this(numRows: Int, numCols: Int, values: Array[Double]) = + this(numRows, numCols, values, false) override def equals(o: Any) = o match { case m: DenseMatrix => @@ -178,11 +187,7 @@ class DenseMatrix(val numRows: Int, val numCols: Int, val values: Array[Double]) this } - override def transpose: Matrix = { - val transposedMatrix = new DenseMatrix(numCols, numRows, values) - transposedMatrix.isTrans = !isTransposed - transposedMatrix - } + override def transpose: Matrix = new DenseMatrix(numCols, numRows, values, !isTransposed) private[spark] override def foreachActive(f: (Int, Int, Double) => Unit): Unit = { if (!isTransposed) { @@ -190,9 +195,9 @@ class DenseMatrix(val numRows: Int, val numCols: Int, val values: Array[Double]) var j = 0 while (j < numCols) { var i = 0 + val indStart = j * numRows while (i < numRows) { - val ind = index(i, j) - f(i, j, values(ind)) + f(i, j, values(indStart + i)) i += 1 } j += 1 @@ -202,9 +207,9 @@ class DenseMatrix(val numRows: Int, val numCols: Int, val values: Array[Double]) var i = 0 while (i < numRows) { var j = 0 + val indStart = i * numCols while (j < numCols) { - val ind = index(i, j) - f(i, j, values(ind)) + f(i, j, values(indStart + j)) j += 1 } i += 1 @@ -212,7 +217,8 @@ class DenseMatrix(val numRows: Int, val numCols: Int, val values: Array[Double]) } } - /** Generate a `SparseMatrix` from the given `DenseMatrix`. */ + /** Generate a `SparseMatrix` from the given `DenseMatrix`. The new matrix will have isTransposed + * set to false. */ def toSparse(): SparseMatrix = { val spVals: MArrayBuilder[Double] = new MArrayBuilder.ofDouble val colPtrs: Array[Int] = new Array[Int](numCols + 1) @@ -334,13 +340,17 @@ object DenseMatrix { * @param rowIndices the row index of the entry. They must be in strictly increasing order for each * column * @param values non-zero matrix entries in column major + * @param isTransposed whether the matrix is transposed. If true, the matrix can be considered + * Compressed Sparse Row (CSR) format, where `colPtrs` behaves as rowPtrs, + * and `rowIndices` behave as colIndices, and `values` are stored in row major. */ class SparseMatrix( val numRows: Int, val numCols: Int, val colPtrs: Array[Int], val rowIndices: Array[Int], - val values: Array[Double]) extends Matrix { + val values: Array[Double], + override val isTransposed: Boolean) extends Matrix { require(values.length == rowIndices.length, "The number of row indices and values don't match! " + s"values.length: ${values.length}, rowIndices.length: ${rowIndices.length}") @@ -351,37 +361,31 @@ class SparseMatrix( require(values.length == colPtrs.last, "The last value of colPtrs must equal the number of " + s"elements. values.length: ${values.length}, colPtrs.last: ${colPtrs.last}") - override def toArray: Array[Double] = { - val arr = new Array[Double](numRows * numCols) - // if statement inside the loop would be expensive - if (!isTransposed) { - var j = 0 - while (j < numCols) { - var i = colPtrs(j) - val indEnd = colPtrs(j + 1) - val offset = j * numRows - while (i < indEnd) { - val rowIndex = rowIndices(i) - arr(offset + rowIndex) = values(i) - i += 1 - } - j += 1 - } - } else { - var i = 0 - while (i < numRows) { - var j = colPtrs(i) - val jEnd = colPtrs(i + 1) - while (j < jEnd) { - val colIndex = rowIndices(j) - arr(colIndex * numRows + i) = values(j) - j += 1 - } - i += 1 - } - } - arr - } + /** + * Column-major sparse matrix. + * The entry values are stored in Compressed Sparse Column (CSC) format. + * For example, the following matrix + * {{{ + * 1.0 0.0 4.0 + * 0.0 3.0 5.0 + * 2.0 0.0 6.0 + * }}} + * is stored as `values: [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]`, + * `rowIndices=[0, 2, 1, 0, 1, 2]`, `colPointers=[0, 2, 3, 6]`. + * + * @param numRows number of rows + * @param numCols number of columns + * @param colPtrs the index corresponding to the start of a new column + * @param rowIndices the row index of the entry. They must be in strictly increasing order for each + * column + * @param values non-zero matrix entries in column major + */ + def this( + numRows: Int, + numCols: Int, + colPtrs: Array[Int], + rowIndices: Array[Int], + values: Array[Double]) = this(numRows, numCols, colPtrs, rowIndices, values, false) private[mllib] def toBreeze: BM[Double] = { if (!isTransposed) { @@ -430,18 +434,16 @@ class SparseMatrix( this } - override def transpose: Matrix = { - val transposedMatrix = new SparseMatrix(numCols, numRows, colPtrs, rowIndices, values) - transposedMatrix.isTrans = !isTransposed - transposedMatrix - } + override def transpose: Matrix = + new SparseMatrix(numCols, numRows, colPtrs, rowIndices, values, !isTransposed) private[spark] override def foreachActive(f: (Int, Int, Double) => Unit): Unit = { if (!isTransposed) { var j = 0 while (j < numCols) { var idx = colPtrs(j) - while (idx < colPtrs(j + 1)) { + val idxEnd = colPtrs(j + 1) + while (idx < idxEnd) { f(rowIndices(idx), j, values(idx)) idx += 1 } @@ -451,7 +453,8 @@ class SparseMatrix( var i = 0 while (i < numRows) { var idx = colPtrs(i) - while (idx < colPtrs(i + 1)) { + val idxEnd = colPtrs(i + 1) + while (idx < idxEnd) { val j = rowIndices(idx) f(i, j, values(idx)) idx += 1 @@ -461,7 +464,8 @@ class SparseMatrix( } } - /** Generate a `DenseMatrix` from the given `SparseMatrix`. */ + /** Generate a `DenseMatrix` from the given `SparseMatrix`. The new matrix will have isTransposed + * set to false. */ def toDense(): DenseMatrix = { new DenseMatrix(numRows, numCols, toArray) } @@ -677,9 +681,7 @@ object Matrices { private[mllib] def fromBreeze(breeze: BM[Double]): Matrix = { breeze match { case dm: BDM[Double] => - val mat = new DenseMatrix(dm.rows, dm.cols, dm.data) - mat.isTrans = dm.isTranspose - mat + new DenseMatrix(dm.rows, dm.cols, dm.data, dm.isTranspose) case sm: BSM[Double] => // There is no isTranspose flag for sparse matrices in Breeze new SparseMatrix(sm.rows, sm.cols, sm.colPtrs, sm.rowIndices, sm.data) diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala index d028a4187e9d4..b1ebfde0e5e57 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala @@ -22,6 +22,9 @@ import java.util.Random import org.mockito.Mockito.when import org.scalatest.FunSuite import org.scalatest.mock.MockitoSugar._ +import scala.collection.mutable.{Map => MutableMap} + +import org.apache.spark.mllib.util.TestingUtils._ class MatricesSuite extends FunSuite { test("dense matrix construction") { @@ -32,7 +35,6 @@ class MatricesSuite extends FunSuite { assert(mat.numRows === m) assert(mat.numCols === n) assert(mat.values.eq(values), "should not copy data") - assert(mat.toArray.eq(values), "toArray should not copy data") } test("dense matrix construction with wrong dimension") { @@ -181,7 +183,7 @@ class MatricesSuite extends FunSuite { assert(sA(2, 1) === sAT(1, 2)) assert(!dA.toArray.eq(dAT.toArray), "has to have a new array") - assert(dA.toArray.eq(dAT.transpose.toArray), "should not copy array") + assert(dA.values.eq(dAT.transpose.asInstanceOf[DenseMatrix].values), "should not copy array") assert(dAT.toSparse().toBreeze === sATexpected.toBreeze) assert(sAT.toDense().toBreeze === dATexpected.toBreeze) @@ -198,29 +200,27 @@ class MatricesSuite extends FunSuite { val sp = new SparseMatrix(m, n, colPtrs, rowIndices, values) val dn = new DenseMatrix(m, n, allValues) - val dnMap = scala.collection.mutable.Map[(Int, Int), Double]() + val dnMap = MutableMap[(Int, Int), Double]() dn.foreachActive { (i, j, value) => dnMap.put((i, j), value) } assert(dnMap.size === 6) - assert(dnMap.get(0, 0) === Some(1.0)) - assert(dnMap.get(1, 0) === Some(2.0)) - assert(dnMap.get(2, 0) === Some(0.0)) - assert(dnMap.get(0, 1) === Some(0.0)) - assert(dnMap.get(1, 1) === Some(4.0)) - assert(dnMap.get(2, 1) === Some(5.0)) - - val spMap = scala.collection.mutable.Map[Int, Double]() - var cnt = 0 + assert(dnMap(0, 0) === 1.0) + assert(dnMap(1, 0) === 2.0) + assert(dnMap(2, 0) === 0.0) + assert(dnMap(0, 1) === 0.0) + assert(dnMap(1, 1) === 4.0) + assert(dnMap(2, 1) === 5.0) + + val spMap = MutableMap[(Int, Int), Double]() sp.foreachActive { (i, j, value) => - spMap.put(cnt, value) - cnt += 1 + spMap.put((i, j), value) } assert(spMap.size === 4) - assert(spMap.get(0) === Some(1.0)) - assert(spMap.get(1) === Some(2.0)) - assert(spMap.get(2) === Some(4.0)) - assert(spMap.get(3) === Some(5.0)) + assert(spMap(0, 0) === 1.0) + assert(spMap(1, 0) === 2.0) + assert(spMap(1, 1) === 4.0) + assert(spMap(2, 1) === 5.0) } test("horzcat, vertcat, eye, speye") { @@ -267,8 +267,8 @@ class MatricesSuite extends FunSuite { assert(deHorz2.numCols === 0) assert(deHorz2.toArray.length === 0) - assert(deHorz1.toBreeze.toDenseMatrix === spHorz2.toBreeze.toDenseMatrix) - assert(spHorz2.toBreeze === spHorz3.toBreeze) + assert(deHorz1 ~== spHorz2.asInstanceOf[SparseMatrix].toDense absTol 1e-15) + assert(spHorz2 ~== spHorz3 absTol 1e-15) assert(spHorz(0, 0) === 1.0) assert(spHorz(2, 1) === 5.0) assert(spHorz(0, 2) === 1.0) @@ -290,10 +290,10 @@ class MatricesSuite extends FunSuite { val spHorz3T = Matrices.horzcat(Array(deMat1TT, spMat2)) val deHorz1T = Matrices.horzcat(Array(deMat1TT, deMat2)) - assert(deHorz1T.toBreeze === deHorz1.toBreeze) - assert(spHorzT.toBreeze === spHorz.toBreeze) - assert(spHorz2T.toBreeze === spHorz2.toBreeze) - assert(spHorz3T.toBreeze === spHorz3.toBreeze) + assert(deHorz1T ~== deHorz1 absTol 1e-15) + assert(spHorzT ~== spHorz absTol 1e-15) + assert(spHorz2T ~== spHorz2 absTol 1e-15) + assert(spHorz3T ~== spHorz3 absTol 1e-15) intercept[IllegalArgumentException] { Matrices.horzcat(Array(spMat1, spMat3)) @@ -321,8 +321,8 @@ class MatricesSuite extends FunSuite { assert(deVert2.numCols === 0) assert(deVert2.toArray.length === 0) - assert(deVert1.toBreeze.toDenseMatrix === spVert2.toBreeze.toDenseMatrix) - assert(spVert2.toBreeze === spVert3.toBreeze) + assert(deVert1 ~== spVert2.asInstanceOf[SparseMatrix].toDense absTol 1e-15) + assert(spVert2 ~== spVert3 absTol 1e-15) assert(spVert(0, 0) === 1.0) assert(spVert(2, 1) === 5.0) assert(spVert(3, 0) === 1.0) @@ -340,10 +340,10 @@ class MatricesSuite extends FunSuite { val spVert2T = Matrices.vertcat(Array(spMat1TT, deMat3)) val spVert3T = Matrices.vertcat(Array(deMat1TT, spMat3)) - assert(deVert1T.toBreeze === deVert1.toBreeze) - assert(spVertT.toBreeze === spVert.toBreeze) - assert(spVert2T.toBreeze === spVert2.toBreeze) - assert(spVert3T.toBreeze === spVert3.toBreeze) + assert(deVert1T ~== deVert1 absTol 1e-15) + assert(spVertT ~== spVert absTol 1e-15) + assert(spVert2T ~== spVert2 absTol 1e-15) + assert(spVert3T ~== spVert3 absTol 1e-15) intercept[IllegalArgumentException] { Matrices.vertcat(Array(spMat1, spMat2)) diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala index 8e56639117504..c36b64e8c674c 100644 --- a/project/MimaExcludes.scala +++ b/project/MimaExcludes.scala @@ -60,8 +60,8 @@ object MimaExcludes { "org.apache.spark.mllib.linalg.Matrix.transpose"), ProblemFilters.exclude[MissingMethodProblem]( "org.apache.spark.mllib.linalg.DenseMatrix.transposeMultiply"), - ProblemFilters.exclude[MissingMethodProblem]( - "org.apache.spark.mllib.linalg.Matrix.isTrans_="), + ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.mllib.linalg.Matrix." + + "org$apache$spark$mllib$linalg$Matrix$_setter_$isTransposed_="), ProblemFilters.exclude[MissingMethodProblem]( "org.apache.spark.mllib.linalg.Matrix.isTransposed"), ProblemFilters.exclude[MissingMethodProblem]( From 87ab83cb07a3b3451a4e3ddd158527400b4284ea Mon Sep 17 00:00:00 2001 From: Burak Yavuz Date: Tue, 27 Jan 2015 00:10:58 -0800 Subject: [PATCH 11/11] fixed scalastyle --- .../main/scala/org/apache/spark/mllib/linalg/Matrices.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala index cfa1826dfbac6..ad7e86827b368 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala @@ -376,8 +376,8 @@ class SparseMatrix( * @param numRows number of rows * @param numCols number of columns * @param colPtrs the index corresponding to the start of a new column - * @param rowIndices the row index of the entry. They must be in strictly increasing order for each - * column + * @param rowIndices the row index of the entry. They must be in strictly increasing + * order for each column * @param values non-zero matrix entries in column major */ def this(