Skip to content

Commit

Permalink
update linalg docs and some new method signatures
Browse files Browse the repository at this point in the history
  • Loading branch information
mengxr committed Feb 5, 2015
1 parent 371721b commit 27f5bdd
Show file tree
Hide file tree
Showing 4 changed files with 40 additions and 28 deletions.
48 changes: 26 additions & 22 deletions mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ sealed trait Matrix extends Serializable {
*
* @param numRows number of rows
* @param numCols number of columns
* @param values matrix entries in column major
* @param values matrix entries in column major if not transposed or in row major otherwise
* @param isTransposed whether the matrix is transposed. If true, `values` stores the matrix in
* row major.
*/
Expand Down Expand Up @@ -187,7 +187,7 @@ class DenseMatrix(
this
}

override def transpose: Matrix = new DenseMatrix(numCols, numRows, values, !isTransposed)
override def transpose: DenseMatrix = new DenseMatrix(numCols, numRows, values, !isTransposed)

private[spark] override def foreachActive(f: (Int, Int, Double) => Unit): Unit = {
if (!isTransposed) {
Expand Down Expand Up @@ -217,9 +217,11 @@ class DenseMatrix(
}
}

/** Generate a `SparseMatrix` from the given `DenseMatrix`. The new matrix will have isTransposed
* set to false. */
def toSparse(): SparseMatrix = {
/**
* Generate a `SparseMatrix` from the given `DenseMatrix`. The new matrix will have isTransposed
* set to false.
*/
def toSparse: SparseMatrix = {
val spVals: MArrayBuilder[Double] = new MArrayBuilder.ofDouble
val colPtrs: Array[Int] = new Array[Int](numCols + 1)
val rowIndices: MArrayBuilder[Int] = new MArrayBuilder.ofInt
Expand Down Expand Up @@ -282,7 +284,7 @@ object DenseMatrix {
}

/**
* Generate a `DenseMatrix` consisting of i.i.d. uniform random numbers.
* Generate a `DenseMatrix` consisting of `i.i.d.` uniform random numbers.
* @param numRows number of rows of the matrix
* @param numCols number of columns of the matrix
* @param rng a random number generator
Expand All @@ -293,7 +295,7 @@ object DenseMatrix {
}

/**
* Generate a `DenseMatrix` consisting of i.i.d. gaussian random numbers.
* Generate a `DenseMatrix` consisting of `i.i.d.` gaussian random numbers.
* @param numRows number of rows of the matrix
* @param numCols number of columns of the matrix
* @param rng a random number generator
Expand Down Expand Up @@ -336,10 +338,10 @@ object DenseMatrix {
*
* @param numRows number of rows
* @param numCols number of columns
* @param colPtrs the index corresponding to the start of a new column
* @param rowIndices the row index of the entry. They must be in strictly increasing order for each
* column
* @param values non-zero matrix entries in column major
* @param colPtrs the index corresponding to the start of a new column (if not transposed)
* @param rowIndices the row index of the entry (if not transposed). They must be in strictly
* increasing order for each column
* @param values nonzero matrix entries in column major (if not transposed)
* @param isTransposed whether the matrix is transposed. If true, the matrix can be considered
* Compressed Sparse Row (CSR) format, where `colPtrs` behaves as rowPtrs,
* and `rowIndices` behave as colIndices, and `values` are stored in row major.
Expand Down Expand Up @@ -434,7 +436,7 @@ class SparseMatrix(
this
}

override def transpose: Matrix =
override def transpose: SparseMatrix =
new SparseMatrix(numCols, numRows, colPtrs, rowIndices, values, !isTransposed)

private[spark] override def foreachActive(f: (Int, Int, Double) => Unit): Unit = {
Expand Down Expand Up @@ -464,9 +466,11 @@ class SparseMatrix(
}
}

/** Generate a `DenseMatrix` from the given `SparseMatrix`. The new matrix will have isTransposed
* set to false. */
def toDense(): DenseMatrix = {
/**
* Generate a `DenseMatrix` from the given `SparseMatrix`. The new matrix will have isTransposed
* set to false.
*/
def toDense: DenseMatrix = {
new DenseMatrix(numRows, numCols, toArray)
}
}
Expand Down Expand Up @@ -593,7 +597,7 @@ object SparseMatrix {
}

/**
* Generate a `SparseMatrix` consisting of i.i.d. uniform random numbers. The number of non-zero
* Generate a `SparseMatrix` consisting of `i.i.d`. uniform random numbers. The number of non-zero
* elements equal the ceiling of `numRows` x `numCols` x `density`
*
* @param numRows number of rows of the matrix
Expand All @@ -608,7 +612,7 @@ object SparseMatrix {
}

/**
* Generate a `SparseMatrix` consisting of i.i.d. gaussian random numbers.
* Generate a `SparseMatrix` consisting of `i.i.d`. gaussian random numbers.
* @param numRows number of rows of the matrix
* @param numCols number of columns of the matrix
* @param density the desired density for the matrix
Expand All @@ -626,7 +630,7 @@ object SparseMatrix {
* @return Square `SparseMatrix` with size `values.length` x `values.length` and non-zero
* `values` on the diagonal
*/
def diag(vector: Vector): SparseMatrix = {
def spdiag(vector: Vector): SparseMatrix = {
val n = vector.size
vector match {
case sVec: SparseVector =>
Expand Down Expand Up @@ -722,7 +726,7 @@ object Matrices {
def speye(n: Int): Matrix = SparseMatrix.speye(n)

/**
* Generate a `DenseMatrix` consisting of i.i.d. uniform random numbers.
* Generate a `DenseMatrix` consisting of `i.i.d.` uniform random numbers.
* @param numRows number of rows of the matrix
* @param numCols number of columns of the matrix
* @param rng a random number generator
Expand All @@ -732,7 +736,7 @@ object Matrices {
DenseMatrix.rand(numRows, numCols, rng)

/**
* Generate a `SparseMatrix` consisting of i.i.d. gaussian random numbers.
* Generate a `SparseMatrix` consisting of `i.i.d.` gaussian random numbers.
* @param numRows number of rows of the matrix
* @param numCols number of columns of the matrix
* @param density the desired density for the matrix
Expand All @@ -743,7 +747,7 @@ object Matrices {
SparseMatrix.sprand(numRows, numCols, density, rng)

/**
* Generate a `DenseMatrix` consisting of i.i.d. gaussian random numbers.
* Generate a `DenseMatrix` consisting of `i.i.d.` gaussian random numbers.
* @param numRows number of rows of the matrix
* @param numCols number of columns of the matrix
* @param rng a random number generator
Expand All @@ -753,7 +757,7 @@ object Matrices {
DenseMatrix.randn(numRows, numCols, rng)

/**
* Generate a `SparseMatrix` consisting of i.i.d. gaussian random numbers.
* Generate a `SparseMatrix` consisting of `i.i.d.` gaussian random numbers.
* @param numRows number of rows of the matrix
* @param numCols number of columns of the matrix
* @param density the desired density for the matrix
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -244,8 +244,7 @@ object Vectors {
}

/**
* Parses a string resulted from `Vector#toString` into
* an [[org.apache.spark.mllib.linalg.Vector]].
* Parses a string resulted from [[Vector.toString]] into a [[Vector]].
*/
def parse(s: String): Vector = {
parseNumeric(NumericParser.parse(s))
Expand Down Expand Up @@ -483,6 +482,7 @@ class DenseVector(val values: Array[Double]) extends Vector {
}

object DenseVector {
/** Extracts the value array from a dense vector. */
def unapply(dv: DenseVector): Option[Array[Double]] = Some(dv.values)
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@ import scala.collection.mutable.ArrayBuffer

import breeze.linalg.{DenseMatrix => BDM}

import org.apache.spark.{SparkException, Logging, Partitioner}
import org.apache.spark.{Logging, Partitioner, SparkException}
import org.apache.spark.annotation.Experimental
import org.apache.spark.mllib.linalg.{DenseMatrix, Matrices, Matrix, SparseMatrix}
import org.apache.spark.rdd.RDD
import org.apache.spark.storage.StorageLevel
Expand Down Expand Up @@ -104,6 +105,8 @@ private[mllib] object GridPartitioner {
}

/**
* :: Experimental ::
*
* Represents a distributed matrix in blocks of local matrices.
*
* @param blocks The RDD of sub-matrix blocks ((blockRowIndex, blockColIndex), sub-matrix) that
Expand All @@ -118,6 +121,7 @@ private[mllib] object GridPartitioner {
* @param nCols Number of columns of this matrix. If the supplied value is less than or equal to
* zero, the number of columns will be calculated when `numCols` is invoked.
*/
@Experimental
class BlockMatrix(
val blocks: RDD[((Int, Int), Matrix)],
val rowsPerBlock: Int,
Expand Down Expand Up @@ -177,6 +181,10 @@ class BlockMatrix(
assert(cols <= nCols, s"The number of columns $cols is more than claimed $nCols.")
}

/**
* Validates the block matrix info against the matrix data (`blocks`) and throws an exception if
* any error is found.
*/
def validate(): Unit = {
logDebug("Validating BlockMatrix...")
// check if the matrix is larger than the claimed dimensions
Expand Down Expand Up @@ -351,7 +359,7 @@ class BlockMatrix(
if (a.nonEmpty && b.nonEmpty) {
val C = b.head match {
case dense: DenseMatrix => a.head.multiply(dense)
case sparse: SparseMatrix => a.head.multiply(sparse.toDense())
case sparse: SparseMatrix => a.head.multiply(sparse.toDense)
case _ => throw new SparkException(s"Unrecognized matrix type ${b.head.getClass}.")
}
Iterator(((blockRowIndex, blockColIndex), C.toBreeze))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,8 @@ public void diagonalMatrixConstruction() {
Matrix sm = Matrices.diag(sv);
DenseMatrix d = DenseMatrix.diag(v);
DenseMatrix sd = DenseMatrix.diag(sv);
SparseMatrix s = SparseMatrix.diag(v);
SparseMatrix ss = SparseMatrix.diag(sv);
SparseMatrix s = SparseMatrix.spdiag(v);
SparseMatrix ss = SparseMatrix.spdiag(sv);

assertArrayEquals(m.toArray(), sm.toArray(), 0.0);
assertArrayEquals(d.toArray(), sm.toArray(), 0.0);
Expand Down

0 comments on commit 27f5bdd

Please sign in to comment.