Skip to content

Commit

Permalink
update since versions in mllib.linalg
Browse files Browse the repository at this point in the history
  • Loading branch information
mengxr committed Aug 26, 2015
1 parent 125205c commit b38437e
Show file tree
Hide file tree
Showing 8 changed files with 64 additions and 31 deletions.
44 changes: 28 additions & 16 deletions mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala
Original file line number Diff line number Diff line change
Expand Up @@ -32,18 +32,23 @@ import org.apache.spark.sql.types._
* Trait for a local matrix.
*/
@SQLUserDefinedType(udt = classOf[MatrixUDT])
@Since("1.0.0")
sealed trait Matrix extends Serializable {

/** Number of rows. */
@Since("1.0.0")
def numRows: Int

/** Number of columns. */
@Since("1.0.0")
def numCols: Int

/** Flag that keeps track whether the matrix is transposed or not. False by default. */
@Since("1.3.0")
val isTransposed: Boolean = false

/** Converts to a dense array in column major. */
@Since("1.0.0")
def toArray: Array[Double] = {
val newArray = new Array[Double](numRows * numCols)
foreachActive { (i, j, v) =>
Expand All @@ -56,6 +61,7 @@ sealed trait Matrix extends Serializable {
private[mllib] def toBreeze: BM[Double]

/** Gets the (i, j)-th element. */
@Since("1.3.0")
def apply(i: Int, j: Int): Double

/** Return the index for the (i, j)-th element in the backing array. */
Expand All @@ -65,24 +71,29 @@ sealed trait Matrix extends Serializable {
private[mllib] def update(i: Int, j: Int, v: Double): Unit

/** Get a deep copy of the matrix. */
@Since("1.2.0")
def copy: Matrix

/** Transpose the Matrix. Returns a new `Matrix` instance sharing the same underlying data. */
@Since("1.3.0")
def transpose: Matrix

/** Convenience method for `Matrix`-`DenseMatrix` multiplication. */
@Since("1.2.0")
def multiply(y: DenseMatrix): DenseMatrix = {
val C: DenseMatrix = DenseMatrix.zeros(numRows, y.numCols)
BLAS.gemm(1.0, this, y, 0.0, C)
C
}

/** Convenience method for `Matrix`-`DenseVector` multiplication. For binary compatibility. */
@Since("1.2.0")
def multiply(y: DenseVector): DenseVector = {
multiply(y.asInstanceOf[Vector])
}

/** Convenience method for `Matrix`-`Vector` multiplication. */
@Since("1.4.0")
def multiply(y: Vector): DenseVector = {
val output = new DenseVector(new Array[Double](numRows))
BLAS.gemv(1.0, this, y, 0.0, output)
Expand All @@ -93,6 +104,7 @@ sealed trait Matrix extends Serializable {
override def toString: String = toBreeze.toString()

/** A human readable representation of the matrix with maximum lines and width */
@Since("1.4.0")
def toString(maxLines: Int, maxLineWidth: Int): String = toBreeze.toString(maxLines, maxLineWidth)

/** Map the values of this matrix using a function. Generates a new matrix. Performs the
Expand All @@ -118,11 +130,13 @@ sealed trait Matrix extends Serializable {
/**
* Find the number of non-zero active values.
*/
@Since("1.5.0")
def numNonzeros: Int

/**
* Find the number of values stored explicitly. These values can be zero as well.
*/
@Since("1.5.0")
def numActives: Int
}

Expand Down Expand Up @@ -230,11 +244,11 @@ private[spark] class MatrixUDT extends UserDefinedType[Matrix] {
*/
@Since("1.0.0")
@SQLUserDefinedType(udt = classOf[MatrixUDT])
class DenseMatrix(
val numRows: Int,
val numCols: Int,
val values: Array[Double],
override val isTransposed: Boolean) extends Matrix {
class DenseMatrix @Since("1.3.0") (
@Since("1.0.0") val numRows: Int,
@Since("1.0.0") val numCols: Int,
@Since("1.0.0") val values: Array[Double],
@Since("1.3.0") override val isTransposed: Boolean) extends Matrix {

require(values.length == numRows * numCols, "The number of values supplied doesn't match the " +
s"size of the matrix! values.length: ${values.length}, numRows * numCols: ${numRows * numCols}")
Expand All @@ -254,7 +268,7 @@ class DenseMatrix(
* @param numCols number of columns
* @param values matrix entries in column major
*/
@Since("1.3.0")
@Since("1.0.0")
def this(numRows: Int, numCols: Int, values: Array[Double]) =
this(numRows, numCols, values, false)

Expand Down Expand Up @@ -491,13 +505,13 @@ object DenseMatrix {
*/
@Since("1.2.0")
@SQLUserDefinedType(udt = classOf[MatrixUDT])
class SparseMatrix(
val numRows: Int,
val numCols: Int,
val colPtrs: Array[Int],
val rowIndices: Array[Int],
val values: Array[Double],
override val isTransposed: Boolean) extends Matrix {
class SparseMatrix @Since("1.3.0") (
@Since("1.2.0") val numRows: Int,
@Since("1.2.0") val numCols: Int,
@Since("1.2.0") val colPtrs: Array[Int],
@Since("1.2.0") val rowIndices: Array[Int],
@Since("1.2.0") val values: Array[Double],
@Since("1.3.0") override val isTransposed: Boolean) extends Matrix {

require(values.length == rowIndices.length, "The number of row indices and values don't match! " +
s"values.length: ${values.length}, rowIndices.length: ${rowIndices.length}")
Expand Down Expand Up @@ -527,7 +541,7 @@ class SparseMatrix(
* order for each column
* @param values non-zero matrix entries in column major
*/
@Since("1.3.0")
@Since("1.2.0")
def this(
numRows: Int,
numCols: Int,
Expand All @@ -549,8 +563,6 @@ class SparseMatrix(
}
}

/**
*/
@Since("1.3.0")
override def apply(i: Int, j: Int): Double = {
val ind = index(i, j)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ case class SingularValueDecomposition[UType, VType](U: UType, s: Vector, V: VTyp
* :: Experimental ::
* Represents QR factors.
*/
@Since("1.5.0")
@Experimental
case class QRDecomposition[QType, RType](Q: QType, R: RType)

25 changes: 20 additions & 5 deletions mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala
Original file line number Diff line number Diff line change
Expand Up @@ -38,16 +38,19 @@ import org.apache.spark.sql.types._
* Note: Users should not implement this interface.
*/
@SQLUserDefinedType(udt = classOf[VectorUDT])
@Since("1.0.0")
sealed trait Vector extends Serializable {

/**
* Size of the vector.
*/
@Since("1.0.0")
def size: Int

/**
* Converts the instance to a double array.
*/
@Since("1.0.0")
def toArray: Array[Double]

override def equals(other: Any): Boolean = {
Expand Down Expand Up @@ -99,11 +102,13 @@ sealed trait Vector extends Serializable {
* Gets the value of the ith element.
* @param i index
*/
@Since("1.1.0")
def apply(i: Int): Double = toBreeze(i)

/**
* Makes a deep copy of this vector.
*/
@Since("1.1.0")
def copy: Vector = {
throw new NotImplementedError(s"copy is not implemented for ${this.getClass}.")
}
Expand All @@ -121,26 +126,31 @@ sealed trait Vector extends Serializable {
* Number of active entries. An "active entry" is an element which is explicitly stored,
* regardless of its value. Note that inactive entries have value 0.
*/
@Since("1.4.0")
def numActives: Int

/**
* Number of nonzero elements. This scans all active values and count nonzeros.
*/
@Since("1.4.0")
def numNonzeros: Int

/**
* Converts this vector to a sparse vector with all explicit zeros removed.
*/
@Since("1.4.0")
def toSparse: SparseVector

/**
* Converts this vector to a dense vector.
*/
@Since("1.4.0")
def toDense: DenseVector = new DenseVector(this.toArray)

/**
* Returns a vector in either dense or sparse format, whichever uses less storage.
*/
@Since("1.4.0")
def compressed: Vector = {
val nnz = numNonzeros
// A dense vector needs 8 * size + 8 bytes, while a sparse vector needs 12 * nnz + 20 bytes.
Expand All @@ -155,6 +165,7 @@ sealed trait Vector extends Serializable {
* Find the index of a maximal element. Returns the first maximal element in case of a tie.
* Returns -1 if vector has length 0.
*/
@Since("1.5.0")
def argmax: Int
}

Expand Down Expand Up @@ -532,7 +543,8 @@ object Vectors {
*/
@Since("1.0.0")
@SQLUserDefinedType(udt = classOf[VectorUDT])
class DenseVector(val values: Array[Double]) extends Vector {
class DenseVector @Since("1.0.0") (
@Since("1.0.0") val values: Array[Double]) extends Vector {

@Since("1.0.0")
override def size: Int = values.length
Expand Down Expand Up @@ -632,7 +644,9 @@ class DenseVector(val values: Array[Double]) extends Vector {

@Since("1.3.0")
object DenseVector {

/** Extracts the value array from a dense vector. */
@Since("1.3.0")
def unapply(dv: DenseVector): Option[Array[Double]] = Some(dv.values)
}

Expand All @@ -645,10 +659,10 @@ object DenseVector {
*/
@Since("1.0.0")
@SQLUserDefinedType(udt = classOf[VectorUDT])
class SparseVector(
override val size: Int,
val indices: Array[Int],
val values: Array[Double]) extends Vector {
class SparseVector @Since("1.0.0") (
@Since("1.0.0") override val size: Int,
@Since("1.0.0") val indices: Array[Int],
@Since("1.0.0") val values: Array[Double]) extends Vector {

require(indices.length == values.length, "Sparse vectors require that the dimension of the" +
s" indices match the dimension of the values. You provided ${indices.length} indices and " +
Expand Down Expand Up @@ -819,6 +833,7 @@ class SparseVector(

@Since("1.3.0")
object SparseVector {
@Since("1.3.0")
def unapply(sv: SparseVector): Option[(Int, Array[Int], Array[Double])] =
Some((sv.size, sv.indices, sv.values))
}
Original file line number Diff line number Diff line change
Expand Up @@ -131,10 +131,10 @@ private[mllib] object GridPartitioner {
*/
@Since("1.3.0")
@Experimental
class BlockMatrix(
val blocks: RDD[((Int, Int), Matrix)],
val rowsPerBlock: Int,
val colsPerBlock: Int,
class BlockMatrix @Since("1.3.0") (
@Since("1.3.0") val blocks: RDD[((Int, Int), Matrix)],
@Since("1.3.0") val rowsPerBlock: Int,
@Since("1.3.0") val colsPerBlock: Int,
private var nRows: Long,
private var nCols: Long) extends DistributedMatrix with Logging {

Expand Down Expand Up @@ -171,7 +171,9 @@ class BlockMatrix(
nCols
}

@Since("1.3.0")
val numRowBlocks = math.ceil(numRows() * 1.0 / rowsPerBlock).toInt
@Since("1.3.0")
val numColBlocks = math.ceil(numCols() * 1.0 / colsPerBlock).toInt

private[mllib] def createPartitioner(): GridPartitioner =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,8 @@ case class MatrixEntry(i: Long, j: Long, value: Double)
*/
@Since("1.0.0")
@Experimental
class CoordinateMatrix(
val entries: RDD[MatrixEntry],
class CoordinateMatrix @Since("1.0.0") (
@Since("1.0.0") val entries: RDD[MatrixEntry],
private var nRows: Long,
private var nCols: Long) extends DistributedMatrix {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,11 @@ import org.apache.spark.annotation.Since
trait DistributedMatrix extends Serializable {

/** Gets or computes the number of rows. */
@Since("1.0.0")
def numRows(): Long

/** Gets or computes the number of columns. */
@Since("1.0.0")
def numCols(): Long

/** Collects data and assembles a local dense breeze matrix (for test only). */
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,8 @@ case class IndexedRow(index: Long, vector: Vector)
*/
@Since("1.0.0")
@Experimental
class IndexedRowMatrix(
val rows: RDD[IndexedRow],
class IndexedRowMatrix @Since("1.0.0") (
@Since("1.0.0") val rows: RDD[IndexedRow],
private var nRows: Long,
private var nCols: Int) extends DistributedMatrix {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,8 @@ import org.apache.spark.storage.StorageLevel
*/
@Since("1.0.0")
@Experimental
class RowMatrix(
val rows: RDD[Vector],
class RowMatrix @Since("1.0.0") (
@Since("1.0.0") val rows: RDD[Vector],
private var nRows: Long,
private var nCols: Int) extends DistributedMatrix with Logging {

Expand Down Expand Up @@ -519,6 +519,7 @@ class RowMatrix(
* @param computeQ whether to computeQ
* @return QRDecomposition(Q, R), Q = null if computeQ = false.
*/
@Since("1.5.0")
def tallSkinnyQR(computeQ: Boolean = false): QRDecomposition[RowMatrix, Matrix] = {
val col = numCols().toInt
// split rows horizontally into smaller matrices, and compute QR for each of them
Expand Down

0 comments on commit b38437e

Please sign in to comment.