Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SPARK-10238] [MLLIB] update since versions in mllib.linalg #8440

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 28 additions & 16 deletions mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala
Original file line number Diff line number Diff line change
Expand Up @@ -32,18 +32,23 @@ import org.apache.spark.sql.types._
* Trait for a local matrix.
*/
@SQLUserDefinedType(udt = classOf[MatrixUDT])
@Since("1.0.0")
sealed trait Matrix extends Serializable {

/** Number of rows. */
@Since("1.0.0")
def numRows: Int

/** Number of columns. */
@Since("1.0.0")
def numCols: Int

/** Flag that keeps track whether the matrix is transposed or not. False by default. */
@Since("1.3.0")
val isTransposed: Boolean = false

/** Converts to a dense array in column major. */
@Since("1.0.0")
def toArray: Array[Double] = {
val newArray = new Array[Double](numRows * numCols)
foreachActive { (i, j, v) =>
Expand All @@ -56,6 +61,7 @@ sealed trait Matrix extends Serializable {
private[mllib] def toBreeze: BM[Double]

/** Gets the (i, j)-th element. */
@Since("1.3.0")
def apply(i: Int, j: Int): Double

/** Return the index for the (i, j)-th element in the backing array. */
Expand All @@ -65,24 +71,29 @@ sealed trait Matrix extends Serializable {
private[mllib] def update(i: Int, j: Int, v: Double): Unit

/** Get a deep copy of the matrix. */
@Since("1.2.0")
def copy: Matrix

/** Transpose the Matrix. Returns a new `Matrix` instance sharing the same underlying data. */
@Since("1.3.0")
def transpose: Matrix

/** Convenience method for `Matrix`-`DenseMatrix` multiplication. */
@Since("1.2.0")
def multiply(y: DenseMatrix): DenseMatrix = {
val C: DenseMatrix = DenseMatrix.zeros(numRows, y.numCols)
BLAS.gemm(1.0, this, y, 0.0, C)
C
}

/** Convenience method for `Matrix`-`DenseVector` multiplication. For binary compatibility. */
@Since("1.2.0")
def multiply(y: DenseVector): DenseVector = {
multiply(y.asInstanceOf[Vector])
}

/** Convenience method for `Matrix`-`Vector` multiplication. */
@Since("1.4.0")
def multiply(y: Vector): DenseVector = {
val output = new DenseVector(new Array[Double](numRows))
BLAS.gemv(1.0, this, y, 0.0, output)
Expand All @@ -93,6 +104,7 @@ sealed trait Matrix extends Serializable {
override def toString: String = toBreeze.toString()

/** A human readable representation of the matrix with maximum lines and width */
@Since("1.4.0")
def toString(maxLines: Int, maxLineWidth: Int): String = toBreeze.toString(maxLines, maxLineWidth)

/** Map the values of this matrix using a function. Generates a new matrix. Performs the
Expand All @@ -118,11 +130,13 @@ sealed trait Matrix extends Serializable {
/**
* Find the number of non-zero active values.
*/
@Since("1.5.0")
def numNonzeros: Int

/**
* Find the number of values stored explicitly. These values can be zero as well.
*/
@Since("1.5.0")
def numActives: Int
}

Expand Down Expand Up @@ -230,11 +244,11 @@ private[spark] class MatrixUDT extends UserDefinedType[Matrix] {
*/
@Since("1.0.0")
@SQLUserDefinedType(udt = classOf[MatrixUDT])
class DenseMatrix(
val numRows: Int,
val numCols: Int,
val values: Array[Double],
override val isTransposed: Boolean) extends Matrix {
class DenseMatrix @Since("1.3.0") (
@Since("1.0.0") val numRows: Int,
@Since("1.0.0") val numCols: Int,
@Since("1.0.0") val values: Array[Double],
@Since("1.3.0") override val isTransposed: Boolean) extends Matrix {

require(values.length == numRows * numCols, "The number of values supplied doesn't match the " +
s"size of the matrix! values.length: ${values.length}, numRows * numCols: ${numRows * numCols}")
Expand All @@ -254,7 +268,7 @@ class DenseMatrix(
* @param numCols number of columns
* @param values matrix entries in column major
*/
@Since("1.3.0")
@Since("1.0.0")
def this(numRows: Int, numCols: Int, values: Array[Double]) =
this(numRows, numCols, values, false)

Expand Down Expand Up @@ -491,13 +505,13 @@ object DenseMatrix {
*/
@Since("1.2.0")
@SQLUserDefinedType(udt = classOf[MatrixUDT])
class SparseMatrix(
val numRows: Int,
val numCols: Int,
val colPtrs: Array[Int],
val rowIndices: Array[Int],
val values: Array[Double],
override val isTransposed: Boolean) extends Matrix {
class SparseMatrix @Since("1.3.0") (
@Since("1.2.0") val numRows: Int,
@Since("1.2.0") val numCols: Int,
@Since("1.2.0") val colPtrs: Array[Int],
@Since("1.2.0") val rowIndices: Array[Int],
@Since("1.2.0") val values: Array[Double],
@Since("1.3.0") override val isTransposed: Boolean) extends Matrix {

require(values.length == rowIndices.length, "The number of row indices and values don't match! " +
s"values.length: ${values.length}, rowIndices.length: ${rowIndices.length}")
Expand Down Expand Up @@ -527,7 +541,7 @@ class SparseMatrix(
* order for each column
* @param values non-zero matrix entries in column major
*/
@Since("1.3.0")
@Since("1.2.0")
def this(
numRows: Int,
numCols: Int,
Expand All @@ -549,8 +563,6 @@ class SparseMatrix(
}
}

/**
*/
@Since("1.3.0")
override def apply(i: Int, j: Int): Double = {
val ind = index(i, j)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ case class SingularValueDecomposition[UType, VType](U: UType, s: Vector, V: VTyp
* :: Experimental ::
* Represents QR factors.
*/
@Since("1.5.0")
@Experimental
case class QRDecomposition[QType, RType](Q: QType, R: RType)

25 changes: 20 additions & 5 deletions mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala
Original file line number Diff line number Diff line change
Expand Up @@ -38,16 +38,19 @@ import org.apache.spark.sql.types._
* Note: Users should not implement this interface.
*/
@SQLUserDefinedType(udt = classOf[VectorUDT])
@Since("1.0.0")
sealed trait Vector extends Serializable {

/**
* Size of the vector.
*/
@Since("1.0.0")
def size: Int

/**
* Converts the instance to a double array.
*/
@Since("1.0.0")
def toArray: Array[Double]

override def equals(other: Any): Boolean = {
Expand Down Expand Up @@ -99,11 +102,13 @@ sealed trait Vector extends Serializable {
* Gets the value of the ith element.
* @param i index
*/
@Since("1.1.0")
def apply(i: Int): Double = toBreeze(i)

/**
* Makes a deep copy of this vector.
*/
@Since("1.1.0")
def copy: Vector = {
throw new NotImplementedError(s"copy is not implemented for ${this.getClass}.")
}
Expand All @@ -121,26 +126,31 @@ sealed trait Vector extends Serializable {
* Number of active entries. An "active entry" is an element which is explicitly stored,
* regardless of its value. Note that inactive entries have value 0.
*/
@Since("1.4.0")
def numActives: Int

/**
* Number of nonzero elements. This scans all active values and count nonzeros.
*/
@Since("1.4.0")
def numNonzeros: Int

/**
* Converts this vector to a sparse vector with all explicit zeros removed.
*/
@Since("1.4.0")
def toSparse: SparseVector

/**
* Converts this vector to a dense vector.
*/
@Since("1.4.0")
def toDense: DenseVector = new DenseVector(this.toArray)

/**
* Returns a vector in either dense or sparse format, whichever uses less storage.
*/
@Since("1.4.0")
def compressed: Vector = {
val nnz = numNonzeros
// A dense vector needs 8 * size + 8 bytes, while a sparse vector needs 12 * nnz + 20 bytes.
Expand All @@ -155,6 +165,7 @@ sealed trait Vector extends Serializable {
* Find the index of a maximal element. Returns the first maximal element in case of a tie.
* Returns -1 if vector has length 0.
*/
@Since("1.5.0")
def argmax: Int
}

Expand Down Expand Up @@ -532,7 +543,8 @@ object Vectors {
*/
@Since("1.0.0")
@SQLUserDefinedType(udt = classOf[VectorUDT])
class DenseVector(val values: Array[Double]) extends Vector {
class DenseVector @Since("1.0.0") (
@Since("1.0.0") val values: Array[Double]) extends Vector {

@Since("1.0.0")
override def size: Int = values.length
Expand Down Expand Up @@ -632,7 +644,9 @@ class DenseVector(val values: Array[Double]) extends Vector {

@Since("1.3.0")
object DenseVector {

/** Extracts the value array from a dense vector. */
@Since("1.3.0")
def unapply(dv: DenseVector): Option[Array[Double]] = Some(dv.values)
}

Expand All @@ -645,10 +659,10 @@ object DenseVector {
*/
@Since("1.0.0")
@SQLUserDefinedType(udt = classOf[VectorUDT])
class SparseVector(
override val size: Int,
val indices: Array[Int],
val values: Array[Double]) extends Vector {
class SparseVector @Since("1.0.0") (
@Since("1.0.0") override val size: Int,
@Since("1.0.0") val indices: Array[Int],
@Since("1.0.0") val values: Array[Double]) extends Vector {

require(indices.length == values.length, "Sparse vectors require that the dimension of the" +
s" indices match the dimension of the values. You provided ${indices.length} indices and " +
Expand Down Expand Up @@ -819,6 +833,7 @@ class SparseVector(

@Since("1.3.0")
object SparseVector {
@Since("1.3.0")
def unapply(sv: SparseVector): Option[(Int, Array[Int], Array[Double])] =
Some((sv.size, sv.indices, sv.values))
}
Original file line number Diff line number Diff line change
Expand Up @@ -131,10 +131,10 @@ private[mllib] object GridPartitioner {
*/
@Since("1.3.0")
@Experimental
class BlockMatrix(
val blocks: RDD[((Int, Int), Matrix)],
val rowsPerBlock: Int,
val colsPerBlock: Int,
class BlockMatrix @Since("1.3.0") (
@Since("1.3.0") val blocks: RDD[((Int, Int), Matrix)],
@Since("1.3.0") val rowsPerBlock: Int,
@Since("1.3.0") val colsPerBlock: Int,
private var nRows: Long,
private var nCols: Long) extends DistributedMatrix with Logging {

Expand Down Expand Up @@ -171,7 +171,9 @@ class BlockMatrix(
nCols
}

@Since("1.3.0")
val numRowBlocks = math.ceil(numRows() * 1.0 / rowsPerBlock).toInt
@Since("1.3.0")
val numColBlocks = math.ceil(numCols() * 1.0 / colsPerBlock).toInt

private[mllib] def createPartitioner(): GridPartitioner =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,8 @@ case class MatrixEntry(i: Long, j: Long, value: Double)
*/
@Since("1.0.0")
@Experimental
class CoordinateMatrix(
val entries: RDD[MatrixEntry],
class CoordinateMatrix @Since("1.0.0") (
@Since("1.0.0") val entries: RDD[MatrixEntry],
private var nRows: Long,
private var nCols: Long) extends DistributedMatrix {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,11 @@ import org.apache.spark.annotation.Since
trait DistributedMatrix extends Serializable {

/** Gets or computes the number of rows. */
@Since("1.0.0")
def numRows(): Long

/** Gets or computes the number of columns. */
@Since("1.0.0")
def numCols(): Long

/** Collects data and assembles a local dense breeze matrix (for test only). */
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,8 @@ case class IndexedRow(index: Long, vector: Vector)
*/
@Since("1.0.0")
@Experimental
class IndexedRowMatrix(
val rows: RDD[IndexedRow],
class IndexedRowMatrix @Since("1.0.0") (
@Since("1.0.0") val rows: RDD[IndexedRow],
private var nRows: Long,
private var nCols: Int) extends DistributedMatrix {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,8 @@ import org.apache.spark.storage.StorageLevel
*/
@Since("1.0.0")
@Experimental
class RowMatrix(
val rows: RDD[Vector],
class RowMatrix @Since("1.0.0") (
@Since("1.0.0") val rows: RDD[Vector],
private var nRows: Long,
private var nCols: Int) extends DistributedMatrix with Logging {

Expand Down Expand Up @@ -519,6 +519,7 @@ class RowMatrix(
* @param computeQ whether to computeQ
* @return QRDecomposition(Q, R), Q = null if computeQ = false.
*/
@Since("1.5.0")
def tallSkinnyQR(computeQ: Boolean = false): QRDecomposition[RowMatrix, Matrix] = {
val col = numCols().toInt
// split rows horizontally into smaller matrices, and compute QR for each of them
Expand Down