From 627a0e9776a3c39e985b30b508521e4869309767 Mon Sep 17 00:00:00 2001 From: daniel-pape Date: Tue, 18 Aug 2015 20:29:06 +0200 Subject: [PATCH 01/16] Work in progress: Test cases and implementation for outer product of vectors. --- .../apache/flink/ml/math/DenseVector.scala | 34 ++++++++++ .../apache/flink/ml/math/SparseVector.scala | 35 +++++++++++ .../org/apache/flink/ml/math/Vector.scala | 8 +++ .../flink/ml/math/DenseVectorSuite.scala | 63 +++++++++++++++++++ 4 files changed, 140 insertions(+) diff --git a/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/DenseVector.scala b/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/DenseVector.scala index f24249629145c..a8741256a44cb 100644 --- a/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/DenseVector.scala +++ b/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/DenseVector.scala @@ -102,6 +102,40 @@ case class DenseVector( } } + /** Returns the outer product of the recipient and the argument + * + * + * @param other + * @return + * + * TODO: Dense x Dense should yield Dense, + * Sparse x Sparse should yield Sparse, + * Dense x Sparse (and the other way around) should yield Sparse + */ + override def outer(other: Vector): Matrix = { + val numRows = size + val numCols = other.size + + other match { + case SparseVector(size, indices, data_) => + val entries: Array[(Int, Int, Double)] = for { + i <- (0 until numRows).toArray + j <- indices + value = this(i) * other(j) + if value != 0 + } yield (i, j, value) + + SparseMatrix.fromCOO(numRows, numCols, entries) + case _ => + val values = for { + i <- (0 until numRows) + j <- (0 until numCols) + } yield this(i) * other(j) + + DenseMatrix(numRows, numCols, values.toArray) + } + } + /** Magnitude of a vector * * @return diff --git a/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/SparseVector.scala b/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/SparseVector.scala index 8ad03698e2e7e..0d49378e1df57 100644 --- a/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/SparseVector.scala +++ b/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/SparseVector.scala @@ -85,6 +85,41 @@ case class SparseVector( } } + /** Returns the outer product of the recipient and the argument + * + * + * @param other + * @return + * + * TODO: Dense x Dense should yield Dense, + * Sparse x Sparse should yield Sparse, + * Dense x Sparse (and the other way around) should yield Sparse + */ + override def outer(other: Vector): Matrix = { + val numRows = size + val numCols = other.size + + other match { + case SparseVector(size, indices, data_) => + val entries: Array[(Int, Int, Double)] = for { + i <- (0 until numRows).toArray + j <- indices + value = this(i) * other(j) + if value != 0 + } yield (i, j, value) + + SparseMatrix.fromCOO(numRows, numCols, entries) + case _ => + val values = for { + i <- (0 until numRows) + j <- (0 until numCols) + } yield this(i) * other(j) + + DenseMatrix(numRows, numCols, values.toArray) + } + } + + /** Magnitude of a vector * * @return diff --git a/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/Vector.scala b/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/Vector.scala index c3a9a3951dfde..e52328d37240c 100644 --- a/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/Vector.scala +++ b/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/Vector.scala @@ -58,6 +58,14 @@ trait Vector extends Serializable { */ def dot(other: Vector): Double + /** Returns the outer product of the recipient and the argument + * + * + * @param other a Vector + * @return a matrix + */ + def outer(other: Vector): Matrix + /** Magnitude of a vector * * @return diff --git a/flink-staging/flink-ml/src/test/scala/org/apache/flink/ml/math/DenseVectorSuite.scala b/flink-staging/flink-ml/src/test/scala/org/apache/flink/ml/math/DenseVectorSuite.scala index c7a3dc0867dbd..941ee31a78ddd 100644 --- a/flink-staging/flink-ml/src/test/scala/org/apache/flink/ml/math/DenseVectorSuite.scala +++ b/flink-staging/flink-ml/src/test/scala/org/apache/flink/ml/math/DenseVectorSuite.scala @@ -78,6 +78,69 @@ class DenseVectorSuite extends FlatSpec with Matchers { } } + //==================================================================================================================== + it should "calculate outer product with DenseVector" in { + val vec1 = DenseVector(Array(1, 0, 1)) + val vec2 = DenseVector(Array(0, 1, 0)) + + vec1.outer(vec2) should be(DenseMatrix(3, 3, Array(0, 1, 0, 0, 0, 0, 0, 1, 0))) + } + + it should "calculate outer product with SparseVector" in { + val vec1 = DenseVector(Array(1, 0, 1)) + val vec2 = SparseVector.fromCOO(3, (0, 1), (1, 1)) + + vec1.outer(vec2) should be(SparseMatrix.fromCOO(3, 3, (0, 0, 1.0), (0, 1, 1.0), (2, 0, 1.0), (2, 1, 1.0))) + } + + it should "calculate outer product with SparseVector 2" in { + val vec1 = DenseVector(Array(1, 0, 1, 0, 0)) + val vec2 = SparseVector.fromCOO(5, (2, 1), (4, 1)) + + vec1.outer(vec2) should be(SparseMatrix.fromCOO(5, 5, (0, 2, 1.0), (0, 4, 1.0), (2, 2, 1.0), (2, 4, 1.0))) + } + + it should "calculate right outer product with one-dimensional unit vector as identity" in { + val vec = DenseVector(Array(1, 0, 1, 0, 0)) + val unit = DenseVector(1) + + vec.outer(unit) should equal(DenseMatrix(vec.size, 1, vec.data)) + } + + it should "calculate left outer product with one-dimensional unit vector as identity" in { + val vec = DenseVector(Array(1, 2, 3, 4, 5)) + val unit = DenseVector(1) + + unit.outer(vec) should equal(DenseMatrix(1, vec.size, vec.data)) + } + + it should "calculate outer product of one-dimensional dense vectors as multiplication" in { + val vec1 = DenseVector(Array(2)) + val vec2 = DenseVector(Array(3)) + + vec1.outer(vec2) should be(DenseMatrix(1, 1, 2 * 3)) + } + + it should "calculate outer product of one-dimensional sparse vectors as multiplication" in { + val vec1 = SparseVector(1, Array(0), Array(2)) + val vec2 = DenseVector(Array(3)) + + // TODO: Next line fails due to dummy impl of outer for sparse vectors: + vec1.outer(vec2) should be(SparseMatrix.fromCOO(1, 1, (0, 0, 2 * 3))) + vec2.outer(vec1) should be(SparseMatrix.fromCOO(1, 1, (0, 0, 2 * 3))) + } + + +// it should "fail in case of calculation dot product with different size vector" in { +// val vec1 = DenseVector(Array(1, 0)) +// val vec2 = DenseVector(Array(0)) +// +// intercept[IllegalArgumentException] { +// vec1.dot(vec2) +// } +// } + //==================================================================================================================== + it should "calculate magnitude of vector" in { val vec = DenseVector(Array(1, 4, 8)) From 277771aee8d0e3aeea1b027bb70c71c5ea1aa66b Mon Sep 17 00:00:00 2001 From: daniel-pape Date: Fri, 21 Aug 2015 14:50:26 +0200 Subject: [PATCH 02/16] Implementation of outer product for sparse vectors. --- .../apache/flink/ml/math/SparseVector.scala | 29 ++++++++----------- 1 file changed, 12 insertions(+), 17 deletions(-) diff --git a/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/SparseVector.scala b/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/SparseVector.scala index 0d49378e1df57..0ba0158616c2a 100644 --- a/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/SparseVector.scala +++ b/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/SparseVector.scala @@ -99,24 +99,19 @@ case class SparseVector( val numRows = size val numCols = other.size - other match { - case SparseVector(size, indices, data_) => - val entries: Array[(Int, Int, Double)] = for { - i <- (0 until numRows).toArray - j <- indices - value = this(i) * other(j) - if value != 0 - } yield (i, j, value) - - SparseMatrix.fromCOO(numRows, numCols, entries) - case _ => - val values = for { - i <- (0 until numRows) - j <- (0 until numCols) - } yield this(i) * other(j) - - DenseMatrix(numRows, numCols, values.toArray) + val otherIndices = other match { + case sv @ SparseVector(_, _, _) => sv.indices + case dv @ DenseVector(_) => (0 until dv.size).toArray } + + val entries = for { + i <- indices + j <- otherIndices + value = this(i) * other(j) + if value != 0 + } yield (i, j, value) + + SparseMatrix.fromCOO(numRows, numCols, entries) } From 0e9a608feb305ef254d896e9f39f58f98e236dba Mon Sep 17 00:00:00 2001 From: daniel-pape Date: Fri, 21 Aug 2015 14:51:40 +0200 Subject: [PATCH 03/16] Test cases for outer product computation. For dense as well as sparse vectors, More tests are to come. --- .../flink/ml/math/DenseVectorSuite.scala | 49 +++++++++++++------ .../flink/ml/math/SparseVectorSuite.scala | 34 +++++++++++++ 2 files changed, 68 insertions(+), 15 deletions(-) diff --git a/flink-staging/flink-ml/src/test/scala/org/apache/flink/ml/math/DenseVectorSuite.scala b/flink-staging/flink-ml/src/test/scala/org/apache/flink/ml/math/DenseVectorSuite.scala index 941ee31a78ddd..0c92ad3eaa3fb 100644 --- a/flink-staging/flink-ml/src/test/scala/org/apache/flink/ml/math/DenseVectorSuite.scala +++ b/flink-staging/flink-ml/src/test/scala/org/apache/flink/ml/math/DenseVectorSuite.scala @@ -79,58 +79,77 @@ class DenseVectorSuite extends FlatSpec with Matchers { } //==================================================================================================================== - it should "calculate outer product with DenseVector" in { + it should "calculate outer product with DenseVectors correctly as DenseMatrix" in { val vec1 = DenseVector(Array(1, 0, 1)) val vec2 = DenseVector(Array(0, 1, 0)) + vec1.outer(vec2) should be(an[DenseMatrix]) vec1.outer(vec2) should be(DenseMatrix(3, 3, Array(0, 1, 0, 0, 0, 0, 0, 1, 0))) } - it should "calculate outer product with SparseVector" in { + it should "calculate outer product with SparseVector correctly as SparseMatrix" in { val vec1 = DenseVector(Array(1, 0, 1)) - val vec2 = SparseVector.fromCOO(3, (0, 1), (1, 1)) + val vec2 = SparseVector(3, Array(1), Array(1)) - vec1.outer(vec2) should be(SparseMatrix.fromCOO(3, 3, (0, 0, 1.0), (0, 1, 1.0), (2, 0, 1.0), (2, 1, 1.0))) + vec1.outer(vec2) should be(an[SparseMatrix]) + vec1.outer(vec2) should be(SparseMatrix.fromCOO(3, 3, (0, 1, 1), (2, 1, 1))) } - it should "calculate outer product with SparseVector 2" in { + //==================================================================================================================== + + it should "calculate outer product of a DenseVector with a SparseVector" in { val vec1 = DenseVector(Array(1, 0, 1, 0, 0)) val vec2 = SparseVector.fromCOO(5, (2, 1), (4, 1)) - vec1.outer(vec2) should be(SparseMatrix.fromCOO(5, 5, (0, 2, 1.0), (0, 4, 1.0), (2, 2, 1.0), (2, 4, 1.0))) + vec1.outer(vec2) should be(SparseMatrix.fromCOO(5, 5, (0, 2, 1), (0, 4, 1), (2, 2, 1), (2, 4, 1))) } - it should "calculate right outer product with one-dimensional unit vector as identity" in { + //==================================================================================================================== + + it should "calculate right outer product with DenseVector with one-dimensional unit DenseVector as identity" in { val vec = DenseVector(Array(1, 0, 1, 0, 0)) val unit = DenseVector(1) vec.outer(unit) should equal(DenseMatrix(vec.size, 1, vec.data)) } - it should "calculate left outer product with one-dimensional unit vector as identity" in { + it should "calculate right outer product with DenseVector with one-dimensional unit SparseVector as identity" in { + val vec = DenseVector(Array(1, 0, 1, 0, 0)) + val unit = SparseVector(1, Array(0), Array(1)) + + vec.outer(unit) should equal(SparseMatrix.fromCOO(vec.size, 1, (0, 0, 1), (2, 0, 1))) + } + + it should "calculate left outer product for DenseVector with one-dimensional unit DenseVector as identity" in { val vec = DenseVector(Array(1, 2, 3, 4, 5)) val unit = DenseVector(1) unit.outer(vec) should equal(DenseMatrix(1, vec.size, vec.data)) } - it should "calculate outer product of one-dimensional dense vectors as multiplication" in { + it should "calculate left outer product for SparseVector with one-dimensional unit DenseVector as identity" in { + val vec = SparseVector(5, Array(0, 1, 2, 3, 4), Array(1, 2, 3, 4, 5)) + val unit = DenseVector(1) + + unit.outer(vec) should equal(SparseMatrix.fromCOO(1, vec.size, (0, 0, 1), (0, 1, 2), (0, 2, 3), (0, 3, 4), (0, 4, 5))) + } + + //==================================================================================================================== + + it should "calculate outer product with DenseVector via multiplication if both vectors are one-dimensional" in { val vec1 = DenseVector(Array(2)) val vec2 = DenseVector(Array(3)) vec1.outer(vec2) should be(DenseMatrix(1, 1, 2 * 3)) } - it should "calculate outer product of one-dimensional sparse vectors as multiplication" in { - val vec1 = SparseVector(1, Array(0), Array(2)) - val vec2 = DenseVector(Array(3)) + it should "calculate outer product with SparseVector via multiplication if both vectors are one-dimensioan" in { + val vec1 = DenseVector(Array(2)) + val vec2 = SparseVector(1, Array(0), Array(3)) - // TODO: Next line fails due to dummy impl of outer for sparse vectors: vec1.outer(vec2) should be(SparseMatrix.fromCOO(1, 1, (0, 0, 2 * 3))) - vec2.outer(vec1) should be(SparseMatrix.fromCOO(1, 1, (0, 0, 2 * 3))) } - // it should "fail in case of calculation dot product with different size vector" in { // val vec1 = DenseVector(Array(1, 0)) // val vec2 = DenseVector(Array(0)) diff --git a/flink-staging/flink-ml/src/test/scala/org/apache/flink/ml/math/SparseVectorSuite.scala b/flink-staging/flink-ml/src/test/scala/org/apache/flink/ml/math/SparseVectorSuite.scala index 5ed0b379eab07..1f60f12d102b6 100644 --- a/flink-staging/flink-ml/src/test/scala/org/apache/flink/ml/math/SparseVectorSuite.scala +++ b/flink-staging/flink-ml/src/test/scala/org/apache/flink/ml/math/SparseVectorSuite.scala @@ -119,6 +119,40 @@ class SparseVectorSuite extends FlatSpec with Matchers { vec1.dot(vec2) should be(0) } + //==================================================================================================================== + + it should "calculate outer product with SparseVector correctly as SparseMatrix" in { + val vec1 = SparseVector(3, Array(0, 2), Array(1, 1)) + val vec2 = SparseVector(3, Array(1), Array(1)) + + vec1.outer(vec2) should be(an[SparseMatrix]) + vec1.outer(vec2) should be(SparseMatrix.fromCOO(3, 3, (0, 1, 1), (2, 1, 1))) + } + + it should "calculate outer product with DenseVector correctly as SparseMatrix" in { + val vec1 = SparseVector(3, Array(0, 2), Array(1, 1)) + val vec2 = DenseVector(Array(0, 1, 0)) + + vec1.outer(vec2) should be(an[SparseMatrix]) + vec1.outer(vec2) should be(SparseMatrix.fromCOO(3, 3, (0, 1, 1), (2, 1, 1))) + } + + it should "calculate outer product with SparseVector via multiplication if bothe vectors are one-dimensional" in { + val vec1 = SparseVector.fromCOO(1, (0, 2)) + val vec2 = SparseVector.fromCOO(1, (0, 3)) + + vec1.outer(vec2) should be(SparseMatrix.fromCOO(1, 1, (0, 0, 2 * 3))) + } + + it should "calculate outer product with DenseVector via multiplication if both vectors are one-dimensional" in { + val vec1 = SparseVector(1, Array(0), Array(2)) + val vec2 = DenseVector(Array(3)) + + vec1.outer(vec2) should be(SparseMatrix.fromCOO(1, 1, (0, 0, 2 * 3))) + } + + //==================================================================================================================== + it should "fail in case of calculation dot product with different size vector" in { val vec1 = SparseVector.fromCOO(4, (0, 1), (2, 1)) val vec2 = DenseVector(Array(0, 1, 0)) From d0eb80102ae4856236fce0b98c4e396183d86f3f Mon Sep 17 00:00:00 2001 From: daniel-pape Date: Fri, 21 Aug 2015 21:38:05 +0200 Subject: [PATCH 04/16] Added test case. --- .../org/apache/flink/ml/math/DenseVectorSuite.scala | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/flink-staging/flink-ml/src/test/scala/org/apache/flink/ml/math/DenseVectorSuite.scala b/flink-staging/flink-ml/src/test/scala/org/apache/flink/ml/math/DenseVectorSuite.scala index 0c92ad3eaa3fb..4cc2c927c0568 100644 --- a/flink-staging/flink-ml/src/test/scala/org/apache/flink/ml/math/DenseVectorSuite.scala +++ b/flink-staging/flink-ml/src/test/scala/org/apache/flink/ml/math/DenseVectorSuite.scala @@ -79,7 +79,7 @@ class DenseVectorSuite extends FlatSpec with Matchers { } //==================================================================================================================== - it should "calculate outer product with DenseVectors correctly as DenseMatrix" in { + it should "calculate outer product with DenseVector correctly as DenseMatrix" in { val vec1 = DenseVector(Array(1, 0, 1)) val vec2 = DenseVector(Array(0, 1, 0)) @@ -97,7 +97,14 @@ class DenseVectorSuite extends FlatSpec with Matchers { //==================================================================================================================== - it should "calculate outer product of a DenseVector with a SparseVector" in { + it should "calculate outer product with a DenseVector correctly as DenseMatrix 2" in { + val vec1 = DenseVector(Array(1, 0, 1, 0, 0)) + val vec2 = DenseVector(Array(0, 0, 1, 0, 1)) + + vec1.outer(vec2) should be(DenseMatrix(5, 5, Array(0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0))) + } + + it should "calculate outer product with a SparseVector correctly as SparseMatrix 2" in { val vec1 = DenseVector(Array(1, 0, 1, 0, 0)) val vec2 = SparseVector.fromCOO(5, (2, 1), (4, 1)) From 97dd4f050e7d3abf7c419d904913979406abac05 Mon Sep 17 00:00:00 2001 From: Daniel Pape Date: Sun, 30 Aug 2015 22:11:53 +0200 Subject: [PATCH 05/16] Added method documentation for outer product methods. --- .../org/apache/flink/ml/math/DenseVector.scala | 14 ++++++-------- .../org/apache/flink/ml/math/SparseVector.scala | 16 +++++++--------- 2 files changed, 13 insertions(+), 17 deletions(-) diff --git a/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/DenseVector.scala b/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/DenseVector.scala index a8741256a44cb..a95117083d5d8 100644 --- a/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/DenseVector.scala +++ b/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/DenseVector.scala @@ -102,15 +102,13 @@ case class DenseVector( } } - /** Returns the outer product of the recipient and the argument + /** Returns the outer product (a.k.a. Kronecker product) of `this` + * with `other`. The result will given in [[org.apache.flink.ml.math.SparseMatrix]] + * representation if `other` is sparse and as [[org.apache.flink.ml.math.DenseMatrix]] otherwise. * - * - * @param other - * @return - * - * TODO: Dense x Dense should yield Dense, - * Sparse x Sparse should yield Sparse, - * Dense x Sparse (and the other way around) should yield Sparse + * @param other a Vector + * @return the [[org.apache.flink.ml.math.Matrix]] which equals the outer product of `this` + * with `other.` */ override def outer(other: Vector): Matrix = { val numRows = size diff --git a/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/SparseVector.scala b/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/SparseVector.scala index 0ba0158616c2a..964b4dcb832c9 100644 --- a/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/SparseVector.scala +++ b/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/SparseVector.scala @@ -85,17 +85,15 @@ case class SparseVector( } } - /** Returns the outer product of the recipient and the argument + /** Returns the outer product (a.k.a. Kronecker product) of `this` + * with `other`. The result is given in [[org.apache.flink.ml.math.SparseMatrix]] + * representation. * - * - * @param other - * @return - * - * TODO: Dense x Dense should yield Dense, - * Sparse x Sparse should yield Sparse, - * Dense x Sparse (and the other way around) should yield Sparse + * @param other a Vector + * @return the [[org.apache.flink.ml.math.SparseMatrix]] which equals the outer product of `this` + * with `other.` */ - override def outer(other: Vector): Matrix = { + override def outer(other: Vector): SparseMatrix = { val numRows = size val numCols = other.size From 4dde9f86b300cd7c64c7f62feb11984267f45913 Mon Sep 17 00:00:00 2001 From: daniel-pape Date: Tue, 18 Aug 2015 20:29:06 +0200 Subject: [PATCH 06/16] Work in progress: Test cases and implementation for outer product of vectors. --- .../apache/flink/ml/math/DenseVector.scala | 34 ++++++++++ .../apache/flink/ml/math/SparseVector.scala | 35 +++++++++++ .../org/apache/flink/ml/math/Vector.scala | 8 +++ .../flink/ml/math/DenseVectorSuite.scala | 63 +++++++++++++++++++ 4 files changed, 140 insertions(+) diff --git a/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/DenseVector.scala b/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/DenseVector.scala index f24249629145c..a8741256a44cb 100644 --- a/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/DenseVector.scala +++ b/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/DenseVector.scala @@ -102,6 +102,40 @@ case class DenseVector( } } + /** Returns the outer product of the recipient and the argument + * + * + * @param other + * @return + * + * TODO: Dense x Dense should yield Dense, + * Sparse x Sparse should yield Sparse, + * Dense x Sparse (and the other way around) should yield Sparse + */ + override def outer(other: Vector): Matrix = { + val numRows = size + val numCols = other.size + + other match { + case SparseVector(size, indices, data_) => + val entries: Array[(Int, Int, Double)] = for { + i <- (0 until numRows).toArray + j <- indices + value = this(i) * other(j) + if value != 0 + } yield (i, j, value) + + SparseMatrix.fromCOO(numRows, numCols, entries) + case _ => + val values = for { + i <- (0 until numRows) + j <- (0 until numCols) + } yield this(i) * other(j) + + DenseMatrix(numRows, numCols, values.toArray) + } + } + /** Magnitude of a vector * * @return diff --git a/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/SparseVector.scala b/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/SparseVector.scala index 8ad03698e2e7e..0d49378e1df57 100644 --- a/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/SparseVector.scala +++ b/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/SparseVector.scala @@ -85,6 +85,41 @@ case class SparseVector( } } + /** Returns the outer product of the recipient and the argument + * + * + * @param other + * @return + * + * TODO: Dense x Dense should yield Dense, + * Sparse x Sparse should yield Sparse, + * Dense x Sparse (and the other way around) should yield Sparse + */ + override def outer(other: Vector): Matrix = { + val numRows = size + val numCols = other.size + + other match { + case SparseVector(size, indices, data_) => + val entries: Array[(Int, Int, Double)] = for { + i <- (0 until numRows).toArray + j <- indices + value = this(i) * other(j) + if value != 0 + } yield (i, j, value) + + SparseMatrix.fromCOO(numRows, numCols, entries) + case _ => + val values = for { + i <- (0 until numRows) + j <- (0 until numCols) + } yield this(i) * other(j) + + DenseMatrix(numRows, numCols, values.toArray) + } + } + + /** Magnitude of a vector * * @return diff --git a/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/Vector.scala b/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/Vector.scala index c3a9a3951dfde..e52328d37240c 100644 --- a/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/Vector.scala +++ b/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/Vector.scala @@ -58,6 +58,14 @@ trait Vector extends Serializable { */ def dot(other: Vector): Double + /** Returns the outer product of the recipient and the argument + * + * + * @param other a Vector + * @return a matrix + */ + def outer(other: Vector): Matrix + /** Magnitude of a vector * * @return diff --git a/flink-staging/flink-ml/src/test/scala/org/apache/flink/ml/math/DenseVectorSuite.scala b/flink-staging/flink-ml/src/test/scala/org/apache/flink/ml/math/DenseVectorSuite.scala index c7a3dc0867dbd..941ee31a78ddd 100644 --- a/flink-staging/flink-ml/src/test/scala/org/apache/flink/ml/math/DenseVectorSuite.scala +++ b/flink-staging/flink-ml/src/test/scala/org/apache/flink/ml/math/DenseVectorSuite.scala @@ -78,6 +78,69 @@ class DenseVectorSuite extends FlatSpec with Matchers { } } + //==================================================================================================================== + it should "calculate outer product with DenseVector" in { + val vec1 = DenseVector(Array(1, 0, 1)) + val vec2 = DenseVector(Array(0, 1, 0)) + + vec1.outer(vec2) should be(DenseMatrix(3, 3, Array(0, 1, 0, 0, 0, 0, 0, 1, 0))) + } + + it should "calculate outer product with SparseVector" in { + val vec1 = DenseVector(Array(1, 0, 1)) + val vec2 = SparseVector.fromCOO(3, (0, 1), (1, 1)) + + vec1.outer(vec2) should be(SparseMatrix.fromCOO(3, 3, (0, 0, 1.0), (0, 1, 1.0), (2, 0, 1.0), (2, 1, 1.0))) + } + + it should "calculate outer product with SparseVector 2" in { + val vec1 = DenseVector(Array(1, 0, 1, 0, 0)) + val vec2 = SparseVector.fromCOO(5, (2, 1), (4, 1)) + + vec1.outer(vec2) should be(SparseMatrix.fromCOO(5, 5, (0, 2, 1.0), (0, 4, 1.0), (2, 2, 1.0), (2, 4, 1.0))) + } + + it should "calculate right outer product with one-dimensional unit vector as identity" in { + val vec = DenseVector(Array(1, 0, 1, 0, 0)) + val unit = DenseVector(1) + + vec.outer(unit) should equal(DenseMatrix(vec.size, 1, vec.data)) + } + + it should "calculate left outer product with one-dimensional unit vector as identity" in { + val vec = DenseVector(Array(1, 2, 3, 4, 5)) + val unit = DenseVector(1) + + unit.outer(vec) should equal(DenseMatrix(1, vec.size, vec.data)) + } + + it should "calculate outer product of one-dimensional dense vectors as multiplication" in { + val vec1 = DenseVector(Array(2)) + val vec2 = DenseVector(Array(3)) + + vec1.outer(vec2) should be(DenseMatrix(1, 1, 2 * 3)) + } + + it should "calculate outer product of one-dimensional sparse vectors as multiplication" in { + val vec1 = SparseVector(1, Array(0), Array(2)) + val vec2 = DenseVector(Array(3)) + + // TODO: Next line fails due to dummy impl of outer for sparse vectors: + vec1.outer(vec2) should be(SparseMatrix.fromCOO(1, 1, (0, 0, 2 * 3))) + vec2.outer(vec1) should be(SparseMatrix.fromCOO(1, 1, (0, 0, 2 * 3))) + } + + +// it should "fail in case of calculation dot product with different size vector" in { +// val vec1 = DenseVector(Array(1, 0)) +// val vec2 = DenseVector(Array(0)) +// +// intercept[IllegalArgumentException] { +// vec1.dot(vec2) +// } +// } + //==================================================================================================================== + it should "calculate magnitude of vector" in { val vec = DenseVector(Array(1, 4, 8)) From 9ea41fc721bb6983cd91ca102342ef31c4cd0732 Mon Sep 17 00:00:00 2001 From: daniel-pape Date: Fri, 21 Aug 2015 14:50:26 +0200 Subject: [PATCH 07/16] Implementation of outer product for sparse vectors. --- .../apache/flink/ml/math/SparseVector.scala | 29 ++++++++----------- 1 file changed, 12 insertions(+), 17 deletions(-) diff --git a/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/SparseVector.scala b/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/SparseVector.scala index 0d49378e1df57..0ba0158616c2a 100644 --- a/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/SparseVector.scala +++ b/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/SparseVector.scala @@ -99,24 +99,19 @@ case class SparseVector( val numRows = size val numCols = other.size - other match { - case SparseVector(size, indices, data_) => - val entries: Array[(Int, Int, Double)] = for { - i <- (0 until numRows).toArray - j <- indices - value = this(i) * other(j) - if value != 0 - } yield (i, j, value) - - SparseMatrix.fromCOO(numRows, numCols, entries) - case _ => - val values = for { - i <- (0 until numRows) - j <- (0 until numCols) - } yield this(i) * other(j) - - DenseMatrix(numRows, numCols, values.toArray) + val otherIndices = other match { + case sv @ SparseVector(_, _, _) => sv.indices + case dv @ DenseVector(_) => (0 until dv.size).toArray } + + val entries = for { + i <- indices + j <- otherIndices + value = this(i) * other(j) + if value != 0 + } yield (i, j, value) + + SparseMatrix.fromCOO(numRows, numCols, entries) } From b021b1f4d6a31626cf5b1cfac7c9dbf025ff00a1 Mon Sep 17 00:00:00 2001 From: daniel-pape Date: Fri, 21 Aug 2015 14:51:40 +0200 Subject: [PATCH 08/16] Test cases for outer product computation. For dense as well as sparse vectors, More tests are to come. --- .../flink/ml/math/DenseVectorSuite.scala | 49 +++++++++++++------ .../flink/ml/math/SparseVectorSuite.scala | 34 +++++++++++++ 2 files changed, 68 insertions(+), 15 deletions(-) diff --git a/flink-staging/flink-ml/src/test/scala/org/apache/flink/ml/math/DenseVectorSuite.scala b/flink-staging/flink-ml/src/test/scala/org/apache/flink/ml/math/DenseVectorSuite.scala index 941ee31a78ddd..0c92ad3eaa3fb 100644 --- a/flink-staging/flink-ml/src/test/scala/org/apache/flink/ml/math/DenseVectorSuite.scala +++ b/flink-staging/flink-ml/src/test/scala/org/apache/flink/ml/math/DenseVectorSuite.scala @@ -79,58 +79,77 @@ class DenseVectorSuite extends FlatSpec with Matchers { } //==================================================================================================================== - it should "calculate outer product with DenseVector" in { + it should "calculate outer product with DenseVectors correctly as DenseMatrix" in { val vec1 = DenseVector(Array(1, 0, 1)) val vec2 = DenseVector(Array(0, 1, 0)) + vec1.outer(vec2) should be(an[DenseMatrix]) vec1.outer(vec2) should be(DenseMatrix(3, 3, Array(0, 1, 0, 0, 0, 0, 0, 1, 0))) } - it should "calculate outer product with SparseVector" in { + it should "calculate outer product with SparseVector correctly as SparseMatrix" in { val vec1 = DenseVector(Array(1, 0, 1)) - val vec2 = SparseVector.fromCOO(3, (0, 1), (1, 1)) + val vec2 = SparseVector(3, Array(1), Array(1)) - vec1.outer(vec2) should be(SparseMatrix.fromCOO(3, 3, (0, 0, 1.0), (0, 1, 1.0), (2, 0, 1.0), (2, 1, 1.0))) + vec1.outer(vec2) should be(an[SparseMatrix]) + vec1.outer(vec2) should be(SparseMatrix.fromCOO(3, 3, (0, 1, 1), (2, 1, 1))) } - it should "calculate outer product with SparseVector 2" in { + //==================================================================================================================== + + it should "calculate outer product of a DenseVector with a SparseVector" in { val vec1 = DenseVector(Array(1, 0, 1, 0, 0)) val vec2 = SparseVector.fromCOO(5, (2, 1), (4, 1)) - vec1.outer(vec2) should be(SparseMatrix.fromCOO(5, 5, (0, 2, 1.0), (0, 4, 1.0), (2, 2, 1.0), (2, 4, 1.0))) + vec1.outer(vec2) should be(SparseMatrix.fromCOO(5, 5, (0, 2, 1), (0, 4, 1), (2, 2, 1), (2, 4, 1))) } - it should "calculate right outer product with one-dimensional unit vector as identity" in { + //==================================================================================================================== + + it should "calculate right outer product with DenseVector with one-dimensional unit DenseVector as identity" in { val vec = DenseVector(Array(1, 0, 1, 0, 0)) val unit = DenseVector(1) vec.outer(unit) should equal(DenseMatrix(vec.size, 1, vec.data)) } - it should "calculate left outer product with one-dimensional unit vector as identity" in { + it should "calculate right outer product with DenseVector with one-dimensional unit SparseVector as identity" in { + val vec = DenseVector(Array(1, 0, 1, 0, 0)) + val unit = SparseVector(1, Array(0), Array(1)) + + vec.outer(unit) should equal(SparseMatrix.fromCOO(vec.size, 1, (0, 0, 1), (2, 0, 1))) + } + + it should "calculate left outer product for DenseVector with one-dimensional unit DenseVector as identity" in { val vec = DenseVector(Array(1, 2, 3, 4, 5)) val unit = DenseVector(1) unit.outer(vec) should equal(DenseMatrix(1, vec.size, vec.data)) } - it should "calculate outer product of one-dimensional dense vectors as multiplication" in { + it should "calculate left outer product for SparseVector with one-dimensional unit DenseVector as identity" in { + val vec = SparseVector(5, Array(0, 1, 2, 3, 4), Array(1, 2, 3, 4, 5)) + val unit = DenseVector(1) + + unit.outer(vec) should equal(SparseMatrix.fromCOO(1, vec.size, (0, 0, 1), (0, 1, 2), (0, 2, 3), (0, 3, 4), (0, 4, 5))) + } + + //==================================================================================================================== + + it should "calculate outer product with DenseVector via multiplication if both vectors are one-dimensional" in { val vec1 = DenseVector(Array(2)) val vec2 = DenseVector(Array(3)) vec1.outer(vec2) should be(DenseMatrix(1, 1, 2 * 3)) } - it should "calculate outer product of one-dimensional sparse vectors as multiplication" in { - val vec1 = SparseVector(1, Array(0), Array(2)) - val vec2 = DenseVector(Array(3)) + it should "calculate outer product with SparseVector via multiplication if both vectors are one-dimensioan" in { + val vec1 = DenseVector(Array(2)) + val vec2 = SparseVector(1, Array(0), Array(3)) - // TODO: Next line fails due to dummy impl of outer for sparse vectors: vec1.outer(vec2) should be(SparseMatrix.fromCOO(1, 1, (0, 0, 2 * 3))) - vec2.outer(vec1) should be(SparseMatrix.fromCOO(1, 1, (0, 0, 2 * 3))) } - // it should "fail in case of calculation dot product with different size vector" in { // val vec1 = DenseVector(Array(1, 0)) // val vec2 = DenseVector(Array(0)) diff --git a/flink-staging/flink-ml/src/test/scala/org/apache/flink/ml/math/SparseVectorSuite.scala b/flink-staging/flink-ml/src/test/scala/org/apache/flink/ml/math/SparseVectorSuite.scala index 5ed0b379eab07..1f60f12d102b6 100644 --- a/flink-staging/flink-ml/src/test/scala/org/apache/flink/ml/math/SparseVectorSuite.scala +++ b/flink-staging/flink-ml/src/test/scala/org/apache/flink/ml/math/SparseVectorSuite.scala @@ -119,6 +119,40 @@ class SparseVectorSuite extends FlatSpec with Matchers { vec1.dot(vec2) should be(0) } + //==================================================================================================================== + + it should "calculate outer product with SparseVector correctly as SparseMatrix" in { + val vec1 = SparseVector(3, Array(0, 2), Array(1, 1)) + val vec2 = SparseVector(3, Array(1), Array(1)) + + vec1.outer(vec2) should be(an[SparseMatrix]) + vec1.outer(vec2) should be(SparseMatrix.fromCOO(3, 3, (0, 1, 1), (2, 1, 1))) + } + + it should "calculate outer product with DenseVector correctly as SparseMatrix" in { + val vec1 = SparseVector(3, Array(0, 2), Array(1, 1)) + val vec2 = DenseVector(Array(0, 1, 0)) + + vec1.outer(vec2) should be(an[SparseMatrix]) + vec1.outer(vec2) should be(SparseMatrix.fromCOO(3, 3, (0, 1, 1), (2, 1, 1))) + } + + it should "calculate outer product with SparseVector via multiplication if bothe vectors are one-dimensional" in { + val vec1 = SparseVector.fromCOO(1, (0, 2)) + val vec2 = SparseVector.fromCOO(1, (0, 3)) + + vec1.outer(vec2) should be(SparseMatrix.fromCOO(1, 1, (0, 0, 2 * 3))) + } + + it should "calculate outer product with DenseVector via multiplication if both vectors are one-dimensional" in { + val vec1 = SparseVector(1, Array(0), Array(2)) + val vec2 = DenseVector(Array(3)) + + vec1.outer(vec2) should be(SparseMatrix.fromCOO(1, 1, (0, 0, 2 * 3))) + } + + //==================================================================================================================== + it should "fail in case of calculation dot product with different size vector" in { val vec1 = SparseVector.fromCOO(4, (0, 1), (2, 1)) val vec2 = DenseVector(Array(0, 1, 0)) From f70f5e0be5851d98cbbb4d0572abfb8294af3b0f Mon Sep 17 00:00:00 2001 From: daniel-pape Date: Fri, 21 Aug 2015 21:38:05 +0200 Subject: [PATCH 09/16] Added test case. --- .../org/apache/flink/ml/math/DenseVectorSuite.scala | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/flink-staging/flink-ml/src/test/scala/org/apache/flink/ml/math/DenseVectorSuite.scala b/flink-staging/flink-ml/src/test/scala/org/apache/flink/ml/math/DenseVectorSuite.scala index 0c92ad3eaa3fb..4cc2c927c0568 100644 --- a/flink-staging/flink-ml/src/test/scala/org/apache/flink/ml/math/DenseVectorSuite.scala +++ b/flink-staging/flink-ml/src/test/scala/org/apache/flink/ml/math/DenseVectorSuite.scala @@ -79,7 +79,7 @@ class DenseVectorSuite extends FlatSpec with Matchers { } //==================================================================================================================== - it should "calculate outer product with DenseVectors correctly as DenseMatrix" in { + it should "calculate outer product with DenseVector correctly as DenseMatrix" in { val vec1 = DenseVector(Array(1, 0, 1)) val vec2 = DenseVector(Array(0, 1, 0)) @@ -97,7 +97,14 @@ class DenseVectorSuite extends FlatSpec with Matchers { //==================================================================================================================== - it should "calculate outer product of a DenseVector with a SparseVector" in { + it should "calculate outer product with a DenseVector correctly as DenseMatrix 2" in { + val vec1 = DenseVector(Array(1, 0, 1, 0, 0)) + val vec2 = DenseVector(Array(0, 0, 1, 0, 1)) + + vec1.outer(vec2) should be(DenseMatrix(5, 5, Array(0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0))) + } + + it should "calculate outer product with a SparseVector correctly as SparseMatrix 2" in { val vec1 = DenseVector(Array(1, 0, 1, 0, 0)) val vec2 = SparseVector.fromCOO(5, (2, 1), (4, 1)) From 503e4c04416c436da31f9340448420198b495d7b Mon Sep 17 00:00:00 2001 From: Daniel Pape Date: Sun, 30 Aug 2015 22:11:53 +0200 Subject: [PATCH 10/16] Added method documentation for outer product methods. --- .../org/apache/flink/ml/math/DenseVector.scala | 14 ++++++-------- .../org/apache/flink/ml/math/SparseVector.scala | 16 +++++++--------- 2 files changed, 13 insertions(+), 17 deletions(-) diff --git a/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/DenseVector.scala b/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/DenseVector.scala index a8741256a44cb..a95117083d5d8 100644 --- a/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/DenseVector.scala +++ b/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/DenseVector.scala @@ -102,15 +102,13 @@ case class DenseVector( } } - /** Returns the outer product of the recipient and the argument + /** Returns the outer product (a.k.a. Kronecker product) of `this` + * with `other`. The result will given in [[org.apache.flink.ml.math.SparseMatrix]] + * representation if `other` is sparse and as [[org.apache.flink.ml.math.DenseMatrix]] otherwise. * - * - * @param other - * @return - * - * TODO: Dense x Dense should yield Dense, - * Sparse x Sparse should yield Sparse, - * Dense x Sparse (and the other way around) should yield Sparse + * @param other a Vector + * @return the [[org.apache.flink.ml.math.Matrix]] which equals the outer product of `this` + * with `other.` */ override def outer(other: Vector): Matrix = { val numRows = size diff --git a/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/SparseVector.scala b/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/SparseVector.scala index 0ba0158616c2a..964b4dcb832c9 100644 --- a/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/SparseVector.scala +++ b/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/SparseVector.scala @@ -85,17 +85,15 @@ case class SparseVector( } } - /** Returns the outer product of the recipient and the argument + /** Returns the outer product (a.k.a. Kronecker product) of `this` + * with `other`. The result is given in [[org.apache.flink.ml.math.SparseMatrix]] + * representation. * - * - * @param other - * @return - * - * TODO: Dense x Dense should yield Dense, - * Sparse x Sparse should yield Sparse, - * Dense x Sparse (and the other way around) should yield Sparse + * @param other a Vector + * @return the [[org.apache.flink.ml.math.SparseMatrix]] which equals the outer product of `this` + * with `other.` */ - override def outer(other: Vector): Matrix = { + override def outer(other: Vector): SparseMatrix = { val numRows = size val numCols = other.size From 9f337f3d117d025e26578a96fafde2cdd7b2df72 Mon Sep 17 00:00:00 2001 From: Daniel Pape Date: Sun, 30 Aug 2015 22:46:11 +0200 Subject: [PATCH 11/16] Removed marker comments from test suites and also add the missing test to SparseVector suite that correspond to the one from the suite for DenseVector. --- .../flink/ml/math/DenseVectorSuite.scala | 17 ------ .../flink/ml/math/SparseVectorSuite.scala | 58 ++++++++++++++----- 2 files changed, 45 insertions(+), 30 deletions(-) diff --git a/flink-staging/flink-ml/src/test/scala/org/apache/flink/ml/math/DenseVectorSuite.scala b/flink-staging/flink-ml/src/test/scala/org/apache/flink/ml/math/DenseVectorSuite.scala index 4cc2c927c0568..6173fe00d9685 100644 --- a/flink-staging/flink-ml/src/test/scala/org/apache/flink/ml/math/DenseVectorSuite.scala +++ b/flink-staging/flink-ml/src/test/scala/org/apache/flink/ml/math/DenseVectorSuite.scala @@ -78,7 +78,6 @@ class DenseVectorSuite extends FlatSpec with Matchers { } } - //==================================================================================================================== it should "calculate outer product with DenseVector correctly as DenseMatrix" in { val vec1 = DenseVector(Array(1, 0, 1)) val vec2 = DenseVector(Array(0, 1, 0)) @@ -95,8 +94,6 @@ class DenseVectorSuite extends FlatSpec with Matchers { vec1.outer(vec2) should be(SparseMatrix.fromCOO(3, 3, (0, 1, 1), (2, 1, 1))) } - //==================================================================================================================== - it should "calculate outer product with a DenseVector correctly as DenseMatrix 2" in { val vec1 = DenseVector(Array(1, 0, 1, 0, 0)) val vec2 = DenseVector(Array(0, 0, 1, 0, 1)) @@ -111,8 +108,6 @@ class DenseVectorSuite extends FlatSpec with Matchers { vec1.outer(vec2) should be(SparseMatrix.fromCOO(5, 5, (0, 2, 1), (0, 4, 1), (2, 2, 1), (2, 4, 1))) } - //==================================================================================================================== - it should "calculate right outer product with DenseVector with one-dimensional unit DenseVector as identity" in { val vec = DenseVector(Array(1, 0, 1, 0, 0)) val unit = DenseVector(1) @@ -141,8 +136,6 @@ class DenseVectorSuite extends FlatSpec with Matchers { unit.outer(vec) should equal(SparseMatrix.fromCOO(1, vec.size, (0, 0, 1), (0, 1, 2), (0, 2, 3), (0, 3, 4), (0, 4, 5))) } - //==================================================================================================================== - it should "calculate outer product with DenseVector via multiplication if both vectors are one-dimensional" in { val vec1 = DenseVector(Array(2)) val vec2 = DenseVector(Array(3)) @@ -157,16 +150,6 @@ class DenseVectorSuite extends FlatSpec with Matchers { vec1.outer(vec2) should be(SparseMatrix.fromCOO(1, 1, (0, 0, 2 * 3))) } -// it should "fail in case of calculation dot product with different size vector" in { -// val vec1 = DenseVector(Array(1, 0)) -// val vec2 = DenseVector(Array(0)) -// -// intercept[IllegalArgumentException] { -// vec1.dot(vec2) -// } -// } - //==================================================================================================================== - it should "calculate magnitude of vector" in { val vec = DenseVector(Array(1, 4, 8)) diff --git a/flink-staging/flink-ml/src/test/scala/org/apache/flink/ml/math/SparseVectorSuite.scala b/flink-staging/flink-ml/src/test/scala/org/apache/flink/ml/math/SparseVectorSuite.scala index 1f60f12d102b6..06a24cbd93cf9 100644 --- a/flink-staging/flink-ml/src/test/scala/org/apache/flink/ml/math/SparseVectorSuite.scala +++ b/flink-staging/flink-ml/src/test/scala/org/apache/flink/ml/math/SparseVectorSuite.scala @@ -119,7 +119,14 @@ class SparseVectorSuite extends FlatSpec with Matchers { vec1.dot(vec2) should be(0) } - //==================================================================================================================== + it should "fail in case of calculation dot product with different size vector" in { + val vec1 = SparseVector.fromCOO(4, (0, 1), (2, 1)) + val vec2 = DenseVector(Array(0, 1, 0)) + + intercept[IllegalArgumentException] { + vec1.dot(vec2) + } + } it should "calculate outer product with SparseVector correctly as SparseMatrix" in { val vec1 = SparseVector(3, Array(0, 2), Array(1, 1)) @@ -137,7 +144,43 @@ class SparseVectorSuite extends FlatSpec with Matchers { vec1.outer(vec2) should be(SparseMatrix.fromCOO(3, 3, (0, 1, 1), (2, 1, 1))) } - it should "calculate outer product with SparseVector via multiplication if bothe vectors are one-dimensional" in { + it should "calculate outer product with a DenseVector correctly as SparseMatrix 2" in { + val vec1 = SparseVector(5, Array(0, 2), Array(1, 1)) + val vec2 = DenseVector(Array(0, 0, 1, 0, 1)) + + vec1.outer(vec2) should be(SparseMatrix.fromCOO(5, 5, (0, 2, 1), (0, 4, 1), (2, 2, 1), (2, 4, 1))) + } + + it should "calculate outer product with a SparseVector correctly as SparseMatrix 2" in { + val vec1 = SparseVector(5, Array(0, 2), Array(1, 1)) + val vec2 = SparseVector.fromCOO(5, (2, 1), (4, 1)) + + vec1.outer(vec2) should be(SparseMatrix.fromCOO(5, 5, (0, 2, 1), (0, 4, 1), (2, 2, 1), (2, 4, 1))) + } + + + it should "calculate right outer product with DenseVector with one-dimensional unit DenseVector as identity" in { + val vec = SparseVector(5, Array(0, 2), Array(1, 1)) + val unit = DenseVector(1) + + vec.outer(unit) should equal(SparseMatrix.fromCOO(vec.size, 1, (0, 0, 1), (2, 0, 1))) + } + + it should "calculate right outer product with DenseVector with one-dimensional unit SparseVector as identity" in { + val vec = SparseVector(5, Array(0, 2), Array(1, 1)) + val unit = SparseVector(1, Array(0), Array(1)) + + vec.outer(unit) should equal(SparseMatrix.fromCOO(vec.size, 1, (0, 0, 1), (2, 0, 1))) + } + + it should "calculate left outer product for SparseVector with one-dimensional unit DenseVector as identity" in { + val vec = SparseVector(5, Array(0, 1, 2, 3, 4), Array(1, 2, 3, 4, 5)) + val unit = DenseVector(1) + + unit.outer(vec) should equal(SparseMatrix.fromCOO(1, vec.size, (0, 0, 1), (0, 1, 2), (0, 2, 3), (0, 3, 4), (0, 4, 5))) + } + + it should "calculate outer product with SparseVector via multiplication if both vectors are one-dimensional" in { val vec1 = SparseVector.fromCOO(1, (0, 2)) val vec2 = SparseVector.fromCOO(1, (0, 3)) @@ -151,17 +194,6 @@ class SparseVectorSuite extends FlatSpec with Matchers { vec1.outer(vec2) should be(SparseMatrix.fromCOO(1, 1, (0, 0, 2 * 3))) } - //==================================================================================================================== - - it should "fail in case of calculation dot product with different size vector" in { - val vec1 = SparseVector.fromCOO(4, (0, 1), (2, 1)) - val vec2 = DenseVector(Array(0, 1, 0)) - - intercept[IllegalArgumentException] { - vec1.dot(vec2) - } - } - it should "calculate magnitude of vector" in { val vec = SparseVector.fromCOO(3, (0, 1), (1, 4), (2, 8)) From 9be4005b5b477426399595b1d9dd6a3522ca31ce Mon Sep 17 00:00:00 2001 From: Daniel Pape Date: Sun, 27 Sep 2015 19:31:43 +0200 Subject: [PATCH 12/16] Incorporated suggestions from the review and polished code a little. Reduced warnings by removing val keyword from case class field and added missing parameter documentation. --- .../apache/flink/ml/math/DenseVector.scala | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/DenseVector.scala b/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/DenseVector.scala index a95117083d5d8..d60e30a26c8b2 100644 --- a/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/DenseVector.scala +++ b/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/DenseVector.scala @@ -27,7 +27,7 @@ import breeze.linalg.{SparseVector => BreezeSparseVector, DenseVector => BreezeD * @param data Array of doubles to store the vector elements */ case class DenseVector( - val data: Array[Double]) + data: Array[Double]) extends Vector with Serializable { @@ -76,8 +76,8 @@ case class DenseVector( /** Updates the element at the given index with the provided value * - * @param index - * @param value + * @param index Index whose value is updated. + * @param value The value used to update the index. */ override def update(index: Int, value: Double): Unit = { require(0 <= index && index < data.length, index + " not in [0, " + data.length + ")") @@ -115,19 +115,19 @@ case class DenseVector( val numCols = other.size other match { - case SparseVector(size, indices, data_) => - val entries: Array[(Int, Int, Double)] = for { - i <- (0 until numRows).toArray - j <- indices - value = this(i) * other(j) + case sv @ SparseVector(_, _, _) => + val entries = for { + i <- 0 until numRows + j <- sv.indices + value = this(i) * sv.data(sv.indices.indexOf(j)) if value != 0 } yield (i, j, value) SparseMatrix.fromCOO(numRows, numCols, entries) case _ => val values = for { - i <- (0 until numRows) - j <- (0 until numCols) + i <- 0 until numRows + j <- 0 until numCols } yield this(i) * other(j) DenseMatrix(numRows, numCols, values.toArray) From 7620dd7ca064f2415a35e9ed2486dbaf1c3b65b1 Mon Sep 17 00:00:00 2001 From: Daniel Pape Date: Sun, 27 Sep 2015 19:43:40 +0200 Subject: [PATCH 13/16] Replaced implementation of `outer' method in order to avoid call to `SparseVector.apply` (which involves binary search). Reduced warning by three: Removed unnecessary `val` keyword from case class fields. --- .../apache/flink/ml/math/SparseVector.scala | 36 +++++++++++-------- 1 file changed, 21 insertions(+), 15 deletions(-) diff --git a/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/SparseVector.scala b/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/SparseVector.scala index 964b4dcb832c9..40754bc8252f7 100644 --- a/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/SparseVector.scala +++ b/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/SparseVector.scala @@ -26,15 +26,16 @@ import scala.util.Sorting * indices of the non-zero vector entries and the other the corresponding vector entries */ case class SparseVector( - val size: Int, - val indices: Array[Int], - val data: Array[Double]) + size: Int, + indices: Array[Int], + data: Array[Double]) extends Vector with Serializable { + /** Updates the element at the given index with the provided value * - * @param index - * @param value + * @param index Index whose value is updated. + * @param value The value used to update the index. */ override def update(index: Int, value: Double): Unit = { val resolvedIndex = locate(index) @@ -97,18 +98,23 @@ case class SparseVector( val numRows = size val numCols = other.size - val otherIndices = other match { - case sv @ SparseVector(_, _, _) => sv.indices - case dv @ DenseVector(_) => (0 until dv.size).toArray + val entries = other match { + case sv @ SparseVector(_, _, _) => + for { + i <- indices + j <- sv.indices + value = data(indices.indexOf(i)) * sv.data(sv.indices.indexOf(j)) + if value != 0 + } yield (i, j, value) + case _ => + for { + i <- indices + j <- 0 until numCols + value = data(indices.indexOf(i)) * other(j) + if value != 0 + } yield (i, j, value) } - val entries = for { - i <- indices - j <- otherIndices - value = this(i) * other(j) - if value != 0 - } yield (i, j, value) - SparseMatrix.fromCOO(numRows, numCols, entries) } From 856da854386fd0a6789c4073fe820545d2f61af2 Mon Sep 17 00:00:00 2001 From: dp Date: Wed, 30 Sep 2015 10:40:49 +0200 Subject: [PATCH 14/16] Build fix/style check: Adjusted line length to 100. --- .../flink/ml/math/DenseVectorSuite.scala | 35 ++++++++++++------- 1 file changed, 23 insertions(+), 12 deletions(-) diff --git a/flink-staging/flink-ml/src/test/scala/org/apache/flink/ml/math/DenseVectorSuite.scala b/flink-staging/flink-ml/src/test/scala/org/apache/flink/ml/math/DenseVectorSuite.scala index 6173fe00d9685..114c617b9c67d 100644 --- a/flink-staging/flink-ml/src/test/scala/org/apache/flink/ml/math/DenseVectorSuite.scala +++ b/flink-staging/flink-ml/src/test/scala/org/apache/flink/ml/math/DenseVectorSuite.scala @@ -25,13 +25,13 @@ class DenseVectorSuite extends FlatSpec with Matchers { behavior of "Flink's DenseVector" it should "contain the initialization data" in { - val data = Array.range(1,10) + val data = Array.range(1, 10) val vector = DenseVector(data) assertResult(data.length)(vector.size) - data.zip(vector.map(_._2)).foreach{case (expected, actual) => assertResult(expected)(actual)} + data.zip(vector.map(_._2)).foreach { case (expected, actual) => assertResult(expected)(actual) } } it should "fail in case of an illegal element access" in { @@ -47,7 +47,7 @@ class DenseVectorSuite extends FlatSpec with Matchers { vector(size) } } - + it should "calculate dot product with DenseVector" in { val vec1 = DenseVector(Array(1, 0, 1)) val vec2 = DenseVector(Array(0, 1, 0)) @@ -98,52 +98,63 @@ class DenseVectorSuite extends FlatSpec with Matchers { val vec1 = DenseVector(Array(1, 0, 1, 0, 0)) val vec2 = DenseVector(Array(0, 0, 1, 0, 1)) - vec1.outer(vec2) should be(DenseMatrix(5, 5, Array(0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0))) + val values = Array(0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) + vec1.outer(vec2) should be(DenseMatrix(5, 5, values)) } it should "calculate outer product with a SparseVector correctly as SparseMatrix 2" in { val vec1 = DenseVector(Array(1, 0, 1, 0, 0)) val vec2 = SparseVector.fromCOO(5, (2, 1), (4, 1)) - vec1.outer(vec2) should be(SparseMatrix.fromCOO(5, 5, (0, 2, 1), (0, 4, 1), (2, 2, 1), (2, 4, 1))) + val entries = Iterable((0, 2, 1.0), (0, 4, 1.0), (2, 2, 1.0), (2, 4, 1.0)) + vec1.outer(vec2) should be(SparseMatrix.fromCOO(5, 5, entries)) } - it should "calculate right outer product with DenseVector with one-dimensional unit DenseVector as identity" in { + + + it should s"""calculate right outer product with DenseVector + |with one-dimensional unit DenseVector as identity""".stripMargin in { val vec = DenseVector(Array(1, 0, 1, 0, 0)) val unit = DenseVector(1) vec.outer(unit) should equal(DenseMatrix(vec.size, 1, vec.data)) } - it should "calculate right outer product with DenseVector with one-dimensional unit SparseVector as identity" in { + it should s"""calculate right outer product with DenseVector + |with one-dimensional unit SparseVector as identity""".stripMargin in { val vec = DenseVector(Array(1, 0, 1, 0, 0)) val unit = SparseVector(1, Array(0), Array(1)) vec.outer(unit) should equal(SparseMatrix.fromCOO(vec.size, 1, (0, 0, 1), (2, 0, 1))) } - it should "calculate left outer product for DenseVector with one-dimensional unit DenseVector as identity" in { + it should s"""calculate left outer product for DenseVector + |with one-dimensional unit DenseVector as identity""".stripMargin in { val vec = DenseVector(Array(1, 2, 3, 4, 5)) val unit = DenseVector(1) unit.outer(vec) should equal(DenseMatrix(1, vec.size, vec.data)) } - it should "calculate left outer product for SparseVector with one-dimensional unit DenseVector as identity" in { + it should s"""calculate left outer product for SparseVector + |with one-dimensional unit DenseVector as identity""".stripMargin in { val vec = SparseVector(5, Array(0, 1, 2, 3, 4), Array(1, 2, 3, 4, 5)) val unit = DenseVector(1) - unit.outer(vec) should equal(SparseMatrix.fromCOO(1, vec.size, (0, 0, 1), (0, 1, 2), (0, 2, 3), (0, 3, 4), (0, 4, 5))) + val entries = Iterable((0, 0, 1.0), (0, 1, 2.0), (0, 2, 3.0), (0, 3, 4.0), (0, 4, 5.0)) + unit.outer(vec) should equal(SparseMatrix.fromCOO(1, vec.size, entries)) } - it should "calculate outer product with DenseVector via multiplication if both vectors are one-dimensional" in { + it should s"""calculate outer product with DenseVector + |via multiplication if both vectors are one-dimensional""".stripMargin in { val vec1 = DenseVector(Array(2)) val vec2 = DenseVector(Array(3)) vec1.outer(vec2) should be(DenseMatrix(1, 1, 2 * 3)) } - it should "calculate outer product with SparseVector via multiplication if both vectors are one-dimensioan" in { + it should s"""calculate outer product with SparseVector + |via multiplication if both vectors are one-dimensional""".stripMargin in { val vec1 = DenseVector(Array(2)) val vec2 = SparseVector(1, Array(0), Array(3)) From 5f559a3156bb2afe04dd492cc6439118b7549173 Mon Sep 17 00:00:00 2001 From: dp Date: Wed, 30 Sep 2015 10:49:29 +0200 Subject: [PATCH 15/16] Build fix/style check: Adjusted line length to 100. --- .../flink/ml/math/SparseVectorSuite.scala | 38 ++++++++++++------- 1 file changed, 24 insertions(+), 14 deletions(-) diff --git a/flink-staging/flink-ml/src/test/scala/org/apache/flink/ml/math/SparseVectorSuite.scala b/flink-staging/flink-ml/src/test/scala/org/apache/flink/ml/math/SparseVectorSuite.scala index 06a24cbd93cf9..11609caf3125b 100644 --- a/flink-staging/flink-ml/src/test/scala/org/apache/flink/ml/math/SparseVectorSuite.scala +++ b/flink-staging/flink-ml/src/test/scala/org/apache/flink/ml/math/SparseVectorSuite.scala @@ -29,7 +29,7 @@ class SparseVectorSuite extends FlatSpec with Matchers { sparseVector(0) should equal(1) - for(index <- 1 until 3) { + for (index <- 1 until 3) { sparseVector(index) should equal(0) } } @@ -53,13 +53,15 @@ class SparseVectorSuite extends FlatSpec with Matchers { denseVector should equal(expectedDenseVector) val dataMap = data. - groupBy{_._1}. - mapValues{ + groupBy { + _._1 + }. + mapValues { entries => entries.map(_._2).reduce(_ + _) } - for(index <- 0 until size) { + for (index <- 0 until size) { sparseVector(index) should be(dataMap.getOrElse(index, 0)) } } @@ -82,7 +84,7 @@ class SparseVectorSuite extends FlatSpec with Matchers { } intercept[IllegalArgumentException] { - val sparseVector = SparseVector.fromCOO(5, (0, 1), (4,3), (5, 1)) + val sparseVector = SparseVector.fromCOO(5, (0, 1), (4, 3), (5, 1)) } } @@ -95,7 +97,7 @@ class SparseVectorSuite extends FlatSpec with Matchers { copy(3) = 3 - sparseVector should not equal(copy) + sparseVector should not equal (copy) } it should "calculate dot product with SparseVector" in { @@ -148,46 +150,54 @@ class SparseVectorSuite extends FlatSpec with Matchers { val vec1 = SparseVector(5, Array(0, 2), Array(1, 1)) val vec2 = DenseVector(Array(0, 0, 1, 0, 1)) - vec1.outer(vec2) should be(SparseMatrix.fromCOO(5, 5, (0, 2, 1), (0, 4, 1), (2, 2, 1), (2, 4, 1))) + val entries = Iterable((0, 2, 1.0), (0, 4, 1.0), (2, 2, 1.0), (2, 4, 1.0)) + vec1.outer(vec2) should be(SparseMatrix.fromCOO(5, 5, entries)) } it should "calculate outer product with a SparseVector correctly as SparseMatrix 2" in { val vec1 = SparseVector(5, Array(0, 2), Array(1, 1)) val vec2 = SparseVector.fromCOO(5, (2, 1), (4, 1)) - vec1.outer(vec2) should be(SparseMatrix.fromCOO(5, 5, (0, 2, 1), (0, 4, 1), (2, 2, 1), (2, 4, 1))) + val entries = Iterable((0, 2, 1.0), (0, 4, 1.0), (2, 2, 1.0), (2, 4, 1.0)) + vec1.outer(vec2) should be(SparseMatrix.fromCOO(5, 5, entries)) } - it should "calculate right outer product with DenseVector with one-dimensional unit DenseVector as identity" in { + it should s"""calculate right outer product with DenseVector + |with one-dimensional unit DenseVector as identity""".stripMargin in { val vec = SparseVector(5, Array(0, 2), Array(1, 1)) val unit = DenseVector(1) vec.outer(unit) should equal(SparseMatrix.fromCOO(vec.size, 1, (0, 0, 1), (2, 0, 1))) } - it should "calculate right outer product with DenseVector with one-dimensional unit SparseVector as identity" in { + it should s"""calculate right outer product with DenseVector + |with one-dimensional unit SparseVector as identity""".stripMargin in { val vec = SparseVector(5, Array(0, 2), Array(1, 1)) val unit = SparseVector(1, Array(0), Array(1)) vec.outer(unit) should equal(SparseMatrix.fromCOO(vec.size, 1, (0, 0, 1), (2, 0, 1))) } - it should "calculate left outer product for SparseVector with one-dimensional unit DenseVector as identity" in { + it should s"""calculate left outer product for SparseVector + |with one-dimensional unit DenseVector as identity""".stripMargin in { val vec = SparseVector(5, Array(0, 1, 2, 3, 4), Array(1, 2, 3, 4, 5)) val unit = DenseVector(1) - unit.outer(vec) should equal(SparseMatrix.fromCOO(1, vec.size, (0, 0, 1), (0, 1, 2), (0, 2, 3), (0, 3, 4), (0, 4, 5))) + val entries = Iterable((0, 0, 1.0), (0, 1, 2.0), (0, 2, 3.0), (0, 3, 4.0), (0, 4, 5.0)) + unit.outer(vec) should equal(SparseMatrix.fromCOO(1, vec.size, entries)) } - it should "calculate outer product with SparseVector via multiplication if both vectors are one-dimensional" in { + it should s"""calculate outer product with SparseVector + |via multiplication if both vectors are one-dimensional""".stripMargin in { val vec1 = SparseVector.fromCOO(1, (0, 2)) val vec2 = SparseVector.fromCOO(1, (0, 3)) vec1.outer(vec2) should be(SparseMatrix.fromCOO(1, 1, (0, 0, 2 * 3))) } - it should "calculate outer product with DenseVector via multiplication if both vectors are one-dimensional" in { + it should s"""calculate outer product with DenseVector + |via multiplication if both vectors are one-dimensional""".stripMargin in { val vec1 = SparseVector(1, Array(0), Array(2)) val vec2 = DenseVector(Array(3)) From edbbdca4a317a72f274a340273683f85949f6253 Mon Sep 17 00:00:00 2001 From: dp Date: Wed, 23 Dec 2015 22:15:34 +0100 Subject: [PATCH 16/16] Incorporated suggestion from Till Rohrmann's code review ("avoid binary search by using zipWithIndex"). --- .../main/scala/org/apache/flink/ml/math/DenseVector.scala | 6 +++--- .../scala/org/apache/flink/ml/math/SparseVector.scala | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/DenseVector.scala b/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/DenseVector.scala index d60e30a26c8b2..5e70741ee3623 100644 --- a/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/DenseVector.scala +++ b/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/DenseVector.scala @@ -115,11 +115,11 @@ case class DenseVector( val numCols = other.size other match { - case sv @ SparseVector(_, _, _) => + case sv: SparseVector => val entries = for { i <- 0 until numRows - j <- sv.indices - value = this(i) * sv.data(sv.indices.indexOf(j)) + (j, k) <- sv.indices.zipWithIndex + value = this(i) * sv.data(k) if value != 0 } yield (i, j, value) diff --git a/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/SparseVector.scala b/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/SparseVector.scala index 40754bc8252f7..acee6077d6108 100644 --- a/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/SparseVector.scala +++ b/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/SparseVector.scala @@ -99,11 +99,11 @@ case class SparseVector( val numCols = other.size val entries = other match { - case sv @ SparseVector(_, _, _) => + case sv: SparseVector => for { - i <- indices - j <- sv.indices - value = data(indices.indexOf(i)) * sv.data(sv.indices.indexOf(j)) + (i, k) <- indices.zipWithIndex + (j, l) <- sv.indices.zipWithIndex + value = data(k) * sv.data(l) if value != 0 } yield (i, j, value) case _ =>