Remove last usage of jblas, in tests

apache · Mar 7, 2016 · c7eea5e · c7eea5e
1 parent 03f57a6
commit c7eea5e
Show file tree

Hide file tree

Showing 10 changed files with 107 additions and 121 deletions.
diff --git a/LICENSE b/LICENSE
@@ -237,7 +237,6 @@ The text of each license is also included at licenses/LICENSE-[project].txt.
 
      (BSD 3 Clause) netlib core (com.github.fommil.netlib:core:1.1.2 - https://github.com/fommil/netlib-java/core)
      (BSD 3 Clause) JPMML-Model (org.jpmml:pmml-model:1.2.7 - https://github.com/jpmml/jpmml-model)
-     (BSD 3-clause style license) jblas (org.jblas:jblas:1.2.4 - http://jblas.org/)
      (BSD License) AntLR Parser Generator (antlr:antlr:2.7.7 - http://www.antlr.org/)
      (BSD licence) ANTLR ST4 4.0.4 (org.antlr:ST4:4.0.4 - http://www.stringtemplate.org)
      (BSD licence) ANTLR StringTemplate (org.antlr:stringtemplate:3.2.1 - http://www.stringtemplate.org)

diff --git a/docs/mllib-data-types.md b/docs/mllib-data-types.md
@@ -11,7 +11,7 @@ MLlib supports local vectors and matrices stored on a single machine,
 as well as distributed matrices backed by one or more RDDs.
 Local vectors and local matrices are simple data models 
 that serve as public interfaces. The underlying linear algebra operations are provided by
-[Breeze](http://www.scalanlp.org/) and [jblas](http://jblas.org/).
+[Breeze](http://www.scalanlp.org/).
 A training example used in supervised learning is called a "labeled point" in MLlib.
 
 ## Local vector

diff --git a/mllib/pom.xml b/mllib/pom.xml
@@ -62,12 +62,6 @@
       <artifactId>spark-graphx_${scala.binary.version}</artifactId>
       <version>${project.version}</version>
     </dependency>
-    <dependency>
-      <groupId>org.jblas</groupId>
-      <artifactId>jblas</artifactId>
-      <version>${jblas.version}</version>
-      <scope>test</scope>
-    </dependency>
     <dependency>
       <groupId>org.scalanlp</groupId>
       <artifactId>breeze_${scala.binary.version}</artifactId>

diff --git a/mllib/src/test/java/org/apache/spark/mllib/recommendation/JavaALSSuite.java b/mllib/src/test/java/org/apache/spark/mllib/recommendation/JavaALSSuite.java
@@ -24,7 +24,6 @@
 import scala.Tuple2;
 import scala.Tuple3;
 
-import org.jblas.DoubleMatrix;
 import org.junit.After;
 import org.junit.Assert;
 import org.junit.Before;
@@ -48,14 +47,14 @@ public void tearDown() {
     sc = null;
   }
 
-  void validatePrediction(
+  private void validatePrediction(
       MatrixFactorizationModel model,
       int users,
       int products,
-      DoubleMatrix trueRatings,
+      double[] trueRatings,
       double matchThreshold,
       boolean implicitPrefs,
-      DoubleMatrix truePrefs) {
+      double[] truePrefs) {
     List<Tuple2<Integer, Integer>> localUsersProducts = new ArrayList<>(users * products);
     for (int u=0; u < users; ++u) {
       for (int p=0; p < products; ++p) {
@@ -68,7 +67,7 @@ void validatePrediction(
     if (!implicitPrefs) {
       for (Rating r: predictedRatings) {
         double prediction = r.rating();
-        double correct = trueRatings.get(r.user(), r.product());
+        double correct = trueRatings[r.user() * products + r.product()];
         Assert.assertTrue(String.format("Prediction=%2.4f not below match threshold of %2.2f",
           prediction, matchThreshold), Math.abs(prediction - correct) < matchThreshold);
       }
@@ -79,9 +78,9 @@ void validatePrediction(
       double denom = 0.0;
       for (Rating r: predictedRatings) {
         double prediction = r.rating();
-        double truePref = truePrefs.get(r.user(), r.product());
+        double truePref = truePrefs[r.user() * products + r.product()];
         double confidence = 1.0 +
-          /* alpha = */ 1.0 * Math.abs(trueRatings.get(r.user(), r.product()));
+          /* alpha = */ 1.0 * Math.abs(trueRatings[r.user() * products + r.product()]);
         double err = confidence * (truePref - prediction) * (truePref - prediction);
         sqErr += err;
         denom += confidence;
@@ -98,8 +97,8 @@ public void runALSUsingStaticMethods() {
     int iterations = 15;
     int users = 50;
     int products = 100;
-    Tuple3<List<Rating>, DoubleMatrix, DoubleMatrix> testData = ALSSuite.generateRatingsAsJavaList(
-        users, products, features, 0.7, false, false);
+    Tuple3<List<Rating>, double[], double[]> testData =
+        ALSSuite.generateRatingsAsJava(users, products, features, 0.7, false, false);
 
     JavaRDD<Rating> data = sc.parallelize(testData._1());
     MatrixFactorizationModel model = ALS.train(data.rdd(), features, iterations);
@@ -112,8 +111,8 @@ public void runALSUsingConstructor() {
     int iterations = 15;
     int users = 100;
     int products = 200;
-    Tuple3<List<Rating>, DoubleMatrix, DoubleMatrix> testData = ALSSuite.generateRatingsAsJavaList(
-        users, products, features, 0.7, false, false);
+    Tuple3<List<Rating>, double[], double[]> testData =
+        ALSSuite.generateRatingsAsJava(users, products, features, 0.7, false, false);
 
     JavaRDD<Rating> data = sc.parallelize(testData._1());
 
@@ -129,8 +128,8 @@ public void runImplicitALSUsingStaticMethods() {
     int iterations = 15;
     int users = 80;
     int products = 160;
-    Tuple3<List<Rating>, DoubleMatrix, DoubleMatrix> testData = ALSSuite.generateRatingsAsJavaList(
-        users, products, features, 0.7, true, false);
+    Tuple3<List<Rating>, double[], double[]> testData =
+        ALSSuite.generateRatingsAsJava(users, products, features, 0.7, true, false);
 
     JavaRDD<Rating> data = sc.parallelize(testData._1());
     MatrixFactorizationModel model = ALS.trainImplicit(data.rdd(), features, iterations);
@@ -143,8 +142,8 @@ public void runImplicitALSUsingConstructor() {
     int iterations = 15;
     int users = 100;
     int products = 200;
-    Tuple3<List<Rating>, DoubleMatrix, DoubleMatrix> testData = ALSSuite.generateRatingsAsJavaList(
-        users, products, features, 0.7, true, false);
+    Tuple3<List<Rating>, double[], double[]> testData =
+        ALSSuite.generateRatingsAsJava(users, products, features, 0.7, true, false);
 
     JavaRDD<Rating> data = sc.parallelize(testData._1());
 
@@ -161,8 +160,8 @@ public void runImplicitALSWithNegativeWeight() {
     int iterations = 15;
     int users = 80;
     int products = 160;
-    Tuple3<List<Rating>, DoubleMatrix, DoubleMatrix> testData = ALSSuite.generateRatingsAsJavaList(
-        users, products, features, 0.7, true, true);
+    Tuple3<List<Rating>, double[], double[]> testData =
+        ALSSuite.generateRatingsAsJava(users, products, features, 0.7, true, true);
 
     JavaRDD<Rating> data = sc.parallelize(testData._1());
     MatrixFactorizationModel model = new ALS().setRank(features)
@@ -179,9 +178,9 @@ public void runRecommend() {
     int iterations = 10;
     int users = 200;
     int products = 50;
-    Tuple3<List<Rating>, DoubleMatrix, DoubleMatrix> testData = ALSSuite.generateRatingsAsJavaList(
-        users, products, features, 0.7, true, false);
-    JavaRDD<Rating> data = sc.parallelize(testData._1());
+    List<Rating> testData = ALSSuite.generateRatingsAsJava(
+        users, products, features, 0.7, true, false)._1();
+    JavaRDD<Rating> data = sc.parallelize(testData);
     MatrixFactorizationModel model = new ALS().setRank(features)
       .setIterations(iterations)
       .setImplicitPrefs(true)

diff --git a/mllib/src/test/java/org/apache/spark/mllib/regression/JavaRidgeRegressionSuite.java b/mllib/src/test/java/org/apache/spark/mllib/regression/JavaRidgeRegressionSuite.java
@@ -19,14 +19,13 @@
 
 import java.io.Serializable;
 import java.util.List;
+import java.util.Random;
 
 import org.junit.After;
 import org.junit.Assert;
 import org.junit.Before;
 import org.junit.Test;
 
-import org.jblas.DoubleMatrix;
-
 import org.apache.spark.api.java.JavaRDD;
 import org.apache.spark.api.java.JavaSparkContext;
 import org.apache.spark.mllib.util.LinearDataGenerator;
@@ -45,7 +44,8 @@ public void tearDown() {
       sc = null;
   }
 
-  double predictionError(List<LabeledPoint> validationData, RidgeRegressionModel model) {
+  private static double predictionError(List<LabeledPoint> validationData,
+                                        RidgeRegressionModel model) {
     double errorSum = 0;
     for (LabeledPoint point: validationData) {
       Double prediction = model.predict(point.features());
@@ -54,11 +54,14 @@ public void tearDown() {
     return errorSum / validationData.size();
   }
 
-  List<LabeledPoint> generateRidgeData(int numPoints, int numFeatures, double std) {
-    org.jblas.util.Random.seed(42);
+  private static List<LabeledPoint> generateRidgeData(int numPoints, int numFeatures, double std) {
     // Pick weights as random values distributed uniformly in [-0.5, 0.5]
-    DoubleMatrix w = DoubleMatrix.rand(numFeatures, 1).subi(0.5);
-    return LinearDataGenerator.generateLinearInputAsList(0.0, w.data, numPoints, 42, std);
+    Random random = new Random(42);
+    double[] w = new double[numFeatures];
+    for (int i = 0; i < w.length; i++) {
+      w[i] = random.nextDouble() - 0.5;
+    }
+    return LinearDataGenerator.generateLinearInputAsList(0.0, w, numPoints, 42, std);
   }
 
   @Test

diff --git a/mllib/src/test/scala/org/apache/spark/mllib/classification/SVMSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/classification/SVMSuite.scala
@@ -20,7 +20,7 @@ package org.apache.spark.mllib.classification
 import scala.collection.JavaConverters._
 import scala.util.Random
 
-import org.jblas.DoubleMatrix
+import breeze.linalg.{DenseVector => BDV}
 
 import org.apache.spark.{SparkException, SparkFunSuite}
 import org.apache.spark.mllib.linalg.Vectors
@@ -45,12 +45,11 @@ object SVMSuite {
     nPoints: Int,
     seed: Int): Seq[LabeledPoint] = {
     val rnd = new Random(seed)
-    val weightsMat = new DoubleMatrix(1, weights.length, weights: _*)
+    val weightsMat = new BDV(weights)
     val x = Array.fill[Array[Double]](nPoints)(
         Array.fill[Double](weights.length)(rnd.nextDouble() * 2.0 - 1.0))
     val y = x.map { xi =>
-      val yD = new DoubleMatrix(1, xi.length, xi: _*).dot(weightsMat) +
-        intercept + 0.01 * rnd.nextGaussian()
+      val yD = new BDV(xi).dot(weightsMat) + intercept + 0.01 * rnd.nextGaussian()
       if (yD < 0) 0.0 else 1.0
     }
     y.zip(x).map(p => LabeledPoint(p._1, Vectors.dense(p._2)))

diff --git a/mllib/src/test/scala/org/apache/spark/mllib/optimization/NNLSSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/optimization/NNLSSuite.scala
@@ -19,28 +19,22 @@ package org.apache.spark.mllib.optimization
 
 import scala.util.Random
 
-import org.jblas.{DoubleMatrix, SimpleBlas}
+import breeze.linalg.{DenseMatrix => BDM, DenseVector => BDV}
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.mllib.util.TestingUtils._
 
 class NNLSSuite extends SparkFunSuite {
   /** Generate an NNLS problem whose optimal solution is the all-ones vector. */
-  def genOnesData(n: Int, rand: Random): (DoubleMatrix, DoubleMatrix) = {
-    val A = new DoubleMatrix(n, n, Array.fill(n*n)(rand.nextDouble()): _*)
-    val b = A.mmul(DoubleMatrix.ones(n, 1))
-
-    val ata = A.transpose.mmul(A)
-    val atb = A.transpose.mmul(b)
-
-    (ata, atb)
+  def genOnesData(n: Int, rand: Random): (BDM[Double], BDV[Double]) = {
+    val A = new BDM(n, n, Array.fill(n*n)(rand.nextDouble()))
+    val b = A * new BDV(Array.fill(n)(1.0))
+    (A.t * A, A.t * b)
   }
 
   /** Compute the objective value */
-  def computeObjectiveValue(ata: DoubleMatrix, atb: DoubleMatrix, x: DoubleMatrix): Double = {
-    val res = (x.transpose().mmul(ata).mmul(x)).mul(0.5).sub(atb.dot(x))
-    res.get(0)
-  }
+  def computeObjectiveValue(ata: BDM[Double], atb: BDV[Double], x: BDV[Double]): Double =
+    (x.t * ata * x) / 2.0 - atb.dot(x)
 
   test("NNLS: exact solution cases") {
     val n = 20
@@ -54,33 +48,34 @@ class NNLSSuite extends SparkFunSuite {
 
     for (k <- 0 until 100) {
       val (ata, atb) = genOnesData(n, rand)
-      val x = new DoubleMatrix(NNLS.solve(ata.data, atb.data, ws))
+      val x = new BDV(NNLS.solve(ata.data, atb.data, ws))
       assert(x.length === n)
-      val answer = DoubleMatrix.ones(n, 1)
-      SimpleBlas.axpy(-1.0, answer, x)
-      val solved = (x.norm2 < 1e-2) && (x.normmax < 1e-3)
-      if (solved) numSolved = numSolved + 1
+      val answer = new BDV(Array.fill(n)(1.0))
+      val solved =
+        (breeze.linalg.norm(x - answer) < 0.01) &&    // L2 norm
+        ((x - answer).toArray.map(_.abs).max < 0.001) // inf norm
+      if (solved) {
+        numSolved += 1
+      }
     }
 
     assert(numSolved > 50)
   }
 
   test("NNLS: nonnegativity constraint active") {
     val n = 5
-    // scalastyle:off
-    val ata = new DoubleMatrix(Array(
-      Array( 4.377, -3.531, -1.306, -0.139,  3.418),
-      Array(-3.531,  4.344,  0.934,  0.305, -2.140),
-      Array(-1.306,  0.934,  2.644, -0.203, -0.170),
-      Array(-0.139,  0.305, -0.203,  5.883,  1.428),
-      Array( 3.418, -2.140, -0.170,  1.428,  4.684)))
-    // scalastyle:on
-    val atb = new DoubleMatrix(Array(-1.632, 2.115, 1.094, -1.025, -0.636))
+    val ata = Array(
+       4.377, -3.531, -1.306, -0.139, 3.418,
+      -3.531, 4.344, 0.934, 0.305, -2.140,
+      -1.306, 0.934, 2.644, -0.203, -0.170,
+      -0.139, 0.305, -0.203, 5.883, 1.428,
+       3.418, -2.140, -0.170, 1.428, 4.684)
+    val atb = Array(-1.632, 2.115, 1.094, -1.025, -0.636)
 
     val goodx = Array(0.13025, 0.54506, 0.2874, 0.0, 0.028628)
 
     val ws = NNLS.createWorkspace(n)
-    val x = NNLS.solve(ata.data, atb.data, ws)
+    val x = NNLS.solve(ata, atb, ws)
     for (i <- 0 until n) {
       assert(x(i) ~== goodx(i) absTol 1E-3)
       assert(x(i) >= 0)
@@ -89,23 +84,21 @@ class NNLSSuite extends SparkFunSuite {
 
   test("NNLS: objective value test") {
     val n = 5
-    val ata = new DoubleMatrix(5, 5
-      , 517399.13534, 242529.67289, -153644.98976, 130802.84503, -798452.29283
-      , 242529.67289, 126017.69765, -75944.21743, 81785.36128, -405290.60884
-      , -153644.98976, -75944.21743, 46986.44577, -45401.12659, 247059.51049
-      , 130802.84503, 81785.36128, -45401.12659, 67457.31310, -253747.03819
-      , -798452.29283, -405290.60884, 247059.51049, -253747.03819, 1310939.40814
-    )
-    val atb = new DoubleMatrix(5, 1,
-      -31755.05710, 13047.14813, -20191.24443, 25993.77580, 11963.55017)
+    val ata = new BDM(5, 5, Array(
+      517399.13534, 242529.67289, -153644.98976, 130802.84503, -798452.29283,
+      242529.67289, 126017.69765, -75944.21743, 81785.36128, -405290.60884,
+      -153644.98976, -75944.21743, 46986.44577, -45401.12659, 247059.51049,
+      130802.84503, 81785.36128, -45401.12659, 67457.31310, -253747.03819,
+      -798452.29283, -405290.60884, 247059.51049, -253747.03819, 1310939.40814))
+    val atb = new BDV(Array(-31755.05710, 13047.14813, -20191.24443, 25993.77580, 11963.55017))
 
     /** reference solution obtained from matlab function quadprog */
-    val refx = new DoubleMatrix(Array(34.90751, 103.96254, 0.00000, 27.82094, 58.79627))
+    val refx = new BDV(Array(34.90751, 103.96254, 0.00000, 27.82094, 58.79627))
     val refObj = computeObjectiveValue(ata, atb, refx)
 
 
     val ws = NNLS.createWorkspace(n)
-    val x = new DoubleMatrix(NNLS.solve(ata.data, atb.data, ws))
+    val x = new BDV(NNLS.solve(ata.data, atb.data, ws))
     val obj = computeObjectiveValue(ata, atb, x)
 
     assert(obj < refObj + 1E-5)