From 596aba6c80bb2c9c5f90f6cdeb5a0c20e3590f55 Mon Sep 17 00:00:00 2001 From: Xusen Yin Date: Thu, 14 Jul 2016 16:40:55 -0700 Subject: [PATCH 1/2] fix vector --- .../scala/org/apache/spark/examples/mllib/LDAExample.scala | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/LDAExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/LDAExample.scala index 3fbf8e03339e8..f8d43e085e5b6 100644 --- a/examples/src/main/scala/org/apache/spark/examples/mllib/LDAExample.scala +++ b/examples/src/main/scala/org/apache/spark/examples/mllib/LDAExample.scala @@ -24,8 +24,10 @@ import scopt.OptionParser import org.apache.spark.{SparkConf, SparkContext} import org.apache.spark.ml.Pipeline import org.apache.spark.ml.feature.{CountVectorizer, CountVectorizerModel, RegexTokenizer, StopWordsRemover} +import org.apache.spark.ml.linalg.{Vector => MLVector} import org.apache.spark.mllib.clustering.{DistributedLDAModel, EMLDAOptimizer, LDA, OnlineLDAOptimizer} import org.apache.spark.mllib.linalg.Vector +import org.apache.spark.mllib.linalg.VectorImplicits import org.apache.spark.rdd.RDD import org.apache.spark.sql.{Row, SparkSession} @@ -225,7 +227,7 @@ object LDAExample { val documents = model.transform(df) .select("features") .rdd - .map { case Row(features: Vector) => features } + .map { case Row(features: MLVector) => VectorImplicits.mlVectorToMLlibVector(features) } .zipWithIndex() .map(_.swap) From de5eeabfb10a684b2c8f938d88ebc3dec88c387c Mon Sep 17 00:00:00 2001 From: Xusen Yin Date: Thu, 21 Jul 2016 14:55:27 -0700 Subject: [PATCH 2/2] use API instead of implicits --- .../scala/org/apache/spark/examples/mllib/LDAExample.scala | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/LDAExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/LDAExample.scala index f8d43e085e5b6..ef67841f0cbee 100644 --- a/examples/src/main/scala/org/apache/spark/examples/mllib/LDAExample.scala +++ b/examples/src/main/scala/org/apache/spark/examples/mllib/LDAExample.scala @@ -26,8 +26,7 @@ import org.apache.spark.ml.Pipeline import org.apache.spark.ml.feature.{CountVectorizer, CountVectorizerModel, RegexTokenizer, StopWordsRemover} import org.apache.spark.ml.linalg.{Vector => MLVector} import org.apache.spark.mllib.clustering.{DistributedLDAModel, EMLDAOptimizer, LDA, OnlineLDAOptimizer} -import org.apache.spark.mllib.linalg.Vector -import org.apache.spark.mllib.linalg.VectorImplicits +import org.apache.spark.mllib.linalg.{Vector, Vectors} import org.apache.spark.rdd.RDD import org.apache.spark.sql.{Row, SparkSession} @@ -227,7 +226,7 @@ object LDAExample { val documents = model.transform(df) .select("features") .rdd - .map { case Row(features: MLVector) => VectorImplicits.mlVectorToMLlibVector(features) } + .map { case Row(features: MLVector) => Vectors.fromML(features) } .zipWithIndex() .map(_.swap)