From 95d4939900f327ecf51dabdb18a3c5852bc4999a Mon Sep 17 00:00:00 2001 From: Feynman Liang Date: Thu, 18 Jun 2015 19:41:50 -0700 Subject: [PATCH 01/10] Working DCT for 1D Doubles --- .../feature/DiscreteCosineTransformer.scala | 71 ++++++++++++++++++ .../DiscreteCosineTransformerSuite.scala | 74 +++++++++++++++++++ 2 files changed, 145 insertions(+) create mode 100644 mllib/src/main/scala/org/apache/spark/ml/feature/DiscreteCosineTransformer.scala create mode 100644 mllib/src/test/scala/org/apache/spark/ml/feature/DiscreteCosineTransformerSuite.scala diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/DiscreteCosineTransformer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/DiscreteCosineTransformer.scala new file mode 100644 index 0000000000000..0b616fbb69300 --- /dev/null +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/DiscreteCosineTransformer.scala @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.ml.feature + +import edu.emory.mathcs.jtransforms.dct._ + +import org.apache.spark.annotation.Experimental +import org.apache.spark.ml.UnaryTransformer +import org.apache.spark.ml.param.BooleanParam +import org.apache.spark.ml.util.Identifiable +import org.apache.spark.sql.types.{ArrayType, DataType, DoubleType} + +/** + * :: Experimental :: + * A feature transformer that takes the 1D discrete cosine transform of a real vector. + * It returns a real vector of the same length representing the DCT. The return vector is scaled + * such that the transform matrix is unitary. + * + * More information: https://en.wikipedia.org/wiki/Discrete_cosine_transform#DCT-II + */ +@Experimental +// TODO: extend to work for any numeric class +// TODO: handle 2D and 3D transforms +class DiscreteCosineTransformer(override val uid: String) + extends UnaryTransformer[Seq[Double], Seq[Double], DiscreteCosineTransformer] { + + def this() = this(Identifiable.randomUID("dct")) + + /** + * Indicates whether to perform the inverse DCT (true) or forward DCT (false). + * @return + */ + def inverse: BooleanParam = new BooleanParam( + this, "inverse", "Set transformer to perform inverse DCT") + + /** @group setParam */ + def setInverse(value: Boolean): this.type = set(inverse, value) + + /** @group getParam */ + def getInverse: Boolean = $(inverse) + + override protected def createTransformFunc: Seq[Double] => Seq[Double] = { vec : Seq[Double] => + val res = vec.toArray + val jTransformer = new DoubleDCT_1D(vec.length) + if ($(inverse)) jTransformer.inverse(res, true) else jTransformer.forward(res, true) + res + } + + override protected def validateInputType(inputType: DataType): Unit = { + require( + inputType == ArrayType(DoubleType, false), + s"Input type must be ArrayType(DoubleType, false) but got $inputType.") + } + + override protected def outputDataType: DataType = new ArrayType(DoubleType, false) +} diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/DiscreteCosineTransformerSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/DiscreteCosineTransformerSuite.scala new file mode 100644 index 0000000000000..be3e2e16d47e3 --- /dev/null +++ b/mllib/src/test/scala/org/apache/spark/ml/feature/DiscreteCosineTransformerSuite.scala @@ -0,0 +1,74 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.ml.feature + +import scala.beans.BeanInfo +import scala.collection.mutable.ArrayBuffer + +import edu.emory.mathcs.jtransforms.dct.DoubleDCT_1D + +import org.apache.spark.SparkFunSuite +import org.apache.spark.mllib.util.MLlibTestSparkContext +import org.apache.spark.sql.{DataFrame, Row} + +@BeanInfo +case class DCTTestData(vec: Array[Double], wantedVec: Array[Double]) + +class DiscreteCosineTransformerSuite extends SparkFunSuite with MLlibTestSparkContext { + import org.apache.spark.ml.feature.DiscreteCosineTransformerSuite._ + + test("forward transform of discrete cosine matches jTransforms result") { + val transformer = new DiscreteCosineTransformer() + .setInputCol("vec") + .setOutputCol("resultVec") + .setInverse(false) + val data = (0 until 128).map(_ => 2D * math.random - 1D).toArray + val expectedResult = data.clone() + (new DoubleDCT_1D(data.length)).forward(expectedResult, true) + val dataset = sqlContext.createDataFrame(Seq( + DCTTestData(data, expectedResult) + )) + testDCT(transformer, dataset) + } + + test("inverse transform of discrete cosine matches jTransforms result") { + val transformer = new DiscreteCosineTransformer() + .setInputCol("vec") + .setOutputCol("resultVec") + .setInverse(true) + val data = (0 until 128).map(_ => 2D * math.random - 1D).toArray + val expectedResult = data.clone() + (new DoubleDCT_1D(data.length)).inverse(expectedResult, true) + val dataset = sqlContext.createDataFrame(Seq( + DCTTestData(data, expectedResult) + )) + testDCT(transformer, dataset) + } +} + +object DiscreteCosineTransformerSuite extends SparkFunSuite { + + def testDCT(t: DiscreteCosineTransformer, dataset: DataFrame): Unit = { + t.transform(dataset) + .select("resultVec", "wantedVec") + .collect() + .foreach { case Row(resultVec: ArrayBuffer[Double], wantedVec : ArrayBuffer[Double]) => + assert(resultVec.zip(wantedVec).map(x => Math.pow(x._1 - x._2, 2)).sum < 1e-4) + } + } +} From 195d7aa9380195979bcb8d9df4f05bb2a41aed68 Mon Sep 17 00:00:00 2001 From: Feynman Liang Date: Fri, 19 Jun 2015 20:59:54 -0700 Subject: [PATCH 02/10] Implement support for arbitrary numeric types --- .../feature/DiscreteCosineTransformer.scala | 38 +++++++++++++------ .../DiscreteCosineTransformerSuite.scala | 21 +++++----- 2 files changed, 38 insertions(+), 21 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/DiscreteCosineTransformer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/DiscreteCosineTransformer.scala index 0b616fbb69300..ac5ade4da0d5d 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/DiscreteCosineTransformer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/DiscreteCosineTransformer.scala @@ -23,7 +23,7 @@ import org.apache.spark.annotation.Experimental import org.apache.spark.ml.UnaryTransformer import org.apache.spark.ml.param.BooleanParam import org.apache.spark.ml.util.Identifiable -import org.apache.spark.sql.types.{ArrayType, DataType, DoubleType} +import org.apache.spark.sql.types.{NumericType, ArrayType, DataType, DoubleType} /** * :: Experimental :: @@ -34,16 +34,15 @@ import org.apache.spark.sql.types.{ArrayType, DataType, DoubleType} * More information: https://en.wikipedia.org/wiki/Discrete_cosine_transform#DCT-II */ @Experimental -// TODO: extend to work for any numeric class -// TODO: handle 2D and 3D transforms -class DiscreteCosineTransformer(override val uid: String) - extends UnaryTransformer[Seq[Double], Seq[Double], DiscreteCosineTransformer] { +class DiscreteCosineTransformer[IN : Numeric](override val uid: String) + extends UnaryTransformer[Seq[IN], Seq[Double], DiscreteCosineTransformer[IN]] { def this() = this(Identifiable.randomUID("dct")) /** * Indicates whether to perform the inverse DCT (true) or forward DCT (false). - * @return + * Default: false + * @group param */ def inverse: BooleanParam = new BooleanParam( this, "inverse", "Set transformer to perform inverse DCT") @@ -54,17 +53,34 @@ class DiscreteCosineTransformer(override val uid: String) /** @group getParam */ def getInverse: Boolean = $(inverse) - override protected def createTransformFunc: Seq[Double] => Seq[Double] = { vec : Seq[Double] => - val res = vec.toArray + /** + * Indicates whether output type should be double (true) or single (false) floating point. + * Default: true + * @group param + */ + def doublePrecision: BooleanParam = new BooleanParam( + this, "doublePrecision", "Set transformer to use double floating point precision") + + /** @group setParam */ + def setDoublePrecision(value: Boolean): this.type = set(doublePrecision, value) + + /** @group getParam */ + def getDoublePrecision: Boolean = $(doublePrecision) + + setDefault(inverse -> false, doublePrecision -> true) + + override protected def createTransformFunc: Seq[IN] => Seq[Double] = { vec : Seq[IN] => + val res = vec.map(implicitly[Numeric[IN]].toDouble(_)).toArray val jTransformer = new DoubleDCT_1D(vec.length) if ($(inverse)) jTransformer.inverse(res, true) else jTransformer.forward(res, true) res } override protected def validateInputType(inputType: DataType): Unit = { - require( - inputType == ArrayType(DoubleType, false), - s"Input type must be ArrayType(DoubleType, false) but got $inputType.") + require(inputType match { + case ArrayType(innerType, false) => innerType.isInstanceOf[NumericType] + }, + s"Input type must be subtype of ArrayType(NumericType, false) but got $inputType.") } override protected def outputDataType: DataType = new ArrayType(DoubleType, false) diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/DiscreteCosineTransformerSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/DiscreteCosineTransformerSuite.scala index be3e2e16d47e3..f8ffbf4f9c483 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/feature/DiscreteCosineTransformerSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/feature/DiscreteCosineTransformerSuite.scala @@ -27,13 +27,13 @@ import org.apache.spark.mllib.util.MLlibTestSparkContext import org.apache.spark.sql.{DataFrame, Row} @BeanInfo -case class DCTTestData(vec: Array[Double], wantedVec: Array[Double]) +case class DCTTestData[T](vec: Array[T], wantedVec: Array[T]) class DiscreteCosineTransformerSuite extends SparkFunSuite with MLlibTestSparkContext { import org.apache.spark.ml.feature.DiscreteCosineTransformerSuite._ test("forward transform of discrete cosine matches jTransforms result") { - val transformer = new DiscreteCosineTransformer() + val transformer = new DiscreteCosineTransformer[Double]() .setInputCol("vec") .setOutputCol("resultVec") .setInverse(false) @@ -41,13 +41,13 @@ class DiscreteCosineTransformerSuite extends SparkFunSuite with MLlibTestSparkCo val expectedResult = data.clone() (new DoubleDCT_1D(data.length)).forward(expectedResult, true) val dataset = sqlContext.createDataFrame(Seq( - DCTTestData(data, expectedResult) + DCTTestData[Double](data, expectedResult) )) - testDCT(transformer, dataset) + testDCT[Double](transformer, dataset) } test("inverse transform of discrete cosine matches jTransforms result") { - val transformer = new DiscreteCosineTransformer() + val transformer = new DiscreteCosineTransformer[Double]() .setInputCol("vec") .setOutputCol("resultVec") .setInverse(true) @@ -55,20 +55,21 @@ class DiscreteCosineTransformerSuite extends SparkFunSuite with MLlibTestSparkCo val expectedResult = data.clone() (new DoubleDCT_1D(data.length)).inverse(expectedResult, true) val dataset = sqlContext.createDataFrame(Seq( - DCTTestData(data, expectedResult) + DCTTestData[Double](data, expectedResult) )) - testDCT(transformer, dataset) + testDCT[Double](transformer, dataset) } } object DiscreteCosineTransformerSuite extends SparkFunSuite { - def testDCT(t: DiscreteCosineTransformer, dataset: DataFrame): Unit = { + def testDCT[T : Numeric](t: DiscreteCosineTransformer[_], dataset: DataFrame): Unit = { + import Numeric.Implicits._ t.transform(dataset) .select("resultVec", "wantedVec") .collect() - .foreach { case Row(resultVec: ArrayBuffer[Double], wantedVec : ArrayBuffer[Double]) => - assert(resultVec.zip(wantedVec).map(x => Math.pow(x._1 - x._2, 2)).sum < 1e-4) + .foreach { case Row(resultVec: ArrayBuffer[T], wantedVec : ArrayBuffer[T]) => + assert(resultVec.zip(wantedVec).map(x => math.pow((x._1 - x._2).toDouble, 2)).sum < 1e-4) } } } From 530983ab569e479dd0a4dd746b6520380496c80d Mon Sep 17 00:00:00 2001 From: Feynman Liang Date: Mon, 22 Jun 2015 10:31:28 -0700 Subject: [PATCH 03/10] Tests for other numeric datatypes --- .../feature/DiscreteCosineTransformer.scala | 1 + .../DiscreteCosineTransformerSuite.scala | 41 +++++++++++++++---- 2 files changed, 35 insertions(+), 7 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/DiscreteCosineTransformer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/DiscreteCosineTransformer.scala index ac5ade4da0d5d..468f90a5de160 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/DiscreteCosineTransformer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/DiscreteCosineTransformer.scala @@ -34,6 +34,7 @@ import org.apache.spark.sql.types.{NumericType, ArrayType, DataType, DoubleType} * More information: https://en.wikipedia.org/wiki/Discrete_cosine_transform#DCT-II */ @Experimental +// TODO: explore variance -IN (and +OUT in UnaryTransformer) class DiscreteCosineTransformer[IN : Numeric](override val uid: String) extends UnaryTransformer[Seq[IN], Seq[Double], DiscreteCosineTransformer[IN]] { diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/DiscreteCosineTransformerSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/DiscreteCosineTransformerSuite.scala index f8ffbf4f9c483..dea4d6804da14 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/feature/DiscreteCosineTransformerSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/feature/DiscreteCosineTransformerSuite.scala @@ -27,7 +27,7 @@ import org.apache.spark.mllib.util.MLlibTestSparkContext import org.apache.spark.sql.{DataFrame, Row} @BeanInfo -case class DCTTestData[T](vec: Array[T], wantedVec: Array[T]) +case class DCTTestData[T](vec: Array[T], wantedVec: Array[Double]) class DiscreteCosineTransformerSuite extends SparkFunSuite with MLlibTestSparkContext { import org.apache.spark.ml.feature.DiscreteCosineTransformerSuite._ @@ -43,7 +43,7 @@ class DiscreteCosineTransformerSuite extends SparkFunSuite with MLlibTestSparkCo val dataset = sqlContext.createDataFrame(Seq( DCTTestData[Double](data, expectedResult) )) - testDCT[Double](transformer, dataset) + testDCT(transformer, dataset) } test("inverse transform of discrete cosine matches jTransforms result") { @@ -57,19 +57,46 @@ class DiscreteCosineTransformerSuite extends SparkFunSuite with MLlibTestSparkCo val dataset = sqlContext.createDataFrame(Seq( DCTTestData[Double](data, expectedResult) )) - testDCT[Double](transformer, dataset) + testDCT(transformer, dataset) + } + + test("handle float datatype") { + val transformer = new DiscreteCosineTransformer[Float]() + .setInputCol("vec") + .setOutputCol("resultVec") + .setInverse(true) + val data = (0 until 128).map(_ => (2D * math.random - 1D).toFloat).toArray + val expectedResult = data.clone().map(_.toDouble) + (new DoubleDCT_1D(data.length)).inverse(expectedResult, true) + val dataset = sqlContext.createDataFrame(Seq( + DCTTestData[Float](data, expectedResult) + )) + testDCT(transformer, dataset) + } + + test("handle integer datatype") { + val transformer = new DiscreteCosineTransformer[Int]() + .setInputCol("vec") + .setOutputCol("resultVec") + .setInverse(true) + val data = (0 until 128).map(_ => (2D * math.random - 1D).toInt).toArray + val expectedResult = data.clone().map(_.toDouble) + (new DoubleDCT_1D(data.length)).inverse(expectedResult, true) + val dataset = sqlContext.createDataFrame(Seq( + DCTTestData[Int](data, expectedResult) + )) + testDCT(transformer, dataset) } } object DiscreteCosineTransformerSuite extends SparkFunSuite { - def testDCT[T : Numeric](t: DiscreteCosineTransformer[_], dataset: DataFrame): Unit = { - import Numeric.Implicits._ + def testDCT(t: DiscreteCosineTransformer[_], dataset: DataFrame): Unit = { t.transform(dataset) .select("resultVec", "wantedVec") .collect() - .foreach { case Row(resultVec: ArrayBuffer[T], wantedVec : ArrayBuffer[T]) => - assert(resultVec.zip(wantedVec).map(x => math.pow((x._1 - x._2).toDouble, 2)).sum < 1e-4) + .foreach { case Row(resultVec: ArrayBuffer[Double], wantedVec : ArrayBuffer[Double]) => + assert(resultVec.zip(wantedVec).map(x => math.pow(x._1 - x._2, 2)).sum < 1e-4) } } } From b5ac19cf23f0f47697e4bb4cd8075e13cb07361d Mon Sep 17 00:00:00 2001 From: Feynman Liang Date: Wed, 24 Jun 2015 11:17:53 -0700 Subject: [PATCH 04/10] Use Vector types, add Java test --- .../feature/DiscreteCosineTransformer.scala | 48 ++++-------- .../JavaDiscreteCosineTransformerSuite.java | 77 +++++++++++++++++++ .../DiscreteCosineTransformerSuite.scala | 68 ++++++---------- 3 files changed, 118 insertions(+), 75 deletions(-) create mode 100644 mllib/src/test/java/org/apache/spark/ml/feature/JavaDiscreteCosineTransformerSuite.java diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/DiscreteCosineTransformer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/DiscreteCosineTransformer.scala index 468f90a5de160..a2f4d59f81c44 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/DiscreteCosineTransformer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/DiscreteCosineTransformer.scala @@ -23,20 +23,21 @@ import org.apache.spark.annotation.Experimental import org.apache.spark.ml.UnaryTransformer import org.apache.spark.ml.param.BooleanParam import org.apache.spark.ml.util.Identifiable -import org.apache.spark.sql.types.{NumericType, ArrayType, DataType, DoubleType} +import org.apache.spark.mllib.linalg.{Vector, VectorUDT, Vectors} +import org.apache.spark.sql.types.DataType /** * :: Experimental :: - * A feature transformer that takes the 1D discrete cosine transform of a real vector. + * A feature transformer that takes the 1D discrete cosine transform of a real vector. No zero + * padding is performed on the input vector. * It returns a real vector of the same length representing the DCT. The return vector is scaled - * such that the transform matrix is unitary. + * such that the transform matrix is unitary (aka scaled DCT-II). * - * More information: https://en.wikipedia.org/wiki/Discrete_cosine_transform#DCT-II + * More information on [[https://en.wikipedia.org/wiki/Discrete_cosine_transform#DCT-II Wikipedia]]. */ @Experimental -// TODO: explore variance -IN (and +OUT in UnaryTransformer) -class DiscreteCosineTransformer[IN : Numeric](override val uid: String) - extends UnaryTransformer[Seq[IN], Seq[Double], DiscreteCosineTransformer[IN]] { +class DiscreteCosineTransformer(override val uid: String) + extends UnaryTransformer[Vector, Vector, DiscreteCosineTransformer] { def this() = this(Identifiable.randomUID("dct")) @@ -54,35 +55,18 @@ class DiscreteCosineTransformer[IN : Numeric](override val uid: String) /** @group getParam */ def getInverse: Boolean = $(inverse) - /** - * Indicates whether output type should be double (true) or single (false) floating point. - * Default: true - * @group param - */ - def doublePrecision: BooleanParam = new BooleanParam( - this, "doublePrecision", "Set transformer to use double floating point precision") - - /** @group setParam */ - def setDoublePrecision(value: Boolean): this.type = set(doublePrecision, value) - - /** @group getParam */ - def getDoublePrecision: Boolean = $(doublePrecision) - - setDefault(inverse -> false, doublePrecision -> true) + setDefault(inverse -> false) - override protected def createTransformFunc: Seq[IN] => Seq[Double] = { vec : Seq[IN] => - val res = vec.map(implicitly[Numeric[IN]].toDouble(_)).toArray - val jTransformer = new DoubleDCT_1D(vec.length) - if ($(inverse)) jTransformer.inverse(res, true) else jTransformer.forward(res, true) - res + override protected def createTransformFunc: Vector => Vector = { vec => + val result = vec.toArray + val jTransformer = new DoubleDCT_1D(result.length) + if ($(inverse)) jTransformer.inverse(result, true) else jTransformer.forward(result, true) + Vectors.dense(result) } override protected def validateInputType(inputType: DataType): Unit = { - require(inputType match { - case ArrayType(innerType, false) => innerType.isInstanceOf[NumericType] - }, - s"Input type must be subtype of ArrayType(NumericType, false) but got $inputType.") + require(inputType.isInstanceOf[VectorUDT], s"Input type must be VectorUDT but got $inputType.") } - override protected def outputDataType: DataType = new ArrayType(DoubleType, false) + override protected def outputDataType: DataType = new VectorUDT } diff --git a/mllib/src/test/java/org/apache/spark/ml/feature/JavaDiscreteCosineTransformerSuite.java b/mllib/src/test/java/org/apache/spark/ml/feature/JavaDiscreteCosineTransformerSuite.java new file mode 100644 index 0000000000000..b8c0c4329abfa --- /dev/null +++ b/mllib/src/test/java/org/apache/spark/ml/feature/JavaDiscreteCosineTransformerSuite.java @@ -0,0 +1,77 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.ml.feature; + +import com.google.common.collect.Lists; +import edu.emory.mathcs.jtransforms.dct.DoubleDCT_1D; +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.mllib.linalg.Vector; +import org.apache.spark.mllib.linalg.VectorUDT; +import org.apache.spark.mllib.linalg.Vectors; +import org.apache.spark.sql.DataFrame; +import org.apache.spark.sql.Row; +import org.apache.spark.sql.RowFactory; +import org.apache.spark.sql.SQLContext; +import org.apache.spark.sql.types.Metadata; +import org.apache.spark.sql.types.StructField; +import org.apache.spark.sql.types.StructType; +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +public class JavaDiscreteCosineTransformerSuite { + private transient JavaSparkContext jsc; + private transient SQLContext jsql; + + @Before + public void setUp() { + jsc = new JavaSparkContext("local", "JavaDiscreteCosineTransformerSuite"); + jsql = new SQLContext(jsc); + } + + @After + public void tearDown() { + jsc.stop(); + jsc = null; + } + + @Test + public void javaCompatibilityTest() { + double[] input = new double[] {1D, 2D, 3D, 4D}; + JavaRDD data = jsc.parallelize(Lists.newArrayList( + RowFactory.create(Vectors.dense(input)) + )); + DataFrame dataset = jsql.createDataFrame(data, new StructType(new StructField[]{ + new StructField("vec", (new VectorUDT()), false, Metadata.empty()) + })); + + double[] expectedResult= input.clone(); + (new DoubleDCT_1D(input.length)).forward(expectedResult, true); + + DiscreteCosineTransformer DCT = new DiscreteCosineTransformer() + .setInputCol("vec") + .setOutputCol("resultVec"); + + Row[] result = DCT.transform(dataset).select("resultVec").collect(); + Vector resultVec = result[0].getAs("resultVec"); + + Assert.assertArrayEquals(expectedResult, resultVec.toArray(), 1e-6); + } +} diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/DiscreteCosineTransformerSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/DiscreteCosineTransformerSuite.scala index dea4d6804da14..0bc74333036db 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/feature/DiscreteCosineTransformerSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/feature/DiscreteCosineTransformerSuite.scala @@ -18,85 +18,67 @@ package org.apache.spark.ml.feature import scala.beans.BeanInfo -import scala.collection.mutable.ArrayBuffer import edu.emory.mathcs.jtransforms.dct.DoubleDCT_1D import org.apache.spark.SparkFunSuite +import org.apache.spark.mllib.linalg.{Vector, Vectors} import org.apache.spark.mllib.util.MLlibTestSparkContext import org.apache.spark.sql.{DataFrame, Row} @BeanInfo -case class DCTTestData[T](vec: Array[T], wantedVec: Array[Double]) +case class DCTTestData(vec: Vector, wantedVec: Vector) class DiscreteCosineTransformerSuite extends SparkFunSuite with MLlibTestSparkContext { import org.apache.spark.ml.feature.DiscreteCosineTransformerSuite._ test("forward transform of discrete cosine matches jTransforms result") { - val transformer = new DiscreteCosineTransformer[Double]() - .setInputCol("vec") - .setOutputCol("resultVec") - .setInverse(false) - val data = (0 until 128).map(_ => 2D * math.random - 1D).toArray - val expectedResult = data.clone() - (new DoubleDCT_1D(data.length)).forward(expectedResult, true) + val data = Vectors.dense((0 until 128).map(_ => 2D * math.random - 1D).toArray) + + val expectedResultBuffer = data.toArray.clone() + (new DoubleDCT_1D(data.size)).forward(expectedResultBuffer, true) + val expectedResult = Vectors.dense(expectedResultBuffer) + val dataset = sqlContext.createDataFrame(Seq( - DCTTestData[Double](data, expectedResult) + DCTTestData(data, expectedResult) )) - testDCT(transformer, dataset) - } - test("inverse transform of discrete cosine matches jTransforms result") { - val transformer = new DiscreteCosineTransformer[Double]() + val transformer = new DiscreteCosineTransformer() .setInputCol("vec") .setOutputCol("resultVec") - .setInverse(true) - val data = (0 until 128).map(_ => 2D * math.random - 1D).toArray - val expectedResult = data.clone() - (new DoubleDCT_1D(data.length)).inverse(expectedResult, true) - val dataset = sqlContext.createDataFrame(Seq( - DCTTestData[Double](data, expectedResult) - )) + .setInverse(false) + testDCT(transformer, dataset) } - test("handle float datatype") { - val transformer = new DiscreteCosineTransformer[Float]() - .setInputCol("vec") - .setOutputCol("resultVec") - .setInverse(true) - val data = (0 until 128).map(_ => (2D * math.random - 1D).toFloat).toArray - val expectedResult = data.clone().map(_.toDouble) - (new DoubleDCT_1D(data.length)).inverse(expectedResult, true) + test("inverse transform of discrete cosine matches jTransforms result") { + val data = Vectors.dense((0 until 128).map(_ => 2D * math.random - 1D).toArray) + + val expectedResultBuffer = data.toArray.clone() + (new DoubleDCT_1D(data.size)).inverse(expectedResultBuffer, true) + val expectedResult = Vectors.dense(expectedResultBuffer) + val dataset = sqlContext.createDataFrame(Seq( - DCTTestData[Float](data, expectedResult) + DCTTestData(data, expectedResult) )) - testDCT(transformer, dataset) - } - test("handle integer datatype") { - val transformer = new DiscreteCosineTransformer[Int]() + val transformer = new DiscreteCosineTransformer() .setInputCol("vec") .setOutputCol("resultVec") .setInverse(true) - val data = (0 until 128).map(_ => (2D * math.random - 1D).toInt).toArray - val expectedResult = data.clone().map(_.toDouble) - (new DoubleDCT_1D(data.length)).inverse(expectedResult, true) - val dataset = sqlContext.createDataFrame(Seq( - DCTTestData[Int](data, expectedResult) - )) + testDCT(transformer, dataset) } } object DiscreteCosineTransformerSuite extends SparkFunSuite { - def testDCT(t: DiscreteCosineTransformer[_], dataset: DataFrame): Unit = { + def testDCT(t: DiscreteCosineTransformer, dataset: DataFrame): Unit = { t.transform(dataset) .select("resultVec", "wantedVec") .collect() - .foreach { case Row(resultVec: ArrayBuffer[Double], wantedVec : ArrayBuffer[Double]) => - assert(resultVec.zip(wantedVec).map(x => math.pow(x._1 - x._2, 2)).sum < 1e-4) + .foreach { case Row(resultVec: Vector, wantedVec : Vector) => + assert(Vectors.sqdist(resultVec, wantedVec) < 1e-6) } } } From 91e9636d26ea4fcbfc802cdcb8a2740a829a5c7f Mon Sep 17 00:00:00 2001 From: Feynman Liang Date: Wed, 24 Jun 2015 16:17:48 -0700 Subject: [PATCH 05/10] Style guide and test helper refactor --- .../JavaDiscreteCosineTransformerSuite.java | 11 ++++---- .../DiscreteCosineTransformerSuite.scala | 28 +++++++++++-------- 2 files changed, 22 insertions(+), 17 deletions(-) diff --git a/mllib/src/test/java/org/apache/spark/ml/feature/JavaDiscreteCosineTransformerSuite.java b/mllib/src/test/java/org/apache/spark/ml/feature/JavaDiscreteCosineTransformerSuite.java index b8c0c4329abfa..28bc5f65e0532 100644 --- a/mllib/src/test/java/org/apache/spark/ml/feature/JavaDiscreteCosineTransformerSuite.java +++ b/mllib/src/test/java/org/apache/spark/ml/feature/JavaDiscreteCosineTransformerSuite.java @@ -19,6 +19,11 @@ import com.google.common.collect.Lists; import edu.emory.mathcs.jtransforms.dct.DoubleDCT_1D; +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.mllib.linalg.Vector; @@ -31,10 +36,6 @@ import org.apache.spark.sql.types.Metadata; import org.apache.spark.sql.types.StructField; import org.apache.spark.sql.types.StructType; -import org.junit.After; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; public class JavaDiscreteCosineTransformerSuite { private transient JavaSparkContext jsc; @@ -62,7 +63,7 @@ public void javaCompatibilityTest() { new StructField("vec", (new VectorUDT()), false, Metadata.empty()) })); - double[] expectedResult= input.clone(); + double[] expectedResult = input.clone(); (new DoubleDCT_1D(input.length)).forward(expectedResult, true); DiscreteCosineTransformer DCT = new DiscreteCosineTransformer() diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/DiscreteCosineTransformerSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/DiscreteCosineTransformerSuite.scala index 0bc74333036db..cc22a915aca4e 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/feature/DiscreteCosineTransformerSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/feature/DiscreteCosineTransformerSuite.scala @@ -34,38 +34,32 @@ class DiscreteCosineTransformerSuite extends SparkFunSuite with MLlibTestSparkCo test("forward transform of discrete cosine matches jTransforms result") { val data = Vectors.dense((0 until 128).map(_ => 2D * math.random - 1D).toArray) - - val expectedResultBuffer = data.toArray.clone() - (new DoubleDCT_1D(data.size)).forward(expectedResultBuffer, true) - val expectedResult = Vectors.dense(expectedResultBuffer) + val inverse = false val dataset = sqlContext.createDataFrame(Seq( - DCTTestData(data, expectedResult) + DCTTestData(data, makeExpectedResult(data, inverse)) )) val transformer = new DiscreteCosineTransformer() .setInputCol("vec") .setOutputCol("resultVec") - .setInverse(false) + .setInverse(inverse) testDCT(transformer, dataset) } test("inverse transform of discrete cosine matches jTransforms result") { val data = Vectors.dense((0 until 128).map(_ => 2D * math.random - 1D).toArray) - - val expectedResultBuffer = data.toArray.clone() - (new DoubleDCT_1D(data.size)).inverse(expectedResultBuffer, true) - val expectedResult = Vectors.dense(expectedResultBuffer) + val inverse = true val dataset = sqlContext.createDataFrame(Seq( - DCTTestData(data, expectedResult) + DCTTestData(data, makeExpectedResult(data, inverse)) )) val transformer = new DiscreteCosineTransformer() .setInputCol("vec") .setOutputCol("resultVec") - .setInverse(true) + .setInverse(inverse) testDCT(transformer, dataset) } @@ -73,6 +67,16 @@ class DiscreteCosineTransformerSuite extends SparkFunSuite with MLlibTestSparkCo object DiscreteCosineTransformerSuite extends SparkFunSuite { + def makeExpectedResult(data: Vector, inverse: Boolean): Vector = { + val expectedResultBuffer = data.toArray.clone() + if (inverse) { + (new DoubleDCT_1D(data.size)).inverse(expectedResultBuffer, true) + } else { + (new DoubleDCT_1D(data.size)).forward(expectedResultBuffer, true) + } + Vectors.dense(expectedResultBuffer) + } + def testDCT(t: DiscreteCosineTransformer, dataset: DataFrame): Unit = { t.transform(dataset) .select("resultVec", "wantedVec") From 433dbc72943ea64fe295c8dfc0a343bbbf815696 Mon Sep 17 00:00:00 2001 From: Feynman Liang Date: Tue, 30 Jun 2015 11:03:37 -0700 Subject: [PATCH 06/10] Test refactoring --- .../DiscreteCosineTransformerSuite.scala | 49 +++++++------------ 1 file changed, 17 insertions(+), 32 deletions(-) diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/DiscreteCosineTransformerSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/DiscreteCosineTransformerSuite.scala index cc22a915aca4e..ed0fc11f78f69 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/feature/DiscreteCosineTransformerSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/feature/DiscreteCosineTransformerSuite.scala @@ -30,59 +30,44 @@ import org.apache.spark.sql.{DataFrame, Row} case class DCTTestData(vec: Vector, wantedVec: Vector) class DiscreteCosineTransformerSuite extends SparkFunSuite with MLlibTestSparkContext { - import org.apache.spark.ml.feature.DiscreteCosineTransformerSuite._ test("forward transform of discrete cosine matches jTransforms result") { val data = Vectors.dense((0 until 128).map(_ => 2D * math.random - 1D).toArray) val inverse = false - val dataset = sqlContext.createDataFrame(Seq( - DCTTestData(data, makeExpectedResult(data, inverse)) - )) - - val transformer = new DiscreteCosineTransformer() - .setInputCol("vec") - .setOutputCol("resultVec") - .setInverse(inverse) - - testDCT(transformer, dataset) + testDCT(data, inverse) } test("inverse transform of discrete cosine matches jTransforms result") { val data = Vectors.dense((0 until 128).map(_ => 2D * math.random - 1D).toArray) val inverse = true - val dataset = sqlContext.createDataFrame(Seq( - DCTTestData(data, makeExpectedResult(data, inverse)) - )) - - val transformer = new DiscreteCosineTransformer() - .setInputCol("vec") - .setOutputCol("resultVec") - .setInverse(inverse) - - testDCT(transformer, dataset) + testDCT(data, inverse) } -} - -object DiscreteCosineTransformerSuite extends SparkFunSuite { - def makeExpectedResult(data: Vector, inverse: Boolean): Vector = { + private def testDCT(data: Vector, inverse: Boolean): Unit = { val expectedResultBuffer = data.toArray.clone() if (inverse) { (new DoubleDCT_1D(data.size)).inverse(expectedResultBuffer, true) } else { (new DoubleDCT_1D(data.size)).forward(expectedResultBuffer, true) } - Vectors.dense(expectedResultBuffer) - } + val expectedResult = Vectors.dense(expectedResultBuffer) + + val dataset = sqlContext.createDataFrame(Seq( + DCTTestData(data, expectedResult) + )) - def testDCT(t: DiscreteCosineTransformer, dataset: DataFrame): Unit = { - t.transform(dataset) + val transformer = new DiscreteCosineTransformer() + .setInputCol("vec") + .setOutputCol("resultVec") + .setInverse(inverse) + + transformer.transform(dataset) .select("resultVec", "wantedVec") .collect() - .foreach { case Row(resultVec: Vector, wantedVec : Vector) => - assert(Vectors.sqdist(resultVec, wantedVec) < 1e-6) - } + .foreach { case Row(resultVec: Vector, wantedVec: Vector) => + assert(Vectors.sqdist(resultVec, wantedVec) < 1e-6) + } } } From 894d0b2154751836f03ec1354cd727787b0e4762 Mon Sep 17 00:00:00 2001 From: Feynman Liang Date: Tue, 30 Jun 2015 15:27:46 -0700 Subject: [PATCH 07/10] Rename DiscreteCosineTransformer to DCT --- .../feature/{DiscreteCosineTransformer.scala => DCT.scala} | 4 ++-- ...iscreteCosineTransformerSuite.java => JavaDCTSuite.java} | 6 +++--- ...{DiscreteCosineTransformerSuite.scala => DCTSuite.scala} | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) rename mllib/src/main/scala/org/apache/spark/ml/feature/{DiscreteCosineTransformer.scala => DCT.scala} (95%) rename mllib/src/test/java/org/apache/spark/ml/feature/{JavaDiscreteCosineTransformerSuite.java => JavaDCTSuite.java} (92%) rename mllib/src/test/scala/org/apache/spark/ml/feature/{DiscreteCosineTransformerSuite.scala => DCTSuite.scala} (94%) diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/DiscreteCosineTransformer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/DCT.scala similarity index 95% rename from mllib/src/main/scala/org/apache/spark/ml/feature/DiscreteCosineTransformer.scala rename to mllib/src/main/scala/org/apache/spark/ml/feature/DCT.scala index a2f4d59f81c44..228347635c92b 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/DiscreteCosineTransformer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/DCT.scala @@ -36,8 +36,8 @@ import org.apache.spark.sql.types.DataType * More information on [[https://en.wikipedia.org/wiki/Discrete_cosine_transform#DCT-II Wikipedia]]. */ @Experimental -class DiscreteCosineTransformer(override val uid: String) - extends UnaryTransformer[Vector, Vector, DiscreteCosineTransformer] { +class DCT(override val uid: String) + extends UnaryTransformer[Vector, Vector, DCT] { def this() = this(Identifiable.randomUID("dct")) diff --git a/mllib/src/test/java/org/apache/spark/ml/feature/JavaDiscreteCosineTransformerSuite.java b/mllib/src/test/java/org/apache/spark/ml/feature/JavaDCTSuite.java similarity index 92% rename from mllib/src/test/java/org/apache/spark/ml/feature/JavaDiscreteCosineTransformerSuite.java rename to mllib/src/test/java/org/apache/spark/ml/feature/JavaDCTSuite.java index 28bc5f65e0532..34f8cbbe1eb62 100644 --- a/mllib/src/test/java/org/apache/spark/ml/feature/JavaDiscreteCosineTransformerSuite.java +++ b/mllib/src/test/java/org/apache/spark/ml/feature/JavaDCTSuite.java @@ -37,13 +37,13 @@ import org.apache.spark.sql.types.StructField; import org.apache.spark.sql.types.StructType; -public class JavaDiscreteCosineTransformerSuite { +public class JavaDCTSuite { private transient JavaSparkContext jsc; private transient SQLContext jsql; @Before public void setUp() { - jsc = new JavaSparkContext("local", "JavaDiscreteCosineTransformerSuite"); + jsc = new JavaSparkContext("local", "JavaDCTSuite"); jsql = new SQLContext(jsc); } @@ -66,7 +66,7 @@ public void javaCompatibilityTest() { double[] expectedResult = input.clone(); (new DoubleDCT_1D(input.length)).forward(expectedResult, true); - DiscreteCosineTransformer DCT = new DiscreteCosineTransformer() + DCT DCT = new DCT() .setInputCol("vec") .setOutputCol("resultVec"); diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/DiscreteCosineTransformerSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/DCTSuite.scala similarity index 94% rename from mllib/src/test/scala/org/apache/spark/ml/feature/DiscreteCosineTransformerSuite.scala rename to mllib/src/test/scala/org/apache/spark/ml/feature/DCTSuite.scala index ed0fc11f78f69..37ed2367c33f7 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/feature/DiscreteCosineTransformerSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/feature/DCTSuite.scala @@ -29,7 +29,7 @@ import org.apache.spark.sql.{DataFrame, Row} @BeanInfo case class DCTTestData(vec: Vector, wantedVec: Vector) -class DiscreteCosineTransformerSuite extends SparkFunSuite with MLlibTestSparkContext { +class DCTSuite extends SparkFunSuite with MLlibTestSparkContext { test("forward transform of discrete cosine matches jTransforms result") { val data = Vectors.dense((0 until 128).map(_ => 2D * math.random - 1D).toArray) @@ -58,7 +58,7 @@ class DiscreteCosineTransformerSuite extends SparkFunSuite with MLlibTestSparkCo DCTTestData(data, expectedResult) )) - val transformer = new DiscreteCosineTransformer() + val transformer = new DCT() .setInputCol("vec") .setOutputCol("resultVec") .setInverse(inverse) From f9a8958fa468320a5fe20b72c7c5b8dada3982df Mon Sep 17 00:00:00 2001 From: Feynman Liang Date: Tue, 30 Jun 2015 16:02:51 -0700 Subject: [PATCH 08/10] Remove old files --- .../feature/DiscreteCosineTransformer.scala | 72 ----------------- .../JavaDiscreteCosineTransformerSuite.java | 78 ------------------- .../DiscreteCosineTransformerSuite.scala | 73 ----------------- 3 files changed, 223 deletions(-) delete mode 100644 mllib/src/main/scala/org/apache/spark/ml/feature/DiscreteCosineTransformer.scala delete mode 100644 mllib/src/test/java/org/apache/spark/ml/feature/JavaDiscreteCosineTransformerSuite.java delete mode 100644 mllib/src/test/scala/org/apache/spark/ml/feature/DiscreteCosineTransformerSuite.scala diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/DiscreteCosineTransformer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/DiscreteCosineTransformer.scala deleted file mode 100644 index a2f4d59f81c44..0000000000000 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/DiscreteCosineTransformer.scala +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.ml.feature - -import edu.emory.mathcs.jtransforms.dct._ - -import org.apache.spark.annotation.Experimental -import org.apache.spark.ml.UnaryTransformer -import org.apache.spark.ml.param.BooleanParam -import org.apache.spark.ml.util.Identifiable -import org.apache.spark.mllib.linalg.{Vector, VectorUDT, Vectors} -import org.apache.spark.sql.types.DataType - -/** - * :: Experimental :: - * A feature transformer that takes the 1D discrete cosine transform of a real vector. No zero - * padding is performed on the input vector. - * It returns a real vector of the same length representing the DCT. The return vector is scaled - * such that the transform matrix is unitary (aka scaled DCT-II). - * - * More information on [[https://en.wikipedia.org/wiki/Discrete_cosine_transform#DCT-II Wikipedia]]. - */ -@Experimental -class DiscreteCosineTransformer(override val uid: String) - extends UnaryTransformer[Vector, Vector, DiscreteCosineTransformer] { - - def this() = this(Identifiable.randomUID("dct")) - - /** - * Indicates whether to perform the inverse DCT (true) or forward DCT (false). - * Default: false - * @group param - */ - def inverse: BooleanParam = new BooleanParam( - this, "inverse", "Set transformer to perform inverse DCT") - - /** @group setParam */ - def setInverse(value: Boolean): this.type = set(inverse, value) - - /** @group getParam */ - def getInverse: Boolean = $(inverse) - - setDefault(inverse -> false) - - override protected def createTransformFunc: Vector => Vector = { vec => - val result = vec.toArray - val jTransformer = new DoubleDCT_1D(result.length) - if ($(inverse)) jTransformer.inverse(result, true) else jTransformer.forward(result, true) - Vectors.dense(result) - } - - override protected def validateInputType(inputType: DataType): Unit = { - require(inputType.isInstanceOf[VectorUDT], s"Input type must be VectorUDT but got $inputType.") - } - - override protected def outputDataType: DataType = new VectorUDT -} diff --git a/mllib/src/test/java/org/apache/spark/ml/feature/JavaDiscreteCosineTransformerSuite.java b/mllib/src/test/java/org/apache/spark/ml/feature/JavaDiscreteCosineTransformerSuite.java deleted file mode 100644 index 28bc5f65e0532..0000000000000 --- a/mllib/src/test/java/org/apache/spark/ml/feature/JavaDiscreteCosineTransformerSuite.java +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.ml.feature; - -import com.google.common.collect.Lists; -import edu.emory.mathcs.jtransforms.dct.DoubleDCT_1D; -import org.junit.After; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; - -import org.apache.spark.api.java.JavaRDD; -import org.apache.spark.api.java.JavaSparkContext; -import org.apache.spark.mllib.linalg.Vector; -import org.apache.spark.mllib.linalg.VectorUDT; -import org.apache.spark.mllib.linalg.Vectors; -import org.apache.spark.sql.DataFrame; -import org.apache.spark.sql.Row; -import org.apache.spark.sql.RowFactory; -import org.apache.spark.sql.SQLContext; -import org.apache.spark.sql.types.Metadata; -import org.apache.spark.sql.types.StructField; -import org.apache.spark.sql.types.StructType; - -public class JavaDiscreteCosineTransformerSuite { - private transient JavaSparkContext jsc; - private transient SQLContext jsql; - - @Before - public void setUp() { - jsc = new JavaSparkContext("local", "JavaDiscreteCosineTransformerSuite"); - jsql = new SQLContext(jsc); - } - - @After - public void tearDown() { - jsc.stop(); - jsc = null; - } - - @Test - public void javaCompatibilityTest() { - double[] input = new double[] {1D, 2D, 3D, 4D}; - JavaRDD data = jsc.parallelize(Lists.newArrayList( - RowFactory.create(Vectors.dense(input)) - )); - DataFrame dataset = jsql.createDataFrame(data, new StructType(new StructField[]{ - new StructField("vec", (new VectorUDT()), false, Metadata.empty()) - })); - - double[] expectedResult = input.clone(); - (new DoubleDCT_1D(input.length)).forward(expectedResult, true); - - DiscreteCosineTransformer DCT = new DiscreteCosineTransformer() - .setInputCol("vec") - .setOutputCol("resultVec"); - - Row[] result = DCT.transform(dataset).select("resultVec").collect(); - Vector resultVec = result[0].getAs("resultVec"); - - Assert.assertArrayEquals(expectedResult, resultVec.toArray(), 1e-6); - } -} diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/DiscreteCosineTransformerSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/DiscreteCosineTransformerSuite.scala deleted file mode 100644 index ed0fc11f78f69..0000000000000 --- a/mllib/src/test/scala/org/apache/spark/ml/feature/DiscreteCosineTransformerSuite.scala +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.ml.feature - -import scala.beans.BeanInfo - -import edu.emory.mathcs.jtransforms.dct.DoubleDCT_1D - -import org.apache.spark.SparkFunSuite -import org.apache.spark.mllib.linalg.{Vector, Vectors} -import org.apache.spark.mllib.util.MLlibTestSparkContext -import org.apache.spark.sql.{DataFrame, Row} - -@BeanInfo -case class DCTTestData(vec: Vector, wantedVec: Vector) - -class DiscreteCosineTransformerSuite extends SparkFunSuite with MLlibTestSparkContext { - - test("forward transform of discrete cosine matches jTransforms result") { - val data = Vectors.dense((0 until 128).map(_ => 2D * math.random - 1D).toArray) - val inverse = false - - testDCT(data, inverse) - } - - test("inverse transform of discrete cosine matches jTransforms result") { - val data = Vectors.dense((0 until 128).map(_ => 2D * math.random - 1D).toArray) - val inverse = true - - testDCT(data, inverse) - } - - private def testDCT(data: Vector, inverse: Boolean): Unit = { - val expectedResultBuffer = data.toArray.clone() - if (inverse) { - (new DoubleDCT_1D(data.size)).inverse(expectedResultBuffer, true) - } else { - (new DoubleDCT_1D(data.size)).forward(expectedResultBuffer, true) - } - val expectedResult = Vectors.dense(expectedResultBuffer) - - val dataset = sqlContext.createDataFrame(Seq( - DCTTestData(data, expectedResult) - )) - - val transformer = new DiscreteCosineTransformer() - .setInputCol("vec") - .setOutputCol("resultVec") - .setInverse(inverse) - - transformer.transform(dataset) - .select("resultVec", "wantedVec") - .collect() - .foreach { case Row(resultVec: Vector, wantedVec: Vector) => - assert(Vectors.sqdist(resultVec, wantedVec) < 1e-6) - } - } -} From 9d5c9e475df088feb479fe0fa4ff7465ba58aeeb Mon Sep 17 00:00:00 2001 From: Feynman Liang Date: Tue, 30 Jun 2015 16:20:45 -0700 Subject: [PATCH 09/10] Lowercase JavaDCTSuite variable --- .../src/test/java/org/apache/spark/ml/feature/JavaDCTSuite.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mllib/src/test/java/org/apache/spark/ml/feature/JavaDCTSuite.java b/mllib/src/test/java/org/apache/spark/ml/feature/JavaDCTSuite.java index 34f8cbbe1eb62..87366ce002c10 100644 --- a/mllib/src/test/java/org/apache/spark/ml/feature/JavaDCTSuite.java +++ b/mllib/src/test/java/org/apache/spark/ml/feature/JavaDCTSuite.java @@ -66,7 +66,7 @@ public void javaCompatibilityTest() { double[] expectedResult = input.clone(); (new DoubleDCT_1D(input.length)).forward(expectedResult, true); - DCT DCT = new DCT() + DCT dct = new DCT() .setInputCol("vec") .setOutputCol("resultVec"); From e547b3ed209bd58602372c39f673686367ae27b0 Mon Sep 17 00:00:00 2001 From: Feynman Liang Date: Tue, 30 Jun 2015 16:51:11 -0700 Subject: [PATCH 10/10] Fix renaming bug --- .../src/test/java/org/apache/spark/ml/feature/JavaDCTSuite.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mllib/src/test/java/org/apache/spark/ml/feature/JavaDCTSuite.java b/mllib/src/test/java/org/apache/spark/ml/feature/JavaDCTSuite.java index 87366ce002c10..845eed61c45c6 100644 --- a/mllib/src/test/java/org/apache/spark/ml/feature/JavaDCTSuite.java +++ b/mllib/src/test/java/org/apache/spark/ml/feature/JavaDCTSuite.java @@ -70,7 +70,7 @@ public void javaCompatibilityTest() { .setInputCol("vec") .setOutputCol("resultVec"); - Row[] result = DCT.transform(dataset).select("resultVec").collect(); + Row[] result = dct.transform(dataset).select("resultVec").collect(); Vector resultVec = result[0].getAs("resultVec"); Assert.assertArrayEquals(expectedResult, resultVec.toArray(), 1e-6);