Skip to content

Commit

Permalink
[SPARK-8472] [ML] [PySpark] Python API for DCT
Browse files Browse the repository at this point in the history
Add Python API for ml.feature.DCT.

Author: Yanbo Liang <ybliang8@gmail.com>

Closes #8485 from yanboliang/spark-8472.
  • Loading branch information
yanboliang authored and mengxr committed Aug 31, 2015
1 parent 23e39cc commit 5b3245d
Showing 1 changed file with 64 additions and 1 deletion.
65 changes: 64 additions & 1 deletion python/pyspark/ml/feature.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
from pyspark.mllib.common import inherit_doc
from pyspark.mllib.linalg import _convert_to_vector

__all__ = ['Binarizer', 'Bucketizer', 'ElementwiseProduct', 'HashingTF', 'IDF', 'IDFModel',
__all__ = ['Binarizer', 'Bucketizer', 'DCT', 'ElementwiseProduct', 'HashingTF', 'IDF', 'IDFModel',
'NGram', 'Normalizer', 'OneHotEncoder', 'PolynomialExpansion', 'RegexTokenizer',
'StandardScaler', 'StandardScalerModel', 'StringIndexer', 'StringIndexerModel',
'Tokenizer', 'VectorAssembler', 'VectorIndexer', 'Word2Vec', 'Word2VecModel',
Expand Down Expand Up @@ -166,6 +166,69 @@ def getSplits(self):
return self.getOrDefault(self.splits)


@inherit_doc
class DCT(JavaTransformer, HasInputCol, HasOutputCol):
"""
A feature transformer that takes the 1D discrete cosine transform
of a real vector. No zero padding is performed on the input vector.
It returns a real vector of the same length representing the DCT.
The return vector is scaled such that the transform matrix is
unitary (aka scaled DCT-II).
More information on
`https://en.wikipedia.org/wiki/Discrete_cosine_transform#DCT-II Wikipedia`.
>>> from pyspark.mllib.linalg import Vectors
>>> df1 = sqlContext.createDataFrame([(Vectors.dense([5.0, 8.0, 6.0]),)], ["vec"])
>>> dct = DCT(inverse=False, inputCol="vec", outputCol="resultVec")
>>> df2 = dct.transform(df1)
>>> df2.head().resultVec
DenseVector([10.969..., -0.707..., -2.041...])
>>> df3 = DCT(inverse=True, inputCol="resultVec", outputCol="origVec").transform(df2)
>>> df3.head().origVec
DenseVector([5.0, 8.0, 6.0])
"""

# a placeholder to make it appear in the generated doc
inverse = Param(Params._dummy(), "inverse", "Set transformer to perform inverse DCT, " +
"default False.")

@keyword_only
def __init__(self, inverse=False, inputCol=None, outputCol=None):
"""
__init__(self, inverse=False, inputCol=None, outputCol=None)
"""
super(DCT, self).__init__()
self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.DCT", self.uid)
self.inverse = Param(self, "inverse", "Set transformer to perform inverse DCT, " +
"default False.")
self._setDefault(inverse=False)
kwargs = self.__init__._input_kwargs
self.setParams(**kwargs)

@keyword_only
def setParams(self, inverse=False, inputCol=None, outputCol=None):
"""
setParams(self, inverse=False, inputCol=None, outputCol=None)
Sets params for this DCT.
"""
kwargs = self.setParams._input_kwargs
return self._set(**kwargs)

def setInverse(self, value):
"""
Sets the value of :py:attr:`inverse`.
"""
self._paramMap[self.inverse] = value
return self

def getInverse(self):
"""
Gets the value of inverse or its default value.
"""
return self.getOrDefault(self.inverse)


@inherit_doc
class ElementwiseProduct(JavaTransformer, HasInputCol, HasOutputCol):
"""
Expand Down

0 comments on commit 5b3245d

Please sign in to comment.