Skip to content

Commit

Permalink
[SPARK-10094] Pyspark ML Feature transformers marked as experimental
Browse files Browse the repository at this point in the history
Modified class-level docstrings to mark all feature transformers in pyspark.ml as experimental.

Author: noelsmith <mail@noelsmith.com>

Closes #8623 from noel-smith/SPARK-10094-mark-pyspark-ml-trans-exp.
  • Loading branch information
noel-smith authored and mengxr committed Sep 9, 2015
1 parent 3a11e50 commit 0e2f216
Showing 1 changed file with 52 additions and 0 deletions.
52 changes: 52 additions & 0 deletions python/pyspark/ml/feature.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@
@inherit_doc
class Binarizer(JavaTransformer, HasInputCol, HasOutputCol):
"""
.. note:: Experimental
Binarize a column of continuous features given a threshold.
>>> df = sqlContext.createDataFrame([(0.5,)], ["values"])
Expand Down Expand Up @@ -92,6 +94,8 @@ def getThreshold(self):
@inherit_doc
class Bucketizer(JavaTransformer, HasInputCol, HasOutputCol):
"""
.. note:: Experimental
Maps a column of continuous features to a column of feature buckets.
>>> df = sqlContext.createDataFrame([(0.1,), (0.4,), (1.2,), (1.5,)], ["values"])
Expand Down Expand Up @@ -169,6 +173,8 @@ def getSplits(self):
@inherit_doc
class DCT(JavaTransformer, HasInputCol, HasOutputCol):
"""
.. note:: Experimental
A feature transformer that takes the 1D discrete cosine transform
of a real vector. No zero padding is performed on the input vector.
It returns a real vector of the same length representing the DCT.
Expand Down Expand Up @@ -232,6 +238,8 @@ def getInverse(self):
@inherit_doc
class ElementwiseProduct(JavaTransformer, HasInputCol, HasOutputCol):
"""
.. note:: Experimental
Outputs the Hadamard product (i.e., the element-wise product) of each input vector
with a provided "weight" vector. In other words, it scales each column of the dataset
by a scalar multiplier.
Expand Down Expand Up @@ -289,6 +297,8 @@ def getScalingVec(self):
@inherit_doc
class HashingTF(JavaTransformer, HasInputCol, HasOutputCol, HasNumFeatures):
"""
.. note:: Experimental
Maps a sequence of terms to their term frequencies using the
hashing trick.
Expand Down Expand Up @@ -327,6 +337,8 @@ def setParams(self, numFeatures=1 << 18, inputCol=None, outputCol=None):
@inherit_doc
class IDF(JavaEstimator, HasInputCol, HasOutputCol):
"""
.. note:: Experimental
Compute the Inverse Document Frequency (IDF) given a collection of documents.
>>> from pyspark.mllib.linalg import DenseVector
Expand Down Expand Up @@ -387,6 +399,8 @@ def _create_model(self, java_model):

class IDFModel(JavaModel):
"""
.. note:: Experimental
Model fitted by IDF.
"""

Expand All @@ -395,6 +409,8 @@ class IDFModel(JavaModel):
@ignore_unicode_prefix
class NGram(JavaTransformer, HasInputCol, HasOutputCol):
"""
.. note:: Experimental
A feature transformer that converts the input array of strings into an array of n-grams. Null
values in the input array are ignored.
It returns an array of n-grams where each n-gram is represented by a space-separated string of
Expand Down Expand Up @@ -463,6 +479,8 @@ def getN(self):
@inherit_doc
class Normalizer(JavaTransformer, HasInputCol, HasOutputCol):
"""
.. note:: Experimental
Normalize a vector to have unit norm using the given p-norm.
>>> from pyspark.mllib.linalg import Vectors
Expand Down Expand Up @@ -519,6 +537,8 @@ def getP(self):
@inherit_doc
class OneHotEncoder(JavaTransformer, HasInputCol, HasOutputCol):
"""
.. note:: Experimental
A one-hot encoder that maps a column of category indices to a
column of binary vectors, with at most a single one-value per row
that indicates the input category index.
Expand Down Expand Up @@ -591,6 +611,8 @@ def getDropLast(self):
@inherit_doc
class PolynomialExpansion(JavaTransformer, HasInputCol, HasOutputCol):
"""
.. note:: Experimental
Perform feature expansion in a polynomial space. As said in wikipedia of Polynomial Expansion,
which is available at `http://en.wikipedia.org/wiki/Polynomial_expansion`, "In mathematics, an
expansion of a product of sums expresses it as a sum of products by using the fact that
Expand Down Expand Up @@ -649,6 +671,8 @@ def getDegree(self):
@ignore_unicode_prefix
class RegexTokenizer(JavaTransformer, HasInputCol, HasOutputCol):
"""
.. note:: Experimental
A regex based tokenizer that extracts tokens either by using the
provided regex pattern (in Java dialect) to split the text
(default) or repeatedly matching the regex (if gaps is false).
Expand Down Expand Up @@ -746,6 +770,8 @@ def getPattern(self):
@inherit_doc
class SQLTransformer(JavaTransformer):
"""
.. note:: Experimental
Implements the transforms which are defined by SQL statement.
Currently we only support SQL syntax like 'SELECT ... FROM __THIS__'
where '__THIS__' represents the underlying table of the input dataset.
Expand Down Expand Up @@ -797,6 +823,8 @@ def getStatement(self):
@inherit_doc
class StandardScaler(JavaEstimator, HasInputCol, HasOutputCol):
"""
.. note:: Experimental
Standardizes features by removing the mean and scaling to unit variance using column summary
statistics on the samples in the training set.
Expand Down Expand Up @@ -870,6 +898,8 @@ def _create_model(self, java_model):

class StandardScalerModel(JavaModel):
"""
.. note:: Experimental
Model fitted by StandardScaler.
"""

Expand All @@ -891,6 +921,8 @@ def mean(self):
@inherit_doc
class StringIndexer(JavaEstimator, HasInputCol, HasOutputCol):
"""
.. note:: Experimental
A label indexer that maps a string column of labels to an ML column of label indices.
If the input column is numeric, we cast it to string and index the string values.
The indices are in [0, numLabels), ordered by label frequencies.
Expand Down Expand Up @@ -929,6 +961,8 @@ def _create_model(self, java_model):

class StringIndexerModel(JavaModel):
"""
.. note:: Experimental
Model fitted by StringIndexer.
"""

Expand Down Expand Up @@ -1006,6 +1040,8 @@ def getCaseSensitive(self):
@ignore_unicode_prefix
class Tokenizer(JavaTransformer, HasInputCol, HasOutputCol):
"""
.. note:: Experimental
A tokenizer that converts the input string to lowercase and then
splits it by white spaces.
Expand Down Expand Up @@ -1051,6 +1087,8 @@ def setParams(self, inputCol=None, outputCol=None):
@inherit_doc
class VectorAssembler(JavaTransformer, HasInputCols, HasOutputCol):
"""
.. note:: Experimental
A feature transformer that merges multiple columns into a vector column.
>>> df = sqlContext.createDataFrame([(1, 0, 3)], ["a", "b", "c"])
Expand Down Expand Up @@ -1087,6 +1125,8 @@ def setParams(self, inputCols=None, outputCol=None):
@inherit_doc
class VectorIndexer(JavaEstimator, HasInputCol, HasOutputCol):
"""
.. note:: Experimental
Class for indexing categorical feature columns in a dataset of [[Vector]].
This has 2 usage modes:
Expand Down Expand Up @@ -1186,6 +1226,8 @@ def _create_model(self, java_model):

class VectorIndexerModel(JavaModel):
"""
.. note:: Experimental
Model fitted by VectorIndexer.
"""

Expand All @@ -1194,6 +1236,8 @@ class VectorIndexerModel(JavaModel):
@ignore_unicode_prefix
class Word2Vec(JavaEstimator, HasStepSize, HasMaxIter, HasSeed, HasInputCol, HasOutputCol):
"""
.. note:: Experimental
Word2Vec trains a model of `Map(String, Vector)`, i.e. transforms a word into a code for further
natural language processing or machine learning process.
Expand Down Expand Up @@ -1307,6 +1351,8 @@ def _create_model(self, java_model):

class Word2VecModel(JavaModel):
"""
.. note:: Experimental
Model fitted by Word2Vec.
"""

Expand All @@ -1332,6 +1378,8 @@ def findSynonyms(self, word, num):
@inherit_doc
class PCA(JavaEstimator, HasInputCol, HasOutputCol):
"""
.. note:: Experimental
PCA trains a model to project vectors to a low-dimensional space using PCA.
>>> from pyspark.mllib.linalg import Vectors
Expand Down Expand Up @@ -1387,6 +1435,8 @@ def _create_model(self, java_model):

class PCAModel(JavaModel):
"""
.. note:: Experimental
Model fitted by PCA.
"""

Expand Down Expand Up @@ -1470,6 +1520,8 @@ def _create_model(self, java_model):

class RFormulaModel(JavaModel):
"""
.. note:: Experimental
Model fitted by :py:class:`RFormula`.
"""

Expand Down

0 comments on commit 0e2f216

Please sign in to comment.