Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[MINOR] [MLLIB] [ML] [DOC] Minor doc fixes for StringIndexer and MetadataUtils #8679

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -181,10 +181,10 @@ class StringIndexerModel (

/**
* :: Experimental ::
* A [[Transformer]] that maps a column of string indices back to a new column of corresponding
* string values using either the ML attributes of the input column, or if provided using the labels
* supplied by the user.
* All original columns are kept during transformation.
* A [[Transformer]] that maps a column of indices back to a new column of corresponding
* string values.
* The index-string mapping is either from the ML attributes of the input column,
* or from user-supplied labels (which take precedence over ML attributes).
*
* @see [[StringIndexer]] for converting strings into indices
*/
Expand All @@ -202,32 +202,23 @@ class IndexToString private[ml] (
/** @group setParam */
def setOutputCol(value: String): this.type = set(outputCol, value)

/**
* Optional labels to be provided by the user, if not supplied column
* metadata is read for labels. The default value is an empty array,
* but the empty array is ignored and column metadata used instead.
* @group setParam
*/
/** @group setParam */
def setLabels(value: Array[String]): this.type = set(labels, value)

/**
* Param for array of labels.
* Optional labels to be provided by the user.
* Default: Empty array, in which case column metadata is used for labels.
* Optional param for array of labels specifying index-string mapping.
*
* Default: Empty array, in which case [[inputCol]] metadata is used for labels.
* @group param
*/
final val labels: StringArrayParam = new StringArrayParam(this, "labels",
"array of labels, if not provided metadata from inputCol is used instead.")
"Optional array of labels specifying index-string mapping." +
" If not provided or if empty, then metadata from inputCol is used instead.")
setDefault(labels, Array.empty[String])

/**
* Optional labels to be provided by the user, if not supplied column
* metadata is read for labels.
* @group getParam
*/
/** @group getParam */
final def getLabels: Array[String] = $(labels)

/** Transform the schema for the inverse transformation */
override def transformSchema(schema: StructType): StructType = {
val inputColName = $(inputCol)
val inputDataType = schema(inputColName).dataType
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ import org.apache.spark.sql.types.StructField


/**
* Helper utilities for tree-based algorithms
* Helper utilities for algorithms using ML metadata
*/
private[spark] object MetadataUtils {

Expand Down
16 changes: 8 additions & 8 deletions python/pyspark/ml/feature.py
Original file line number Diff line number Diff line change
Expand Up @@ -984,17 +984,17 @@ class IndexToString(JavaTransformer, HasInputCol, HasOutputCol):
"""
.. note:: Experimental

A :py:class:`Transformer` that maps a column of string indices back to a new column of
corresponding string values using either the ML attributes of the input column, or if
provided using the labels supplied by the user.
All original columns are kept during transformation.
A :py:class:`Transformer` that maps a column of indices back to a new column of
corresponding string values.
The index-string mapping is either from the ML attributes of the input column,
or from user-supplied labels (which take precedence over ML attributes).
See L{StringIndexer} for converting strings into indices.
"""

# a placeholder to make the labels show up in generated doc
labels = Param(Params._dummy(), "labels",
"Optional array of labels to be provided by the user, if not supplied or " +
"empty, column metadata is read for labels")
"Optional array of labels specifying index-string mapping." +
" If not provided or if empty, then metadata from inputCol is used instead.")

@keyword_only
def __init__(self, inputCol=None, outputCol=None, labels=None):
Expand All @@ -1005,8 +1005,8 @@ def __init__(self, inputCol=None, outputCol=None, labels=None):
self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.IndexToString",
self.uid)
self.labels = Param(self, "labels",
"Optional array of labels to be provided by the user, if not " +
"supplied or empty, column metadata is read for labels")
"Optional array of labels specifying index-string mapping. If not" +
" provided or if empty, then metadata from inputCol is used instead.")
kwargs = self.__init__._input_kwargs
self.setParams(**kwargs)

Expand Down