From a633c22cc11f29c6c54899dd499318ed17068654 Mon Sep 17 00:00:00 2001
From: "Joseph K. Bradley" <joseph@databricks.com>
Date: Wed, 9 Sep 2015 19:48:15 -0700
Subject: [PATCH 1/2] StringIndexer doc fixes

---
 .../spark/ml/feature/StringIndexer.scala      | 31 +++++++------------
 python/pyspark/ml/feature.py                  | 16 +++++-----
 2 files changed, 19 insertions(+), 28 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala
index b6482ffe0b2ee..3a4ab9a857648 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala
@@ -181,10 +181,10 @@ class StringIndexerModel (
 
 /**
  * :: Experimental ::
- * A [[Transformer]] that maps a column of string indices back to a new column of corresponding
- * string values using either the ML attributes of the input column, or if provided using the labels
- * supplied by the user.
- * All original columns are kept during transformation.
+ * A [[Transformer]] that maps a column of indices back to a new column of corresponding
+ * string values.
+ * The index-string mapping is either from the ML attributes of the input column,
+ * or from user-supplied labels (which take precedence over ML attributes).
  *
  * @see [[StringIndexer]] for converting strings into indices
  */
@@ -202,32 +202,23 @@ class IndexToString private[ml] (
   /** @group setParam */
   def setOutputCol(value: String): this.type = set(outputCol, value)
 
-  /**
-   * Optional labels to be provided by the user, if not supplied column
-   * metadata is read for labels. The default value is an empty array,
-   * but the empty array is ignored and column metadata used instead.
-   * @group setParam
-   */
+  /** @group setParam */
   def setLabels(value: Array[String]): this.type = set(labels, value)
 
   /**
-   * Param for array of labels.
-   * Optional labels to be provided by the user.
-   * Default: Empty array, in which case column metadata is used for labels.
+   * Optional param for array of labels specifying index-string mapping.
+   *
+   * Default: Empty array, in which case [[inputCol]] metadata is used for labels.
    * @group param
    */
   final val labels: StringArrayParam = new StringArrayParam(this, "labels",
-    "array of labels, if not provided metadata from inputCol is used instead.")
+    "Optional array of labels specifying index-string mapping." +
+      " If not provided or if empty, then metadata from inputCol is used instead.")
   setDefault(labels, Array.empty[String])
 
-  /**
-   * Optional labels to be provided by the user, if not supplied column
-   * metadata is read for labels.
-   * @group getParam
-   */
+  /** @group getParam */
   final def getLabels: Array[String] = $(labels)
 
-  /** Transform the schema for the inverse transformation */
   override def transformSchema(schema: StructType): StructType = {
     val inputColName = $(inputCol)
     val inputDataType = schema(inputColName).dataType
diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py
index 1c423486be8d9..de032e40f4e63 100644
--- a/python/pyspark/ml/feature.py
+++ b/python/pyspark/ml/feature.py
@@ -984,17 +984,17 @@ class IndexToString(JavaTransformer, HasInputCol, HasOutputCol):
     """
     .. note:: Experimental
 
-    A :py:class:`Transformer` that maps a column of string indices back to a new column of
-    corresponding string values using either the ML attributes of the input column, or if
-    provided using the labels supplied by the user.
-    All original columns are kept during transformation.
+    A :py:class:`Transformer` that maps a column of indices back to a new column of
+    corresponding string values.
+    The index-string mapping is either from the ML attributes of the input column,
+    or from user-supplied labels (which take precedence over ML attributes).
     See L{StringIndexer} for converting strings into indices.
     """
 
     # a placeholder to make the labels show up in generated doc
     labels = Param(Params._dummy(), "labels",
-                   "Optional array of labels to be provided by the user, if not supplied or " +
-                   "empty, column metadata is read for labels")
+                   "Optional array of labels specifying index-string mapping." +
+                   " If not provided or if empty, then metadata from inputCol is used instead.")
 
     @keyword_only
     def __init__(self, inputCol=None, outputCol=None, labels=None):
@@ -1005,8 +1005,8 @@ def __init__(self, inputCol=None, outputCol=None, labels=None):
         self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.IndexToString",
                                             self.uid)
         self.labels = Param(self, "labels",
-                            "Optional array of labels to be provided by the user, if not " +
-                            "supplied or empty, column metadata is read for labels")
+                            "Optional array of labels specifying index-string mapping. If not" +
+                            " provided or if empty, then metadata from inputCol is used instead.")
         kwargs = self.__init__._input_kwargs
         self.setParams(**kwargs)
 

From 316f60dea5b44e8aa83f1fd96c0ca00b73d821e5 Mon Sep 17 00:00:00 2001
From: "Joseph K. Bradley" <joseph@databricks.com>
Date: Wed, 9 Sep 2015 19:52:34 -0700
Subject: [PATCH 2/2] minor doc fix

---
 .../src/main/scala/org/apache/spark/ml/util/MetadataUtils.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/util/MetadataUtils.scala b/mllib/src/main/scala/org/apache/spark/ml/util/MetadataUtils.scala
index fcb517b5f735e..96a38a3bde960 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/util/MetadataUtils.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/util/MetadataUtils.scala
@@ -25,7 +25,7 @@ import org.apache.spark.sql.types.StructField
 
 
 /**
- * Helper utilities for tree-based algorithms
+ * Helper utilities for algorithms using ML metadata
  */
 private[spark] object MetadataUtils {