apache · holdenk · Dec 9, 2015 · Dec 9, 2015 · Dec 9, 2015 · Dec 30, 2015
diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py
@@ -72,7 +72,6 @@ class LogisticRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredicti
     .. versionadded:: 1.3.0
     """
 
-    # a placeholder to make it appear in the generated doc
     threshold = Param(Params._dummy(), "threshold",
                       "Threshold in binary classification prediction, in range [0, 1]." +
                       " If threshold and thresholds are both set, they must match.")
@@ -93,9 +92,7 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
         self._java_obj = self._new_java_obj(
             "org.apache.spark.ml.classification.LogisticRegression", self.uid)
         #: param for threshold in binary classification, in range [0, 1].
-        self.threshold = Param(self, "threshold",
-                               "Threshold in binary classification prediction, in range [0, 1]." +
-                               " If threshold and thresholds are both set, they must match.")
+        self.threshold = LogisticRegression.threshold._copy_new_parent(self)
         self._setDefault(maxIter=100, regParam=0.1, tol=1E-6, threshold=0.5)
         kwargs = self.__init__._input_kwargs
         self.setParams(**kwargs)
@@ -232,7 +229,6 @@ class TreeClassifierParams(object):
     """
     supportedImpurities = ["entropy", "gini"]
 
-    # a placeholder to make it appear in the generated doc
     impurity = Param(Params._dummy(), "impurity",
                      "Criterion used for information gain calculation (case-insensitive). " +
                      "Supported options: " +
@@ -241,9 +237,7 @@ class TreeClassifierParams(object):
     def __init__(self):
         super(TreeClassifierParams, self).__init__()
         #: param for Criterion used for information gain calculation (case-insensitive).
-        self.impurity = Param(self, "impurity", "Criterion used for information " +
-                              "gain calculation (case-insensitive). Supported options: " +
-                              ", ".join(self.supportedImpurities))
+        self.impurity = TreeClassifierParams.impurity._copy_new_parent(self)
 
     @since("1.6.0")
     def setImpurity(self, value):
@@ -485,7 +479,6 @@ class GBTClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol
     .. versionadded:: 1.4.0
     """
 
-    # a placeholder to make it appear in the generated doc
     lossType = Param(Params._dummy(), "lossType",
                      "Loss function which GBT tries to minimize (case-insensitive). " +
                      "Supported options: " + ", ".join(GBTParams.supportedLossTypes))
@@ -505,9 +498,7 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
         self._java_obj = self._new_java_obj(
             "org.apache.spark.ml.classification.GBTClassifier", self.uid)
         #: param for Loss function which GBT tries to minimize (case-insensitive).
-        self.lossType = Param(self, "lossType",
-                              "Loss function which GBT tries to minimize (case-insensitive). " +
-                              "Supported options: " + ", ".join(GBTParams.supportedLossTypes))
+        self.lossType = GBTClassifier.lossType._copy_new_parent(self)
         self._setDefault(maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
                          maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10,
                          lossType="logistic", maxIter=20, stepSize=0.1)
@@ -597,7 +588,6 @@ class NaiveBayes(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, H
     .. versionadded:: 1.5.0
     """
 
-    # a placeholder to make it appear in the generated doc
     smoothing = Param(Params._dummy(), "smoothing", "The smoothing parameter, should be >= 0, " +
                       "default is 1.0")
     modelType = Param(Params._dummy(), "modelType", "The model type which is a string " +
@@ -616,12 +606,9 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
         self._java_obj = self._new_java_obj(
             "org.apache.spark.ml.classification.NaiveBayes", self.uid)
         #: param for the smoothing parameter.
-        self.smoothing = Param(self, "smoothing", "The smoothing parameter, should be >= 0, " +
-                               "default is 1.0")
+        self.smoothing = NaiveBayes.smoothing._copy_new_parent(self)
         #: param for the model type.
-        self.modelType = Param(self, "modelType", "The model type which is a string " +
-                               "(case-sensitive). Supported options: multinomial (default) " +
-                               "and bernoulli.")
+        self.modelType = NaiveBayes.modelType._copy_new_parent(self)
         self._setDefault(smoothing=1.0, modelType="multinomial")
         kwargs = self.__init__._input_kwargs
         self.setParams(**kwargs)
@@ -734,7 +721,6 @@ class MultilayerPerceptronClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol,
     .. versionadded:: 1.6.0
     """
 
-    # a placeholder to make it appear in the generated doc
     layers = Param(Params._dummy(), "layers", "Sizes of layers from input layer to output layer " +
                    "E.g., Array(780, 100, 10) means 780 inputs, one hidden layer with 100 " +
                    "neurons and output layer of 10 neurons, default is [1, 1].")
@@ -753,14 +739,8 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
         super(MultilayerPerceptronClassifier, self).__init__()
         self._java_obj = self._new_java_obj(
             "org.apache.spark.ml.classification.MultilayerPerceptronClassifier", self.uid)
-        self.layers = Param(self, "layers", "Sizes of layers from input layer to output layer " +
-                            "E.g., Array(780, 100, 10) means 780 inputs, one hidden layer with " +
-                            "100 neurons and output layer of 10 neurons, default is [1, 1].")
-        self.blockSize = Param(self, "blockSize", "Block size for stacking input data in " +
-                               "matrices. Data is stacked within partitions. If block size is " +
-                               "more than remaining data in a partition then it is adjusted to " +
-                               "the size of this data. Recommended size is between 10 and 1000, " +
-                               "default is 128.")
+        self.layers = MultilayerPerceptronClassifier.layers._copy_new_parent(self)
+        self.blockSize = MultilayerPerceptronClassifier.blockSize._copy_new_parent(self)
         self._setDefault(maxIter=100, tol=1E-4, layers=[1, 1], blockSize=128)
         kwargs = self.__init__._input_kwargs
         self.setParams(**kwargs)

diff --git a/python/pyspark/ml/clustering.py b/python/pyspark/ml/clustering.py
@@ -73,7 +73,6 @@ class KMeans(JavaEstimator, HasFeaturesCol, HasPredictionCol, HasMaxIter, HasTol
     .. versionadded:: 1.5.0
     """
 
-    # a placeholder to make it appear in the generated doc
     k = Param(Params._dummy(), "k", "number of clusters to create")
     initMode = Param(Params._dummy(), "initMode",
                      "the initialization algorithm. This can be either \"random\" to " +
@@ -90,12 +89,9 @@ def __init__(self, featuresCol="features", predictionCol="prediction", k=2,
         """
         super(KMeans, self).__init__()
         self._java_obj = self._new_java_obj("org.apache.spark.ml.clustering.KMeans", self.uid)
-        self.k = Param(self, "k", "number of clusters to create")
-        self.initMode = Param(self, "initMode",
-                              "the initialization algorithm. This can be either \"random\" to " +
-                              "choose random points as initial cluster centers, or \"k-means||\" " +
-                              "to use a parallel variant of k-means++")
-        self.initSteps = Param(self, "initSteps", "steps for k-means initialization mode")
+        self.k = KMeans.k._copy_new_parent(self)
+        self.initMode = KMeans.initMode._copy_new_parent(self)
+        self.initSteps = KMeans.initSteps._copy_new_parent(self)
         self._setDefault(k=2, initMode="k-means||", initSteps=5, tol=1e-4, maxIter=20)
         kwargs = self.__init__._input_kwargs
         self.setParams(**kwargs)

diff --git a/python/pyspark/ml/evaluation.py b/python/pyspark/ml/evaluation.py
@@ -124,7 +124,6 @@ class BinaryClassificationEvaluator(JavaEvaluator, HasLabelCol, HasRawPrediction
     .. versionadded:: 1.4.0
     """
 
-    # a placeholder to make it appear in the generated doc
     metricName = Param(Params._dummy(), "metricName",
                        "metric name in evaluation (areaUnderROC|areaUnderPR)")
 
@@ -139,8 +138,7 @@ def __init__(self, rawPredictionCol="rawPrediction", labelCol="label",
         self._java_obj = self._new_java_obj(
             "org.apache.spark.ml.evaluation.BinaryClassificationEvaluator", self.uid)
         #: param for metric name in evaluation (areaUnderROC|areaUnderPR)
-        self.metricName = Param(self, "metricName",
-                                "metric name in evaluation (areaUnderROC|areaUnderPR)")
+        self.metricName = BinaryClassificationEvaluator.metricName._copy_new_parent(self)
         self._setDefault(rawPredictionCol="rawPrediction", labelCol="label",
                          metricName="areaUnderROC")
         kwargs = self.__init__._input_kwargs
@@ -211,8 +209,7 @@ def __init__(self, predictionCol="prediction", labelCol="label",
         self._java_obj = self._new_java_obj(
             "org.apache.spark.ml.evaluation.RegressionEvaluator", self.uid)
         #: param for metric name in evaluation (mse|rmse|r2|mae)
-        self.metricName = Param(self, "metricName",
-                                "metric name in evaluation (mse|rmse|r2|mae)")
+        self.metricName = RegressionEvaluator.metricName._copy_new_parent(self)
         self._setDefault(predictionCol="prediction", labelCol="label",
                          metricName="rmse")
         kwargs = self.__init__._input_kwargs
@@ -265,7 +262,6 @@ class MulticlassClassificationEvaluator(JavaEvaluator, HasLabelCol, HasPredictio
 
     .. versionadded:: 1.5.0
     """
-    # a placeholder to make it appear in the generated doc
     metricName = Param(Params._dummy(), "metricName",
                        "metric name in evaluation "
                        "(f1|precision|recall|weightedPrecision|weightedRecall)")
@@ -281,9 +277,7 @@ def __init__(self, predictionCol="prediction", labelCol="label",
         self._java_obj = self._new_java_obj(
             "org.apache.spark.ml.evaluation.MulticlassClassificationEvaluator", self.uid)
         # param for metric name in evaluation (f1|precision|recall|weightedPrecision|weightedRecall)
-        self.metricName = Param(self, "metricName",
-                                "metric name in evaluation"
-                                " (f1|precision|recall|weightedPrecision|weightedRecall)")
+        self.metricName = MulticlassClassificationEvaluator.metricName._copy_new_parent(self)
         self._setDefault(predictionCol="prediction", labelCol="label",
                          metricName="f1")
         kwargs = self.__init__._input_kwargs