From fd7c005d6536ca996acd0c1e659d5cec9fd5223a Mon Sep 17 00:00:00 2001 From: Bryan Cutler Date: Mon, 13 Jul 2015 17:08:43 -0700 Subject: [PATCH 1/4] [SPARK-8924] Added @since tags to mllib.tree --- .../spark/mllib/tree/DecisionTree.scala | 13 +++++++ .../mllib/tree/GradientBoostedTrees.scala | 9 +++++ .../spark/mllib/tree/RandomForest.scala | 10 +++++ .../spark/mllib/tree/configuration/Algo.scala | 1 + .../tree/configuration/BoostingStrategy.scala | 6 +++ .../tree/configuration/FeatureType.scala | 1 + .../tree/configuration/QuantileStrategy.scala | 1 + .../mllib/tree/configuration/Strategy.scala | 20 +++++++++- .../spark/mllib/tree/impurity/Entropy.scala | 4 ++ .../spark/mllib/tree/impurity/Gini.scala | 4 ++ .../spark/mllib/tree/impurity/Impurity.scala | 3 ++ .../spark/mllib/tree/impurity/Variance.scala | 4 ++ .../spark/mllib/tree/loss/AbsoluteError.scala | 2 + .../spark/mllib/tree/loss/LogLoss.scala | 2 + .../apache/spark/mllib/tree/loss/Loss.scala | 3 ++ .../apache/spark/mllib/tree/loss/Losses.scala | 6 +++ .../spark/mllib/tree/loss/SquaredError.scala | 2 + .../mllib/tree/model/DecisionTreeModel.scala | 27 ++++++++++++++ .../tree/model/InformationGainStats.scala | 10 +++++ .../apache/spark/mllib/tree/model/Node.scala | 6 +++ .../spark/mllib/tree/model/Predict.scala | 10 +++++ .../apache/spark/mllib/tree/model/Split.scala | 4 ++ .../mllib/tree/model/treeEnsembleModels.scala | 37 +++++++++++++++++++ .../org/apache/spark/mllib/tree/package.scala | 1 + 24 files changed, 185 insertions(+), 1 deletion(-) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/DecisionTree.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/DecisionTree.scala index cecd1fed896d5..e5200b86fddd4 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/DecisionTree.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/DecisionTree.scala @@ -43,6 +43,7 @@ import org.apache.spark.util.random.XORShiftRandom * @param strategy The configuration parameters for the tree algorithm which specify the type * of algorithm (classification, regression, etc.), feature type (continuous, * categorical), depth of the tree, quantile calculation strategy, etc. + * @since 1.0.0 */ @Experimental class DecisionTree (private val strategy: Strategy) extends Serializable with Logging { @@ -53,6 +54,7 @@ class DecisionTree (private val strategy: Strategy) extends Serializable with Lo * Method to train a decision tree model over an RDD * @param input Training data: RDD of [[org.apache.spark.mllib.regression.LabeledPoint]] * @return DecisionTreeModel that can be used for prediction + * @since 1.2.0 */ def run(input: RDD[LabeledPoint]): DecisionTreeModel = { // Note: random seed will not be used since numTrees = 1. @@ -62,6 +64,9 @@ class DecisionTree (private val strategy: Strategy) extends Serializable with Lo } } +/** + * @since 1.0.0 + */ object DecisionTree extends Serializable with Logging { /** @@ -79,6 +84,7 @@ object DecisionTree extends Serializable with Logging { * of algorithm (classification, regression, etc.), feature type (continuous, * categorical), depth of the tree, quantile calculation strategy, etc. * @return DecisionTreeModel that can be used for prediction + * @since 1.0.0 */ def train(input: RDD[LabeledPoint], strategy: Strategy): DecisionTreeModel = { new DecisionTree(strategy).run(input) @@ -100,6 +106,7 @@ object DecisionTree extends Serializable with Logging { * @param maxDepth Maximum depth of the tree. * E.g., depth 0 means 1 leaf node; depth 1 means 1 internal node + 2 leaf nodes. * @return DecisionTreeModel that can be used for prediction + * @since 1.0.0 */ def train( input: RDD[LabeledPoint], @@ -127,6 +134,7 @@ object DecisionTree extends Serializable with Logging { * E.g., depth 0 means 1 leaf node; depth 1 means 1 internal node + 2 leaf nodes. * @param numClasses number of classes for classification. Default value of 2. * @return DecisionTreeModel that can be used for prediction + * @since 1.2.0 */ def train( input: RDD[LabeledPoint], @@ -160,6 +168,7 @@ object DecisionTree extends Serializable with Logging { * E.g., an entry (n -> k) indicates that feature n is categorical * with k categories indexed from 0: {0, 1, ..., k-1}. * @return DecisionTreeModel that can be used for prediction + * @since 1.0.0 */ def train( input: RDD[LabeledPoint], @@ -192,6 +201,7 @@ object DecisionTree extends Serializable with Logging { * @param maxBins maximum number of bins used for splitting features * (suggested value: 32) * @return DecisionTreeModel that can be used for prediction + * @since 1.1.0 */ def trainClassifier( input: RDD[LabeledPoint], @@ -207,6 +217,7 @@ object DecisionTree extends Serializable with Logging { /** * Java-friendly API for [[org.apache.spark.mllib.tree.DecisionTree$#trainClassifier]] + * @since 1.1.0 */ def trainClassifier( input: JavaRDD[LabeledPoint], @@ -236,6 +247,7 @@ object DecisionTree extends Serializable with Logging { * @param maxBins maximum number of bins used for splitting features * (suggested value: 32) * @return DecisionTreeModel that can be used for prediction + * @since 1.1.0 */ def trainRegressor( input: RDD[LabeledPoint], @@ -249,6 +261,7 @@ object DecisionTree extends Serializable with Logging { /** * Java-friendly API for [[org.apache.spark.mllib.tree.DecisionTree$#trainRegressor]] + * @since 1.1.0 */ def trainRegressor( input: JavaRDD[LabeledPoint], diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/GradientBoostedTrees.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/GradientBoostedTrees.scala index a835f96d5d0e3..272462b6f6e3c 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/GradientBoostedTrees.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/GradientBoostedTrees.scala @@ -47,6 +47,7 @@ import org.apache.spark.storage.StorageLevel * for other loss functions. * * @param boostingStrategy Parameters for the gradient boosting algorithm. + * @since 1.2.0 */ @Experimental class GradientBoostedTrees(private val boostingStrategy: BoostingStrategy) @@ -73,6 +74,7 @@ class GradientBoostedTrees(private val boostingStrategy: BoostingStrategy) /** * Java-friendly API for [[org.apache.spark.mllib.tree.GradientBoostedTrees!#run]]. + * @since 1.2.0 */ def run(input: JavaRDD[LabeledPoint]): GradientBoostedTreesModel = { run(input.rdd) @@ -87,6 +89,7 @@ class GradientBoostedTrees(private val boostingStrategy: BoostingStrategy) * E.g., these two datasets could be created from an original dataset * by using [[org.apache.spark.rdd.RDD.randomSplit()]] * @return a gradient boosted trees model that can be used for prediction + * @since 1.4.0 */ def runWithValidation( input: RDD[LabeledPoint], @@ -110,6 +113,7 @@ class GradientBoostedTrees(private val boostingStrategy: BoostingStrategy) /** * Java-friendly API for [[org.apache.spark.mllib.tree.GradientBoostedTrees!#runWithValidation]]. + * @since 1.4.0 */ def runWithValidation( input: JavaRDD[LabeledPoint], @@ -118,6 +122,9 @@ class GradientBoostedTrees(private val boostingStrategy: BoostingStrategy) } } +/** + * @since 1.2.0 + */ object GradientBoostedTrees extends Logging { /** @@ -128,6 +135,7 @@ object GradientBoostedTrees extends Logging { * For regression, labels are real numbers. * @param boostingStrategy Configuration options for the boosting algorithm. * @return a gradient boosted trees model that can be used for prediction + * @since 1.2.0 */ def train( input: RDD[LabeledPoint], @@ -137,6 +145,7 @@ object GradientBoostedTrees extends Logging { /** * Java-friendly API for [[org.apache.spark.mllib.tree.GradientBoostedTrees$#train]] + * @since 1.2.0 */ def train( input: JavaRDD[LabeledPoint], diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/RandomForest.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/RandomForest.scala index 069959976a188..9f3230656acc5 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/RandomForest.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/RandomForest.scala @@ -260,6 +260,9 @@ private class RandomForest ( } +/** + * @since 1.2.0 + */ object RandomForest extends Serializable with Logging { /** @@ -276,6 +279,7 @@ object RandomForest extends Serializable with Logging { * if numTrees > 1 (forest) set to "sqrt". * @param seed Random seed for bootstrapping and choosing feature subsets. * @return a random forest model that can be used for prediction + * @since 1.2.0 */ def trainClassifier( input: RDD[LabeledPoint], @@ -313,6 +317,7 @@ object RandomForest extends Serializable with Logging { * (suggested value: 100) * @param seed Random seed for bootstrapping and choosing feature subsets. * @return a random forest model that can be used for prediction + * @since 1.2.0 */ def trainClassifier( input: RDD[LabeledPoint], @@ -332,6 +337,7 @@ object RandomForest extends Serializable with Logging { /** * Java-friendly API for [[org.apache.spark.mllib.tree.RandomForest$#trainClassifier]] + * @since 1.2.0 */ def trainClassifier( input: JavaRDD[LabeledPoint], @@ -362,6 +368,7 @@ object RandomForest extends Serializable with Logging { * if numTrees > 1 (forest) set to "onethird". * @param seed Random seed for bootstrapping and choosing feature subsets. * @return a random forest model that can be used for prediction + * @since 1.2.0 */ def trainRegressor( input: RDD[LabeledPoint], @@ -398,6 +405,7 @@ object RandomForest extends Serializable with Logging { * (suggested value: 100) * @param seed Random seed for bootstrapping and choosing feature subsets. * @return a random forest model that can be used for prediction + * @since 1.2.0 */ def trainRegressor( input: RDD[LabeledPoint], @@ -416,6 +424,7 @@ object RandomForest extends Serializable with Logging { /** * Java-friendly API for [[org.apache.spark.mllib.tree.RandomForest$#trainRegressor]] + * @since 1.2.0 */ def trainRegressor( input: JavaRDD[LabeledPoint], @@ -433,6 +442,7 @@ object RandomForest extends Serializable with Logging { /** * List of supported feature subset sampling strategies. + * @since 1.2.0 */ val supportedFeatureSubsetStrategies: Array[String] = Array("auto", "all", "sqrt", "log2", "onethird") diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Algo.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Algo.scala index b6099259971b7..d9a49aa71fcfb 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Algo.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Algo.scala @@ -22,6 +22,7 @@ import org.apache.spark.annotation.Experimental /** * :: Experimental :: * Enum to select the algorithm for the decision tree + * @since 1.0.0 */ @Experimental object Algo extends Enumeration { diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/BoostingStrategy.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/BoostingStrategy.scala index 2d6b01524ff3d..c17d17966f30a 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/BoostingStrategy.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/BoostingStrategy.scala @@ -37,6 +37,7 @@ import org.apache.spark.mllib.tree.loss.{LogLoss, SquaredError, Loss} * @param validationTol Useful when runWithValidation is used. If the error rate on the * validation input between two iterations is less than the validationTol * then stop. Ignored when [[run]] is used. + * @since 1.2.0 */ @Experimental case class BoostingStrategy( @@ -69,6 +70,9 @@ case class BoostingStrategy( } } +/** + * @since 1.2.0 + */ @Experimental object BoostingStrategy { @@ -76,6 +80,7 @@ object BoostingStrategy { * Returns default configuration for the boosting algorithm * @param algo Learning goal. Supported: "Classification" or "Regression" * @return Configuration for boosting algorithm + * @since 1.2.0 */ def defaultParams(algo: String): BoostingStrategy = { defaultParams(Algo.fromString(algo)) @@ -87,6 +92,7 @@ object BoostingStrategy { * [[org.apache.spark.mllib.tree.configuration.Algo.Classification]], * [[org.apache.spark.mllib.tree.configuration.Algo.Regression]] * @return Configuration for boosting algorithm + * @since 1.3.0 */ def defaultParams(algo: Algo): BoostingStrategy = { val treeStrategy = Strategy.defaultStategy(algo) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/FeatureType.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/FeatureType.scala index f4c877232750f..0684cafa486bd 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/FeatureType.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/FeatureType.scala @@ -22,6 +22,7 @@ import org.apache.spark.annotation.Experimental /** * :: Experimental :: * Enum to describe whether a feature is "continuous" or "categorical" + * @since 1.0.0 */ @Experimental object FeatureType extends Enumeration { diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/QuantileStrategy.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/QuantileStrategy.scala index 7da976e55a722..2daa63c4d2771 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/QuantileStrategy.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/QuantileStrategy.scala @@ -22,6 +22,7 @@ import org.apache.spark.annotation.Experimental /** * :: Experimental :: * Enum for selecting the quantile calculation strategy + * @since 1.0.0 */ @Experimental object QuantileStrategy extends Enumeration { diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Strategy.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Strategy.scala index ada227c200a79..4679959ef4fee 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Strategy.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Strategy.scala @@ -66,6 +66,7 @@ import org.apache.spark.mllib.tree.configuration.QuantileStrategy._ * E.g. 10 means that the cache will get checkpointed every 10 updates. If * the checkpoint directory is not set in * [[org.apache.spark.SparkContext]], this setting is ignored. + * @since 1.0.0 */ @Experimental class Strategy ( @@ -83,16 +84,23 @@ class Strategy ( @BeanProperty var useNodeIdCache: Boolean = false, @BeanProperty var checkpointInterval: Int = 10) extends Serializable { + /** + * @since 1.2.0 + */ def isMulticlassClassification: Boolean = { algo == Classification && numClasses > 2 } + /** + * @since 1.2.0 + */ def isMulticlassWithCategoricalFeatures: Boolean = { isMulticlassClassification && (categoricalFeaturesInfo.size > 0) } /** * Java-friendly constructor for [[org.apache.spark.mllib.tree.configuration.Strategy]] + * @since 1.1.0 */ def this( algo: Algo, @@ -107,6 +115,7 @@ class Strategy ( /** * Sets Algorithm using a String. + * @since 1.2.0 */ def setAlgo(algo: String): Unit = algo match { case "Classification" => setAlgo(Classification) @@ -115,6 +124,7 @@ class Strategy ( /** * Sets categoricalFeaturesInfo using a Java Map. + * @since 1.2.0 */ def setCategoricalFeaturesInfo( categoricalFeaturesInfo: java.util.Map[java.lang.Integer, java.lang.Integer]): Unit = { @@ -162,7 +172,10 @@ class Strategy ( s"$subsamplingRate") } - /** Returns a shallow copy of this instance. */ + /** + * Returns a shallow copy of this instance. + * @since 1.2.0 + */ def copy: Strategy = { new Strategy(algo, impurity, maxDepth, numClasses, maxBins, quantileCalculationStrategy, categoricalFeaturesInfo, minInstancesPerNode, minInfoGain, @@ -170,12 +183,16 @@ class Strategy ( } } +/** + * @since 1.2.0 + */ @Experimental object Strategy { /** * Construct a default set of parameters for [[org.apache.spark.mllib.tree.DecisionTree]] * @param algo "Classification" or "Regression" + * @since 1.2.0 */ def defaultStrategy(algo: String): Strategy = { defaultStategy(Algo.fromString(algo)) @@ -184,6 +201,7 @@ object Strategy { /** * Construct a default set of parameters for [[org.apache.spark.mllib.tree.DecisionTree]] * @param algo Algo.Classification or Algo.Regression + * @since 1.3.0 */ def defaultStategy(algo: Algo): Strategy = algo match { case Algo.Classification => diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Entropy.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Entropy.scala index 5ac10f3fd32dd..a5f12578358ae 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Entropy.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Entropy.scala @@ -23,6 +23,7 @@ import org.apache.spark.annotation.{DeveloperApi, Experimental} * :: Experimental :: * Class for calculating [[http://en.wikipedia.org/wiki/Binary_entropy_function entropy]] during * binary classification. + * @since 1.0.0 */ @Experimental object Entropy extends Impurity { @@ -35,6 +36,7 @@ object Entropy extends Impurity { * @param counts Array[Double] with counts for each label * @param totalCount sum of counts for all labels * @return information value, or 0 if totalCount = 0 + * @since 1.1.0 */ @DeveloperApi override def calculate(counts: Array[Double], totalCount: Double): Double = { @@ -62,6 +64,7 @@ object Entropy extends Impurity { * @param sum sum of labels * @param sumSquares summation of squares of the labels * @return information value, or 0 if count = 0 + * @since 1.0.0 */ @DeveloperApi override def calculate(count: Double, sum: Double, sumSquares: Double): Double = @@ -70,6 +73,7 @@ object Entropy extends Impurity { /** * Get this impurity instance. * This is useful for passing impurity parameters to a Strategy in Java. + * @since 1.1.0 */ def instance: this.type = this diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Gini.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Gini.scala index 19d318203c344..3cba65a67aa45 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Gini.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Gini.scala @@ -24,6 +24,7 @@ import org.apache.spark.annotation.{DeveloperApi, Experimental} * Class for calculating the * [[http://en.wikipedia.org/wiki/Decision_tree_learning#Gini_impurity Gini impurity]] * during binary classification. + * @since 1.0.0 */ @Experimental object Gini extends Impurity { @@ -34,6 +35,7 @@ object Gini extends Impurity { * @param counts Array[Double] with counts for each label * @param totalCount sum of counts for all labels * @return information value, or 0 if totalCount = 0 + * @since 1.1.0 */ @DeveloperApi override def calculate(counts: Array[Double], totalCount: Double): Double = { @@ -58,6 +60,7 @@ object Gini extends Impurity { * @param sum sum of labels * @param sumSquares summation of squares of the labels * @return information value, or 0 if count = 0 + * @since 1.0.0 */ @DeveloperApi override def calculate(count: Double, sum: Double, sumSquares: Double): Double = @@ -66,6 +69,7 @@ object Gini extends Impurity { /** * Get this impurity instance. * This is useful for passing impurity parameters to a Strategy in Java. + * @since 1.1.0 */ def instance: this.type = this diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurity.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurity.scala index 72eb24c49264a..1b1790b0ddeff 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurity.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurity.scala @@ -25,6 +25,7 @@ import org.apache.spark.annotation.{DeveloperApi, Experimental} * This trait is used for * (a) setting the impurity parameter in [[org.apache.spark.mllib.tree.configuration.Strategy]] * (b) calculating impurity values from sufficient statistics. + * @since 1.0.0 */ @Experimental trait Impurity extends Serializable { @@ -35,6 +36,7 @@ trait Impurity extends Serializable { * @param counts Array[Double] with counts for each label * @param totalCount sum of counts for all labels * @return information value, or 0 if totalCount = 0 + * @since 1.1.0 */ @DeveloperApi def calculate(counts: Array[Double], totalCount: Double): Double @@ -46,6 +48,7 @@ trait Impurity extends Serializable { * @param sum sum of labels * @param sumSquares summation of squares of the labels * @return information value, or 0 if count = 0 + * @since 1.0.0 */ @DeveloperApi def calculate(count: Double, sum: Double, sumSquares: Double): Double diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Variance.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Variance.scala index 7104a7fa4dd4c..df6c568f07eb2 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Variance.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Variance.scala @@ -22,6 +22,7 @@ import org.apache.spark.annotation.{DeveloperApi, Experimental} /** * :: Experimental :: * Class for calculating variance during regression + * @since 1.0.0 */ @Experimental object Variance extends Impurity { @@ -32,6 +33,7 @@ object Variance extends Impurity { * @param counts Array[Double] with counts for each label * @param totalCount sum of counts for all labels * @return information value, or 0 if totalCount = 0 + * @since 1.1.0 */ @DeveloperApi override def calculate(counts: Array[Double], totalCount: Double): Double = @@ -44,6 +46,7 @@ object Variance extends Impurity { * @param sum sum of labels * @param sumSquares summation of squares of the labels * @return information value, or 0 if count = 0 + * @since 1.0.0 */ @DeveloperApi override def calculate(count: Double, sum: Double, sumSquares: Double): Double = { @@ -57,6 +60,7 @@ object Variance extends Impurity { /** * Get this impurity instance. * This is useful for passing impurity parameters to a Strategy in Java. + * @since 1.0.0 */ def instance: this.type = this diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/AbsoluteError.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/AbsoluteError.scala index 2bdef73c4a8f1..c6e3d0d824dd7 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/AbsoluteError.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/AbsoluteError.scala @@ -29,6 +29,7 @@ import org.apache.spark.mllib.tree.model.TreeEnsembleModel * The absolute (L1) error is defined as: * |y - F(x)| * where y is the label and F(x) is the model prediction for features x. + * @since 1.2.0 */ @DeveloperApi object AbsoluteError extends Loss { @@ -40,6 +41,7 @@ object AbsoluteError extends Loss { * @param prediction Predicted label. * @param label True label. * @return Loss gradient + * @since 1.2.0 */ override def gradient(prediction: Double, label: Double): Double = { if (label - prediction < 0) 1.0 else -1.0 diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/LogLoss.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/LogLoss.scala index 778c24526de70..eee58445a1ec1 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/LogLoss.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/LogLoss.scala @@ -31,6 +31,7 @@ import org.apache.spark.mllib.util.MLUtils * The log loss is defined as: * 2 log(1 + exp(-2 y F(x))) * where y is a label in {-1, 1} and F(x) is the model prediction for features x. + * @since 1.2.0 */ @DeveloperApi object LogLoss extends Loss { @@ -42,6 +43,7 @@ object LogLoss extends Loss { * @param prediction Predicted label. * @param label True label. * @return Loss gradient + * @since 1.2.0 */ override def gradient(prediction: Double, label: Double): Double = { - 4.0 * label / (1.0 + math.exp(2.0 * label * prediction)) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/Loss.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/Loss.scala index 64ffccbce073f..7c9fb924645c8 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/Loss.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/Loss.scala @@ -26,6 +26,7 @@ import org.apache.spark.rdd.RDD /** * :: DeveloperApi :: * Trait for adding "pluggable" loss functions for the gradient boosting algorithm. + * @since 1.2.0 */ @DeveloperApi trait Loss extends Serializable { @@ -35,6 +36,7 @@ trait Loss extends Serializable { * @param prediction Predicted feature * @param label true label. * @return Loss gradient. + * @since 1.2.0 */ def gradient(prediction: Double, label: Double): Double @@ -45,6 +47,7 @@ trait Loss extends Serializable { * @param model Model of the weak learner. * @param data Training dataset: RDD of [[org.apache.spark.mllib.regression.LabeledPoint]]. * @return Measure of model error on data + * @since 1.2.0 */ def computeError(model: TreeEnsembleModel, data: RDD[LabeledPoint]): Double = { data.map(point => computeError(model.predict(point.features), point.label)).mean() diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/Losses.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/Losses.scala index 42c9ead9884b4..47dc94cde7e03 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/Losses.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/Losses.scala @@ -17,8 +17,14 @@ package org.apache.spark.mllib.tree.loss +/** + * @since 1.2.0 + */ object Losses { + /** + * @since 1.2.0 + */ def fromString(name: String): Loss = name match { case "leastSquaresError" => SquaredError case "leastAbsoluteError" => AbsoluteError diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/SquaredError.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/SquaredError.scala index a5582d3ef3324..a8473594b8810 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/SquaredError.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/SquaredError.scala @@ -29,6 +29,7 @@ import org.apache.spark.mllib.tree.model.TreeEnsembleModel * The squared (L2) error is defined as: * (y - F(x))**2 * where y is the label and F(x) is the model prediction for features x. + * @since 1.2.0 */ @DeveloperApi object SquaredError extends Loss { @@ -40,6 +41,7 @@ object SquaredError extends Loss { * @param prediction Predicted label. * @param label True label. * @return Loss gradient + * @since 1.2.0 */ override def gradient(prediction: Double, label: Double): Double = { 2.0 * (prediction - label) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala index f2c78bbabff0b..fab296bd9188d 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala @@ -40,6 +40,7 @@ import org.apache.spark.util.Utils * This model stores the decision tree structure and parameters. * @param topNode root node * @param algo algorithm type -- classification or regression + * @since 1.0.0 */ @Experimental class DecisionTreeModel(val topNode: Node, val algo: Algo) extends Serializable with Saveable { @@ -49,6 +50,7 @@ class DecisionTreeModel(val topNode: Node, val algo: Algo) extends Serializable * * @param features array representing a single data point * @return Double prediction from the trained model + * @since 1.0.0 */ def predict(features: Vector): Double = { topNode.predict(features) @@ -59,6 +61,7 @@ class DecisionTreeModel(val topNode: Node, val algo: Algo) extends Serializable * * @param features RDD representing data points to be predicted * @return RDD of predictions for each of the given data points + * @since 1.0.0 */ def predict(features: RDD[Vector]): RDD[Double] = { features.map(x => predict(x)) @@ -69,6 +72,7 @@ class DecisionTreeModel(val topNode: Node, val algo: Algo) extends Serializable * * @param features JavaRDD representing data points to be predicted * @return JavaRDD of predictions for each of the given data points + * @since 1.2.0 */ def predict(features: JavaRDD[Vector]): JavaRDD[Double] = { predict(features.rdd) @@ -76,6 +80,7 @@ class DecisionTreeModel(val topNode: Node, val algo: Algo) extends Serializable /** * Get number of nodes in tree, including leaf nodes. + * @since 1.1.0 */ def numNodes: Int = { 1 + topNode.numDescendants @@ -84,6 +89,7 @@ class DecisionTreeModel(val topNode: Node, val algo: Algo) extends Serializable /** * Get depth of tree. * E.g.: Depth 0 means 1 leaf node. Depth 1 means 1 internal node and 2 leaf nodes. + * @since 1.1.0 */ def depth: Int = { topNode.subtreeDepth @@ -91,6 +97,7 @@ class DecisionTreeModel(val topNode: Node, val algo: Algo) extends Serializable /** * Print a summary of the model. + * @since 1.1.0 */ override def toString: String = algo match { case Classification => @@ -103,19 +110,32 @@ class DecisionTreeModel(val topNode: Node, val algo: Algo) extends Serializable /** * Print the full model to a string. + * @since 1.2.0 */ def toDebugString: String = { val header = toString + "\n" header + topNode.subtreeToString(2) } + /** + * @param sc Spark context used to save model data. + * @param path Path specifying the directory in which to save this model. + * If the directory already exists, this method throws an exception. + * @since 1.3.0 + */ override def save(sc: SparkContext, path: String): Unit = { DecisionTreeModel.SaveLoadV1_0.save(sc, path, this) } + /** + * @since 1.3.0 + */ override protected def formatVersion: String = DecisionTreeModel.formatVersion } +/** + * @since 1.3.0 + */ object DecisionTreeModel extends Loader[DecisionTreeModel] with Logging { private[spark] def formatVersion: String = "1.0" @@ -297,6 +317,13 @@ object DecisionTreeModel extends Loader[DecisionTreeModel] with Logging { } } + /** + * + * @param sc Spark context used for loading model files. + * @param path Path specifying the directory to which the model was saved. + * @return Model instance + * @since 1.3.0 + */ override def load(sc: SparkContext, path: String): DecisionTreeModel = { implicit val formats = DefaultFormats val (loadedClassName, version, metadata) = Loader.loadMetadata(sc, path) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/InformationGainStats.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/InformationGainStats.scala index 2d087c967f679..d8589e7bcc425 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/InformationGainStats.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/InformationGainStats.scala @@ -28,6 +28,7 @@ import org.apache.spark.annotation.DeveloperApi * @param rightImpurity right node impurity * @param leftPredict left node predict * @param rightPredict right node predict + * @since 1.0.0 */ @DeveloperApi class InformationGainStats( @@ -38,11 +39,17 @@ class InformationGainStats( val leftPredict: Predict, val rightPredict: Predict) extends Serializable { + /** + * @since 1.0.0 + */ override def toString: String = { s"gain = $gain, impurity = $impurity, left impurity = $leftImpurity, " + s"right impurity = $rightImpurity" } + /** + * @since 1.2.0 + */ override def equals(o: Any): Boolean = o match { case other: InformationGainStats => gain == other.gain && @@ -55,6 +62,9 @@ class InformationGainStats( case _ => false } + /** + * @since 1.4.0 + */ override def hashCode: Int = { com.google.common.base.Objects.hashCode( gain: java.lang.Double, diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Node.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Node.scala index a6d1398fc267b..f52c93be3e432 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Node.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Node.scala @@ -38,6 +38,7 @@ import org.apache.spark.mllib.linalg.Vector * @param leftNode left child * @param rightNode right child * @param stats information gain stats + * @since 1.0.0 */ @DeveloperApi class Node ( @@ -50,6 +51,9 @@ class Node ( var rightNode: Option[Node], var stats: Option[InformationGainStats]) extends Serializable with Logging { + /** + * @since 1.0.0 + */ override def toString: String = { s"id = $id, isLeaf = $isLeaf, predict = $predict, impurity = $impurity, " + s"split = $split, stats = $stats" @@ -58,6 +62,7 @@ class Node ( /** * build the left node and right nodes if not leaf * @param nodes array of nodes + * @since 1.0.0 */ @deprecated("build should no longer be used since trees are constructed on-the-fly in training", "1.2.0") @@ -79,6 +84,7 @@ class Node ( * predict value if node is not leaf * @param features feature value * @return predicted value + * @since 1.0.0 */ def predict(features: Vector) : Double = { if (isLeaf) { diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Predict.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Predict.scala index 5cbe7c280dbee..b48556c0001b8 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Predict.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Predict.scala @@ -23,14 +23,21 @@ import org.apache.spark.annotation.DeveloperApi * Predicted value for a node * @param predict predicted value * @param prob probability of the label (classification only) + * @since 1.2.0 */ @DeveloperApi class Predict( val predict: Double, val prob: Double = 0.0) extends Serializable { + /** + * @since 1.2.0 + */ override def toString: String = s"$predict (prob = $prob)" + /** + * @since 1.3.0 + */ override def equals(other: Any): Boolean = { other match { case p: Predict => predict == p.predict && prob == p.prob @@ -38,6 +45,9 @@ class Predict( } } + /** + * @since 1.4.0 + */ override def hashCode: Int = { com.google.common.base.Objects.hashCode(predict: java.lang.Double, prob: java.lang.Double) } diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Split.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Split.scala index be6c9b3de5479..9a0e029019597 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Split.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Split.scala @@ -30,6 +30,7 @@ import org.apache.spark.mllib.tree.configuration.FeatureType.FeatureType * Split left if feature <= threshold, else right. * @param featureType type of feature -- categorical or continuous * @param categories Split left if categorical feature value is in this set, else right. + * @since 1.0.0 */ @DeveloperApi case class Split( @@ -38,6 +39,9 @@ case class Split( featureType: FeatureType, categories: List[Double]) { + /** + * @since 1.0.0 + */ override def toString: String = { s"Feature = $feature, threshold = $threshold, featureType = $featureType, " + s"categories = $categories" diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/treeEnsembleModels.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/treeEnsembleModels.scala index 905c5fb42bd44..0c629b12a84df 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/treeEnsembleModels.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/treeEnsembleModels.scala @@ -45,6 +45,7 @@ import org.apache.spark.util.Utils * * @param algo algorithm for the ensemble model, either Classification or Regression * @param trees tree ensembles + * @since 1.2.0 */ @Experimental class RandomForestModel(override val algo: Algo, override val trees: Array[DecisionTreeModel]) @@ -54,6 +55,13 @@ class RandomForestModel(override val algo: Algo, override val trees: Array[Decis require(trees.forall(_.algo == algo)) + /** + * + * @param sc Spark context used to save model data. + * @param path Path specifying the directory in which to save this model. + * If the directory already exists, this method throws an exception. + * @since 1.3.0 + */ override def save(sc: SparkContext, path: String): Unit = { TreeEnsembleModel.SaveLoadV1_0.save(sc, path, this, RandomForestModel.SaveLoadV1_0.thisClassName) @@ -62,10 +70,20 @@ class RandomForestModel(override val algo: Algo, override val trees: Array[Decis override protected def formatVersion: String = RandomForestModel.formatVersion } +/** + * @since 1.3.0 + */ object RandomForestModel extends Loader[RandomForestModel] { private[mllib] def formatVersion: String = TreeEnsembleModel.SaveLoadV1_0.thisFormatVersion + /** + * + * @param sc Spark context used for loading model files. + * @param path Path specifying the directory to which the model was saved. + * @return Model instance + * @since 1.3.0 + */ override def load(sc: SparkContext, path: String): RandomForestModel = { val (loadedClassName, version, jsonMetadata) = Loader.loadMetadata(sc, path) val classNameV1_0 = SaveLoadV1_0.thisClassName @@ -96,6 +114,7 @@ object RandomForestModel extends Loader[RandomForestModel] { * @param algo algorithm for the ensemble model, either Classification or Regression * @param trees tree ensembles * @param treeWeights tree ensemble weights + * @since 1.2.0 */ @Experimental class GradientBoostedTreesModel( @@ -107,6 +126,12 @@ class GradientBoostedTreesModel( require(trees.length == treeWeights.length) + /** + * @param sc Spark context used to save model data. + * @param path Path specifying the directory in which to save this model. + * If the directory already exists, this method throws an exception. + * @since 1.3.0 + */ override def save(sc: SparkContext, path: String): Unit = { TreeEnsembleModel.SaveLoadV1_0.save(sc, path, this, GradientBoostedTreesModel.SaveLoadV1_0.thisClassName) @@ -118,6 +143,7 @@ class GradientBoostedTreesModel( * @param loss evaluation metric. * @return an array with index i having the losses or errors for the ensemble * containing the first i+1 trees + * @since 1.4.0 */ def evaluateEachIteration( data: RDD[LabeledPoint], @@ -159,6 +185,9 @@ class GradientBoostedTreesModel( override protected def formatVersion: String = GradientBoostedTreesModel.formatVersion } +/** + * @since 1.3.0 + */ object GradientBoostedTreesModel extends Loader[GradientBoostedTreesModel] { /** @@ -170,6 +199,7 @@ object GradientBoostedTreesModel extends Loader[GradientBoostedTreesModel] { * @param loss: evaluation metric. * @return a RDD with each element being a zip of the prediction and error * corresponding to every sample. + * @since 1.4.0 */ def computeInitialPredictionAndError( data: RDD[LabeledPoint], @@ -193,6 +223,7 @@ object GradientBoostedTreesModel extends Loader[GradientBoostedTreesModel] { * @param loss: evaluation metric. * @return a RDD with each element being a zip of the prediction and error * corresponding to each sample. + * @since 1.4.0 */ def updatePredictionError( data: RDD[LabeledPoint], @@ -213,6 +244,12 @@ object GradientBoostedTreesModel extends Loader[GradientBoostedTreesModel] { private[mllib] def formatVersion: String = TreeEnsembleModel.SaveLoadV1_0.thisFormatVersion + /** + * @param sc Spark context used for loading model files. + * @param path Path specifying the directory to which the model was saved. + * @return Model instance + * @since 1.3.0 + */ override def load(sc: SparkContext, path: String): GradientBoostedTreesModel = { val (loadedClassName, version, jsonMetadata) = Loader.loadMetadata(sc, path) val classNameV1_0 = SaveLoadV1_0.thisClassName diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/package.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/package.scala index bcaacc1b1f191..f520b3a1b7c72 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/package.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/package.scala @@ -24,6 +24,7 @@ package org.apache.spark.mllib * - information loss calculation with entropy and Gini for classification and * variance for regression, * - both continuous and categorical features. + * @since 1.0.0 */ package object tree { } From df4f8c5b072725b63c525ef4522bef7e43c56608 Mon Sep 17 00:00:00 2001 From: Bryan Cutler Date: Thu, 13 Aug 2015 12:32:54 -0700 Subject: [PATCH 2/4] resolved conflict with merge --- .../spark/mllib/tree/configuration/BoostingStrategy.scala | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/BoostingStrategy.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/BoostingStrategy.scala index 0f88d69218453..88e5f57e9ab32 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/BoostingStrategy.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/BoostingStrategy.scala @@ -36,13 +36,9 @@ import org.apache.spark.mllib.tree.loss.{LogLoss, SquaredError, Loss} * learning rate should be between in the interval (0, 1] * @param validationTol Useful when runWithValidation is used. If the error rate on the * validation input between two iterations is less than the validationTol -<<<<<<< HEAD - * then stop. Ignored when [[run]] is used. - * @since 1.2.0 -======= * then stop. Ignored when * [[org.apache.spark.mllib.tree.GradientBoostedTrees.run()]] is used. ->>>>>>> master + * @since 1.2.0 */ @Experimental case class BoostingStrategy( From c7eadbd2481f3a7fe02a841714eaa63a7fc2d8d7 Mon Sep 17 00:00:00 2001 From: Bryan Cutler Date: Thu, 13 Aug 2015 13:01:46 -0700 Subject: [PATCH 3/4] [SPARK-8924] Fixed problems with since tags --- .../org/apache/spark/mllib/tree/GradientBoostedTrees.scala | 1 + .../org/apache/spark/mllib/tree/model/DecisionTreeModel.scala | 3 --- .../main/scala/org/apache/spark/mllib/tree/model/Node.scala | 2 +- 3 files changed, 2 insertions(+), 4 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/GradientBoostedTrees.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/GradientBoostedTrees.scala index e04101313e2a0..143617098637a 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/GradientBoostedTrees.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/GradientBoostedTrees.scala @@ -58,6 +58,7 @@ class GradientBoostedTrees(private val boostingStrategy: BoostingStrategy) * Method to train a gradient boosting model * @param input Training dataset: RDD of [[org.apache.spark.mllib.regression.LabeledPoint]]. * @return a gradient boosted trees model that can be used for prediction + * @since 1.2.0 */ def run(input: RDD[LabeledPoint]): GradientBoostedTreesModel = { val algo = boostingStrategy.treeStrategy.algo diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala index fab296bd9188d..749e860fa72c2 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala @@ -127,9 +127,6 @@ class DecisionTreeModel(val topNode: Node, val algo: Algo) extends Serializable DecisionTreeModel.SaveLoadV1_0.save(sc, path, this) } - /** - * @since 1.3.0 - */ override protected def formatVersion: String = DecisionTreeModel.formatVersion } diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Node.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Node.scala index f52c93be3e432..a0d59919c2769 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Node.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Node.scala @@ -84,7 +84,7 @@ class Node ( * predict value if node is not leaf * @param features feature value * @return predicted value - * @since 1.0.0 + * @since 1.1.0 */ def predict(features: Vector) : Double = { if (isLeaf) { From 1e24dc5d16d6002c0bfb1d140660bcb3f1ae1cf6 Mon Sep 17 00:00:00 2001 From: Bryan Cutler Date: Thu, 13 Aug 2015 13:14:13 -0700 Subject: [PATCH 4/4] [SPARK-8924] Removed uneeded since tags for hashCode, equals, toString and toDebugString --- .../spark/mllib/tree/model/DecisionTreeModel.scala | 2 -- .../spark/mllib/tree/model/InformationGainStats.scala | 9 --------- .../scala/org/apache/spark/mllib/tree/model/Node.scala | 3 --- .../org/apache/spark/mllib/tree/model/Predict.scala | 9 --------- .../scala/org/apache/spark/mllib/tree/model/Split.scala | 3 --- 5 files changed, 26 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala index 749e860fa72c2..0f386a26601ce 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala @@ -97,7 +97,6 @@ class DecisionTreeModel(val topNode: Node, val algo: Algo) extends Serializable /** * Print a summary of the model. - * @since 1.1.0 */ override def toString: String = algo match { case Classification => @@ -110,7 +109,6 @@ class DecisionTreeModel(val topNode: Node, val algo: Algo) extends Serializable /** * Print the full model to a string. - * @since 1.2.0 */ def toDebugString: String = { val header = toString + "\n" diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/InformationGainStats.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/InformationGainStats.scala index 96cc4002cc7e1..23f0363639120 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/InformationGainStats.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/InformationGainStats.scala @@ -40,17 +40,11 @@ class InformationGainStats( val leftPredict: Predict, val rightPredict: Predict) extends Serializable { - /** - * @since 1.0.0 - */ override def toString: String = { s"gain = $gain, impurity = $impurity, left impurity = $leftImpurity, " + s"right impurity = $rightImpurity" } - /** - * @since 1.2.0 - */ override def equals(o: Any): Boolean = o match { case other: InformationGainStats => gain == other.gain && @@ -63,9 +57,6 @@ class InformationGainStats( case _ => false } - /** - * @since 1.4.0 - */ override def hashCode: Int = { com.google.common.base.Objects.hashCode( gain: java.lang.Double, diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Node.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Node.scala index a0d59919c2769..aca3350c2e535 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Node.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Node.scala @@ -51,9 +51,6 @@ class Node ( var rightNode: Option[Node], var stats: Option[InformationGainStats]) extends Serializable with Logging { - /** - * @since 1.0.0 - */ override def toString: String = { s"id = $id, isLeaf = $isLeaf, predict = $predict, impurity = $impurity, " + s"split = $split, stats = $stats" diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Predict.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Predict.scala index b48556c0001b8..be819b59e7048 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Predict.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Predict.scala @@ -30,14 +30,8 @@ class Predict( val predict: Double, val prob: Double = 0.0) extends Serializable { - /** - * @since 1.2.0 - */ override def toString: String = s"$predict (prob = $prob)" - /** - * @since 1.3.0 - */ override def equals(other: Any): Boolean = { other match { case p: Predict => predict == p.predict && prob == p.prob @@ -45,9 +39,6 @@ class Predict( } } - /** - * @since 1.4.0 - */ override def hashCode: Int = { com.google.common.base.Objects.hashCode(predict: java.lang.Double, prob: java.lang.Double) } diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Split.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Split.scala index 9a0e029019597..18d40530aee1e 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Split.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Split.scala @@ -39,9 +39,6 @@ case class Split( featureType: FeatureType, categories: List[Double]) { - /** - * @since 1.0.0 - */ override def toString: String = { s"Feature = $feature, threshold = $threshold, featureType = $featureType, " + s"categories = $categories"