Skip to content

Commit

Permalink
update @SInCE annotation for mllib.classification
Browse files Browse the repository at this point in the history
  • Loading branch information
mengxr committed Aug 25, 2015
1 parent 69c9c17 commit b2dce80
Show file tree
Hide file tree
Showing 5 changed files with 58 additions and 21 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -30,14 +30,15 @@ import org.apache.spark.rdd.RDD
* belongs. The categories are represented by double values: 0.0, 1.0, 2.0, etc.
*/
@Experimental
@Since("0.8.0")
trait ClassificationModel extends Serializable {
/**
* Predict values for the given data set using the model trained.
*
* @param testData RDD representing data points to be predicted
* @return an RDD[Double] where each entry contains the corresponding prediction
*/
@Since("0.8.0")
@Since("1.0.0")
def predict(testData: RDD[Vector]): RDD[Double]

/**
Expand All @@ -46,15 +47,15 @@ trait ClassificationModel extends Serializable {
* @param testData array representing a single data point
* @return predicted category from the trained model
*/
@Since("0.8.0")
@Since("1.0.0")
def predict(testData: Vector): Double

/**
* Predict values for examples stored in a JavaRDD.
* @param testData JavaRDD representing data points to be predicted
* @return a JavaRDD[java.lang.Double] where each entry contains the corresponding prediction
*/
@Since("0.8.0")
@Since("1.0.0")
def predict(testData: JavaRDD[Vector]): JavaRDD[java.lang.Double] =
predict(testData.rdd).toJavaRDD().asInstanceOf[JavaRDD[java.lang.Double]]
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,12 @@ import org.apache.spark.rdd.RDD
* Multinomial Logistic Regression. By default, it is binary logistic regression
* so numClasses will be set to 2.
*/
class LogisticRegressionModel (
override val weights: Vector,
override val intercept: Double,
val numFeatures: Int,
val numClasses: Int)
@Since("0.8.0")
class LogisticRegressionModel @Since("1.3.0") (
@Since("1.0.0") override val weights: Vector,
@Since("1.0.0") override val intercept: Double,
@Since("1.3.0") val numFeatures: Int,
@Since("1.3.0") val numClasses: Int)
extends GeneralizedLinearModel(weights, intercept) with ClassificationModel with Serializable
with Saveable with PMMLExportable {

Expand Down Expand Up @@ -75,6 +76,7 @@ class LogisticRegressionModel (
/**
* Constructs a [[LogisticRegressionModel]] with weights and intercept for binary classification.
*/
@Since("1.0.0")
def this(weights: Vector, intercept: Double) = this(weights, intercept, weights.size, 2)

private var threshold: Option[Double] = Some(0.5)
Expand Down Expand Up @@ -166,12 +168,12 @@ class LogisticRegressionModel (

override protected def formatVersion: String = "1.0"

@Since("1.4.0")
override def toString: String = {
s"${super.toString}, numClasses = ${numClasses}, threshold = ${threshold.getOrElse("None")}"
}
}

@Since("1.3.0")
object LogisticRegressionModel extends Loader[LogisticRegressionModel] {

@Since("1.3.0")
Expand Down Expand Up @@ -207,6 +209,7 @@ object LogisticRegressionModel extends Loader[LogisticRegressionModel] {
* for k classes multi-label classification problem.
* Using [[LogisticRegressionWithLBFGS]] is recommended over this.
*/
@Since("0.8.0")
class LogisticRegressionWithSGD private[mllib] (
private var stepSize: Double,
private var numIterations: Int,
Expand All @@ -216,6 +219,7 @@ class LogisticRegressionWithSGD private[mllib] (

private val gradient = new LogisticGradient()
private val updater = new SquaredL2Updater()
@Since("0.8.0")
override val optimizer = new GradientDescent(gradient, updater)
.setStepSize(stepSize)
.setNumIterations(numIterations)
Expand All @@ -227,6 +231,7 @@ class LogisticRegressionWithSGD private[mllib] (
* Construct a LogisticRegression object with default parameters: {stepSize: 1.0,
* numIterations: 100, regParm: 0.01, miniBatchFraction: 1.0}.
*/
@Since("0.8.0")
def this() = this(1.0, 100, 0.01, 1.0)

override protected[mllib] def createModel(weights: Vector, intercept: Double) = {
Expand All @@ -238,6 +243,7 @@ class LogisticRegressionWithSGD private[mllib] (
* Top-level methods for calling Logistic Regression using Stochastic Gradient Descent.
* NOTE: Labels used in Logistic Regression should be {0, 1}
*/
@Since("0.8.0")
object LogisticRegressionWithSGD {
// NOTE(shivaram): We use multiple train methods instead of default arguments to support
// Java programs.
Expand Down Expand Up @@ -333,11 +339,13 @@ object LogisticRegressionWithSGD {
* NOTE: Labels used in Logistic Regression should be {0, 1, ..., k - 1}
* for k classes multi-label classification problem.
*/
@Since("1.1.0")
class LogisticRegressionWithLBFGS
extends GeneralizedLinearAlgorithm[LogisticRegressionModel] with Serializable {

this.setFeatureScaling(true)

@Since("1.1.0")
override val optimizer = new LBFGS(new LogisticGradient, new SquaredL2Updater)

override protected val validators = List(multiLabelValidator)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,12 @@ import org.apache.spark.sql.{DataFrame, SQLContext}
* where D is number of features
* @param modelType The type of NB model to fit can be "multinomial" or "bernoulli"
*/
@Since("0.9.0")
class NaiveBayesModel private[spark] (
val labels: Array[Double],
val pi: Array[Double],
val theta: Array[Array[Double]],
val modelType: String)
@Since("1.0.0") val labels: Array[Double],
@Since("0.9.0") val pi: Array[Double],
@Since("0.9.0") val theta: Array[Array[Double]],
@Since("1.4.0") val modelType: String)
extends ClassificationModel with Serializable with Saveable {

import NaiveBayes.{Bernoulli, Multinomial, supportedModelTypes}
Expand Down Expand Up @@ -83,6 +84,7 @@ class NaiveBayesModel private[spark] (
throw new UnknownError(s"Invalid modelType: $modelType.")
}

@Since("1.0.0")
override def predict(testData: RDD[Vector]): RDD[Double] = {
val bcModel = testData.context.broadcast(this)
testData.mapPartitions { iter =>
Expand All @@ -91,6 +93,7 @@ class NaiveBayesModel private[spark] (
}
}

@Since("1.0.0")
override def predict(testData: Vector): Double = {
modelType match {
case Multinomial =>
Expand All @@ -107,6 +110,7 @@ class NaiveBayesModel private[spark] (
* @return an RDD[Vector] where each entry contains the predicted posterior class probabilities,
* in the same order as class labels
*/
@Since("1.5.0")
def predictProbabilities(testData: RDD[Vector]): RDD[Vector] = {
val bcModel = testData.context.broadcast(this)
testData.mapPartitions { iter =>
Expand All @@ -122,6 +126,7 @@ class NaiveBayesModel private[spark] (
* @return predicted posterior class probabilities from the trained model,
* in the same order as class labels
*/
@Since("1.5.0")
def predictProbabilities(testData: Vector): Vector = {
modelType match {
case Multinomial =>
Expand Down Expand Up @@ -158,6 +163,7 @@ class NaiveBayesModel private[spark] (
new DenseVector(scaledProbs.map(_ / probSum))
}

@Since("1.3.0")
override def save(sc: SparkContext, path: String): Unit = {
val data = NaiveBayesModel.SaveLoadV2_0.Data(labels, pi, theta, modelType)
NaiveBayesModel.SaveLoadV2_0.save(sc, path, data)
Expand All @@ -166,6 +172,7 @@ class NaiveBayesModel private[spark] (
override protected def formatVersion: String = "2.0"
}

@Since("1.3.0")
object NaiveBayesModel extends Loader[NaiveBayesModel] {

import org.apache.spark.mllib.util.Loader._
Expand Down Expand Up @@ -199,6 +206,7 @@ object NaiveBayesModel extends Loader[NaiveBayesModel] {
dataRDD.write.parquet(dataPath(path))
}

@Since("1.3.0")
def load(sc: SparkContext, path: String): NaiveBayesModel = {
val sqlContext = new SQLContext(sc)
// Load Parquet data.
Expand Down Expand Up @@ -301,30 +309,35 @@ object NaiveBayesModel extends Loader[NaiveBayesModel] {
* document classification. By making every vector a 0-1 vector, it can also be used as
* Bernoulli NB ([[http://tinyurl.com/p7c96j6]]). The input feature values must be nonnegative.
*/

@Since("0.9.0")
class NaiveBayes private (
private var lambda: Double,
private var modelType: String) extends Serializable with Logging {

import NaiveBayes.{Bernoulli, Multinomial}

@Since("1.4.0")
def this(lambda: Double) = this(lambda, NaiveBayes.Multinomial)

@Since("0.9.0")
def this() = this(1.0, NaiveBayes.Multinomial)

/** Set the smoothing parameter. Default: 1.0. */
@Since("0.9.0")
def setLambda(lambda: Double): NaiveBayes = {
this.lambda = lambda
this
}

/** Get the smoothing parameter. */
@Since("1.4.0")
def getLambda: Double = lambda

/**
* Set the model type using a string (case-sensitive).
* Supported options: "multinomial" (default) and "bernoulli".
*/
@Since("1.4.0")
def setModelType(modelType: String): NaiveBayes = {
require(NaiveBayes.supportedModelTypes.contains(modelType),
s"NaiveBayes was created with an unknown modelType: $modelType.")
Expand All @@ -333,13 +346,15 @@ class NaiveBayes private (
}

/** Get the model type. */
@Since("1.4.0")
def getModelType: String = this.modelType

/**
* Run the algorithm with the configured parameters on an input RDD of LabeledPoint entries.
*
* @param data RDD of [[org.apache.spark.mllib.regression.LabeledPoint]].
*/
@Since("0.9.0")
def run(data: RDD[LabeledPoint]): NaiveBayesModel = {
val requireNonnegativeValues: Vector => Unit = (v: Vector) => {
val values = v match {
Expand Down Expand Up @@ -423,6 +438,7 @@ class NaiveBayes private (
/**
* Top-level methods for calling naive Bayes.
*/
@Since("0.9.0")
object NaiveBayes {

/** String name for multinomial model type. */
Expand Down Expand Up @@ -485,7 +501,7 @@ object NaiveBayes {
* @param modelType The type of NB model to fit from the enumeration NaiveBayesModels, can be
* multinomial or bernoulli
*/
@Since("0.9.0")
@Since("1.4.0")
def train(input: RDD[LabeledPoint], lambda: Double, modelType: String): NaiveBayesModel = {
require(supportedModelTypes.contains(modelType),
s"NaiveBayes was created with an unknown modelType: $modelType.")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,10 @@ import org.apache.spark.rdd.RDD
* @param weights Weights computed for every feature.
* @param intercept Intercept computed for this model.
*/
class SVMModel (
override val weights: Vector,
override val intercept: Double)
@Since("0.8.0")
class SVMModel @Since("1.1.0") (
@Since("1.0.0") override val weights: Vector,
@Since("0.8.0") override val intercept: Double)
extends GeneralizedLinearModel(weights, intercept) with ClassificationModel with Serializable
with Saveable with PMMLExportable {

Expand All @@ -47,7 +48,7 @@ class SVMModel (
* with prediction score greater than or equal to this threshold is identified as an positive,
* and negative otherwise. The default value is 0.0.
*/
@Since("1.3.0")
@Since("1.0.0")
@Experimental
def setThreshold(threshold: Double): this.type = {
this.threshold = Some(threshold)
Expand Down Expand Up @@ -92,12 +93,12 @@ class SVMModel (

override protected def formatVersion: String = "1.0"

@Since("1.4.0")
override def toString: String = {
s"${super.toString}, numClasses = 2, threshold = ${threshold.getOrElse("None")}"
}
}

@Since("1.3.0")
object SVMModel extends Loader[SVMModel] {

@Since("1.3.0")
Expand Down Expand Up @@ -132,6 +133,7 @@ object SVMModel extends Loader[SVMModel] {
* regularization is used, which can be changed via [[SVMWithSGD.optimizer]].
* NOTE: Labels used in SVM should be {0, 1}.
*/
@Since("0.8.0")
class SVMWithSGD private (
private var stepSize: Double,
private var numIterations: Int,
Expand All @@ -141,6 +143,7 @@ class SVMWithSGD private (

private val gradient = new HingeGradient()
private val updater = new SquaredL2Updater()
@Since("0.8.0")
override val optimizer = new GradientDescent(gradient, updater)
.setStepSize(stepSize)
.setNumIterations(numIterations)
Expand All @@ -152,6 +155,7 @@ class SVMWithSGD private (
* Construct a SVM object with default parameters: {stepSize: 1.0, numIterations: 100,
* regParm: 0.01, miniBatchFraction: 1.0}.
*/
@Since("0.8.0")
def this() = this(1.0, 100, 0.01, 1.0)

override protected def createModel(weights: Vector, intercept: Double) = {
Expand All @@ -162,6 +166,7 @@ class SVMWithSGD private (
/**
* Top-level methods for calling SVM. NOTE: Labels used in SVM should be {0, 1}.
*/
@Since("0.8.0")
object SVMWithSGD {

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

package org.apache.spark.mllib.classification

import org.apache.spark.annotation.Experimental
import org.apache.spark.annotation.{Experimental, Since}
import org.apache.spark.mllib.linalg.Vector
import org.apache.spark.mllib.regression.StreamingLinearAlgorithm

Expand All @@ -44,6 +44,7 @@ import org.apache.spark.mllib.regression.StreamingLinearAlgorithm
* }}}
*/
@Experimental
@Since("1.3.0")
class StreamingLogisticRegressionWithSGD private[mllib] (
private var stepSize: Double,
private var numIterations: Int,
Expand All @@ -58,6 +59,7 @@ class StreamingLogisticRegressionWithSGD private[mllib] (
* Initial weights must be set before using trainOn or predictOn
* (see `StreamingLinearAlgorithm`)
*/
@Since("1.3.0")
def this() = this(0.1, 50, 1.0, 0.0)

protected val algorithm = new LogisticRegressionWithSGD(
Expand All @@ -66,30 +68,35 @@ class StreamingLogisticRegressionWithSGD private[mllib] (
protected var model: Option[LogisticRegressionModel] = None

/** Set the step size for gradient descent. Default: 0.1. */
@Since("1.3.0")
def setStepSize(stepSize: Double): this.type = {
this.algorithm.optimizer.setStepSize(stepSize)
this
}

/** Set the number of iterations of gradient descent to run per update. Default: 50. */
@Since("1.3.0")
def setNumIterations(numIterations: Int): this.type = {
this.algorithm.optimizer.setNumIterations(numIterations)
this
}

/** Set the fraction of each batch to use for updates. Default: 1.0. */
@Since("1.3.0")
def setMiniBatchFraction(miniBatchFraction: Double): this.type = {
this.algorithm.optimizer.setMiniBatchFraction(miniBatchFraction)
this
}

/** Set the regularization parameter. Default: 0.0. */
@Since("1.3.0")
def setRegParam(regParam: Double): this.type = {
this.algorithm.optimizer.setRegParam(regParam)
this
}

/** Set the initial weights. Default: [0.0, 0.0]. */
@Since("1.3.0")
def setInitialWeights(initialWeights: Vector): this.type = {
this.model = Some(algorithm.createModel(initialWeights, 0.0))
this
Expand Down

0 comments on commit b2dce80

Please sign in to comment.