Skip to content

Commit

Permalink
Fixed bugs from previous commit.
Browse files Browse the repository at this point in the history
  • Loading branch information
jkbradley committed Feb 5, 2015
1 parent 1c61723 commit 934f97b
Show file tree
Hide file tree
Showing 15 changed files with 43 additions and 172 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ public static void main(String[] args) {
// 'probability' column since we renamed the lr.probabilityCol parameter previously.
model2.transform(test).registerTempTable("results");
DataFrame results =
jsql.sql("SELECT features, label, probability, prediction FROM results");
jsql.sql("SELECT features, label, myProbability, prediction FROM results");
for (Row r: results.collect()) {
System.out.println("(" + r.get(0) + ", " + r.get(1) + ") -> prob=" + r.get(2)
+ ", prediction=" + r.get(3));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,6 @@ object DeveloperApiExample {
import sqlContext._

// Prepare training data.
// We use LabeledPoint, which is a case class. Spark SQL can convert RDDs of Java Beans
// into SchemaRDDs, where it uses the bean metadata to infer the schema.
val training = sparkContext.parallelize(Seq(
LabeledPoint(1.0, Vectors.dense(0.0, 1.1, 0.1)),
LabeledPoint(0.0, Vectors.dense(2.0, 1.0, -1.0)),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ object SimpleParamsExample {
model2.transform(test)
.select('features, 'label, 'myProbability, 'prediction)
.collect()
.foreach { case Row(features: Vector, label: Double, prob: Double, prediction: Double) =>
.foreach { case Row(features: Vector, label: Double, prob: Vector, prediction: Double) =>
println("(" + features + ", " + label + ") -> prob=" + prob + ", prediction=" + prediction)
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,6 @@

package org.apache.spark.ml.classification

import scala.reflect.runtime.universe._

import org.apache.spark.annotation.{DeveloperApi, AlphaComponent}
import org.apache.spark.ml.impl.estimator.{PredictionModel, Predictor, PredictorParams}
import org.apache.spark.ml.param.{Params, ParamMap, HasRawPredictionCol}
Expand Down Expand Up @@ -62,8 +60,6 @@ abstract class Classifier[
extends Predictor[FeaturesType, Learner, M]
with ClassifierParams {

setRawPredictionCol("") // Do not output by default

def setRawPredictionCol(value: String): Learner =
set(rawPredictionCol, value).asInstanceOf[Learner]

Expand All @@ -82,8 +78,6 @@ abstract class Classifier[
abstract class ClassificationModel[FeaturesType, M <: ClassificationModel[FeaturesType, M]]
extends PredictionModel[FeaturesType, M] with ClassifierParams {

setRawPredictionCol("") // Do not output by default

def setRawPredictionCol(value: String): M = set(rawPredictionCol, value).asInstanceOf[M]

/** Number of classes (values which the label can take). */
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ package org.apache.spark.ml.classification
import org.apache.spark.annotation.AlphaComponent
import org.apache.spark.ml.param._
import org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS
import org.apache.spark.mllib.linalg.{VectorUDT, Vectors, BLAS, Vector}
import org.apache.spark.mllib.linalg.{BLAS, Vector, VectorUDT, Vectors}
import org.apache.spark.sql._
import org.apache.spark.sql.Dsl._
import org.apache.spark.sql.types.{DoubleType, StructField, StructType}
Expand All @@ -35,6 +35,7 @@ private[classification] trait LogisticRegressionParams extends ProbabilisticClas

/**
* :: AlphaComponent ::
*
* Logistic regression.
* Currently, this class only supports binary classification.
*/
Expand Down Expand Up @@ -86,6 +87,7 @@ class LogisticRegression

/**
* :: AlphaComponent ::
*
* Model produced by [[LogisticRegression]].
*/
@AlphaComponent
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,6 @@

package org.apache.spark.ml.classification

import scala.reflect.runtime.universe._

import org.apache.spark.annotation.{AlphaComponent, DeveloperApi}
import org.apache.spark.ml.param.{HasProbabilityCol, ParamMap, Params}
import org.apache.spark.mllib.linalg.{Vector, VectorUDT}
Expand All @@ -42,8 +40,10 @@ private[classification] trait ProbabilisticClassifierParams
}
}


/**
* :: AlphaComponent ::
*
* Single-label binary or multiclass classifier which can output class conditional probabilities.
*
* @tparam FeaturesType Type of input features. E.g., [[Vector]]
Expand All @@ -57,13 +57,13 @@ abstract class ProbabilisticClassifier[
M <: ProbabilisticClassificationModel[FeaturesType, M]]
extends Classifier[FeaturesType, Learner, M] with ProbabilisticClassifierParams {

setProbabilityCol("") // Do not output by default

def setProbabilityCol(value: String): Learner = set(probabilityCol, value).asInstanceOf[Learner]
}


/**
* :: AlphaComponent ::
*
* Model produced by a [[ProbabilisticClassifier]].
* Classes are indexed {0, 1, ..., numClasses - 1}.
*
Expand All @@ -76,8 +76,6 @@ abstract class ProbabilisticClassificationModel[
M <: ProbabilisticClassificationModel[FeaturesType, M]]
extends ClassificationModel[FeaturesType, M] with ProbabilisticClassifierParams {

setProbabilityCol("") // Do not output by default

def setProbabilityCol(value: String): M = set(probabilityCol, value).asInstanceOf[M]

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import org.apache.spark.sql.types.DoubleType

/**
* :: AlphaComponent ::
*
* Evaluator for binary classification, which expects two input columns: score and label.
*/
@AlphaComponent
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

package org.apache.spark.ml.impl.estimator

import org.apache.spark.annotation.DeveloperApi
import org.apache.spark.annotation.{AlphaComponent, DeveloperApi}
import org.apache.spark.ml.{Estimator, Model}
import org.apache.spark.ml.param._
import org.apache.spark.mllib.linalg.Vector
Expand Down Expand Up @@ -62,6 +62,8 @@ trait PredictorParams extends Params
}

/**
* :: AlphaComponent ::
*
* Abstraction for prediction problems (regression and classification).
*
* @tparam FeaturesType Type of features.
Expand All @@ -71,7 +73,7 @@ trait PredictorParams extends Params
* @tparam M Specialization of [[PredictionModel]]. If you subclass this type, use this type
* parameter to specify the concrete type for the corresponding model.
*/
@DeveloperApi
@AlphaComponent
abstract class Predictor[
FeaturesType,
Learner <: Predictor[FeaturesType, Learner, M],
Expand Down Expand Up @@ -124,7 +126,18 @@ abstract class Predictor[
}
}

private[ml] abstract class PredictionModel[FeaturesType, M <: PredictionModel[FeaturesType, M]]
/**
* :: AlphaComponent ::
*
* Abstraction for a model for prediction tasks (regression and classification).
*
* @tparam FeaturesType Type of features.
* E.g., [[org.apache.spark.mllib.linalg.VectorUDT]] for vector features.
* @tparam M Specialization of [[PredictionModel]]. If you subclass this type, use this type
* parameter to specify the concrete type for the corresponding model.
*/
@AlphaComponent
abstract class PredictionModel[FeaturesType, M <: PredictionModel[FeaturesType, M]]
extends Model[M] with PredictorParams {

def setFeaturesCol(value: String): M = set(featuresCol, value).asInstanceOf[M]
Expand Down
4 changes: 1 addition & 3 deletions mllib/src/main/scala/org/apache/spark/ml/param/params.scala
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,12 @@ package org.apache.spark.ml.param

import scala.annotation.varargs
import scala.collection.mutable
import scala.reflect.runtime.universe._

import java.lang.reflect.Modifier

import org.apache.spark.annotation.{DeveloperApi, AlphaComponent}
import org.apache.spark.annotation.{AlphaComponent, DeveloperApi}
import org.apache.spark.ml.Identifiable
import org.apache.spark.sql._
import org.apache.spark.sql.catalyst.ScalaReflection

/**
* :: AlphaComponent ::
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@ private[regression] trait LinearRegressionParams extends RegressorParams

/**
* :: AlphaComponent ::
* Logistic regression.
*
* Linear regression.
*/
@AlphaComponent
class LinearRegression extends Regressor[Vector, LinearRegression, LinearRegressionModel]
Expand Down Expand Up @@ -78,6 +79,7 @@ class LinearRegression extends Regressor[Vector, LinearRegression, LinearRegress

/**
* :: AlphaComponent ::
*
* Model produced by [[LinearRegression]].
*/
@AlphaComponent
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ trait RegressorParams extends PredictorParams

/**
* :: AlphaComponent ::
*
* Single-label regression
*
* @tparam FeaturesType Type of input features. E.g., [[org.apache.spark.mllib.linalg.Vector]]
Expand All @@ -49,6 +50,7 @@ abstract class Regressor[

/**
* :: AlphaComponent ::
*
* Model produced by a [[Regressor]].
*
* @tparam FeaturesType Type of input features. E.g., [[org.apache.spark.mllib.linalg.Vector]]
Expand Down
78 changes: 0 additions & 78 deletions mllib/src/test/java/org/apache/spark/ml/JavaLabeledPointSuite.java

This file was deleted.

59 changes: 0 additions & 59 deletions mllib/src/test/scala/org/apache/spark/ml/LabeledPointSuite.scala

This file was deleted.

Loading

0 comments on commit 934f97b

Please sign in to comment.