Skip to content

Commit

Permalink
Modified ParamMap to sort parameters in toString. Cleaned up classes …
Browse files Browse the repository at this point in the history
…in class hierarchy, before implementing tests and examples.
  • Loading branch information
jkbradley committed Feb 5, 2015
1 parent d705e87 commit 601e792
Show file tree
Hide file tree
Showing 7 changed files with 146 additions and 25 deletions.
27 changes: 27 additions & 0 deletions mllib/src/main/scala/org/apache/spark/ml/LabeledPoint.scala
Original file line number Diff line number Diff line change
@@ -1,21 +1,48 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.spark.ml

import scala.beans.BeanInfo

import org.apache.spark.annotation.AlphaComponent
import org.apache.spark.mllib.linalg.Vector

/**
* :: AlphaComponent ::
* Class that represents an instance (data point) for prediction tasks.
*
* @param label Label to predict
* @param features List of features describing this instance
* @param weight Instance weight
*/
@AlphaComponent
@BeanInfo
case class LabeledPoint(label: Double, features: Vector, weight: Double) {

override def toString: String = {
"(%s,%s,%s)".format(label, features, weight)
}
}

/**
* :: AlphaComponent ::
*/
@AlphaComponent
object LabeledPoint {
/** Constructor which sets instance weight to 1.0 */
def apply(label: Double, features: Vector) = new LabeledPoint(label, features, 1.0)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,27 +21,40 @@ import org.apache.spark.annotation.AlphaComponent
import org.apache.spark.ml.impl.estimator.{PredictionModel, Predictor, PredictorParams}
import org.apache.spark.mllib.linalg.Vector

@AlphaComponent
/**
* Params for classification.
* Currently empty, but may add functionality later.
*/
private[classification] trait ClassifierParams extends PredictorParams

/**
* Single-label binary or multiclass classification
* Classes are indexed {0, 1, ..., numClasses - 1}.
*/
@AlphaComponent
abstract class Classifier[Learner <: Classifier[Learner, M], M <: ClassificationModel[M]]
extends Predictor[Learner, M]
with ClassifierParams {

// TODO: defaultEvaluator (follow-up PR)
}


private[ml] abstract class ClassificationModel[M <: ClassificationModel[M]]
/**
* :: AlphaComponent ::
* Model produced by a [[Classifier]].
* Classes are indexed {0, 1, ..., numClasses - 1}.
*
* @tparam M Model type.
*/
@AlphaComponent
abstract class ClassificationModel[M <: ClassificationModel[M]]
extends PredictionModel[M] with ClassifierParams {

/** Number of classes (values which the label can take). */
def numClasses: Int

/**
* Predict label for the given features. Labels are indexed {0, 1, ..., numClasses - 1}.
* Predict label for the given features.
* This default implementation for classification predicts the index of the maximum value
* from [[predictRaw()]].
*/
Expand All @@ -50,8 +63,12 @@ private[ml] abstract class ClassificationModel[M <: ClassificationModel[M]]
}

/**
* Raw prediction for each possible label
* @return vector where element i is the raw score for label i
* Raw prediction for each possible label.
* The meaning of a "raw" prediction may vary between algorithms, but it intuitively gives
* a magnitude of confidence in each possible label.
* @return vector where element i is the raw prediction for label i.
* This raw prediction may be any real number, where a larger value indicates greater
* confidence for that label.
*/
def predictRaw(features: Vector): Vector

Expand Down
Original file line number Diff line number Diff line change
@@ -1,14 +1,29 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.spark.ml.impl.estimator

import org.apache.spark.annotation.AlphaComponent
import org.apache.spark.ml.{Estimator, LabeledPoint, Model}
import org.apache.spark.ml.param._
import org.apache.spark.mllib.linalg.{Vector, VectorUDT}
import org.apache.spark.rdd.RDD
import org.apache.spark.sql._
import org.apache.spark.sql.catalyst.analysis.Star

@AlphaComponent
private[ml] trait PredictorParams extends Params
with HasLabelCol with HasFeaturesCol with HasPredictionCol {

Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,36 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.spark.ml.impl.estimator

import org.apache.spark.mllib.linalg.Vector

/**
* Trait for a [[org.apache.spark.ml.classification.ClassificationModel]] which can output
* class conditional probabilities.
*/
private[ml] trait ProbabilisticClassificationModel {

/**
* Predict the probability of each label.
* Predict the probability of each class given the features.
* These predictions are also called class conditional probabilities.
*
* WARNING: Not all models output well-calibrated probability estimates! These probabilities
* should be treated as confidences, not precise probabilities.
*/
def predictProbabilities(features: Vector): Vector

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -279,7 +279,7 @@ class ParamMap private[ml] (private val map: mutable.Map[Param[Any], Any]) exten
def copy: ParamMap = new ParamMap(map.clone())

override def toString: String = {
map.map { case (param, value) =>
map.toSeq.sorted.map { case (param, value) =>
s"\t${param.parent.uid}-${param.name}: $value"
}.mkString("{\n", ",\n", "\n}")
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,20 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.spark.ml.regression

import org.apache.spark.annotation.AlphaComponent
Expand All @@ -9,22 +26,20 @@ import org.apache.spark.rdd.RDD
import org.apache.spark.storage.StorageLevel

/**
* :: AlphaComponent ::
* Params for linear regression.
*/
@AlphaComponent
private[regression] trait LinearRegressionParams extends RegressorParams
with HasRegParam with HasMaxIter


/**
* :: AlphaComponent ::
* Logistic regression.
*/
@AlphaComponent
class LinearRegression extends Regressor[LinearRegression, LinearRegressionModel]
with LinearRegressionParams {

// TODO: Extend IterativeEstimator

setRegParam(0.1)
setMaxIter(100)

Expand Down Expand Up @@ -52,7 +67,6 @@ class LinearRegression extends Regressor[LinearRegression, LinearRegressionModel
}
}


/**
* :: AlphaComponent ::
* Model produced by [[LinearRegression]].
Expand Down
43 changes: 33 additions & 10 deletions mllib/src/main/scala/org/apache/spark/ml/regression/Regressor.scala
Original file line number Diff line number Diff line change
@@ -1,28 +1,51 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.spark.ml.regression

import org.apache.spark.annotation.AlphaComponent
import org.apache.spark.ml.Evaluator
import org.apache.spark.ml.evaluation.RegressionEvaluator
import org.apache.spark.ml.impl.estimator.{PredictionModel, HasDefaultEvaluator, Predictor,
PredictorParams}
import org.apache.spark.ml.impl.estimator.{PredictionModel, Predictor, PredictorParams}
import org.apache.spark.mllib.linalg.Vector

@AlphaComponent
/**
* Params for regression.
* Currently empty, but may add functionality later.
*/
private[regression] trait RegressorParams extends PredictorParams

/**
* :: AlphaComponent ::
* Single-label regression
*/
@AlphaComponent
abstract class Regressor[Learner <: Regressor[Learner, M], M <: RegressionModel[M]]
extends Predictor[Learner, M]
with RegressorParams
with HasDefaultEvaluator {
with RegressorParams {

override def defaultEvaluator: Evaluator = new RegressionEvaluator
// TODO: defaultEvaluator (follow-up PR)
}


private[ml] abstract class RegressionModel[M <: RegressionModel[M]]
/**
* :: AlphaComponent ::
* Model produced by a [[Regressor]].
* @tparam M Model type.
*/
@AlphaComponent
abstract class RegressionModel[M <: RegressionModel[M]]
extends PredictionModel[M] with RegressorParams {

/**
Expand Down

0 comments on commit 601e792

Please sign in to comment.