Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SPARK-7833][ml] Add python wrapper for RegressionEvaluator #6365

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Expand Up @@ -31,14 +31,14 @@ import org.apache.spark.sql.types.DoubleType
* Evaluator for regression, which expects two input columns: prediction and label.
*/
@AlphaComponent
class RegressionEvaluator(override val uid: String)
final class RegressionEvaluator(override val uid: String)
extends Evaluator with HasPredictionCol with HasLabelCol {

def this() = this(Identifiable.randomUID("regEval"))

/**
* param for metric name in evaluation
* @group param
* @group param supports mse, rmse, r2, mae as valid metric names.
*/
val metricName: Param[String] = {
val allowedParams = ParamValidators.inArray(Array("mse", "rmse", "r2", "mae"))
Expand Down
Expand Up @@ -39,6 +39,7 @@ class RegressionEvaluatorSuite extends FunSuite with MLlibTestSparkContext {
val dataset = sqlContext.createDataFrame(
sc.parallelize(LinearDataGenerator.generateLinearInput(
6.3, Array(4.7, 7.2), Array(0.9, -1.3), Array(0.7, 1.2), 100, 42, 0.1), 2))

/**
* Using the following R code to load the data, train the model and evaluate metrics.
*
Expand Down
68 changes: 66 additions & 2 deletions python/pyspark/ml/evaluation.py
Expand Up @@ -19,11 +19,11 @@

from pyspark.ml.wrapper import JavaWrapper
from pyspark.ml.param import Param, Params
from pyspark.ml.param.shared import HasLabelCol, HasRawPredictionCol
from pyspark.ml.param.shared import HasLabelCol, HasPredictionCol, HasRawPredictionCol
from pyspark.ml.util import keyword_only
from pyspark.mllib.common import inherit_doc

__all__ = ['Evaluator', 'BinaryClassificationEvaluator']
__all__ = ['Evaluator', 'BinaryClassificationEvaluator', 'RegressionEvaluator']


@inherit_doc
Expand Down Expand Up @@ -148,6 +148,70 @@ def setParams(self, rawPredictionCol="rawPrediction", labelCol="label",
return self._set(**kwargs)


@inherit_doc
class RegressionEvaluator(JavaEvaluator, HasLabelCol, HasPredictionCol):
"""
Evaluator for Regression, which expects two input
columns: prediction and label.

>>> scoreAndLabels = [(-28.98343821, -27.0), (20.21491975, 21.5),
... (-25.98418959, -22.0), (30.69731842, 33.0), (74.69283752, 71.0)]
>>> dataset = sqlContext.createDataFrame(scoreAndLabels, ["raw", "label"])
...
>>> evaluator = RegressionEvaluator(predictionCol="raw")
>>> evaluator.evaluate(dataset)
2.842...
>>> evaluator.evaluate(dataset, {evaluator.metricName: "r2"})
0.993...
>>> evaluator.evaluate(dataset, {evaluator.metricName: "mae"})
2.649...
"""
# a placeholder to make it appear in the generated doc
metricName = Param(Params._dummy(), "metricName",
"metric name in evaluation (mse|rmse|r2|mae)")

@keyword_only
def __init__(self, predictionCol="prediction", labelCol="label",
metricName="rmse"):
"""
__init__(self, predictionCol="prediction", labelCol="label", \
metricName="rmse")
"""
super(RegressionEvaluator, self).__init__()
self._java_obj = self._new_java_obj(
"org.apache.spark.ml.evaluation.RegressionEvaluator", self.uid)
#: param for metric name in evaluation (mse|rmse|r2|mae)
self.metricName = Param(self, "metricName",
"metric name in evaluation (mse|rmse|r2|mae)")
self._setDefault(predictionCol="prediction", labelCol="label",
metricName="rmse")
kwargs = self.__init__._input_kwargs
self._set(**kwargs)

def setMetricName(self, value):
"""
Sets the value of :py:attr:`metricName`.
"""
self._paramMap[self.metricName] = value
return self

def getMetricName(self):
"""
Gets the value of metricName or its default value.
"""
return self.getOrDefault(self.metricName)

@keyword_only
def setParams(self, predictionCol="prediction", labelCol="label",
metricName="rmse"):
"""
setParams(self, predictionCol="prediction", labelCol="label",
metricName="rmse")
Sets params for regression evaluator.
"""
kwargs = self.setParams._input_kwargs
return self._set(**kwargs)

if __name__ == "__main__":
import doctest
from pyspark.context import SparkContext
Expand Down