From b41226fe88a86035a67669bd3bf9c727621dfd3a Mon Sep 17 00:00:00 2001 From: Ruifeng Zheng Date: Thu, 2 Jun 2016 13:23:57 +0800 Subject: [PATCH 1/4] del precision,recall --- docs/mllib-evaluation-metrics.md | 16 +++------------- .../MulticlassClassificationEvaluator.scala | 2 -- 2 files changed, 3 insertions(+), 15 deletions(-) diff --git a/docs/mllib-evaluation-metrics.md b/docs/mllib-evaluation-metrics.md index a269dbf030e7c..c49bc4ff124bd 100644 --- a/docs/mllib-evaluation-metrics.md +++ b/docs/mllib-evaluation-metrics.md @@ -140,7 +140,7 @@ definitions of positive and negative labels is straightforward. #### Label based metrics Opposed to binary classification where there are only two possible labels, multiclass classification problems have many -possible labels and so the concept of label-based metrics is introduced. Overall precision measures precision across all +possible labels and so the concept of label-based metrics is introduced. Accuracy measures precision across all labels - the number of times any class was predicted correctly (true positives) normalized by the number of data points. Precision by label considers only one class, and measures the number of time a specific label was predicted correctly normalized by the number of times that label appears in the output. @@ -182,20 +182,10 @@ $$\hat{\delta}(x) = \begin{cases}1 & \text{if $x = 0$}, \\ 0 & \text{otherwise}. - Overall Precision - $PPV = \frac{TP}{TP + FP} = \frac{1}{N}\sum_{i=0}^{N-1} \hat{\delta}\left(\hat{\mathbf{y}}_i - - \mathbf{y}_i\right)$ - - - Overall Recall - $TPR = \frac{TP}{TP + FN} = \frac{1}{N}\sum_{i=0}^{N-1} \hat{\delta}\left(\hat{\mathbf{y}}_i - + Accuracy + $ACC = \frac{TP}{TP + FP} = \frac{1}{N}\sum_{i=0}^{N-1} \hat{\delta}\left(\hat{\mathbf{y}}_i - \mathbf{y}_i\right)$ - - Overall F1-measure - $F1 = 2 \cdot \left(\frac{PPV \cdot TPR} - {PPV + TPR}\right)$ - Precision by label $PPV(\ell) = \frac{TP}{TP + FP} = diff --git a/mllib/src/main/scala/org/apache/spark/ml/evaluation/MulticlassClassificationEvaluator.scala b/mllib/src/main/scala/org/apache/spark/ml/evaluation/MulticlassClassificationEvaluator.scala index 0b84e0a3fa784..897137c17be98 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/evaluation/MulticlassClassificationEvaluator.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/evaluation/MulticlassClassificationEvaluator.scala @@ -82,8 +82,6 @@ class MulticlassClassificationEvaluator @Since("1.5.0") (@Since("1.5.0") overrid val metrics = new MulticlassMetrics(predictionAndLabels) val metric = $(metricName) match { case "f1" => metrics.weightedFMeasure - case "precision" => metrics.accuracy - case "recall" => metrics.accuracy case "weightedPrecision" => metrics.weightedPrecision case "weightedRecall" => metrics.weightedRecall case "accuracy" => metrics.accuracy From 0427cf827d5b6c09bb3f7c970880e7d22db9bae7 Mon Sep 17 00:00:00 2001 From: Ruifeng Zheng Date: Thu, 2 Jun 2016 14:23:40 +0800 Subject: [PATCH 2/4] update doc --- .../evaluation/MulticlassClassificationEvaluator.scala | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/ml/evaluation/MulticlassClassificationEvaluator.scala b/mllib/src/main/scala/org/apache/spark/ml/evaluation/MulticlassClassificationEvaluator.scala index 897137c17be98..794b1e7d9d881 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/evaluation/MulticlassClassificationEvaluator.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/evaluation/MulticlassClassificationEvaluator.scala @@ -39,16 +39,16 @@ class MulticlassClassificationEvaluator @Since("1.5.0") (@Since("1.5.0") overrid def this() = this(Identifiable.randomUID("mcEval")) /** - * param for metric name in evaluation (supports `"f1"` (default), `"precision"`, `"recall"`, - * `"weightedPrecision"`, `"weightedRecall"`, `"accuracy"`) + * param for metric name in evaluation (supports `"f1"` (default), `"weightedPrecision"`, + * `"weightedRecall"`, `"accuracy"`) * @group param */ @Since("1.5.0") val metricName: Param[String] = { - val allowedParams = ParamValidators.inArray(Array("f1", "precision", - "recall", "weightedPrecision", "weightedRecall", "accuracy")) + val allowedParams = ParamValidators.inArray(Array("f1", "weightedPrecision", + "weightedRecall", "accuracy")) new Param(this, "metricName", "metric name in evaluation " + - "(f1|precision|recall|weightedPrecision|weightedRecall|accuracy)", allowedParams) + "(f1|weightedPrecision|weightedRecall|accuracy)", allowedParams) } /** @group getParam */ From 42ed0b97690545279b2faa99166007b56ef29a3b Mon Sep 17 00:00:00 2001 From: Ruifeng Zheng Date: Thu, 2 Jun 2016 14:29:37 +0800 Subject: [PATCH 3/4] fix py --- python/pyspark/ml/evaluation.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/python/pyspark/ml/evaluation.py b/python/pyspark/ml/evaluation.py index b8b2b37af553d..c480525e9bd3a 100644 --- a/python/pyspark/ml/evaluation.py +++ b/python/pyspark/ml/evaluation.py @@ -258,9 +258,7 @@ class MulticlassClassificationEvaluator(JavaEvaluator, HasLabelCol, HasPredictio >>> evaluator = MulticlassClassificationEvaluator(predictionCol="prediction") >>> evaluator.evaluate(dataset) 0.66... - >>> evaluator.evaluate(dataset, {evaluator.metricName: "precision"}) - 0.66... - >>> evaluator.evaluate(dataset, {evaluator.metricName: "recall"}) + >>> evaluator.evaluate(dataset, {evaluator.metricName: "accuracy"}) 0.66... .. versionadded:: 1.5.0 From b4d5811d2889c4238c29df59c9cbce256d325367 Mon Sep 17 00:00:00 2001 From: Ruifeng Zheng Date: Thu, 2 Jun 2016 15:35:09 +0800 Subject: [PATCH 4/4] fix test --- .../ml/evaluation/MulticlassClassificationEvaluatorSuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mllib/src/test/scala/org/apache/spark/ml/evaluation/MulticlassClassificationEvaluatorSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/evaluation/MulticlassClassificationEvaluatorSuite.scala index 522f6675d7f46..1a3a8a13a2d09 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/evaluation/MulticlassClassificationEvaluatorSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/evaluation/MulticlassClassificationEvaluatorSuite.scala @@ -33,7 +33,7 @@ class MulticlassClassificationEvaluatorSuite val evaluator = new MulticlassClassificationEvaluator() .setPredictionCol("myPrediction") .setLabelCol("myLabel") - .setMetricName("recall") + .setMetricName("accuracy") testDefaultReadWrite(evaluator) }