From cb5dce1565edca67a3763b7610137b48545ea998 Mon Sep 17 00:00:00 2001
From: Sandeep Kumar Choudhary <tssandeepkumarchoudhary@gmail.com>
Date: Thu, 1 Mar 2018 21:45:12 +0530
Subject: [PATCH] Adding more binary classification evaluation metrics

---
 ...avaBinaryClassificationMetricsExample.java | 24 +++++++
 .../BinaryClassificationMetricsExample.scala  | 37 +++++++++++
 .../BinaryClassificationMetrics.scala         | 33 +++++++++-
 .../BinaryClassificationMetricComputers.scala | 63 +++++++++++++++++++
 4 files changed, 156 insertions(+), 1 deletion(-)

diff --git a/examples/src/main/java/org/apache/spark/examples/mllib/JavaBinaryClassificationMetricsExample.java b/examples/src/main/java/org/apache/spark/examples/mllib/JavaBinaryClassificationMetricsExample.java
index b9d0313c6bb56..e068e24f051e0 100644
--- a/examples/src/main/java/org/apache/spark/examples/mllib/JavaBinaryClassificationMetricsExample.java
+++ b/examples/src/main/java/org/apache/spark/examples/mllib/JavaBinaryClassificationMetricsExample.java
@@ -68,6 +68,30 @@ public static void main(String[] args) {
     JavaRDD<?> recall = metrics.recallByThreshold().toJavaRDD();
     System.out.println("Recall by threshold: " + recall.collect());
 
+    // False omission rate
+    JavaRDD<?> falseOmissionRate = metrics.forByThreshold().toJavaRDD();
+    System.out.println("False omission rate by threshold: " + falseOmissionRate.collect());
+
+    // False discovery rate
+    JavaRDD<?> falseDiscoveryRate = metrics.fdrByThreshold().toJavaRDD();
+    System.out.println("False discovery rate by threshold: " + falseDiscoveryRate.collect());
+
+    // Negative predictive value
+    JavaRDD<?> negativePredictiveValue = metrics.npvByThreshold().toJavaRDD();
+    System.out.println("Negative predictive value by threshold: " + negativePredictiveValue.collect());
+
+    // False negative rate
+    JavaRDD<?> falseNegativeRate = metrics.fnrByThreshold().toJavaRDD();
+    System.out.println("False negative rate by threshold: " + falseNegativeRate.collect());
+
+    // True negative rate
+    JavaRDD<?> trueNegativeRate = metrics.specificityByThreshold().toJavaRDD();
+    System.out.println("True negative rate by threshold: " + trueNegativeRate.collect());
+
+    // False positive rate
+    JavaRDD<?> falsePositiveate = metrics.fprByThreshold().toJavaRDD();
+    System.out.println("False positive rate by threshold: " + falsePositiveate.collect());
+
     // F Score by threshold
     JavaRDD<?> f1Score = metrics.fMeasureByThreshold().toJavaRDD();
     System.out.println("F1 Score by threshold: " + f1Score.collect());
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/BinaryClassificationMetricsExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/BinaryClassificationMetricsExample.scala
index b9263ac6fcff6..5b204664ff367 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/BinaryClassificationMetricsExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/BinaryClassificationMetricsExample.scala
@@ -69,6 +69,43 @@ object BinaryClassificationMetricsExample {
       println(s"Threshold: $t, Recall: $r")
     }
 
+    // False omission rate
+    val falseOmissionRate = metrics.forByThreshold
+    falseOmissionRate.foreach { case (t, r) =>
+      println(s"Threshold: $t, False omission rate: $r")
+    }
+
+    // False discovery rate
+    val falseDiscoveryRate = metrics.fdrByThreshold()
+    falseDiscoveryRate.foreach { case (t, r) =>
+      println(s"Threshold: $t, False discovery rate: $r")
+    }
+
+    // Negative predictive value
+    val negativePredictiveValue = metrics.npvByThreshold()
+    negativePredictiveValue.foreach { case (t, r) =>
+      println(s"Threshold: $t, Negative predictive value: $r")
+    }
+
+    // False negative rate
+    val falseNegativeRate = metrics.fnrByThreshold()
+    falseNegativeRate.foreach { case (t, r) =>
+      println(s"Threshold: $t, Negative predictive value: $r")
+    }
+
+    // True negative rate
+    val trueNegativeRate = metrics.specificityByThreshold()
+    trueNegativeRate.foreach { case (t, r) =>
+      println(s"Threshold: $t, True negative rate: $r")
+    }
+
+    // False positive rate
+    val falsePositiveate = metrics.fprByThreshold()
+    falsePositiveate.foreach { case (t, r) =>
+      println(s"Threshold: $t, False positive rate: $r")
+    }
+
+
     // Precision-Recall Curve
     val PRC = metrics.pr
 
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetrics.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetrics.scala
index 2cfcf38eb4ca8..7fd4c74ccaee8 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetrics.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetrics.scala
@@ -143,7 +143,38 @@ class BinaryClassificationMetrics @Since("1.3.0") (
   @Since("1.0.0")
   def recallByThreshold(): RDD[(Double, Double)] = createCurve(Recall)
 
-  private lazy val (
+  /**
+   * Returns the (threshold, False omission rate) curve.
+   */
+  def forByThreshold(): RDD[(Double, Double)] = createCurve(FalseOmissionRate)
+
+  /**
+   * Returns the (threshold, False discovery rate) curve.
+   */
+  def fdrByThreshold(): RDD[(Double, Double)] = createCurve(FalseDiscoveryRate)
+
+  /**
+   * Returns the (threshold, Negative predictive value) curve.
+   */
+  def npvByThreshold(): RDD[(Double, Double)] = createCurve(NegativePredictiveValue)
+
+  /**
+   * Returns the (threshold, False Negative rate) curve.
+   */
+  def fnrByThreshold(): RDD[(Double, Double)] = createCurve(FalseNegativeRate)
+
+  /**
+   * Returns the (threshold, True Negative rate) curve.
+   */
+  def specificityByThreshold(): RDD[(Double, Double)] = createCurve(TrueNegativeRate)
+
+   /**
+   * Returns the (threshold, False positive rate) curve.
+   */
+  def fprByThreshold(): RDD[(Double, Double)] = createCurve(FalsePositiveRate)
+
+
+ private lazy val (
     cumulativeCounts: RDD[(Double, BinaryLabelCounter)],
     confusions: RDD[(Double, BinaryConfusionMatrix)]) = {
     // Create a bin for each distinct score value, count positives and negatives within each bin,
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/binary/BinaryClassificationMetricComputers.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/binary/BinaryClassificationMetricComputers.scala
index 5a4c6aef50b7b..bfda826c08d57 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/binary/BinaryClassificationMetricComputers.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/binary/BinaryClassificationMetricComputers.scala
@@ -47,6 +47,7 @@ private[evaluation] object FalsePositiveRate extends BinaryClassificationMetricC
   }
 }
 
+
 /** Recall. Defined as 0.0 when there are no positive examples. */
 private[evaluation] object Recall extends BinaryClassificationMetricComputer {
   override def apply(c: BinaryConfusionMatrix): Double = {
@@ -58,6 +59,68 @@ private[evaluation] object Recall extends BinaryClassificationMetricComputer {
   }
 }
 
+
+/** False Omission Rate by threshold. Defined as 0.0 when there are no positive examples. */
+private[evaluation] object FalseOmissionRate extends BinaryClassificationMetricComputer {
+  override def apply(c: BinaryConfusionMatrix): Double = {
+    val totalNegatives = c.numTrueNegatives + c.numFalseNegatives
+    if (c.numNegatives == 0) {
+      0.0
+    } else {
+      c.numFalseNegatives.toDouble / totalNegatives
+    }
+  }
+}
+
+/** False Discovery Rate  by threshold. Defined as 0.0 when there are no positive examples. */
+private[evaluation] object FalseDiscoveryRate extends BinaryClassificationMetricComputer {
+  override def apply(c: BinaryConfusionMatrix): Double = {
+    if (c.numPositives == 0) {
+      0.0
+    } else {
+      c.numFalsePositives.toDouble / c.numNegatives
+    }
+  }
+}
+
+/** Negative Predictive Value by threshold. Defined as 0.0 when there are no positive examples. */
+private[evaluation] object NegativePredictiveValue extends BinaryClassificationMetricComputer {
+  override def apply(c: BinaryConfusionMatrix): Double = {
+    val totalNegatives = c.numTrueNegatives + c.numFalseNegatives
+    if (c.numNegatives == 0) {
+      0.0
+    } else {
+      c.numTrueNegatives.toDouble / totalNegatives
+    }
+  }
+}
+
+/** False Negative Rate by threshold. Defined as 0.0 when there are no positive examples. */
+private[evaluation] object FalseNegativeRate extends BinaryClassificationMetricComputer {
+  override def apply(c: BinaryConfusionMatrix): Double = {
+    if (c.numNegatives == 0) {
+      0.0
+    } else {
+      c.numFalseNegatives.toDouble / c.numPositives
+    }
+  }
+}
+
+
+/** True Negative Rate by threshold. Defined as 0.0 when there are no positive examples. */
+private[evaluation] object TrueNegativeRate extends BinaryClassificationMetricComputer {
+  override def apply(c: BinaryConfusionMatrix): Double = {
+    if (c.numNegatives == 0) {
+      0.0
+    } else {
+      c.numTrueNegatives.toDouble / c.numNegatives
+    }
+  }
+}
+
+
+
+
 /**
  * F-Measure. Defined as 0 if both precision and recall are 0. EG in the case that all examples
  * are false positives.