Skip to content

Commit

Permalink
Merge with previous updates
Browse files Browse the repository at this point in the history
  • Loading branch information
avulanov committed Oct 31, 2014
2 parents 43a613e + 517a594 commit fc8175e
Show file tree
Hide file tree
Showing 2 changed files with 74 additions and 74 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -33,126 +33,125 @@ class MultilabelMetrics(predictionAndLabels: RDD[(Array[Double], Array[Double])]
labels}.distinct().count()

/**
* Returns strict Accuracy
* Returns subset accuracy
* (for equal sets of labels)
*/
lazy val strictAccuracy: Double = predictionAndLabels.filter { case (predictions, labels) =>
predictions.deep == labels.deep }.count().toDouble / numDocs
lazy val subsetAccuracy: Double = predictionAndLabels.filter { case (predictions, labels) =>
predictions.deep == labels.deep
}.count().toDouble / numDocs

/**
* Returns Accuracy
* Returns accuracy
*/
lazy val accuracy: Double = predictionAndLabels.map { case (predictions, labels) =>
labels.intersect(predictions).size.toDouble /
(labels.size + predictions.size - labels.intersect(predictions).size)}.sum / numDocs


/**
* Returns Hamming-loss
*/
lazy val hammingLoss: Double = predictionAndLabels.map { case (predictions, labels) =>
labels.diff(predictions).size + predictions.diff(labels).size}.
sum / (numDocs * numLabels)
labels.size + predictions.size - 2 * labels.intersect(predictions).size
}.sum / (numDocs * numLabels)

/**
* Returns Document-based Precision averaged by the number of documents
* Returns document-based precision averaged by the number of documents
*/
lazy val macroPrecisionDoc: Double = predictionAndLabels.map { case (predictions, labels) =>
lazy val precision: Double = predictionAndLabels.map { case (predictions, labels) =>
if (predictions.size > 0) {
predictions.intersect(labels).size.toDouble / predictions.size
} else 0
} else {
0
}
}.sum / numDocs

/**
* Returns Document-based Recall averaged by the number of documents
*/
lazy val macroRecallDoc: Double = predictionAndLabels.map { case (predictions, labels) =>
labels.intersect(predictions).size.toDouble / labels.size}.sum / numDocs

/**
* Returns Document-based F1-measure averaged by the number of documents
* Returns document-based recall averaged by the number of documents
*/
lazy val macroF1MeasureDoc: Double = predictionAndLabels.map { case (predictions, labels) =>
2.0 * predictions.intersect(labels).size / (predictions.size + labels.size)}.sum / numDocs

/**
* Returns micro-averaged document-based Precision
* (equals to label-based microPrecision)
*/
lazy val microPrecisionDoc: Double = microPrecisionClass

/**
* Returns micro-averaged document-based Recall
* (equals to label-based microRecall)
*/
lazy val microRecallDoc: Double = microRecallClass
lazy val recall: Double = predictionAndLabels.map { case (predictions, labels) =>
labels.intersect(predictions).size.toDouble / labels.size
}.sum / numDocs

/**
* Returns micro-averaged document-based F1-measure
* (equals to label-based microF1measure)
* Returns document-based f1-measure averaged by the number of documents
*/
lazy val microF1MeasureDoc: Double = microF1MeasureClass
lazy val f1Measure: Double = predictionAndLabels.map { case (predictions, labels) =>
2.0 * predictions.intersect(labels).size / (predictions.size + labels.size)
}.sum / numDocs

private lazy val tpPerClass = predictionAndLabels.flatMap { case (predictions, labels) =>
predictions.intersect(labels).map(category => (category, 1))}.reduceByKey(_ + _).collectAsMap()
predictions.intersect(labels)
}.countByValue()

private lazy val fpPerClass = predictionAndLabels.flatMap { case(predictions, labels) =>
predictions.diff(labels).map(category => (category, 1))}.reduceByKey(_ + _).collectAsMap()
private lazy val fpPerClass = predictionAndLabels.flatMap { case (predictions, labels) =>
predictions.diff(labels)
}.countByValue()

private lazy val fnPerClass = predictionAndLabels.flatMap{ case(predictions, labels) =>
labels.diff(predictions).map(category => (category, 1))}.reduceByKey(_ + _).collectAsMap()
private lazy val fnPerClass = predictionAndLabels.flatMap { case(predictions, labels) =>
labels.diff(predictions)
}.countByValue()

/**
* Returns Precision for a given label (category)
* Returns precision for a given label (category)
* @param label the label.
*/
def precisionClass(label: Double) = {
def precision(label: Double) = {
val tp = tpPerClass(label)
val fp = fpPerClass.getOrElse(label, 0)
val fp = fpPerClass.getOrElse(label, 0L)
if (tp + fp == 0) 0 else tp.toDouble / (tp + fp)
}

/**
* Returns Recall for a given label (category)
* Returns recall for a given label (category)
* @param label the label.
*/
def recallClass(label: Double) = {
def recall(label: Double) = {
val tp = tpPerClass(label)
val fn = fnPerClass.getOrElse(label, 0)
val fn = fnPerClass.getOrElse(label, 0L)
if (tp + fn == 0) 0 else tp.toDouble / (tp + fn)
}

/**
* Returns F1-measure for a given label (category)
* Returns f1-measure for a given label (category)
* @param label the label.
*/
def f1MeasureClass(label: Double) = {
val precision = precisionClass(label)
val recall = recallClass(label)
if((precision + recall) == 0) 0 else 2 * precision * recall / (precision + recall)
def f1Measure(label: Double) = {
val p = precision(label)
val r = recall(label)
if((p + r) == 0) 0 else 2 * p * r / (p + r)
}

private lazy val sumTp = tpPerClass.foldLeft(0L){ case (sum, (_, tp)) => sum + tp}
private lazy val sumFpClass = fpPerClass.foldLeft(0L){ case (sum, (_, fp)) => sum + fp}
private lazy val sumFnClass = fnPerClass.foldLeft(0L){ case (sum, (_, fn)) => sum + fn}
private lazy val sumTp = tpPerClass.foldLeft(0L) { case (sum, (_, tp)) => sum + tp }
private lazy val sumFpClass = fpPerClass.foldLeft(0L) { case (sum, (_, fp)) => sum + fp }
private lazy val sumFnClass = fnPerClass.foldLeft(0L) { case (sum, (_, fn)) => sum + fn }

/**
* Returns micro-averaged label-based Precision
* Returns micro-averaged label-based precision
* (equals to micro-averaged document-based precision)
*/
lazy val microPrecisionClass = {
lazy val microPrecision = {
val sumFp = fpPerClass.foldLeft(0L){ case(cum, (_, fp)) => cum + fp}
sumTp.toDouble / (sumTp + sumFp)
}

/**
* Returns micro-averaged label-based Recall
* Returns micro-averaged label-based recall
* (equals to micro-averaged document-based recall)
*/
lazy val microRecallClass = {
lazy val microRecall = {
val sumFn = fnPerClass.foldLeft(0.0){ case(cum, (_, fn)) => cum + fn}
sumTp.toDouble / (sumTp + sumFn)
}

/**
* Returns micro-averaged label-based F1-measure
* Returns micro-averaged label-based f1-measure
* (equals to micro-averaged document-based f1-measure)
*/
lazy val microF1Measure = 2.0 * sumTp / (2 * sumTp + sumFnClass + sumFpClass)

/**
* Returns the sequence of labels in ascending order
*/
lazy val microF1MeasureClass = 2.0 * sumTp / (2 * sumTp + sumFnClass + sumFpClass)
lazy val labels: Array[Double] = tpPerClass.keys.toArray.sorted
}
Original file line number Diff line number Diff line change
Expand Up @@ -80,23 +80,24 @@ class MultilabelMetricsSuite extends FunSuite with LocalSparkContext {
val hammingLoss = (1.0 / (7 * 3)) * (2 + 2 + 1 + 0 + 0 + 1 + 1)
val strictAccuracy = 2.0 / 7
val accuracy = 1.0 / 7 * (1.0 / 3 + 1.0 /3 + 0 + 1.0 / 1 + 2.0 / 2 + 2.0 / 3 + 1.0 / 2)
assert(math.abs(metrics.precisionClass(0.0) - precision0) < delta)
assert(math.abs(metrics.precisionClass(1.0) - precision1) < delta)
assert(math.abs(metrics.precisionClass(2.0) - precision2) < delta)
assert(math.abs(metrics.recallClass(0.0) - recall0) < delta)
assert(math.abs(metrics.recallClass(1.0) - recall1) < delta)
assert(math.abs(metrics.recallClass(2.0) - recall2) < delta)
assert(math.abs(metrics.f1MeasureClass(0.0) - f1measure0) < delta)
assert(math.abs(metrics.f1MeasureClass(1.0) - f1measure1) < delta)
assert(math.abs(metrics.f1MeasureClass(2.0) - f1measure2) < delta)
assert(math.abs(metrics.microPrecisionClass - microPrecisionClass) < delta)
assert(math.abs(metrics.microRecallClass - microRecallClass) < delta)
assert(math.abs(metrics.microF1MeasureClass - microF1MeasureClass) < delta)
assert(math.abs(metrics.macroPrecisionDoc - macroPrecisionDoc) < delta)
assert(math.abs(metrics.macroRecallDoc - macroRecallDoc) < delta)
assert(math.abs(metrics.macroF1MeasureDoc - macroF1MeasureDoc) < delta)
assert(math.abs(metrics.precision(0.0) - precision0) < delta)
assert(math.abs(metrics.precision(1.0) - precision1) < delta)
assert(math.abs(metrics.precision(2.0) - precision2) < delta)
assert(math.abs(metrics.recall(0.0) - recall0) < delta)
assert(math.abs(metrics.recall(1.0) - recall1) < delta)
assert(math.abs(metrics.recall(2.0) - recall2) < delta)
assert(math.abs(metrics.f1Measure(0.0) - f1measure0) < delta)
assert(math.abs(metrics.f1Measure(1.0) - f1measure1) < delta)
assert(math.abs(metrics.f1Measure(2.0) - f1measure2) < delta)
assert(math.abs(metrics.microPrecision - microPrecisionClass) < delta)
assert(math.abs(metrics.microRecall - microRecallClass) < delta)
assert(math.abs(metrics.microF1Measure - microF1MeasureClass) < delta)
assert(math.abs(metrics.precision - macroPrecisionDoc) < delta)
assert(math.abs(metrics.recall - macroRecallDoc) < delta)
assert(math.abs(metrics.f1Measure - macroF1MeasureDoc) < delta)
assert(math.abs(metrics.hammingLoss - hammingLoss) < delta)
assert(math.abs(metrics.strictAccuracy - strictAccuracy) < delta)
assert(math.abs(metrics.subsetAccuracy - strictAccuracy) < delta)
assert(math.abs(metrics.accuracy - accuracy) < delta)
assert(metrics.labels.sameElements(Array(0.0, 1.0, 2.0)))
}
}

0 comments on commit fc8175e

Please sign in to comment.