craffel · craffel · Mar 3, 2017 · Oct 22, 2016 · Feb 28, 2017 · Feb 28, 2017
diff --git a/mir_eval/segment.py b/mir_eval/segment.py
@@ -28,21 +28,48 @@
 
 * :func:`mir_eval.segment.detection`: An estimated boundary is considered
   correct if it falls within a window around a reference boundary
+  [#turnbull2007]_
 * :func:`mir_eval.segment.deviation`: Computes the median absolute time
   difference from a reference boundary to its nearest estimated boundary, and
-  vice versa
+  vice versa [#turnbull2007]_
 * :func:`mir_eval.segment.pairwise`: For classifying pairs of sampled time
-  instants as belonging to the same structural component
+  instants as belonging to the same structural component [#levy2008]_
 * :func:`mir_eval.segment.rand_index`: Clusters reference and estimated
   annotations and compares them by the Rand Index
 * :func:`mir_eval.segment.ari`: Computes the Rand index, adjusted for chance
 * :func:`mir_eval.segment.nce`: Interprets sampled reference and estimated
   labels as samples of random variables :math:`Y_R, Y_E` from which the
   conditional entropy of :math:`Y_R` given :math:`Y_E` (Under-Segmentation) and
   :math:`Y_E` given :math:`Y_R` (Over-Segmentation) are estimated
+  [#lukashevich2008]_
 * :func:`mir_eval.segment.mutual_information`: Computes the standard,
   normalized, and adjusted mutual information of sampled reference and
   estimated segments
+* :func:`mir_eval.segment.vmeasure`: Computes the V-Measure, which is similar
+  to the conditional entropy metrics, but uses the marginal distributions
+  as normalization rather than the maximum entropy distribution
+  [#rosenberg2007]_
+
+
+References
+----------
+    .. [#turnbull2007] Turnbull, D., Lanckriet, G. R., Pampalk, E.,
+        & Goto, M.  A Supervised Approach for Detecting Boundaries in Music
+        Using Difference Features and Boosting. In ISMIR (pp. 51-54).
+
+    .. [#levy2008] Levy, M., & Sandler, M.
+        Structural segmentation of musical audio by constrained clustering.
+        IEEE transactions on audio, speech, and language processing, 16(2),
+        318-326.
+
+    .. [#lukashevich2008] Lukashevich, H. M.
+        Towards Quantitative Measures of Evaluating Song Segmentation.
+        In ISMIR (pp. 375-380).
+
+    .. [#rosenberg2007] Rosenberg, A., & Hirschberg, J.
+        V-Measure: A Conditional Entropy-Based External Cluster Evaluation
+        Measure.
+        In EMNLP-CoNLL (Vol. 7, pp. 410-420).
 '''
 
 import collections
@@ -912,7 +939,7 @@ def mutual_information(reference_intervals, reference_labels,
 
 
 def nce(reference_intervals, reference_labels, estimated_intervals,
-        estimated_labels, frame_size=0.1, beta=1.0):
+        estimated_labels, frame_size=0.1, beta=1.0, marginal=False):
     """Frame-clustering segmentation: normalized conditional entropy
 
     Computes cross-entropy of cluster assignment, normalized by the
@@ -958,16 +985,31 @@ def nce(reference_intervals, reference_labels, estimated_intervals,
         beta for F-measure
         (Default value = 1.0)
 
+    marginal : bool
+        If `False`, normalize conditional entropy by uniform entropy.
+        If `True`, normalize conditional entropy by the marginal entropy.
+        (Default value = False)
+
     Returns
     -------
     S_over
         Over-clustering score:
-        ``1 - H(y_est | y_ref) / log(|y_est|)``
+
+        - For `marginal=False`, ``1 - H(y_est | y_ref) / log(|y_est|)``
+
+        - For `marginal=True`, ``1 - H(y_est | y_ref) / H(y_est)``
+
         If `|y_est|==1`, then `S_over` will be 0.
+
     S_under
         Under-clustering score:
-        ``1 - H(y_ref | y_est) / log(|y_ref|)``
+
+        - For `marginal=False`, ``1 - H(y_ref | y_est) / log(|y_ref|)``
+
+        - For `marginal=True`, ``1 - H(y_ref | y_est) / H(y_ref)``
+
         If `|y_ref|==1`, then `S_under` will be 0.
+
     S_F
         F-measure for (S_over, S_under)
 
@@ -1009,25 +1051,104 @@ def nce(reference_intervals, reference_labels, estimated_intervals,
     # sum_i P[true = i | estimated = j] log P[true = i | estimated = j]
     # entropy sums over axis=0, which is true labels
 
-    # The following scipy.stats.entropy calls are equivalent to
-    # scipy.stats.entropy(contingency, base=2)
-    # However the `base` kwarg has only been introduced in scipy 0.14.0
-    true_given_est = p_est.dot(scipy.stats.entropy(contingency) / np.log(2))
-    pred_given_ref = p_ref.dot(scipy.stats.entropy(contingency.T) / np.log(2))
+    true_given_est = p_est.dot(scipy.stats.entropy(contingency, base=2))
+    pred_given_ref = p_ref.dot(scipy.stats.entropy(contingency.T, base=2))
+
+    if marginal:
+        # Normalize conditional entropy by marginal entropy
+        z_ref = scipy.stats.entropy(p_ref, base=2)
+        z_est = scipy.stats.entropy(p_est, base=2)
+    else:
+        z_ref = np.log2(contingency.shape[0])
+        z_est = np.log2(contingency.shape[1])
 
     score_under = 0.0
-    if contingency.shape[0] > 1:
-        score_under = 1. - true_given_est / np.log2(contingency.shape[0])
+    if z_ref > 0:
+        score_under = 1. - true_given_est / z_ref
 
     score_over = 0.0
-    if contingency.shape[1] > 1:
-        score_over = 1. - pred_given_ref / np.log2(contingency.shape[1])
+    if z_est > 0:
+        score_over = 1. - pred_given_ref / z_est
 
     f_measure = util.f_measure(score_over, score_under, beta=beta)
 
     return score_over, score_under, f_measure
 
 
+def vmeasure(reference_intervals, reference_labels, estimated_intervals,
+             estimated_labels, frame_size=0.1, beta=1.0):
+    """Frame-clustering segmentation: v-measure
+
+    Computes cross-entropy of cluster assignment, normalized by the
+    marginal-entropy.
+
+    This is equivalent to `nce(..., marginal=True)`.
+
+    Examples
+    --------
+    >>> (ref_intervals,
+    ...  ref_labels) = mir_eval.io.load_labeled_intervals('ref.lab')
+    >>> (est_intervals,
+    ...  est_labels) = mir_eval.io.load_labeled_intervals('est.lab')
+    >>> # Trim or pad the estimate to match reference timing
+    >>> (ref_intervals,
+    ...  ref_labels) = mir_eval.util.adjust_intervals(ref_intervals,
+    ...                                               ref_labels,
+    ...                                               t_min=0)
+    >>> (est_intervals,
+    ...  est_labels) = mir_eval.util.adjust_intervals(
+    ...     est_intervals, est_labels, t_min=0, t_max=ref_intervals.max())
+    >>> V_precision, V_recall, V_F = mir_eval.structure.vmeasure(ref_intervals,
+    ...                                                          ref_labels,
+    ...                                                          est_intervals,
+    ...                                                          est_labels)
+
+    Parameters
+    ----------
+    reference_intervals : np.ndarray, shape=(n, 2)
+        reference segment intervals, in the format returned by
+        :func:`mir_eval.io.load_labeled_intervals`.
+    reference_labels : list, shape=(n,)
+        reference segment labels, in the format returned by
+        :func:`mir_eval.io.load_labeled_intervals`.
+    estimated_intervals : np.ndarray, shape=(m, 2)
+        estimated segment intervals, in the format returned by
+        :func:`mir_eval.io.load_labeled_intervals`.
+    estimated_labels : list, shape=(m,)
+        estimated segment labels, in the format returned by
+        :func:`mir_eval.io.load_labeled_intervals`.
+    frame_size : float > 0
+        length (in seconds) of frames for clustering
+        (Default value = 0.1)
+    beta : float > 0
+        beta for F-measure
+        (Default value = 1.0)
+
+    Returns
+    -------
+    V_precision
+        Over-clustering score:
+        ``1 - H(y_est | y_ref) / H(y_est)``
+
+        If `|y_est|==1`, then `V_precision` will be 0.
+
+    V_recall
+        Under-clustering score:
+        ``1 - H(y_ref | y_est) / H(y_ref)``
+
+        If `|y_ref|==1`, then `V_recall` will be 0.
+
+    V_F
+        F-measure for (V_precision, V_recall)
+
+    """
+
+    return nce(reference_intervals, reference_labels,
+               estimated_intervals, estimated_labels,
+               frame_size=frame_size, beta=beta,
+               marginal=True)
+
+
 def evaluate(ref_intervals, ref_labels, est_intervals, est_labels, **kwargs):
     """Compute all metrics for the given reference and estimated annotations.
 
@@ -1122,4 +1243,9 @@ def evaluate(ref_intervals, ref_labels, est_intervals, est_labels, **kwargs):
         util.filter_kwargs(nce, ref_intervals, ref_labels, est_intervals,
                            est_labels, **kwargs)
 
+    # V-measure metrics
+    scores['V Precision'], scores['V Recall'], scores['V-measure'] = \
+        util.filter_kwargs(vmeasure, ref_intervals, ref_labels, est_intervals,
+                           est_labels, **kwargs)
+
     return scores
diff --git a/setup.py b/setup.py
@@ -25,7 +25,7 @@
     license='MIT',
     install_requires=[
         'numpy >= 1.7.0',
-        'scipy >= 0.9.0',
+        'scipy >= 0.14.0',
         'future',
         'six'
     ],

diff --git a/tests/data/segment/output00.json b/tests/data/segment/output00.json
@@ -1 +1 @@
-{"Precision@0.5": 0.3333333333333333, "Recall@0.5": 0.6, "F-measure@0.5": 0.42857142857142855, "Precision@3.0": 0.5, "Recall@3.0": 0.9, "F-measure@3.0": 0.6428571428571429, "Ref-to-est deviation": 0.39899999999999958, "Est-to-ref deviation": 2.8474999999999997, "Pairwise Precision": 0.87442744102258263, "Pairwise Recall": 0.48068285131997418, "Pairwise F-measure": 0.6203513883678935, "Rand Index": 0.82890773118312333, "Adjusted Rand Index": 0.52166883969499556, "Mutual Information": 1.1375211793868052, "Adjusted Mutual Information": 0.58737925867857188, "Normalized Mutual Information": 0.6939260940391484, "NCE Over": 0.61991743813038624, "NCE Under": 0.85694667760167753, "NCE F-measure": 0.71941105933073668}
+{"Precision@0.5": 0.3333333333333333, "Recall@0.5": 0.6, "F-measure@0.5": 0.42857142857142855, "Precision@3.0": 0.5, "Recall@3.0": 0.9, "F-measure@3.0": 0.6428571428571429, "Ref-to-est deviation": 0.3989999999999996, "Est-to-ref deviation": 2.8474999999999997, "Pairwise Precision": 0.8744274410225826, "Pairwise Recall": 0.4806828513199742, "Pairwise F-measure": 0.6203513883678935, "Rand Index": 0.8289077311831233, "Adjusted Rand Index": 0.5216688396951437, "Mutual Information": 1.1375211793868052, "Adjusted Mutual Information": 0.587379258678572, "Normalized Mutual Information": 0.6939260940391484, "NCE Over": 0.6199174381303865, "NCE Under": 0.8569466776016775, "NCE F-measure": 0.719411059330737, "V Precision": 0.590037137766286, "V Recall": 0.8161069755903478, "V-measure": 0.6848991073026666}
diff --git a/tests/data/segment/output01.json b/tests/data/segment/output01.json
@@ -1 +1 @@
-{"Precision@0.5": 0.5454545454545454, "Recall@0.5": 0.4, "F-measure@0.5": 0.4615384615384615, "Precision@3.0": 0.5454545454545454, "Recall@3.0": 0.4, "F-measure@3.0": 0.4615384615384615, "Ref-to-est deviation": 13.036000000000001, "Est-to-ref deviation": 0.33599999999999852, "Pairwise Precision": 0.49939016579260981, "Pairwise Recall": 0.61221319846233357, "Pairwise F-measure": 0.55007615218121908, "Rand Index": 0.75410460884145092, "Adjusted Rand Index": 0.38328718826533731, "Mutual Information": 0.82985003780999334, "Adjusted Mutual Information": 0.54769702660154485, "Normalized Mutual Information": 0.57269506013770199, "NCE Over": 0.71234956156918239, "NCE Under": 0.61984178395916567, "NCE F-measure": 0.66288378847027396}
+{"Precision@0.5": 0.5454545454545454, "Recall@0.5": 0.4, "F-measure@0.5": 0.4615384615384615, "Precision@3.0": 0.5454545454545454, "Recall@3.0": 0.4, "F-measure@3.0": 0.4615384615384615, "Ref-to-est deviation": 13.036000000000001, "Est-to-ref deviation": 0.3359999999999985, "Pairwise Precision": 0.4993901657926098, "Pairwise Recall": 0.6122131984623336, "Pairwise F-measure": 0.5500761521812191, "Rand Index": 0.7541046088414509, "Adjusted Rand Index": 0.383287188265407, "Mutual Information": 0.8298500378099933, "Adjusted Mutual Information": 0.5476970266015448, "Normalized Mutual Information": 0.572695060137702, "NCE Over": 0.7123495615691824, "NCE Under": 0.6198417839591657, "NCE F-measure": 0.662883788470274, "V Precision": 0.59718972940023, "V Recall": 0.5492050779833786, "V-measure": 0.572193156831662}
diff --git a/tests/data/segment/output02.json b/tests/data/segment/output02.json
@@ -1 +1 @@
-{"Precision@0.5": 0.08333333333333333, "Recall@0.5": 0.18181818181818182, "F-measure@0.5": 0.1142857142857143, "Precision@3.0": 0.20833333333333334, "Recall@3.0": 0.45454545454545453, "F-measure@3.0": 0.28571428571428575, "Ref-to-est deviation": 2.4509999999999934, "Est-to-ref deviation": 7.9399999999999995, "Pairwise Precision": 0.26723175107093661, "Pairwise Recall": 0.81680356275045607, "Pairwise F-measure": 0.40270984454432168, "Rand Index": 0.53199299824956237, "Adjusted Rand Index": 0.15746191367526505, "Mutual Information": 0.5875580090173057, "Adjusted Mutual Information": 0.31916695064697315, "Normalized Mutual Information": 0.45897667736089354, "NCE Over": 0.84069380192324406, "NCE Under": 0.4364389820106126, "NCE F-measure": 0.57458637302198445}
+{"Precision@0.5": 0.08333333333333333, "Recall@0.5": 0.18181818181818182, "F-measure@0.5": 0.1142857142857143, "Precision@3.0": 0.20833333333333334, "Recall@3.0": 0.45454545454545453, "F-measure@3.0": 0.28571428571428575, "Ref-to-est deviation": 2.4509999999999934, "Est-to-ref deviation": 7.9399999999999995, "Pairwise Precision": 0.2672317510709366, "Pairwise Recall": 0.8168035627504561, "Pairwise F-measure": 0.4027098445443217, "Rand Index": 0.5319929982495624, "Adjusted Rand Index": 0.15746191367518272, "Mutual Information": 0.5875580090173058, "Adjusted Mutual Information": 0.31916695064697315, "Normalized Mutual Information": 0.4589766773608936, "NCE Over": 0.840693801923244, "NCE Under": 0.4364389820106126, "NCE F-measure": 0.5745863730219845, "V Precision": 0.6546216709365502, "V Recall": 0.32180356947221267, "V-measure": 0.4314914888375012}
diff --git a/tests/data/segment/output03.json b/tests/data/segment/output03.json
@@ -1 +1 @@
-{"Precision@0.5": 0.18181818181818182, "Recall@0.5": 0.15384615384615385, "F-measure@0.5": 0.16666666666666669, "Precision@3.0": 0.5454545454545454, "Recall@3.0": 0.46153846153846156, "F-measure@3.0": 0.4999999999999999, "Ref-to-est deviation": 2.8320000000000078, "Est-to-ref deviation": 1.6999999999999957, "Pairwise Precision": 0.79542476842193111, "Pairwise Recall": 0.20372577015066376, "Pairwise F-measure": 0.32437258908988192, "Rand Index": 0.61716466119017321, "Adjusted Rand Index": 0.17206814668514248, "Mutual Information": 0.70699805971863017, "Adjusted Mutual Information": 0.32288437024230487, "Normalized Mutual Information": 0.47183488839912169, "NCE Over": 0.36093183448445643, "NCE Under": 0.79892409459630565, "NCE F-measure": 0.49722923657425966}
+{"Precision@0.5": 0.18181818181818182, "Recall@0.5": 0.15384615384615385, "F-measure@0.5": 0.16666666666666669, "Precision@3.0": 0.5454545454545454, "Recall@3.0": 0.46153846153846156, "F-measure@3.0": 0.4999999999999999, "Ref-to-est deviation": 2.832000000000008, "Est-to-ref deviation": 1.6999999999999957, "Pairwise Precision": 0.7954247684219311, "Pairwise Recall": 0.20372577015066376, "Pairwise F-measure": 0.3243725890898819, "Rand Index": 0.6171646611901732, "Adjusted Rand Index": 0.17206814668546003, "Mutual Information": 0.7069980597186302, "Adjusted Mutual Information": 0.3228843702423049, "Normalized Mutual Information": 0.4718348883991217, "NCE Over": 0.36093183448445665, "NCE Under": 0.7989240945963056, "NCE F-measure": 0.4972292365742599, "V Precision": 0.32453331344779024, "V Recall": 0.6859947890878926, "V-measure": 0.44061745804392516}
diff --git a/tests/data/segment/output04.json b/tests/data/segment/output04.json
@@ -1 +1 @@
-{"Precision@0.5": 0.2, "Recall@0.5": 0.5, "F-measure@0.5": 0.28571428571428575, "Precision@3.0": 0.3, "Recall@3.0": 0.75, "F-measure@3.0": 0.4285714285714285, "Ref-to-est deviation": 0.28300000000000125, "Est-to-ref deviation": 5.5510000000000055, "Pairwise Precision": 0.92595043607953142, "Pairwise Recall": 0.35218495476813033, "Pairwise F-measure": 0.51028367539674546, "Rand Index": 0.76348025201081549, "Adjusted Rand Index": 0.39329787297570201, "Mutual Information": 0.97537746604020625, "Adjusted Mutual Information": 0.46809981616245017, "Normalized Mutual Information": 0.65315661235201594, "NCE Over": 0.49896823783207223, "NCE Under": 0.91017263905977475, "NCE F-measure": 0.64457322228326619}
+{"Precision@0.5": 0.2, "Recall@0.5": 0.5, "F-measure@0.5": 0.28571428571428575, "Precision@3.0": 0.3, "Recall@3.0": 0.75, "F-measure@3.0": 0.4285714285714285, "Ref-to-est deviation": 0.28300000000000125, "Est-to-ref deviation": 5.5510000000000055, "Pairwise Precision": 0.9259504360795314, "Pairwise Recall": 0.35218495476813033, "Pairwise F-measure": 0.5102836753967455, "Rand Index": 0.7634802520108155, "Adjusted Rand Index": 0.3932978729758545, "Mutual Information": 0.9753774660402063, "Adjusted Mutual Information": 0.46809981616245017, "Normalized Mutual Information": 0.6531566123520159, "NCE Over": 0.4989682378320722, "NCE Under": 0.9101726390597747, "NCE F-measure": 0.6445732222832662, "V Precision": 0.4697768990421939, "V Recall": 0.9081194948686581, "V-measure": 0.619224438273351}
diff --git a/tests/data/segment/output05.json b/tests/data/segment/output05.json
@@ -1 +1 @@
-{"Precision@0.5": 0.38095238095238093, "Recall@0.5": 0.4444444444444444, "F-measure@0.5": 0.41025641025641024, "Precision@3.0": 0.47619047619047616, "Recall@3.0": 0.5555555555555556, "F-measure@3.0": 0.5128205128205129, "Ref-to-est deviation": 0.88450000000000273, "Est-to-ref deviation": 2.0920000000000414, "Pairwise Precision": 0.89361985851541192, "Pairwise Recall": 0.12923035832531626, "Pairwise F-measure": 0.22580591492517105, "Rand Index": 0.60576268180034554, "Adjusted Rand Index": 0.12775042299122708, "Mutual Information": 0.8118894611245796, "Adjusted Mutual Information": 0.28579555410413998, "Normalized Mutual Information": 0.4958259732497331, "NCE Over": 0.32765438933312696, "NCE Under": 0.90127193904104264, "NCE F-measure": 0.48059139102383153}
+{"Precision@0.5": 0.38095238095238093, "Recall@0.5": 0.4444444444444444, "F-measure@0.5": 0.41025641025641024, "Precision@3.0": 0.47619047619047616, "Recall@3.0": 0.5555555555555556, "F-measure@3.0": 0.5128205128205129, "Ref-to-est deviation": 0.8845000000000027, "Est-to-ref deviation": 2.0920000000000414, "Pairwise Precision": 0.8936198585154119, "Pairwise Recall": 0.12923035832531626, "Pairwise F-measure": 0.22580591492517105, "Rand Index": 0.6057626818003455, "Adjusted Rand Index": 0.12775042299090225, "Mutual Information": 0.8118894611245795, "Adjusted Mutual Information": 0.2857955541041399, "Normalized Mutual Information": 0.49582597324973304, "NCE Over": 0.32765438933312685, "NCE Under": 0.9012719390410426, "NCE F-measure": 0.48059139102383136, "V Precision": 0.28728701796701817, "V Recall": 0.8557414027572561, "V-measure": 0.43016147506335445}
diff --git a/tests/data/segment/output06.json b/tests/data/segment/output06.json
@@ -1 +1 @@
-{"Precision@0.5": 0.6, "Recall@0.5": 0.5, "F-measure@0.5": 0.5454545454545454, "Precision@3.0": 0.9, "Recall@3.0": 0.75, "F-measure@3.0": 0.8181818181818182, "Ref-to-est deviation": 0.37199999999999966, "Est-to-ref deviation": 0.13499999999999779, "Pairwise Precision": 0.79309065091812692, "Pairwise Recall": 0.42685807583415836, "Pairwise F-measure": 0.55500225835591688, "Rand Index": 0.77792616886153987, "Adjusted Rand Index": 0.42429770750008716, "Mutual Information": 1.0358589237639091, "Adjusted Mutual Information": 0.55223481569673849, "Normalized Mutual Information": 0.66743178237377987, "NCE Over": 0.62388170665293718, "NCE Under": 0.85624576821215703, "NCE F-measure": 0.72182441074576342}
+{"Precision@0.5": 0.6, "Recall@0.5": 0.5, "F-measure@0.5": 0.5454545454545454, "Precision@3.0": 0.9, "Recall@3.0": 0.75, "F-measure@3.0": 0.8181818181818182, "Ref-to-est deviation": 0.37199999999999966, "Est-to-ref deviation": 0.1349999999999978, "Pairwise Precision": 0.7930906509181269, "Pairwise Recall": 0.42685807583415836, "Pairwise F-measure": 0.5550022583559169, "Rand Index": 0.7779261688615399, "Adjusted Rand Index": 0.4242977075000336, "Mutual Information": 1.035858923763909, "Adjusted Mutual Information": 0.5522348156967383, "Normalized Mutual Information": 0.6674317823737799, "NCE Over": 0.6238817066529372, "NCE Under": 0.856245768212157, "NCE F-measure": 0.7218244107457634, "V Precision": 0.5562329768516343, "V Recall": 0.8008607951366766, "V-measure": 0.6564987524332658}