Fixed a bug where agreement could not be computed on boundary-set-for…

…mat segmentations and segmentations containing multiple boundary types.
cfournie · Sep 11, 2013 · 0510ceb · 0510ceb
1 parent 3f2fdaf
commit 0510ceb
Show file tree

Hide file tree

Showing 7 changed files with 131 additions and 16 deletions.
diff --git a/segeval/__init__.py b/segeval/__init__.py
@@ -10,7 +10,7 @@
 
 
 # Package description
-__version_number__ = '2.0.8'
+__version_number__ = '2.0.9'
 __release__ = None
 __version__ = '-'.join((__version_number__, __release__)) if __release__ is not None else __version_number__
 __project__ = 'SegEval'

diff --git a/segeval/agreement/__init__.py b/segeval/agreement/__init__.py
@@ -7,6 +7,9 @@
 from ..data import get_coders
 from ..similarity import SIMILARITY_METRIC_DEFAULTS
 from ..similarity.boundary import boundary_similarity
+from ..format import (BoundaryFormat, boundary_string_from_masses,
+                      convert_positions_to_masses, convert_nltk_to_masses)
+from ..util import SegmentationMetricError
 
 
 AGREEMENT_METRIC_DEFAULTS = dict(SIMILARITY_METRIC_DEFAULTS)
@@ -19,9 +22,34 @@
 def __fnc_metric__(fnc_metric, dataset, **kwargs):
     metric_kwargs = dict(AGREEMENT_METRIC_DEFAULTS)
     metric_kwargs.update(kwargs)
+    if hasattr(dataset, 'boundary_types'):
+        metric_kwargs['boundary_types'] = dataset.boundary_types
+    if hasattr(dataset, 'boundary_format'):
+        metric_kwargs['boundary_format'] = dataset.boundary_format
     return fnc_metric(dataset, **metric_kwargs)
 
 
+def __potential_boundaries__(segmentation, **kwargs):
+    boundary_format = kwargs['boundary_format']
+    boundary_types = kwargs['boundary_types']
+    boundary_string = segmentation
+    # Convert from NLTK types
+    if boundary_format == BoundaryFormat.nltk:
+        boundary_string = convert_nltk_to_masses(segmentation)
+        boundary_format = BoundaryFormat.mass
+    # Check format
+    if boundary_format == BoundaryFormat.sets:
+        pass
+    elif boundary_format == BoundaryFormat.mass:
+        boundary_string = boundary_string_from_masses(boundary_string)
+    elif boundary_format == BoundaryFormat.position:
+        boundary_string = convert_positions_to_masses(boundary_string)
+        boundary_string = boundary_string_from_masses(boundary_string)
+    else:
+        raise SegmentationMetricError('Unsupported boundary format')
+    return len(boundary_string) * len(boundary_types)
+
+
 def __actual_agreement_linear__(dataset, **kwargs):
     '''
     Calculate actual (i.e., observed or :math:`\\text{A}_a`), segmentation
@@ -78,7 +106,6 @@ def __actual_agreement_linear__(dataset, **kwargs):
     :mod:`segeval.data.Samples`.
 
     '''
-
     metric_kwargs = dict(kwargs)
     del metric_kwargs['fnc_compare']
     metric_kwargs['return_parts'] = True
@@ -102,7 +129,7 @@ def __actual_agreement_linear__(dataset, **kwargs):
                 numerator, denominator = \
                     fnc_compare(segs_a, segs_b, **metric_kwargs)[0:2]
                 # Obtain necessary values
-                pbs = sum(segs_a) - 1
+                pbs = __potential_boundaries__(segs_a, **metric_kwargs)
                 # Add all pbs
                 all_numerators.append(numerator)
                 all_denominators.append(denominator)

diff --git a/segeval/agreement/bias_test.py b/segeval/agreement/bias_test.py
@@ -7,7 +7,8 @@
 from decimal import Decimal
 from .bias import artstein_poesio_bias_linear
 from ..data.samples import (KAZANTSEVA2012_G5, KAZANTSEVA2012_G2,
-                            COMPLETE_AGREEMENT, LARGE_DISAGREEMENT)
+                            COMPLETE_AGREEMENT, LARGE_DISAGREEMENT,
+                            MULTIPLE_BOUNDARY_TYPES)
 
 
 class TestBias(unittest.TestCase):
@@ -65,6 +66,13 @@ def test_bias_complete(self):
         self.assertEqual(bias,
                          Decimal('0.01455229356727327645713789012'))
 
+    def test_multiple_boundary_types(self):
+        '''
+        Test multiple boundaries.
+        '''
+        value = artstein_poesio_bias_linear(MULTIPLE_BOUNDARY_TYPES)
+        self.assertEqual(value, Decimal('0.00'))
+
     def test_bias_large(self):
         '''
         Test bias upon a hypothetical dataset containing large disagreement.

diff --git a/segeval/agreement/kappa_test.py b/segeval/agreement/kappa_test.py
@@ -7,7 +7,8 @@
 from decimal import Decimal
 from .kappa import fleiss_kappa_linear
 from ..data.samples import (KAZANTSEVA2012_G5, KAZANTSEVA2012_G2,
-                            COMPLETE_AGREEMENT, LARGE_DISAGREEMENT)
+                            COMPLETE_AGREEMENT, LARGE_DISAGREEMENT,
+                            MULTIPLE_BOUNDARY_TYPES)
 
 
 class TestKappa(unittest.TestCase):
@@ -86,6 +87,13 @@ def test_fleiss_kappa_complete(self):
         kappa = fleiss_kappa_linear(data_complete)
         self.assertEqual(kappa, Decimal('1.0'))
 
+    def test_multiple_boundary_types(self):
+        '''
+        Test multiple boundaries.
+        '''
+        value = fleiss_kappa_linear(MULTIPLE_BOUNDARY_TYPES)
+        self.assertEqual(value, Decimal('0.3333333333333333333333333333'))
+
     def test_exception_coders(self):
         '''
         Test exception.

diff --git a/segeval/agreement/pi_test.py b/segeval/agreement/pi_test.py
@@ -7,7 +7,8 @@
 from decimal import Decimal
 from .pi import fleiss_pi_linear
 from ..data.samples import (KAZANTSEVA2012_G5, KAZANTSEVA2012_G2,
-                            COMPLETE_AGREEMENT, LARGE_DISAGREEMENT)
+                            COMPLETE_AGREEMENT, LARGE_DISAGREEMENT,
+                            MULTIPLE_BOUNDARY_TYPES)
 
 
 class TestPi(unittest.TestCase):
@@ -60,20 +61,20 @@ def test_fleiss_pi(self):
         Test Fleiss' Pi.
         '''
 
-        data1 = {'i1': {'c1': [2,8,2,1],
-                        'c2': [2,1,7,2,1]}}
+        data1 = {'i1': {'c1': [2, 8, 2, 1],
+                        'c2': [2, 1, 7, 2, 1]}}
         pi1 = fleiss_pi_linear(data1)
         pi1f = fleiss_pi_linear(data1)
         self.assertEqual(pi1,
                          Decimal('0.7090909090909090909090909091'))
-        self.assertEqual(pi1,pi1f)
+        self.assertEqual(pi1, pi1f)
         data2 = {'i1': {'c1': [2, 8, 2, 1],
                         'c2': [11, 2]}}
         pi2 = fleiss_pi_linear(data2)
         pi2f = fleiss_pi_linear(data2)
         self.assertEqual(pi2,
                          Decimal('0.1111111111111111111111111111'))
-        self.assertEqual(pi2,pi2f)
+        self.assertEqual(pi2, pi2f)
         self.assertTrue(pi2 < pi1)
 
     def test_fleiss_pi_complete(self):
@@ -85,6 +86,13 @@ def test_fleiss_pi_complete(self):
         pi = fleiss_pi_linear(data_complete)
         self.assertEqual(pi, Decimal('1'))
 
+    def test_multiple_boundary_types(self):
+        '''
+        Test multiple boundaries.
+        '''
+        value = fleiss_pi_linear(MULTIPLE_BOUNDARY_TYPES)
+        self.assertEqual(value, Decimal('0.3333333333333333333333333333'))
+
     def test_exception_coders(self):
         '''
         Test exception.

diff --git a/segeval/agreement/test.py b/segeval/agreement/test.py
@@ -5,7 +5,9 @@
 '''
 import unittest
 from decimal import Decimal
-from . import actual_agreement_linear
+from . import actual_agreement_linear, __potential_boundaries__
+from .. import BoundaryFormat
+from ..util import SegmentationMetricError
 from ..data.samples import (KAZANTSEVA2012_G5, KAZANTSEVA2012_G2,
                             COMPLETE_AGREEMENT, LARGE_DISAGREEMENT)
 
@@ -75,3 +77,65 @@ def test_disagreement_large(self):
         self.assertTrue(agreement >= 0)
         self.assertEqual(agreement,
                          Decimal('0'))
+
+    def test_potential_boundaries_mass(self):
+        '''
+        Test counting the number of potential boundaries for BoundaryFormat.mass.
+        '''
+        kwargs = {
+            'boundary_format'   : BoundaryFormat.mass,
+            'boundary_types'    : (1,)
+        }
+        self.assertEqual(4, __potential_boundaries__([2,3], **kwargs))
+
+    def test_potential_boundaries_mass(self):
+        '''
+        Test counting the number of potential boundaries for BoundaryFormat.mass.
+        '''
+        kwargs = {
+            'boundary_format'   : BoundaryFormat.mass,
+            'boundary_types'    : (1,)
+        }
+        self.assertEqual(4, __potential_boundaries__([2,3], **kwargs))
+
+
+    def test_potential_boundaries_position(self):
+        '''
+        Test counting the number of potential boundaries for BoundaryFormat.position.
+        '''
+        kwargs = {
+            'boundary_format'   : BoundaryFormat.position,
+            'boundary_types'    : (1,)
+        }
+        self.assertEqual(4, __potential_boundaries__([1,1,1,2,2], **kwargs))
+
+
+    def test_potential_boundaries_sets(self):
+        '''
+        Test counting the number of potential boundaries for BoundaryFormat.sets.
+        '''
+        kwargs = {
+            'boundary_format'   : BoundaryFormat.sets,
+            'boundary_types'    : (1,)
+        }
+        self.assertEqual(4, __potential_boundaries__([(),(),(1,),()], **kwargs))
+
+    def test_potential_boundaries_nltk(self):
+        '''
+        Test counting the number of potential boundaries for BoundaryFormat.nltk.
+        '''
+        kwargs = {
+            'boundary_format'   : BoundaryFormat.nltk,
+            'boundary_types'    : (1,)
+        }
+        self.assertEqual(4, __potential_boundaries__('0010', **kwargs))
+
+    def test_potential_boundaries_exception(self):
+        '''
+        Test an incorrect format when counting the number of potential boundaries.
+        '''
+        kwargs = {
+            'boundary_format'   : 'incorrect',
+            'boundary_types'    : (1,)
+        }
+        self.assertRaises(SegmentationMetricError, __potential_boundaries__, '0010', **kwargs)
diff --git a/segeval/similarity/boundary_test.py b/segeval/similarity/boundary_test.py
@@ -64,7 +64,7 @@ def test_clustered_fps(self):
 
     def test_positions(self):
         '''
-        Test false negative.
+        Test position-format.
         '''
         a = [1,1,1,1,1,1,1,1,1,1,1,1,1]
         b = [1,1,1,1,2,2,2,2,3,3,3,3,3]
@@ -74,7 +74,7 @@ def test_positions(self):
 
     def test_format_exception(self):
         '''
-        Test false negative.
+        Test incorrect format exception.
         '''
         a = [1,1,1,1,1,1,1,1,1,1,1,1,1]
         b = [1,1,1,1,2,2,2,2,3,3,3,3,3]
@@ -83,7 +83,7 @@ def test_format_exception(self):
 
     def test_arg_exception(self):
         '''
-        Test false negative.
+        Test incorrect argument exception.
         '''
         a = [1,1,1,1,1,1,1,1,1,1,1,1,1]
         b = [1,1,1,1,2,2,2,2,3,3,3,3,3]
@@ -93,15 +93,15 @@ def test_arg_exception(self):
 
     def test_weight_t(self):
         '''
-        Test false negative.
+        Test transposition weighting.
         '''
         value = boundary_similarity([2, 3, 6], [5, 6],
                                     weight=(weight_a, weight_s, weight_t))
         self.assertEqual(0.5, value)
 
     def test_multiple_boundary_types(self):
         '''
-        Test false negative.
+        Test multiple boundaries.
         '''
         value = summarize(boundary_similarity(MULTIPLE_BOUNDARY_TYPES))
         self.assertEqual((Decimal('0.375'),