Skip to content

Commit

Permalink
Merge pull request #205 from nathans/diaeresis
Browse files Browse the repository at this point in the history
Diaeresis
  • Loading branch information
kylepjohnson committed Mar 17, 2016
2 parents 26df02e + 6a21329 commit d173243
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 7 deletions.
10 changes: 7 additions & 3 deletions cltk/corpus/greek/beta_to_unicode.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,9 +177,13 @@
]

LOWER = [
(r'i\\\+', 'ῒ'),
(r'i/\+', 'ΐ'),
(r'i=\+', 'ῗ'),
(r'I\+', 'ϊ'),
(r'I\\\+', 'ῒ'),
(r'I/\+', 'ΐ'),
# Add a second entry for out-of-order betacode
(r'I\+/', 'ΐ'),
(r'I=\+', 'ῗ'),
(r'U\+', 'ϋ'),
(r'U\\\+', 'ῢ'),
(r'U/\+', 'ΰ'),
(r'U=\+', 'ῧ'),
Expand Down
19 changes: 15 additions & 4 deletions cltk/tests/test_corpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,11 +49,22 @@ def test_greek_betacode_to_unicode(self):
Note: assertEqual appears to not be correctly comparing certain
characters (``ά`` and ``ί``, at least).
"""
beta_example = r"""O(/PWS OU)=N MH\ TAU)TO\ """
replacer = Replacer()
unicode = replacer.beta_code(beta_example)
target_unicode = 'ὅπως οὖν μὴ ταὐτὸ '
self.assertEqual(unicode, target_unicode)
# Generic test
beta_1 = r"""O(/PWS OU)=N MH\ TAU)TO\ """
unicode_1 = replacer.beta_code(beta_1)
target_1 = 'ὅπως οὖν μὴ ταὐτὸ '
# Test for iota and diaeresis
self.assertEqual(unicode_1, target_1)
beta_2 = r"""*XALDAI+KH\N"""
unicode_2 = replacer.beta_code(beta_2)
target_2 = 'Χαλδαϊκὴν'
self.assertEqual(unicode_2, target_2)
# Test for upsilon and diaeresis
beta_3 = r"""PROU+POTETAGME/NWN"""
unicode_3 = replacer.beta_code(beta_3)
target_3 = 'προϋποτεταγμένων'
self.assertEqual(unicode_3, target_3)

def test_tlgu_init(self):
"""Test constructors of TLGU module for check, import, and install."""
Expand Down

0 comments on commit d173243

Please sign in to comment.