Skip to content

Commit

Permalink
Merge pull request #809 from Sedictious/OE
Browse files Browse the repository at this point in the history
Add Old English syllabifier
  • Loading branch information
todd-cook committed Jul 31, 2018
2 parents ea37e7a + 0b7b2ff commit 24c2740
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 0 deletions.
35 changes: 35 additions & 0 deletions cltk/corpus/old_english/syllabifier.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
"""
Sonority phoneme hierarchy for Old English
"""

Syllabifier = {
'a': 1,
'e': 1,
'i': 1,
'o': 1,
'u': 1,
'ø': 1,
'æ': 1,
'm': 2,
'n': 2,
'p': 2,
'b': 2,
'c': 2,
't': 2,
'd': 2,
'ð': 2,
'k': 2,
'g': 2,
'f': 3,
'þ': 3,
's': 3,
'h': 3,
'v': 3,
'x': 3,
'z': 3,
'l': 4,
'j': 4,
'w': 4,
'ƿ': 4,
'r': 5
}
15 changes: 15 additions & 0 deletions cltk/phonology/syllabify.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from cltk.exceptions import InputError
from cltk.corpus.middle_english.syllabifier import Syllabifier as ME_Syllabifier
from cltk.corpus.middle_high_german.syllabifier import Syllabifier as MHG_Syllabifier
from cltk.corpus.old_english.syllabifier import Syllabifier as OE_Syllabifier
from cltk.corpus.old_norse.syllabifier import hierarchy as OLD_NORSE_HIERARCHY

LOG = logging.getLogger(__name__)
Expand Down Expand Up @@ -80,6 +81,15 @@ def __init__(self, low_vowels=None, mid_vowels=None, high_vowels=None, flaps=Non

self.set_hierarchy(hierarchy)
self.set_vowels(hierarchy[0])

elif language == 'old english':
hierarchy = [[] for _ in range(len(set(OE_Syllabifier.values())))]

for k in OE_Syllabifier:
hierarchy[OE_Syllabifier[k] - 1].append(k)

self.set_hierarchy(hierarchy)
self.set_vowels(hierarchy[0])

elif language == 'middle high german':
hierarchy = [[] for _ in range(len(set(MHG_Syllabifier.values())))]
Expand Down Expand Up @@ -201,6 +211,11 @@ def syllabify_SSP(self, word):
>>> s.syllabify("huntyng")
['hun', 'tyng']
>>> s = Syllabifier(language='old english')
>>> s.syllabify("arcebiscop")
['ar', 'ce', 'bis', 'cop']
The break_geminants parameter ensures a breakpoint is placed between geminants:
>>> geminant_s = Syllabifier(break_geminants=True)
Expand Down

0 comments on commit 24c2740

Please sign in to comment.