Skip to content

Commit

Permalink
Merge pull request #822 from clemsciences/alliteration
Browse files Browse the repository at this point in the history
Alliteration for Old Norse poetry
  • Loading branch information
clemsciences committed Aug 15, 2018
2 parents fb5644d + 00e8c11 commit 5146a5c
Show file tree
Hide file tree
Showing 5 changed files with 415 additions and 103 deletions.
40 changes: 35 additions & 5 deletions cltk/phonology/syllabify.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from cltk.corpus.middle_high_german.syllabifier import Syllabifier as MHG_Syllabifier
from cltk.corpus.old_english.syllabifier import Syllabifier as OE_Syllabifier
from cltk.corpus.old_norse.syllabifier import hierarchy as OLD_NORSE_HIERARCHY
import cltk.phonology.utils as phut

LOG = logging.getLogger(__name__)
LOG.addHandler(logging.NullHandler())
Expand Down Expand Up @@ -246,7 +247,8 @@ def syllabify_SSP(self, word):

except KeyError:
LOG.error(
"The given string contains invalid characters. Make sure to define the mater of articulation for each phoneme.")
"The given string contains invalid characters. "
"Make sure to define the mater of articulation for each phoneme.")
raise InputError

while i < len(word) - 1:
Expand All @@ -261,7 +263,7 @@ def syllabify_SSP(self, word):
break

else:
#If the break_geminants parameter is set to True, prioritize geminants
# If the break_geminants parameter is set to True, prioritize geminants
if self.break_geminants and word[i-1] == word[i]:
syllables.append(i-1)
find_nucleus = True
Expand Down Expand Up @@ -347,8 +349,36 @@ def syllabify_IPA(self, word):
:param word: word to be syllabified
"""
word = word[1:-1]
word = ''.join(l for l in unicodedata.normalize('NFD', word)
if unicodedata.category(l) != 'Mn')
word = ''.join(l for l in unicodedata.normalize('NFD', word) if unicodedata.category(l) != 'Mn')

print(word)
return self.syllabify_SSP(word)

def syllabify_phonemes(self, phonological_word):
"""
:param phonological_word: result of Transcriber().first_process in cltk.phonology.utils
:return:
"""
phoneme_lengths = []
l_transcribed_word = []
for phoneme in phonological_word:
phoneme_lengths.append(len(phoneme.ipar))
l_transcribed_word.append(phoneme.ipar)
transcribed_word = "".join(l_transcribed_word)
print(phoneme_lengths)
print(l_transcribed_word)
print(transcribed_word)
syllabified_transcribed_word = self.syllabify_SSP(transcribed_word)

syllabified_phonological_word = []
counter = 0 # number of IPA character processed
for i, sts in enumerate(syllabified_transcribed_word):
syllabified_phonological_word.append([])
syllable_len = len(sts)
somme = 0
while somme < syllable_len:
somme += phoneme_lengths[counter]
syllabified_phonological_word[i].append(phonological_word[counter])
counter += 1

return syllabified_phonological_word

0 comments on commit 5146a5c

Please sign in to comment.