Permalink
Browse files

Added methods for Akkadian nouns (#569)

* Added various methods for dealing with Akkadian nouns.  All the methods eventually lead to a simple declension method.

* More noun stuff

* Fixed circular dependancy stem.py and declension.py

* Fix tests for bound_form method
  • Loading branch information...
willismonroe authored and kylepjohnson committed Aug 14, 2017
1 parent 9f44816 commit 29687923aa22812248aee118dd8f8907b1ea1729
View
@@ -55,4 +55,7 @@ _templates
# Jupyter notebook
.ipynb_checkpoints
*.ipynb
*.ipynb
# VSCode folder
.vscode/*
@@ -0,0 +1,70 @@
"""
Return a the bound form of a normalized Akkadian noun.
"""
__author__ = ['M. Willis Monroe <willismonroe@gmail.com>']
__license__ = 'MIT License. See LICENSE.'
from cltk.stem.akkadian.syllabifier import Syllabifier
from cltk.stem.akkadian.stem import Stemmer
from cltk.stem.akkadian.cv_pattern import CVPattern
class BoundForm(object):
"""
Return the bound form of a noun, suitable for adding suffixed pronouns.
"""
def __init__(self):
self.syllabifier = Syllabifier()
self.stemmer = Stemmer()
self.cv_patterner = CVPattern()
def get_bound_form(self, noun, gender):
"""Return bound form of nound, given its gender."""
syllables = self.syllabifier.syllabify(noun)
stem = self.stemmer.get_stem(noun, gender)
cv_pattern = self.cv_patterner.get_cv_pattern(stem)
# Based on Huehnergard Appendix 6.C.1: base in -VC
if [letter[0] for letter in cv_pattern[-2:]] == ['V', 'C'] or stem in ['nakr']:
# a. 2-syllable
if len(syllables) > 2:
# awīlum > awīl, nakrum > naker
if stem in ['nakr']:
return 'naker'
else:
return stem
# b. 1-syllable
elif len(syllables) > 1:
# bēlum > bēl
return stem
# c. abum, aḫum
if stem in ['ab', 'aḫ']:
return stem + 'i'
# Appendix 6.C.2: base in -C₁C₁
if cv_pattern[-1][:2] == cv_pattern[-2][:2]:
# a. 1-syllable
if 3 > len(syllables) > 1:
return stem + 'i'
# b. 2-syllable, -tt
if len(syllables) > 2 and cv_pattern[-1][2] + cv_pattern[-2][2] == 'tt':
return stem + 'i'
# c. 2-syllable, other
if len(syllables) > 2:
return stem[:-1]
# Appendix 6.C.3: base in -C₁C₂, C₂ ≠ t, i.e. pVrs
if cv_pattern[-1][0] == cv_pattern[-2][0] and cv_pattern[-1][1] != cv_pattern[-2][1]:
return stem[:-1] + stem[1] + stem[-1]
# Appendix 6.C.4: base in -Ct (fem.)
if cv_pattern[-1][2] == 't' and cv_pattern[-2][0] == 'C':
if len(syllables) > 2:
return stem + 'i'
# Need to deal with fem. Ptcpl. māḫirtum -> māḫirat
if len(syllables) > 1:
# These are case by case
if stem in ['qīšt']:
return stem + 'i'
if stem in ['mārt']:
return stem[:-1] + stem[1] + stem[-1]
# Appendix 6.C.5: base in -V
# Weak nouns...
@@ -0,0 +1,66 @@
"""
Return a CV patterned string based on the word.
"""
__author__ = ['M. Willis Monroe <willismonroe@gmail.com>']
__license__ = 'MIT License. See LICENSE.'
from cltk.stem.akkadian.syllabifier import AKKADIAN
class CVPattern(object):
"""Return a patterned string representing the consonants
and vowels of the input word."""
def __init__(self):
self.akkadian = AKKADIAN
def get_cv_pattern(self, word, pprint=False):
"""
input = iparras
pattern = [('V', 1, 'i'), ('C', 1, 'p'), ('V', 2, 'a'), ('C', 2, 'r'),
('C', 2, 'r'), ('V', 2, 'a'), ('C', 3, 's')]
pprint = V₁C₁V₂C₂C₂V₂C₃
"""
subscripts = {
1: '',
2: '',
3: '',
4: '',
5: '',
6: '',
7: '',
8: '',
9: '',
0: ''
}
pattern = []
c_count = 1
v_count = 1
count = 0
for char in word:
if char in self.akkadian['consonants']:
cv = 'C'
else:
cv = 'V'
# remove length:
if char in self.akkadian['macron_vowels']:
char = self.akkadian['short_vowels'][self.akkadian['macron_vowels'].index(char)]
elif char in self.akkadian['circumflex_vowels']:
char = self.akkadian['short_vowels'][self.akkadian['circumflex_vowels'].index(char)]
if char not in [x[2] for x in pattern]:
if cv == 'C':
count = c_count
c_count += 1
elif cv == 'V':
count = v_count
v_count += 1
pattern.append((cv, count, char))
elif char in [x[2] for x in pattern]:
pattern.append((cv, next(x[1] for x in pattern if x[2] == char), char))
if pprint:
output = ''
for item in pattern:
output += (item[0] + subscripts[item[1]])
return output
return pattern
@@ -0,0 +1,54 @@
"""
Decline an Akkadian noun.
"""
__author__ = ['M. Willis Monroe <willismonroe@gmail.com>']
__license__ = 'MIT License. See LICENSE.'
from cltk.stem.akkadian.stem import Stemmer
from cltk.stem.akkadian.stem import ENDINGS
from cltk.phonology.akkadian.stress import AKKADIAN
class NaiveDecliner(object):
"""Simple noun decliner"""
def __init__(self):
self.endings = ENDINGS
self.akkadian = AKKADIAN
self.stemmer = Stemmer()
def decline_noun(self, noun, gender, mimation=True):
"""Return a list of all possible declined forms given any form
of a noun and its gender."""
stem = self.stemmer.get_stem(noun, gender)
declension = []
for case in self.endings[gender]['singular']:
if gender == 'm':
form = stem + self.endings[gender]['singular'][case]
else:
form = stem + self.endings[gender]['singular'][case][1:]
declension.append((form, {'case': case, 'number': 'singular'}))
for case in self.endings[gender]['dual']:
if gender == 'm':
form = stem + self.endings[gender]['dual'][case]
else:
form = stem + self.endings[gender]['dual'][case][1:]
declension.append((form, {'case': case, 'number': 'dual'}))
for case in self.endings[gender]['plural']:
if gender == 'm':
form = stem + self.endings[gender]['plural'][case]
else:
if stem[-3] in self.akkadian['macron_vowels']:
theme_vowel = stem[-3]
else:
theme_vowel = 'ā'
ending = [x for x in self.endings[gender]['plural'][case] if x[0] == theme_vowel]
if stem[-2] in self.akkadian['short_vowels']:
form = stem[:-2] + ending[0]
elif stem[-1] in self.akkadian['consonants'] and stem[-2] in self.akkadian['macron_vowels']:
form = stem + ending[0]
else:
form = stem[:-1] + ending[0]
declension.append((form, {'case': case, 'number': 'plural'}))
return declension
View
@@ -0,0 +1,79 @@
"""
Get the stem of a word, given a declined form and its gender.
TODO: Check this logic with von Soden's Grundriss der akkadischen Grammatik.
TODO: Deal with j/y issue.
"""
__author__ = ['M. Willis Monroe <willismonroe@gmail.com>']
__license__ = 'MIT License. See LICENSE.'
ENDINGS = {
'm': {
'singular': {
'nominative': 'um',
'accusative': 'am',
'genitive': 'im'
},
'dual': {
'nominative': 'ān',
'oblique': 'īn'
},
'plural': {
'nominative': 'ū',
'oblique': 'ī'
}
},
'f': {
'singular': {
'nominative': 'tum',
'accusative': 'tam',
'genitive': 'tim'
},
'dual': {
'nominative': 'tān',
'oblique': 'tīn'
},
'plural': {
'nominative': ['ātum', 'ētum', 'ītum'],
'oblique': ['ātim', 'ētim', 'ītum']
}
}
}
class Stemmer(object):
"""Stem Akkadian words with a simple algorithm based on Huehnergard"""
def __init__(self):
self.endings = ENDINGS
def get_stem(self, noun, gender, mimation=True):
"""Return the stem of a noun, given its gender"""
stem = ''
if mimation and noun[-1:] == 'm':
# noun = noun[:-1]
pass
# Take off ending
if gender == 'm':
if noun[-2:] in list(self.endings['m']['singular'].values()) + \
list(self.endings['m']['dual'].values()):
stem = noun[:-2]
elif noun[-1] in list(self.endings['m']['plural'].values()):
stem = noun[:-1]
else:
print("Unknown masculine noun: {}".format(noun))
elif gender == 'f':
if noun[-4:] in self.endings['f']['plural']['nominative'] + \
self.endings['f']['plural']['oblique']:
stem = noun[:-4] + 't'
elif noun[-3:] in list(self.endings['f']['singular'].values()) + \
list(self.endings['f']['dual'].values()):
stem = noun[:-3] + 't'
elif noun[-2:] in list(self.endings['m']['singular'].values()) + \
list(self.endings['m']['dual'].values()):
stem = noun[:-2]
else:
print("Unknown feminine noun: {}".format(noun))
else:
print("Unknown noun: {}".format(noun))
return stem
View
@@ -10,6 +10,10 @@
from cltk.stem.latin.declension import CollatinusDecliner
from cltk.exceptions import UnknownLemma
from cltk.stem.sanskrit.indian_syllabifier import Syllabifier as IndianSyllabifier
from cltk.stem.akkadian.bound_form import BoundForm as AkkadianBoundForm
from cltk.stem.akkadian.cv_pattern import CVPattern as AkkadianCVPattern
from cltk.stem.akkadian.declension import NaiveDecliner as AkkadianNaiveDecliner
from cltk.stem.akkadian.stem import Stemmer as AkkadianStemmer
from cltk.stem.akkadian.syllabifier import Syllabifier as AkkadianSyllabifier
import os
@@ -217,6 +221,44 @@ def test_coordinated_range(self):
current1 = syllabifier.in_coordinated_range_offset(current)
self.assertTrue(current1)
def test_akkadian_bound_form(self):
"""Test Akkadian bound form method"""
bound_former = AkkadianBoundForm()
word = "awīlum"
bound_form = bound_former.get_bound_form(word, 'm')
target = "awīl"
self.assertEquals(bound_form, target)
def test_akkadian_cv_pattern(self):
"""Test Akkadian CV pattern method"""
cv_patterner = AkkadianCVPattern()
word = "iparras"
cv_pattern = cv_patterner.get_cv_pattern(word, pprint=True)
target = "V₁C₁V₂C₂C₂V₂C₃"
self.assertEquals(cv_pattern, target)
def test_akkadian_declension(self):
"""Test Akkadian noun declension"""
decliner = AkkadianNaiveDecliner()
word = "iltum"
declension = decliner.decline_noun(word, 'f')
target = [('iltim', {'case': 'genitive', 'number': 'singular'}),
('iltum', {'case': 'nominative', 'number': 'singular'}),
('iltam', {'case': 'accusative', 'number': 'singular'}),
('iltīn', {'case': 'oblique', 'number': 'dual'}),
('iltān', {'case': 'nominative', 'number': 'dual'}),
('ilātim', {'case': 'oblique', 'number': 'plural'}),
('ilātum', {'case': 'nominative', 'number': 'plural'})]
self.assertEquals(sorted(declension), sorted(target))
def test_akkadian_stemmer(self):
"""Test Akkadian stemmer"""
stemmer = AkkadianStemmer()
word = "šarrū"
stem = stemmer.get_stem(word, 'm')
target = "šarr"
self.assertEquals(stem, target)
def test_akkadian_syllabifier(self):
"""Test Akkadian syllabifier"""
syllabifier = AkkadianSyllabifier()

0 comments on commit 2968792

Please sign in to comment.