Skip to content

Commit

Permalink
Merge pull request #145 from diyclassics/master
Browse files Browse the repository at this point in the history
Update Latin tokenizer to handle 'neque'
  • Loading branch information
kylepjohnson committed Feb 25, 2016
2 parents edcc16a + 5ab276f commit 1586286
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 3 deletions.
7 changes: 5 additions & 2 deletions cltk/tests/test_tokenize.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,10 +60,12 @@ def test_latin_word_tokenizer(self):
# - V. Aen. 1.1
# - Prop. 2.5.1-2
# - Ov. Am. 1.8.65-66
# - Cic. Phillip. 13.14

tests = ['Arma virumque cano, Troiae qui primus ab oris.',
'Hoc verumst, tota te ferri, Cynthia, Roma, et non ignota vivere nequitia?',
'Nec te decipiant veteres circum atria cerae. Tolle tuos tecum, pauper amator, avos!']
'Nec te decipiant veteres circum atria cerae. Tolle tuos tecum, pauper amator, avos!',
'Neque enim, quod quisque potest, id ei licet, nec, si non obstatur, propterea etiam permittitur.']

results = []

Expand All @@ -73,7 +75,8 @@ def test_latin_word_tokenizer(self):

target = [['Arma', 'que', 'virum', 'cano', ',', 'Troiae', 'qui', 'primus', 'ab', 'oris.'],
['Hoc', 'verum', 'est', ',', 'tota', 'te', 'ferri', ',', 'Cynthia', ',', 'Roma', ',', 'et', 'non', 'ignota', 'vivere', 'nequitia', '?'],
['Nec', 'te', 'decipiant', 'veteres', 'circum', 'atria', 'cerae.', 'Tolle', 'tuos', 'cum', 'te', ',', 'pauper', 'amator', ',', 'avos', '!']]
['Nec', 'te', 'decipiant', 'veteres', 'circum', 'atria', 'cerae.', 'Tolle', 'tuos', 'cum', 'te', ',', 'pauper', 'amator', ',', 'avos', '!'],
['que', 'Ne', 'enim', ',', 'quod', 'quisque', 'potest', ',', 'id', 'ei', 'licet', ',', 'nec', ',', 'si', 'non', 'obstatur', ',', 'propterea', 'etiam', 'permittitur.']]

self.assertEqual(results, target)

Expand Down
2 changes: 1 addition & 1 deletion cltk/tokenize/word.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ def __init__(self, language):
que_exceptions += ['absque', 'abusque', 'adaeque', 'adusque', 'aeque', 'antique', 'atque',
'circumundique', 'conseque', 'cumque', 'cunque', 'denique', 'deque',
'donique', 'hucusque', 'inique', 'inseque', 'itaque', 'longinque',
'namque', 'neque', 'oblique', 'peraeque', 'praecoque', 'propinque',
'namque', 'oblique', 'peraeque', 'praecoque', 'propinque',
'qualiscumque', 'quandocumque', 'quandoque', 'quantuluscumque',
'quantumcumque', 'quantuscumque', 'quinque', 'quocumque',
'quomodocumque', 'quomque', 'quotacumque', 'quotcumque',
Expand Down

0 comments on commit 1586286

Please sign in to comment.