Skip to content
Permalink
Browse files

added __init__.py for OE lemmatizer submodule (#931)

  • Loading branch information...
free-variation authored and kylepjohnson committed Aug 14, 2019
1 parent 7530ad8 commit 0a954fee427b3c72e3efa3c068174bfb0b2d5bf9
No changes.
@@ -128,4 +128,29 @@ def evaluate(self, filename):

return lemma_count/token_count

def evaluate_conll(self, filename):
with open(filename, 'r') as infile:
lines = infile.read().splitlines()

tp, fp, fn = 0,0,0

for line in lines:
if line == '':
continue

word, true_lemma = line.split('\t')[1:3]
pred_lemma = self.lemmatize(word, best_guess=False)[0][1]

print(word, true_lemma, pred_lemma)

if pred_lemma == []:
fn += 1
elif true_lemma == pred_lemma[0]:
tp += 1
else:
fp += 1

return tp, fp, fn, tp/(tp + fp), tp/(tp + fn)



@@ -150,7 +150,7 @@ A basic lemmatizer is provided, based on a hand-built dictionary of word forms.
.. code-block:: python
In [1]: import cltk.lemmatize.old_english.lemma as oe_l
In [2]: lemmatizer = oe_l.OldEnglishDictioraryLemmatizer()
In [2]: lemmatizer = oe_l.OldEnglishDictionaryLemmatizer()
In [3]: lemmatizer.lemmatize('Næs him fruma æfre, or geworden, ne nu ende cymþ ecean')
Out [3]: [('Næs', 'næs'), ('him', 'he'), ('fruma', 'fruma'), ('æfre', 'æfre'), (',', ','), ('or', 'or'), ('geworden', 'weorþan'), (',', ','), ('ne', 'ne'), ('nu', 'nu'), ('ende', 'ende'), ('cymþ', 'cuman'), ('ecean', 'ecean')]

0 comments on commit 0a954fe

Please sign in to comment.
You can’t perform that action at this time.