Skip to content

Commit

Permalink
Merge pull request #912 from free-variation/oe_test_fix
Browse files Browse the repository at this point in the history
OE lemmatizer test fix
  • Loading branch information
clemsciences committed May 20, 2019
2 parents fb787df + fd832e3 commit 7605f3a
Showing 1 changed file with 3 additions and 2 deletions.
5 changes: 3 additions & 2 deletions cltk/lemmatize/old_english/lemma.py
@@ -1,6 +1,7 @@
import os
import re
import math
import codecs

from numpy import argmax
from nltk.tokenize import wordpunct_tokenize
Expand Down Expand Up @@ -28,7 +29,7 @@ def _load_forms_and_lemmas(self):

self.lemma_dict = {}

with open(path, 'r') as infile:
with codecs.open(path, 'r', encoding="utf-8") as infile:
lines = infile.read().splitlines()
for line in lines:
forms = line.split('\t')
Expand Down Expand Up @@ -83,7 +84,7 @@ def _lemmatize_token(self, token, best_guess=True, return_frequencies=False):
if lemmas == None:
lemma = token
elif len(lemmas) > 1:
counts = [self.type_counts[word] for word in lemmas]
counts = [self.type_counts.get(word,0) for word in lemmas]
lemma = lemmas[argmax(counts)]
else:
lemma = lemmas[0]
Expand Down

0 comments on commit 7605f3a

Please sign in to comment.