diff --git a/pie_extended/models/fro/classes.py b/pie_extended/models/fro/classes.py index 2507f21..bb32636 100644 --- a/pie_extended/models/fro/classes.py +++ b/pie_extended/models/fro/classes.py @@ -18,7 +18,7 @@ class MemorizingTokenizer(SourceMemorizingTokenizer): - re_add_space_around_punct = re.compile(r"(\s*)([^\w\s\'’ʼ]+)(\s*)") + re_add_space_around_punct = re.compile(r"(\s*)(\.+|[^\w\s\'’ʼ])(\s*)") re_add_space_after_apostrophe = re.compile(r"(\s*)([\'’ʼ])(\s*)") _sentence_boundaries = re.compile( r"([" + _Dots_except_apostrophe + r"]+\s*)+" @@ -95,7 +95,7 @@ def rule_based(cls, token): pos = "PONfrt" else: pos = "PONfbl" - return [token, lemma, pos, "MORPH=empty"] + return [token, lemma, pos, "MORPH=empty", token] def format_line(self, token, tags, ignored=False): tags = list(tags)