Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 11 additions & 3 deletions apertium/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,11 @@ class ModeNotInstalled(ValueError):
pass


def update_modes(pair_path): # type: (str) -> None
def _update_modes(pair_path): # type: (str) -> None
"""
Args:
pair_path (str)
"""
modes = search_path(pair_path)
if modes['pair']:
for path, lang_src, lang_trg in modes['pair']:
Expand All @@ -26,13 +30,17 @@ def update_modes(pair_path): # type: (str) -> None


def append_pair_path(pair_path): # type: (str) -> None
"""
Args:
pair_path (str)
"""
pair_paths.append(pair_path)
update_modes(pair_path)
_update_modes(pair_path)


pair_paths = ['/usr/share/apertium', '/usr/local/share/apertium']
analyzers = {} # type: Dict[str, Tuple[str, str]]
generators = {} # type: Dict[str, Tuple[str, str]]
pairs = {} # type: Dict[str, str]
for pair_path in pair_paths:
update_modes(pair_path)
_update_modes(pair_path)
40 changes: 38 additions & 2 deletions apertium/analysis/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,17 @@


class Analyzer:
"""
Attributes:
analyzer_cmds (Dict[str, List[List[str]]])
lang (str)
"""

def __init__(self, lang): # type: (Analyzer, str) -> None
"""
Args:
lang (str)
"""
self.analyzer_cmds = {} # type: Dict[str, List[List[str]]]
self.lang = to_alpha3_code(lang) # type: str
if self.lang not in apertium.analyzers:
Expand All @@ -17,27 +27,53 @@ def __init__(self, lang): # type: (Analyzer, str) -> None
self.path, self.mode = apertium.analyzers[self.lang]

def _get_commands(self): # type: (Analyzer) -> List[List[str]]
"""
Returns:
List[List[str]]
"""
if self.lang not in self.analyzer_cmds:
mode_path, mode = apertium.analyzers[self.lang]
self.analyzer_cmds[self.lang] = parse_mode_file(mode_path+'/modes/'+mode+'.mode')
return self.analyzer_cmds[self.lang]

def _postproc_text(self, result): # type: (Analyzer, str) -> List[LexicalUnit]
"""
postprocesses the input
Postprocesses the input

Args:
result (str)

Returns:
List[LexicalUnit]
"""
lexical_units = list(parse(result))
return lexical_units

def analyze(self, in_text, formatting='txt'): # type: (Analyzer, str, str) -> List[LexicalUnit]
"""
runs apertium to analyze the input
Runs apertium to analyze the input

Args:
in_text (str)
formatting (str)

Returns:
List[LexicalUnit]
"""
commands = [['apertium', '-d', self.path, '-f', formatting, self.mode]]
result = execute(in_text, commands)
return self._postproc_text(result)


def analyze(lang, in_text, formatting='txt'): # type: (str, str, str) -> List[LexicalUnit]
"""
Args:
lang (str)
in_text (str)
formatting (str)

Returns:
List[LexicalUnit]
"""
analyzer = Analyzer(lang)
return analyzer.analyze(in_text, formatting)
33 changes: 32 additions & 1 deletion apertium/generation/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,39 @@


class Generator:
"""
Attributes:
generation_cmds (Dict[str, List[List[str]]])
lang (str)
"""

def __init__(self, lang): # type: (Generator, str) -> None
"""
Args:
lang (str)
"""
self.generator_cmds = {} # type: Dict[str, List[List[str]]]
self.lang = lang # type: str

def _get_commands(self): # type: (Generator) -> List[List[str]]
"""
Returns:
List[List[str]]
"""
if self.lang not in self.generator_cmds:
mode_path, mode = apertium.generators[self.lang]
self.generator_cmds[self.lang] = parse_mode_file(mode_path+'/modes/'+mode+'.mode')
return self.generator_cmds[self.lang]

def generate(self, in_text, formatting='none'): # type: (Generator, str, str) -> Union[str, List[str]]
"""
Args:
in_text (str)
formatting (str)

Returns:
Union[str, List[str]]
"""
self.lang = to_alpha3_code(self.lang)

if self.lang in apertium.generators:
Expand All @@ -29,6 +51,15 @@ def generate(self, in_text, formatting='none'): # type: (Generator, str, str) -
raise apertium.ModeNotInstalled(self.lang)


def generate(lang, in_text, formatting='none'): # type: (str, str, str) -> Union[str, List[str]]
def generate(lang, in_text, formatting='none'): # type: (str, str, str) -> Union[str, List[str]]
"""
Args:
lang (str)
in_text (str)
formatting (str)

Returns:
Union[str, List[str]]
"""
generator = Generator(lang)
return generator.generate(in_text, formatting)
17 changes: 17 additions & 0 deletions apertium/mode_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,15 @@


def is_loop(dirpath, rootpath, real_root=None): # type: (str, str, Union[None, str]) -> bool
"""
Args:
dirpath (str)
rootpath (str)
real_root (Union[None, str])

Returns:
bool
"""
if os.path.islink(dirpath):
# We just descended into a directory via a symbolic link
# Check if we're referring to a directory that is
Expand All @@ -31,6 +40,14 @@ def is_loop(dirpath, rootpath, real_root=None): # type: (str, str, Union[None,


def search_path(rootpath, include_pairs=True): # type: (str, bool) -> Dict[str, List[Tuple[str, str, str]]]
"""
Args:
rootpath (str)
include_pairs (bool)

Returns:
Dict[str, List[Tuple[str, str, str]]]
"""
lang_code = r'[a-z]{2,3}(?:_[A-Za-z]+)?'
type_re = {
'analyzer': re.compile(r'(({0}(-{0})?)-(an)?mor(ph)?)\.mode'.format(lang_code)),
Expand Down
99 changes: 91 additions & 8 deletions apertium/translation/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,19 +9,47 @@


class Translator:
"""
Attributes:
translation_cmds (Dict[Tuple[str, str], List[List[str]]])
l1 (str)
l2 (str)
"""

def __init__(self, l1, l2): # type: (Translator, str, str) -> None
"""
Args:
l1 (str)
l2 (str)
"""
self.translation_cmds = {} # type: Dict[Tuple[str, str], List[List[str]]]
self.l1 = l1
self.l2 = l2

def _get_commands(self, l1, l2): # type: (Translator, str, str) -> List[List[str]]
"""
Args:
l1 (str)
l2 (str)

Returns:
List[List[str]]
"""
if (l1, l2) not in self.translation_cmds:
mode_path = apertium.pairs['%s-%s' % (l1, l2)]
self.translation_cmds[(l1, l2)] = parse_mode_file(mode_path)
return self.translation_cmds[(l1, l2)]

def _get_format(self, format, deformat, reformat):
# type: (Translator, Optional[str], Optional[str], Optional[str]) -> Tuple[Optional[str], Optional[str]]
def _get_format(self, format, deformat, reformat): # type: (Translator, Optional[str], Optional[str], Optional[str]) -> Tuple[Optional[str], Optional[str]]
"""
Args:
format (Optional[str])
deformat (Optional[str])
reformat (Optional[str])

Returns:
Tuple[Optional[str], Optional[str]]
"""
if format:
deformat = 'apertium-des' + format
reformat = 'apertium-re' + format
Expand All @@ -34,12 +62,31 @@ def _get_format(self, format, deformat, reformat):
return deformat, reformat

def _check_ret_code(self, proc): # type: (Translator, Popen) -> None
"""
Args:
proc (Popen)
"""
if proc.returncode != 0:
raise CalledProcessError() # type: ignore

def _validate_formatters(self, deformat, reformat):
# type: (Translator, Optional[str], Optional[str]) -> Tuple[Union[str, object], Union[str, object]]
def _validate_formatters(self, deformat, reformat): # type: (Translator, Optional[str], Optional[str]) -> Tuple[Union[str, object], Union[str, object]]
"""
Args:
deformat (Optional[str])
reformat (Optional[str])

Returns:
Tuple[Union[str, object], Union[str, object]]
"""
def valid1(elt, lst): # type: (Optional[str], List[object]) -> Union[str, object]
"""
Args:
elt (Optional[str])
lst (List[object])

Returns:
Union[str, object]
"""
if elt in lst:
return elt
else:
Expand All @@ -61,6 +108,14 @@ def valid1(elt, lst): # type: (Optional[str], List[object]) -> Union[str, objec
return valid1(deformat, deformatters), valid1(reformat, reformatters)

def _get_deformat(self, deformat, text): # type: (Translator, str, str) -> str
"""
Args:
deformat (str)
text (str)

Returns:
str
"""
if deformat:
proc_deformat = Popen(deformat, stdin=PIPE, stdout=PIPE)
proc_deformat.stdin.write(bytes(text, 'utf-8'))
Expand All @@ -73,6 +128,14 @@ def _get_deformat(self, deformat, text): # type: (Translator, str, str) -> str
return res

def _get_reformat(self, reformat, text): # type: (Translator, str, str) -> str
"""
Args:
reformat (str)
text (str)

Returns:
str
"""
if reformat:
proc_reformat = Popen(reformat, stdin=PIPE, stdout=PIPE)
proc_reformat.stdin.write(bytes(text, 'utf-8'))
Expand All @@ -82,8 +145,18 @@ def _get_reformat(self, reformat, text): # type: (Translator, str, str) -> str
result = re.sub(rb'\0$', b'', text) # type: ignore
return result # type: ignore

def translate(self, text, mark_unknown=False, format=None, deformat='txt', reformat='txt'):
# type: (Translator, str, bool, Optional[str], str, str) -> str
def translate(self, text, mark_unknown=False, format=None, deformat='txt', reformat='txt'): # type: (Translator, str, bool, Optional[str], str, str) -> str
"""
Args:
text (str)
mark_unknown (bool)
format (Optional[str])
deformat (str)
reformat (str)

Returns:
str
"""
if '%s-%s' % tuple(map(to_alpha3_code, [self.l1, self.l2])) in apertium.pairs: # type: ignore
pair = map(to_alpha3_code, [self.l1, self.l2])
else:
Expand All @@ -100,7 +173,17 @@ def translate(self, text, mark_unknown=False, format=None, deformat='txt', refor
return result.decode() # type: ignore


def translate(l1, l2, text, mark_unknown=False, format=None, deformat='txt', reformat='txt'):
# type: (str, str, str, bool, Optional[str], str, str) -> str
def translate(l1, l2, text, mark_unknown=False, format=None, deformat='txt', reformat='txt'): # type: (str, str, str, bool, Optional[str], str, str) -> str
"""
Args:
text (str)
mark_unknown (bool)
format (Optional[str])
deformat (str)
reformat (str)

Returns:
str
"""
translator = apertium.Translator(l1, l2)
return translator.translate(text, mark_unknown, format, deformat, reformat)
22 changes: 22 additions & 0 deletions apertium/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,13 @@


def to_alpha3_code(code): # type: (str) -> str
"""
Args:
code (str)

Returns:
str
"""
if '_' in code:
code, variant = code.split('_')
return '%s_%s' % ((iso639_codes_inverse[code], variant) if code in iso639_codes_inverse else (code, variant))
Expand All @@ -20,6 +27,14 @@ def to_alpha3_code(code): # type: (str) -> str


def execute(inp, commands): # type: (str, List[List[str]]) -> str
"""
Args:
inp (str)
commands (List[List[str]])

Returns:
str
"""
procs = []
end = inp.encode()
for i, command in enumerate(commands):
Expand All @@ -31,6 +46,13 @@ def execute(inp, commands): # type: (str, List[List[str]]) -> str


def parse_mode_file(mode_path): # type: (str) -> List[List[str]]
"""
Args:
mode_path (str)

Returns:
List[List[str]]
"""
mode_str = open(mode_path, 'r').read().strip()
if mode_str:
commands = []
Expand Down
Loading