Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,9 +59,16 @@ In [2]: apertium.append_pair_path('..')

### Translation
Performing Translations
Method 1:
```python
In [1]: import apertium
In [2]: t = apertium.Translator('eng', 'spa')
In [3]: t.translate('cats')
Out[3]: 'Gatos'
```
Method 2:
```python
In [1]: import apertium
In [2]: apertium.translate('eng', 'spa', 'I love you')
Out[2]: 'Te quieres'
```
12 changes: 10 additions & 2 deletions apertium/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,16 @@ class ModeNotInstalled(ValueError):
pass


def update_modes(pair_path): # type: (str) -> None
modes = search_path(pair_path)
def update_modes(path): # type: (str) -> None
"""Updates the install modes
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

install => installed


Args:
path(str): A string that is the absolute location to the modes to be installed
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This doesn't make it clear that this path will be added, the old ones won't be removed.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

path(str) => path (str)


Yelids:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yelids => Yields

This won't be picked up by the parser.

Moreover, it should probably be returns.

Actually, does it even return anything?

Updates the pairs, analyzers, generator dictionaries with entries
"""
modes = search_path(path)
if modes['pair']:
for path, lang_src, lang_trg in modes['pair']:
pairs['%s-%s' % (lang_src, lang_trg)] = path
Expand Down
28 changes: 23 additions & 5 deletions apertium/analysis/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,20 @@


class Analyzer:
"""An Analyzer object containing it's analysis mode and langugage
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it's => its

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

langugage => language


Attributes:
analyzer_cmds (Dict[str, List[List[str]]]): stores the commands for various analyzers to run succesfully.
lang (str): Language of the text which is morphologically analyzed.
path (str): Location to the analyzer mode for a particular language.
mode (str): Name of the mode that for a particular lingustic task.
"""
def __init__(self, lang): # type: (Analyzer, str) -> None
"""initializes the Analyzer object

Args:
lang (str): Language of the morphological analyzer
"""
self.analyzer_cmds = {} # type: Dict[str, List[List[str]]]
self.lang = to_alpha3_code(lang) # type: str
if self.lang not in apertium.analyzers:
Expand All @@ -17,27 +30,32 @@ def __init__(self, lang): # type: (Analyzer, str) -> None
self.path, self.mode = apertium.analyzers[self.lang]

def _get_commands(self): # type: (Analyzer) -> List[List[str]]
"""
Yeilds: the commands to run for the analysis mode
"""
if self.lang not in self.analyzer_cmds:
mode_path, mode = apertium.analyzers[self.lang]
self.analyzer_cmds[self.lang] = parse_mode_file(mode_path+'/modes/'+mode+'.mode')
return self.analyzer_cmds[self.lang]

def _postproc_text(self, result): # type: (Analyzer, str) -> List[LexicalUnit]
"""
postprocesses the input
"""
lexical_units = list(parse(result))
return lexical_units

def analyze(self, in_text, formatting='txt'): # type: (Analyzer, str, str) -> List[LexicalUnit]
"""
runs apertium to analyze the input
"""runs apertium to analyze the input

Args:
in_text (str): The text who's morphological analysis has to be generated
formatting (str): The type of formatting for the output of the analysis
"""
commands = [['apertium', '-d', self.path, '-f', formatting, self.mode]]
result = execute(in_text, commands)
return self._postproc_text(result)


def analyze(lang, in_text, formatting='txt'): # type: (str, str, str) -> List[LexicalUnit]
"""directly returns the analysis from apertium
"""
analyzer = Analyzer(lang)
return analyzer.analyze(in_text, formatting)
28 changes: 28 additions & 0 deletions apertium/generation/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,43 @@


class Generator:
"""An Generator object containing it's generation mode and langugage. The language is taken as input
and then all the generators corresponding to the particular language that are installed are looked up
and used.

Attributes:
generator_cmds (Dict[str, List[List[str]]]): stores the commands for various generators to run succesfully.
lang (str): Language of the text which is morphologically generated.
"""
def __init__(self, lang): # type: (Generator, str) -> None
self.generator_cmds = {} # type: Dict[str, List[List[str]]]
self.lang = lang # type: str

def _get_commands(self): # type: (Generator) -> List[List[str]]
"""returns the commands to run for the generation

Args:
Object of class Generator

Yeilds:
A List[List[str]] having the commands that need to be run for the particular mode execution.
"""
if self.lang not in self.generator_cmds:
mode_path, mode = apertium.generators[self.lang]
self.generator_cmds[self.lang] = parse_mode_file(mode_path+'/modes/'+mode+'.mode')
return self.generator_cmds[self.lang]

def generate(self, in_text, formatting='none'): # type: (Generator, str, str) -> Union[str, List[str]]
"""generates the word form for the analysis provided

Args:
in_text (str): The analysis from which a wordform has to be generated
formatting (str): The output format of the generated wordform

Yields:
List[str] of the output of correct wordform/wordforms of the input.

"""
self.lang = to_alpha3_code(self.lang)

if self.lang in apertium.generators:
Expand All @@ -30,5 +56,7 @@ def generate(self, in_text, formatting='none'): # type: (Generator, str, str) -


def generate(lang, in_text, formatting='none'): # type: (str, str, str) -> Union[str, List[str]]
"""directly returns the generated output from apertium
"""
generator = Generator(lang)
return generator.generate(in_text, formatting)
36 changes: 36 additions & 0 deletions apertium/translation/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,19 +9,39 @@


class Translator:
"""An Translator object containing it's translation mode and the language pair. The language pair is taken as input
and then the translator corresponding to the particular language that are installed are looked up
and used.

Attributes:
translation_cmds (Dict[str, List[List[str]]]): stores the commands for various translators to run succesfully.
l1 (str): The language in which the input text is provided
l2 (str): The language in which the input text is to be translated.
"""
def __init__(self, l1, l2): # type: (Translator, str, str) -> None
self.translation_cmds = {} # type: Dict[Tuple[str, str], List[List[str]]]
self.l1 = l1
self.l2 = l2

def _get_commands(self, l1, l2): # type: (Translator, str, str) -> List[List[str]]
"""returns the commands to run for the translation

Args:
Object of class Translator

Yeilds:
A List[List[str]] having the commands that need to be run for the particular mode execution.
"""
if (l1, l2) not in self.translation_cmds:
mode_path = apertium.pairs['%s-%s' % (l1, l2)]
self.translation_cmds[(l1, l2)] = parse_mode_file(mode_path)
return self.translation_cmds[(l1, l2)]

def _get_format(self, format, deformat, reformat):
# type: (Translator, Optional[str], Optional[str], Optional[str]) -> Tuple[Optional[str], Optional[str]]
"""
returns the appropriate deformat and reformat arguments
"""
if format:
deformat = 'apertium-des' + format
reformat = 'apertium-re' + format
Expand All @@ -34,11 +54,17 @@ def _get_format(self, format, deformat, reformat):
return deformat, reformat

def _check_ret_code(self, proc): # type: (Translator, Popen) -> None
"""
validates if the process was executed succesfully
"""
if proc.returncode != 0:
raise CalledProcessError() # type: ignore

def _validate_formatters(self, deformat, reformat):
# type: (Translator, Optional[str], Optional[str]) -> Tuple[Union[str, object], Union[str, object]]
"""
returns validated formatting arguments
"""
def valid1(elt, lst): # type: (Optional[str], List[object]) -> Union[str, object]
if elt in lst:
return elt
Expand Down Expand Up @@ -84,6 +110,13 @@ def _get_reformat(self, reformat, text): # type: (Translator, str, str) -> str

def translate(self, text, mark_unknown=False, format=None, deformat='txt', reformat='txt'):
# type: (Translator, str, bool, Optional[str], str, str) -> str
"""
Args:
text (str): The text to be translated from l1 to l2

Yeilds:
str for the translated text
"""
if '%s-%s' % tuple(map(to_alpha3_code, [self.l1, self.l2])) in apertium.pairs: # type: ignore
pair = map(to_alpha3_code, [self.l1, self.l2])
else:
Expand All @@ -102,5 +135,8 @@ def translate(self, text, mark_unknown=False, format=None, deformat='txt', refor

def translate(l1, l2, text, mark_unknown=False, format=None, deformat='txt', reformat='txt'):
# type: (str, str, str, bool, Optional[str], str, str) -> str
"""
directly returns the translation from apertium
"""
translator = apertium.Translator(l1, l2)
return translator.translate(text, mark_unknown, format, deformat, reformat)
17 changes: 17 additions & 0 deletions apertium/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,15 @@ def to_alpha3_code(code): # type: (str) -> str


def execute(inp, commands): # type: (str, List[List[str]]) -> str
"""exectues the commands in a pipeline fashion and returns the output

Args:
inp (str): The input to the command that is to be executed
commands (List[List[str]]): A list of the commands to be executed in pipeline manner

Yeilds:
stringified output when the command is executed
"""
procs = []
end = inp.encode()
for i, command in enumerate(commands):
Expand All @@ -31,6 +40,14 @@ def execute(inp, commands): # type: (str, List[List[str]]) -> str


def parse_mode_file(mode_path): # type: (str) -> List[List[str]]
"""parses the modefile and returns the commands to execute for a gives mode.

Args:
mode_path (str): Path to where the modes for the language data are stored

Yeilds:
commands (List[List[str]]): The commands that need to be run for the execution of the mode
"""
mode_str = open(mode_path, 'r').read().strip()
if mode_str:
commands = []
Expand Down