Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Translation of strings starter #5031

Draft
wants to merge 29 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
f3ce98b
add test and change parameters
Felienne Jan 25, 2024
8b8c855
add rule
Felienne Jan 25, 2024
8f2af2a
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jan 25, 2024
0eaa5c6
add rule and text token
Felienne Jan 25, 2024
6353e3d
Merge branch 'translate-starter' of https://github.com/hedyorg/hedy i…
Felienne Jan 25, 2024
53cc886
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jan 25, 2024
2f7aeff
update token
Felienne Jan 25, 2024
f290f68
Merge branch 'translate-starter' of https://github.com/hedyorg/hedy i…
Felienne Jan 25, 2024
e2b5b71
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jan 25, 2024
68db33f
call fake api
Felienne Jan 25, 2024
ae41207
Merge branch 'translate-starter' of https://github.com/hedyorg/hedy i…
Felienne Jan 25, 2024
d30581d
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jan 25, 2024
7e054c4
use parameter
Felienne Jan 25, 2024
51790a6
Merge branch 'translate-starter' of https://github.com/hedyorg/hedy i…
Felienne Jan 25, 2024
051e55e
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jan 25, 2024
e6bd89d
skip Nones
Felienne Jan 25, 2024
82e83c4
Merge branch 'translate-starter' of https://github.com/hedyorg/hedy i…
Felienne Jan 25, 2024
b8ce7b9
only add strings if parameter is set
Felienne Jan 26, 2024
a7e2b55
Merge branch 'main' into translate-starter
Felienne May 3, 2024
8a15c63
call google api
Felienne May 3, 2024
2fcf524
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] May 3, 2024
51f4cd0
Merge branch 'main' into translate-starter
Felienne May 8, 2024
b4eaace
add api
Felienne May 8, 2024
81424be
better organize api calls, also translate in ASK
Felienne May 8, 2024
5e9b7a3
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] May 8, 2024
ffb5fef
update on working of ask
Felienne May 8, 2024
56642c7
Merge branch 'translate-starter' of https://github.com/hedyorg/hedy i…
Felienne May 8, 2024
d15983f
trial in the front end!
Felienne May 9, 2024
35309f0
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] May 9, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion app.py
Original file line number Diff line number Diff line change
Expand Up @@ -2289,7 +2289,7 @@ def translate_keywords():
body = request.json
try:
translated_code = hedy_translation.translate_keywords(body.get('code'), body.get(
'start_lang'), body.get('goal_lang'), level=int(body.get('level', 1)))
'start_lang'), body.get('goal_lang'), level=int(body.get('level', 1)), translate_strings=True)
if translated_code or translated_code == '': # empty string is False, so explicitly allow it
session["previous_keyword_lang"] = body.get("start_lang")
session["keyword_lang"] = body.get("goal_lang")
Expand Down
92 changes: 72 additions & 20 deletions hedy_translation.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,22 @@
from collections import namedtuple
from lark import Token, Visitor
from lark import Token, Visitor, Tree
from lark.exceptions import VisitError
import hedy
import operator
from os import path
import hedy_content
from website.yaml_file import YamlFile
import copy
from googletrans import Translator


# Holds the token that needs to be translated, its line number, start and
# end indexes and its value (e.g. ", ").
Rule = namedtuple("Rule", "keyword line start end value")

# stores the connection to Google Translate
translator = Translator()


def keywords_to_dict(lang="nl"):
""" "Return a dictionary of keywords from language of choice. Key is english value is lang of choice"""
Expand Down Expand Up @@ -73,7 +78,12 @@ def get_target_keyword(keyword_dict, keyword):
return keyword


def translate_keywords(input_string, from_lang="en", to_lang="nl", level=1):
def translate_string(string, from_lang, to_lang):
result = translator.translate(string, src=from_lang, dest=to_lang)
return result.text


def translate_keywords(input_string, from_lang="en", to_lang="nl", level=1, translate_strings=False):
""" "Return code with keywords translated to language of choice in level of choice"""

if input_string == "":
Expand All @@ -96,7 +106,7 @@ def translate_keywords(input_string, from_lang="en", to_lang="nl", level=1):

program_root = parser.parse(processed_input + "\n").children[0]

translator = Translator(processed_input)
translator = Translator(processed_input, translate_strings)
translator.visit(program_root)
ordered_rules = reversed(sorted(translator.rules, key=operator.attrgetter("line", "start")))

Expand All @@ -112,8 +122,17 @@ def translate_keywords(input_string, from_lang="en", to_lang="nl", level=1):
target = get_target_keyword(keyword_dict_to, rule.keyword)
replaced_line = replace_token_in_line(line, rule, original, target)
result = replace_line(lines, rule.line - 1, replaced_line)

# For now the needed post processing is only removing the 'end-block's added during pre-processing
else:
if translate_strings:
# this is not a keyword, so (for now) that means a text string that needs to be translated
lines = result.splitlines() # do we need to do this for each rule??
line = lines[rule.line - 1]
original = rule.value
target = translate_string(original, from_lang, to_lang)
replaced_line = replace_token_in_line(line, rule, original, target)
result = replace_line(lines, rule.line - 1, replaced_line)

# For now the needed post processing is only removing the 'end-block's added during pre-processing
result = "\n".join([line for line in result.splitlines()])
result = result.replace("#ENDBLOCK", "")

Expand Down Expand Up @@ -146,11 +165,15 @@ def replace_line(lines, index, line):

def replace_token_in_line(line, rule, original, target):
"""Replaces a token in a line from the user input with its translated equivalent"""
before = "" if rule.start == 0 else line[0: rule.start]
after = "" if rule.end == len(line) - 1 else line[rule.end + 1:]
# Note that we need to replace the target value in the original value because some
# grammar rules have ambiguous length and value, e.g. _COMMA: _SPACES*
# (latin_comma | arabic_comma) _SPACES*
if rule.keyword == 'text':
before = line[:rule.start]
after = line[rule.end-1:]
else:
before = "" if rule.start == 0 else line[0: rule.start]
after = "" if rule.end == len(line) - 1 else line[rule.end + 1:]
# Note that we need to replace the target value in the original value because some
# grammar rules have ambiguous length and value, e.g. _COMMA: _SPACES*
# (latin_comma | arabic_comma) _SPACES*
return before + rule.value.replace(original, target) + after


Expand Down Expand Up @@ -194,9 +217,10 @@ class Translator(Visitor):
in the user input string and original value. The information is later used to replace the token in
the original user input with the translated token value."""

def __init__(self, input_string):
def __init__(self, input_string, translate_strings=False):
self.input_string = input_string
self.rules = []
self.translate_strings = translate_strings

def define(self, tree):
self.add_rule("_DEFINE", "define", tree)
Expand All @@ -216,13 +240,31 @@ def returns(self, tree):
def print(self, tree):
self.add_rule("_PRINT", "print", tree)

if self.translate_strings:
# in addition to keywords, we are now also adding plain text strings
# like print arguments to the list of things that need to be translated
if len(tree.children) > 1:
# argument = str(tree.children[1].children[0])
for argument in tree.children:
if type(argument) is Tree and argument.data == 'text':
self.add_rule("text", "text", argument) # this of course only support 1 string

def print_empty_brackets(self, tree):
self.print(tree)

def ask(self, tree):
self.add_rule("_IS", "is", tree)
self.add_rule("_ASK", "ask", tree)

if self.translate_strings: # it'd be nicer of course if this was not copy-paste from PRINT!
# in addition to keywords, we are now also adding plain text strings
# like ask arguments to the list of things that need to be translated
if len(tree.children) > 1:
# argument = str(tree.children[1].children[0])
for argument in tree.children:
if type(argument) is Tree and argument.data == 'text':
self.add_rule("text", "text", argument) # this of course only support 1 string

def echo(self, tree):
self.add_rule("_ECHO", "echo", tree)

Expand Down Expand Up @@ -354,17 +396,27 @@ def pressed(self, tree):
self.add_rule("_PRESSED", "pressed", tree)

def add_rule(self, token_name, token_keyword, tree):
token = self.get_keyword_token(token_name, tree)
if token:
rule = Rule(
token_keyword, token.line, token.column - 1, token.end_column - 2, token.value
)
self.rules.append(rule)

def get_keyword_token(self, token_type, node):
for c in node.children:
if token_name == "text": # this is not superduper pretty but for now it works!
token = tree.children[0]
if token:
rule = Rule(
token_name, token.line, token.column - 1, token.end_column, token.value
)
self.rules.append(rule)
else:
token = self.get_keyword_token(token_name, tree)
if token:
rule = Rule(
token_keyword, token.line, token.column - 1, token.end_column - 2, token.value
)
self.rules.append(rule)

def get_keyword_token(self, token_type, tree):
for c in tree.children:
if type(c) is Token and c.type == token_type:
return c
if type(c) is Tree and c.data == token_type:
return c.children[0]
return None

def get_keyword_tokens(self, token_type, node):
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -35,3 +35,4 @@ doit_watch>=0.1.0
uflash>=2.0.0
pyinstaller==6.3.0
commonmark==0.9.1
googletrans==3.1.0a0
18 changes: 18 additions & 0 deletions tests/test_translation_level/test_translation_level_01.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,15 @@ def test_print_english_dutch(self):

self.assertEqual(expected, result)

def test_print_english_dutch_including_string(self):
code = 'print Hello welcome to Hedy!'

result = hedy_translation.translate_keywords(
code, from_lang="en", to_lang="nl", level=self.level, translate_strings=True)
expected = 'print Hallo welkom bij Hedy!'

self.assertEqual(expected, result)

def test_ask_english_dutch(self):
code = "ask Hallo welkom bij Hedy!"

Expand All @@ -33,6 +42,15 @@ def test_ask_english_dutch(self):

self.assertEqual(expected, result)

def test_ask_english_dutch_including_string(self):
code = "ask Hello welcome to Hedy!!"

result = hedy_translation.translate_keywords(code, from_lang="en", to_lang="nl",
level=self.level, translate_strings=True)
expected = "vraag Hallo welkom bij Hedy!!"

self.assertEqual(expected, result)

def test_echo_english_dutch(self):
code = "ask Hallo welkom bij Hedy!\necho"

Expand Down
38 changes: 34 additions & 4 deletions tests/test_translation_level/test_translation_level_02.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,12 @@ class TestsTranslationLevel2(HedyTester):
level = 2
all_keywords = hedy_translation.all_keywords_to_dict()

def test_print(self):
code = "print Hallo welkom bij Hedy!"
def test_print_including_string(self):
code = "print Hallo, welkom Hedy!"

result = hedy_translation.translate_keywords(
code, "nl", "en", self.level)
expected = "print Hallo welkom bij Hedy!"
code, "nl", "en", self.level, translate_strings=True)
expected = "print Hello, welcome Hedy!"

self.assertEqual(expected, result)

Expand Down Expand Up @@ -69,6 +69,15 @@ def test_print_var_text(self):

self.assertEqual(expected, result)

# def test_print_var_text_including_string(self):
# code = "welcome is Hi welcome to Hedy\nprint welcome Enjoy!"
#
# result = hedy_translation.translate_keywords(
# code, "en", "nl", self.level, translate_strings=True)
# expected = "welkom is Hallo welkom bij Hedy\nprint welkom Veel plezier"

self.assertEqual(expected, result)

def test_ask_kewords(self):
code = "hedy is vraag print ask echo"

Expand Down Expand Up @@ -96,6 +105,27 @@ def test_ask_assign_dutch_english(self):

self.assertEqual(expected, result)

def test_ask_assign_dutch_english_including_string(self):
code = textwrap.dedent("""\
naam is vraag Hoe heet jij?
print Dus het is naam""")

result = hedy_translation.translate_keywords(
code, "nl", "en", self.level, translate_strings=True)

expected = textwrap.dedent("""\
naam is ask How is called you?
print So It is name""")

# the result sounds silly because all words are translated separately
# in levels 2 and 3, this is needed because words in between can be vars
# in level 4 it will magically be better

# we should, of course, changed but that's not easy, and for now it is better than what we have
# also: result is naam because ask vars are not yet translated!

self.assertEqual(expected, result)

def test_translate_back(self):
code = "print welkom bij Hedy\nnaam is ask what is your name\nprint naam"

Expand Down
Loading