hedyorg · Felienne · Jan 25, 2024 · Jan 25, 2024 · Jan 25, 2024 · Jan 25, 2024
diff --git a/app.py b/app.py
@@ -2289,7 +2289,7 @@ def translate_keywords():
     body = request.json
     try:
         translated_code = hedy_translation.translate_keywords(body.get('code'), body.get(
-            'start_lang'), body.get('goal_lang'), level=int(body.get('level', 1)))
+            'start_lang'), body.get('goal_lang'), level=int(body.get('level', 1)), translate_strings=True)
         if translated_code or translated_code == '':  # empty string is False, so explicitly allow it
             session["previous_keyword_lang"] = body.get("start_lang")
             session["keyword_lang"] = body.get("goal_lang")

diff --git a/hedy_translation.py b/hedy_translation.py
@@ -1,17 +1,22 @@
 from collections import namedtuple
-from lark import Token, Visitor
+from lark import Token, Visitor, Tree
 from lark.exceptions import VisitError
 import hedy
 import operator
 from os import path
 import hedy_content
 from website.yaml_file import YamlFile
 import copy
+from googletrans import Translator
+
 
 # Holds the token that needs to be translated, its line number, start and
 # end indexes and its value (e.g. ", ").
 Rule = namedtuple("Rule", "keyword line start end value")
 
+# stores the connection to Google Translate
+translator = Translator()
+
 
 def keywords_to_dict(lang="nl"):
     """ "Return a dictionary of keywords from language of choice. Key is english value is lang of choice"""
@@ -73,7 +78,12 @@ def get_target_keyword(keyword_dict, keyword):
         return keyword
 
 
-def translate_keywords(input_string, from_lang="en", to_lang="nl", level=1):
+def translate_string(string, from_lang, to_lang):
+    result = translator.translate(string, src=from_lang, dest=to_lang)
+    return result.text
+
+
+def translate_keywords(input_string, from_lang="en", to_lang="nl", level=1, translate_strings=False):
     """ "Return code with keywords translated to language of choice in level of choice"""
 
     if input_string == "":
@@ -96,7 +106,7 @@ def translate_keywords(input_string, from_lang="en", to_lang="nl", level=1):
 
         program_root = parser.parse(processed_input + "\n").children[0]
 
-        translator = Translator(processed_input)
+        translator = Translator(processed_input, translate_strings)
         translator.visit(program_root)
         ordered_rules = reversed(sorted(translator.rules, key=operator.attrgetter("line", "start")))
 
@@ -112,8 +122,17 @@ def translate_keywords(input_string, from_lang="en", to_lang="nl", level=1):
                 target = get_target_keyword(keyword_dict_to, rule.keyword)
                 replaced_line = replace_token_in_line(line, rule, original, target)
                 result = replace_line(lines, rule.line - 1, replaced_line)
-
-        # For now the needed post processing is only removing the 'end-block's added during pre-processing
+            else:
+                if translate_strings:
+                    # this is not a keyword, so (for now) that means a text string that needs to be translated
+                    lines = result.splitlines()  # do we need to do this for each rule??
+                    line = lines[rule.line - 1]
+                    original = rule.value
+                    target = translate_string(original, from_lang, to_lang)
+                    replaced_line = replace_token_in_line(line, rule, original, target)
+                    result = replace_line(lines, rule.line - 1, replaced_line)
+
+                # For now the needed post processing is only removing the 'end-block's added during pre-processing
         result = "\n".join([line for line in result.splitlines()])
         result = result.replace("#ENDBLOCK", "")
 
@@ -146,11 +165,15 @@ def replace_line(lines, index, line):
 
 def replace_token_in_line(line, rule, original, target):
     """Replaces a token in a line from the user input with its translated equivalent"""
-    before = "" if rule.start == 0 else line[0: rule.start]
-    after = "" if rule.end == len(line) - 1 else line[rule.end + 1:]
-    # Note that we need to replace the target value in the original value because some
-    # grammar rules have ambiguous length and value, e.g. _COMMA: _SPACES*
-    # (latin_comma | arabic_comma) _SPACES*
+    if rule.keyword == 'text':
+        before = line[:rule.start]
+        after = line[rule.end-1:]
+    else:
+        before = "" if rule.start == 0 else line[0: rule.start]
+        after = "" if rule.end == len(line) - 1 else line[rule.end + 1:]
+        # Note that we need to replace the target value in the original value because some
+        # grammar rules have ambiguous length and value, e.g. _COMMA: _SPACES*
+        # (latin_comma | arabic_comma) _SPACES*
     return before + rule.value.replace(original, target) + after
 
 
@@ -194,9 +217,10 @@ class Translator(Visitor):
     in the user input string and original value. The information is later used to replace the token in
     the original user input with the translated token value."""
 
-    def __init__(self, input_string):
+    def __init__(self, input_string, translate_strings=False):
         self.input_string = input_string
         self.rules = []
+        self.translate_strings = translate_strings
 
     def define(self, tree):
         self.add_rule("_DEFINE", "define", tree)
@@ -216,13 +240,31 @@ def returns(self, tree):
     def print(self, tree):
         self.add_rule("_PRINT", "print", tree)
 
+        if self.translate_strings:
+            # in addition to keywords, we are now also adding plain text strings
+            # like print arguments to the list of things that need to be translated
+            if len(tree.children) > 1:
+                # argument = str(tree.children[1].children[0])
+                for argument in tree.children:
+                    if type(argument) is Tree and argument.data == 'text':
+                        self.add_rule("text", "text", argument)  # this of course only support 1 string
+
     def print_empty_brackets(self, tree):
         self.print(tree)
 
     def ask(self, tree):
         self.add_rule("_IS", "is", tree)
         self.add_rule("_ASK", "ask", tree)
 
+        if self.translate_strings:  # it'd be nicer of course if this was not copy-paste from PRINT!
+            # in addition to keywords, we are now also adding plain text strings
+            # like ask arguments to the list of things that need to be translated
+            if len(tree.children) > 1:
+                # argument = str(tree.children[1].children[0])
+                for argument in tree.children:
+                    if type(argument) is Tree and argument.data == 'text':
+                        self.add_rule("text", "text", argument)  # this of course only support 1 string
+
     def echo(self, tree):
         self.add_rule("_ECHO", "echo", tree)
 
@@ -354,17 +396,27 @@ def pressed(self, tree):
         self.add_rule("_PRESSED", "pressed", tree)
 
     def add_rule(self, token_name, token_keyword, tree):
-        token = self.get_keyword_token(token_name, tree)
-        if token:
-            rule = Rule(
-                token_keyword, token.line, token.column - 1, token.end_column - 2, token.value
-            )
-            self.rules.append(rule)
-
-    def get_keyword_token(self, token_type, node):
-        for c in node.children:
+        if token_name == "text":  # this is not superduper pretty but for now it works!
+            token = tree.children[0]
+            if token:
+                rule = Rule(
+                    token_name, token.line, token.column - 1, token.end_column, token.value
+                )
+                self.rules.append(rule)
+        else:
+            token = self.get_keyword_token(token_name, tree)
+            if token:
+                rule = Rule(
+                    token_keyword, token.line, token.column - 1, token.end_column - 2, token.value
+                )
+                self.rules.append(rule)
+
+    def get_keyword_token(self, token_type, tree):
+        for c in tree.children:
             if type(c) is Token and c.type == token_type:
                 return c
+            if type(c) is Tree and c.data == token_type:
+                return c.children[0]
         return None
 
     def get_keyword_tokens(self, token_type, node):

diff --git a/requirements.txt b/requirements.txt
@@ -35,3 +35,4 @@ doit_watch>=0.1.0
 uflash>=2.0.0
 pyinstaller==6.3.0
 commonmark==0.9.1
+googletrans==3.1.0a0
diff --git a/tests/test_translation_level/test_translation_level_01.py b/tests/test_translation_level/test_translation_level_01.py
@@ -25,6 +25,15 @@ def test_print_english_dutch(self):
 
         self.assertEqual(expected, result)
 
+    def test_print_english_dutch_including_string(self):
+        code = 'print Hello welcome to Hedy!'
+
+        result = hedy_translation.translate_keywords(
+            code, from_lang="en", to_lang="nl", level=self.level, translate_strings=True)
+        expected = 'print Hallo welkom bij Hedy!'
+
+        self.assertEqual(expected, result)
+
     def test_ask_english_dutch(self):
         code = "ask Hallo welkom bij Hedy!"
 
@@ -33,6 +42,15 @@ def test_ask_english_dutch(self):
 
         self.assertEqual(expected, result)
 
+    def test_ask_english_dutch_including_string(self):
+        code = "ask Hello welcome to Hedy!!"
+
+        result = hedy_translation.translate_keywords(code, from_lang="en", to_lang="nl",
+                                                     level=self.level, translate_strings=True)
+        expected = "vraag Hallo welkom bij Hedy!!"
+
+        self.assertEqual(expected, result)
+
     def test_echo_english_dutch(self):
         code = "ask Hallo welkom bij Hedy!\necho"
 

diff --git a/tests/test_translation_level/test_translation_level_02.py b/tests/test_translation_level/test_translation_level_02.py
@@ -15,12 +15,12 @@ class TestsTranslationLevel2(HedyTester):
     level = 2
     all_keywords = hedy_translation.all_keywords_to_dict()
 
-    def test_print(self):
-        code = "print Hallo welkom bij Hedy!"
+    def test_print_including_string(self):
+        code = "print Hallo, welkom Hedy!"
 
         result = hedy_translation.translate_keywords(
-            code, "nl", "en", self.level)
-        expected = "print Hallo welkom bij Hedy!"
+            code, "nl", "en", self.level, translate_strings=True)
+        expected = "print Hello, welcome Hedy!"
 
         self.assertEqual(expected, result)
 
@@ -69,6 +69,15 @@ def test_print_var_text(self):
 
         self.assertEqual(expected, result)
 
+    # def test_print_var_text_including_string(self):
+    #     code = "welcome is Hi welcome to Hedy\nprint welcome Enjoy!"
+    #
+    #     result = hedy_translation.translate_keywords(
+    #         code, "en", "nl", self.level, translate_strings=True)
+    #     expected = "welkom is Hallo welkom bij Hedy\nprint welkom Veel plezier"
+
+        self.assertEqual(expected, result)
+
     def test_ask_kewords(self):
         code = "hedy is vraag print ask echo"
 
@@ -96,6 +105,27 @@ def test_ask_assign_dutch_english(self):
 
         self.assertEqual(expected, result)
 
+    def test_ask_assign_dutch_english_including_string(self):
+        code = textwrap.dedent("""\
+            naam is vraag Hoe heet jij?
+            print Dus het is naam""")
+
+        result = hedy_translation.translate_keywords(
+            code, "nl", "en", self.level, translate_strings=True)
+
+        expected = textwrap.dedent("""\
+            naam is ask How is called you?
+            print So It is name""")
+
+        # the result sounds silly because all words are translated separately
+        # in levels 2 and 3, this is needed because words in between can be vars
+        # in level 4 it will magically be better
+
+        # we should, of course, changed but that's not easy, and for now it is better than what we have
+        # also: result is naam because ask vars are not yet translated!
+
+        self.assertEqual(expected, result)
+
     def test_translate_back(self):
         code = "print welkom bij Hedy\nnaam is ask what is your name\nprint naam"