In [None]:
#| hide
import os

In [None]:
#| hide
running_in_colab = 'google.colab' in str(get_ipython())
if running_in_colab:
    from google.colab import drive
    drive.mount('/content/drive')
    homedir = "/content/drive/MyDrive"
else:
    homedir = os.getenv('HOME')

In [None]:
#| hide
if running_in_colab:
    !pip3 install nbdev
    !pip3 install mteval

In [None]:
#| hide
from dotenv import load_dotenv

In [None]:
#| hide
if running_in_colab:
    # Colab doesn't have a mechanism to set environment variables other than python-dotenv
    env_file = homedir+'/secrets/.env'
    %load_ext dotenv
    %dotenv {env_file}

In [None]:
from mteval.dataset import *
from mteval.awsmt import *
from mteval.deeplmt import *
from mteval.googlemt import *
from mteval.microsoftmt import *
from mteval.modernmt import *

In [None]:
import sys
from tqdm import tqdm
from pathlib import Path

def translate_test_set(mt,base_path,source_lines,sourcelang,targetlang,mtengine_name,test_set_name,test_date):
    target_lines = []
    translate_path = Path(base_path,sourcelang+"_"+targetlang,test_date,test_set_name)
    if not translate_path.exists():
        translate_path.mkdir(parents=True)
    output_filename = "hyp_"+mtengine_name+"."+sourcelang+"-"+targetlang+"."+targetlang
    translate_file = Path(base_path+sourcelang+"_"+targetlang+"/"+test_date+"/"+test_set_name+"/"+output_filename)
    # The MT engine output file already might exist from a previous run of the notebook on the same day.
    # Skipping translation to avoid extra charges.If the file is corrupted/incorrect it should be deleted manually.
    if translate_file.exists():
        print("Translations file already exists:"+str(translate_file),file=sys.stderr)
        return None

    with translate_file.open(mode="w") as target_file:
        for source_line in tqdm(source_lines):
            target_line = mt.translate_text(sourcelang,targetlang,source_line)
            target_lines.append(target_line)
            print(target_line,file=target_file)
    return target_lines


In [None]:
import ipywidgets as widgets
from IPython.display import display

mtengines_selection = widgets.SelectMultiple(
    options=['aws','deepl','google', 'microsoft','modernmt'],
    value=['aws'],
    description='MT Engines:',
    disabled=False
)

display(mtengines_selection)


In [None]:
print(mtengines_selection.value)

In [None]:
import csv
import sys
from datetime import date

base_path = homedir+"/mtd_data/"
translateset_fname = "translate_sets.csv"

# deepl: check if language pair is available
with open(base_path+translateset_fname,'r') as translatesets_fh:
    set_reader = csv.reader(translatesets_fh)
    for (source_language_code,target_language_code,test_set_name) in set_reader:
        translate_date = date.today().isoformat()
        for mtengine in mtengines_selection.value:
            # This should really be an abstract base class being initialized with the mtengine string
            mt_class_initialization = mtengine+"translate"
            mt = globals()[mt_class_initialization]()
            source_lang_eng = source_language_code
            target_lang_eng = target_language_code
            if mtengine == "deepl":
                # DeepL doesn't recognize "en" as target language
                if target_lang_eng == "en":
                    target_lang_eng = "en-US"
                if not mt.check_langpair(source_lang_eng,target_lang_eng):
                    print("DeepL does not support language pair {}-{}".format(source_lang_eng,target_lang_eng),file=sys.stderr)
                    continue
            source_lines = []
            reference_lines = []
            source_lines, reference_lines = download_read_set(base_path,source_language_code,target_language_code,test_set_name)

            print("Test set translation {}-{} Testset: {} Date: {} MT engine: {}".format(source_lang_eng,target_lang_eng,test_set_name,translate_date,mtengine))
            target_lines = translate_test_set(mt,base_path,source_lines,source_lang_eng,target_lang_eng,mtengine,test_set_name,translate_date)
