In [None]:
import glob
import os
import subprocess

In [None]:
def test_model(lang_pairs:list, test_data:str, tc:str, tc_m:str, sp:str, sp_m:str, tr_m:str, out:str):
    """
    Evaluate model on wmt data
    
    Args:
        lang_pairs: language direction for testing
        test_data: root for temp dir 
        tc: path to trucaser
        tc_m: path to trucaser model
        sp: path to sentencepiece
        sp_m: path to sentencepiece model
        tr_m: path to translation model
        out: path to the output
        
    """
    cd = os.getcwd()
    data_path = os.path.abspath(test_data)
    if not os.path.exists(out):
        os.makedirs(out)
    for src_lang, tgt_lang in lang_pairs:
        pair = r"{}-{}".format(src_lang, tgt_lang) 
        tmp = os.path.join(data_path, f"tmp-{pair}")
        if not os.path.exists(tmp):
            os.makedirs(tmp)
        else:
            files = glob.glob(f'{tmp}/*')
            for f in files:
                try:
                    os.unlink(f)
                except OSError as e:
                    print("Error: %s : %s" % (f, e.strerror))
        inp_path = os.path.join(tmp,  f'{pair}.src')
        !sacrebleu -t wmt18 --language {pair} --echo src > {inp_path}
        tc_path = os.path.join(cd, tc)
        tcm_path = os.path.join(cd, tc_m)
        tc_out = os.path.join(tmp, "tc.out")
        !python {tc_path} {tcm_path} {inp_path} > {tc_out}
        !python {sp} --action split  --model {sp_m} --corpora {tc_out}
        tags = os.path.join(tmp, pair + '.tag')
        sp_name = os.path.basename(sp_m)
        sp_out = os.path.join(tmp, sp_name + '-tc.out')
        with open(sp_out, 'r') as _input, open(tags, 'w') as output:
                for line in _input:
                        l = len(line.split())
                        output.write(f"{' '.join([tgt_lang] * l)}\n")
        tr_out = os.path.join(tmp, f"{pair}.out")
        !python -m sockeye.translate --quiet --use-cpu --models {tr_m} --input-factors {tags} --input {sp_out} --output {tr_out}
        !python {sp} --action restore --corpora {tr_out} --model {sp_m}
        res_path = os.path.join(tmp, f'de-{sp_name}-{os.path.basename(tr_out)}')
        output = os.path.join(cd, out, f'wmt18-{pair}.res')
        !cat {res_path} | sacrebleu -t wmt18 -l {pair} > {output}
        print(open(output).read())

In [None]:
test_model([('en','ru')],
           'nmt/data/input',
           'nmt/scripts/truecaser/applytc.py',
           'nmt/models/preproc-models/tc-en',
           'nmt/scripts/word-pieces.py',
           'nmt/models/preproc-models/sp-enru',
           'nmt/models/trilang_0',
           'nmt/data/input/out')