In [13]:
import tensorflow as tf
import pyonmttok
import os
import io

The following code was adapted from the original source available here: https://github.com/OpenNMT/OpenNMT-tf/blob/36c737d1446e475e87b71519a6e7791b22a0f919/examples/serving/python/ende_client.py#L9

In [14]:
export_dir = "opennmt"

In [15]:
#load the data set
text_file = "./train.txt"

with open(text_file, encoding="utf-8") as file:
    lines = file.read().split("\n")

orginal_text_pairs = [(line.split("\t")) for line in lines] 
en = [pair[0] for pair in orginal_text_pairs]  

In [16]:
imported = tf.saved_model.load(export_dir)
translate_fn = imported.signatures["serving_default"]
sp_model_path = os.path.join(export_dir, "assets.extra", "wmtende.model")
tokenizer = pyonmttok.Tokenizer("none", sp_model_path=sp_model_path)

In [17]:
def preprocess(texts):
        all_tokens = []
        lengths = []
        max_length = 0
        for text in texts: 
            tokens, _ = tokenizer.tokenize(text)
            length = len(tokens)
            all_tokens.append(tokens)
            lengths.append(length)
            max_length = max(max_length, length) 
        
        for tokens, length in zip(all_tokens, lengths):
            if length < max_length:
                tokens += [""] * (max_length - length) 

        inputs = {
            "tokens": tf.constant(all_tokens, dtype=tf.string),
            "length": tf.constant(lengths, dtype=tf.int32),
        }
        return inputs

In [18]:
def postprocess(outputs):
    texts = []
    for tokens, length in zip(outputs["tokens"].numpy(), outputs["length"].numpy()):
        tokens = tokens[0][: length[0]].tolist()
        texts.append(tokenizer.detokenize(tokens))
    return texts

In [19]:
def translate(texts):
        """Translates a batch of texts."""
        inputs = preprocess(texts)
        #print(inputs)
        outputs = translate_fn(**inputs)
        return postprocess(outputs)

In [28]:
input = ["If a New User Isn't Being Copied to the Linked Organization"] 
#input = "hi how are you? im from portugal and im doing somee school work."
output = translate(en[0:5]) 

for i in range(0, 5):
    print("EN (original):   ", orginal_text_pairs[i][0])
    print("DE (original):   ", orginal_text_pairs[i][1])
    print("DE (translated): ", output[i], "\n")


EN (original):    breeze through those minor but important activities that inevitably crop up during the day without switching context. quickly log calls, create tasks and events, and jot down notes, all from windows that open on the current page.
DE (original):    erledigen sie diese kleineren, aber wichtigen aufgaben, die im laufe des tages unweigerlich anfallen, mit links, ohne den kontext wechseln zu müssen. protokollieren sie anrufe in blitzgeschwindigkeit, erstellen sie aufgaben und ereignisse und machen sie sich notizen in fenstern, die auf der aktuellen seite geöffnet werden.
DE (translated):  breeze durch die kleinen, aber wichtigen Aktivitäten, die unweigerlich während des Tages auftauchen, ohne den Kontext zu wechseln. 

EN (original):    a visualization is commonly a chart or graph, such as a bar chart, donut chart, timeline, or heat map. it can also be data in tabular form, such as a comparison table or pivot table. every visualization has an underlying query, which is how