###  Created by Luis Alejandro (alejand@umich.edu)

In [1]:
import tensorflow as tf
import numpy as np
from utils import unicode_to_ascii
from dataset import DatasetBuilder
from attention_translation import preprocess
from attention_translation import Translator
import os
os.environ["TF_FORCE_GPU_ALLOW_GROWTH"]="true"

In [2]:
# GPU?
tf.config.experimental.list_physical_devices('GPU')

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [3]:
# Creates dataset for training
files = ['../../datasets/nlp/english-spanish.txt']
builder = DatasetBuilder(files, preprocessors=(preprocess,preprocess), batch_size=64, max_obs=30000, test_obs=20)
train_dataset, test_dataset = builder.build()

In [4]:
# Defining model
translator = Translator(builder.source_tokenizer.word_to_index,
                        builder.target_tokenizer.word_to_index,
                        source_embedding_size=256,
                        target_embedding_size=256,
                        max_output_length=builder.target_tokenizer.max_seq,
                        attention_size=512,
                        restore=True)

In [13]:
# Training model
translator.train(10,train_dataset, test_dataset)

Epoch 1 out of 10 complete (36.43 secs) -- Train Loss: 0.1255 -- Train Acc: 0.97 -- Test Loss: 0.7724 -- Test Acc: 0.90
Epoch 2 out of 10 complete (35.17 secs) -- Train Loss: 0.1154 -- Train Acc: 0.97 -- Test Loss: 0.7537 -- Test Acc: 0.91
Epoch 3 out of 10 complete (34.85 secs) -- Train Loss: 0.1065 -- Train Acc: 0.97 -- Test Loss: 0.7909 -- Test Acc: 0.90
Epoch 4 out of 10 complete (35.74 secs) -- Train Loss: 0.0991 -- Train Acc: 0.97 -- Test Loss: 0.8132 -- Test Acc: 0.91
Epoch 5 out of 10 complete (36.62 secs) -- Train Loss: 0.0919 -- Train Acc: 0.97 -- Test Loss: 0.7587 -- Test Acc: 0.91
Epoch 6 out of 10 complete (37.91 secs) -- Train Loss: 0.0871 -- Train Acc: 0.97 -- Test Loss: 0.7994 -- Test Acc: 0.91
Epoch 7 out of 10 complete (37.05 secs) -- Train Loss: 0.0819 -- Train Acc: 0.97 -- Test Loss: 0.8053 -- Test Acc: 0.90
Epoch 8 out of 10 complete (36.37 secs) -- Train Loss: 0.0775 -- Train Acc: 0.97 -- Test Loss: 0.8036 -- Test Acc: 0.90
Epoch 9 out of 10 complete (38.49 secs) 

In [14]:
# Creating some input
source = b"I'm very happy to see you."
source = preprocess(tf.constant(source))
source = tf.strings.split(tf.constant(source))
print(source)
source = builder.source_tokenizer.encode(source.numpy())
source = tf.constant(np.pad(source,(0,builder.source_tokenizer.max_seq - len(source))),
                     shape=[1,builder.source_tokenizer.max_seq])
print(source)

tf.Tensor([b'<start>' b'i' b'm' b'very' b'happy' b'to' b'see' b'you' b'.' b'<end>'], shape=(10,), dtype=string)
tf.Tensor([[  0  17  49 946 295 255 104 105   2   3   0]], shape=(1, 11), dtype=int32)


In [15]:
# Outputing model translation
print(' '.join(builder.target_tokenizer.index_to_word[word].decode() for word in translator.translate(source))) 

estoy contento de verte .


In [16]:
# Check translation for elements in test set
for batch in test_dataset:
    for source, target in zip(batch[0], batch[1]):
        source = tf.expand_dims(source,0)
        print('Original:', ' '.join(builder.target_tokenizer.index_to_word[word].decode() for word in target.numpy() 
                                   if word != builder.target_tokenizer.word_to_index[b'<start>'] 
                                   and word != builder.target_tokenizer.word_to_index[b'<end>'])) 
        print('Translation:', ' '.join(builder.target_tokenizer.index_to_word[word].decode() 
                                       for word in translator.translate(source)),end='\n\n')      

Original: ellos aplaudieron .
Translation: nosotros aplaudimos .

Original: es nuevo .
Translation: es nuevo .

Original: me gusta el empleo que tengo .
Translation: me encanta mi trabajo .

Original: que guey !
Translation: que idiota !

Original: te vamos a extranar .
Translation: te extranamos .

Original: tengo mala suerte .
Translation: yo soy la cena .

Original: estoy emocionado .
Translation: estoy emocionada .

Original: quiero palomitas de maiz .
Translation: quiero palomitas .

Original: soy buena para cocinar .
Translation: soy un buen cocinero .

Original: hazlo ahorita .
Translation: hazlo ahora mismo .

Original: ayudanos .
Translation: nosotros nos preguntamos .

Original: ¿ esta tom farfullando ?
Translation: ¿ esta tom involucrado en esto ?

Original: nunca me rindo .
Translation: no me rindo nunca .

Original: pase .
Translation: pase .

Original: encargate tu .
Translation: asume el mando .

Original: quedese en casa .
Translation: quedaos en casa .

Original: es tu