# Neural Machine Translation Example

In [1]:
# Install TensorFlow and also our package via PyPI
!pip install tensorflow-gpu==2.0.0
!pip install headliner

Collecting tensorflow-gpu==2.0.0
[?25l  Downloading https://files.pythonhosted.org/packages/25/44/47f0722aea081697143fbcf5d2aa60d1aee4aaacb5869aee2b568974777b/tensorflow_gpu-2.0.0-cp36-cp36m-manylinux2010_x86_64.whl (380.8MB)
[K     |████████████████████████████████| 380.8MB 87kB/s 
Collecting tensorflow-estimator<2.1.0,>=2.0.0 (from tensorflow-gpu==2.0.0)
[?25l  Downloading https://files.pythonhosted.org/packages/95/00/5e6cdf86190a70d7382d320b2b04e4ff0f8191a37d90a422a2f8ff0705bb/tensorflow_estimator-2.0.0-py2.py3-none-any.whl (449kB)
[K     |████████████████████████████████| 450kB 41.6MB/s 
Collecting tensorboard<2.1.0,>=2.0.0 (from tensorflow-gpu==2.0.0)
[?25l  Downloading https://files.pythonhosted.org/packages/9b/a6/e8ffa4e2ddb216449d34cfcb825ebb38206bee5c4553d69e7bc8bc2c5d64/tensorboard-2.0.0-py3-none-any.whl (3.8MB)
[K     |████████████████████████████████| 3.8MB 35.4MB/s 
[31mERROR: tensorflow 1.15.0rc3 has requirement tensorboard<1.16.0,>=1.15.0, but you'll have tensorbo

In [2]:
# Download the German-English sentence pairs
!wget http://www.manythings.org/anki/deu-eng.zip
!unzip deu-eng.zip

--2019-10-15 16:10:20--  http://www.manythings.org/anki/deu-eng.zip
Resolving www.manythings.org (www.manythings.org)... 104.24.108.196, 104.24.109.196, 2606:4700:30::6818:6dc4, ...
Connecting to www.manythings.org (www.manythings.org)|104.24.108.196|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 7612057 (7.3M) [application/zip]
Saving to: ‘deu-eng.zip’


2019-10-15 16:10:23 (2.89 MB/s) - ‘deu-eng.zip’ saved [7612057/7612057]

Archive:  deu-eng.zip
  inflating: deu.txt                 
  inflating: _about.txt              


In [3]:
# Create the dataset but only take a subset for faster training
import io

def create_dataset(path, num_examples):
    lines = io.open(path, encoding='UTF-8').read().strip().split('\n')
    word_pairs = [[w for w in l.split('\t')[:2]]  for l in lines[:num_examples]]
    return zip(*word_pairs)

eng, ger = create_dataset('deu.txt', 30000)
data = list(zip(eng, ger))
data[:5]

[('Hi.', 'Hallo!'),
 ('Hi.', 'Grüß Gott!'),
 ('Run!', 'Lauf!'),
 ('Wow!', 'Potzdonner!'),
 ('Wow!', 'Donnerwetter!')]

In [0]:
# Split the dataset into train and test
from sklearn.model_selection import train_test_split

train, test = train_test_split(data, test_size=100)

In [5]:
# Define the model and train it
from headliner.trainer import Trainer
from headliner.model.summarizer_attention import SummarizerAttention

summarizer = SummarizerAttention(lstm_size=1024, embedding_size=256)
trainer = Trainer(batch_size=64, 
                  steps_per_epoch=100, 
                  steps_to_log=20, 
                  max_output_len=10, 
                  model_save_path='/tmp/summarizer')
trainer.train(summarizer, train, num_epochs=10, val_data=test)

training a bare model, preprocessing data to init model...
fitting tokenizers...
vocab encoder: 4710, vocab decoder: 7526
epoch 0, batch 20, logs: {'loss': 4.108179092407227}
epoch 0, batch 40, logs: {'loss': 3.623246765136719}
epoch 0, batch 60, logs: {'loss': 3.370672607421875}
epoch 0, batch 80, logs: {'loss': 3.2116676330566407}
epoch 0, batch 100, logs: {'loss': 3.091304931640625}

(input) Please sit down. 
(target) Bitte setz dich! 
(prediction) ich bin nicht . <end>


(input) We study music. 
(target) Wir studieren Musik. 
(prediction) ich ist nicht . <end>


(input) I want this one. 
(target) Ich will dieses. 
(prediction) ich bin nicht . <end>


(input) I am not a monster. 
(target) Ich bin kein Ungeheuer! 
(prediction) ich bin nicht . <end>


(input) Tom succeeded. 
(target) Tom hatte Erfolg. 
(prediction) ich ist nicht . <end>

loss_val improved from None to 2.6096150875091553, saving summarizer to /tmp/summarizer
epoch 1, batch 120, logs: {'loss': 2.5131534576416015, 'loss_

In [6]:
# Do some prediction
summarizer.predict('How are you?')

'wie geht es ? <end>'