# Neural Machine Translation Example

In [1]:
# Install TensorFlow and also our package via PyPI
!pip install tensorflow-gpu==2.0.0
!pip install headliner

Collecting tensorflow-gpu==2.0.0
[?25l  Downloading https://files.pythonhosted.org/packages/25/44/47f0722aea081697143fbcf5d2aa60d1aee4aaacb5869aee2b568974777b/tensorflow_gpu-2.0.0-cp36-cp36m-manylinux2010_x86_64.whl (380.8MB)
[K     |████████████████████████████████| 380.8MB 61kB/s 
[?25hCollecting tensorboard<2.1.0,>=2.0.0 (from tensorflow-gpu==2.0.0)
[?25l  Downloading https://files.pythonhosted.org/packages/9b/a6/e8ffa4e2ddb216449d34cfcb825ebb38206bee5c4553d69e7bc8bc2c5d64/tensorboard-2.0.0-py3-none-any.whl (3.8MB)
[K     |████████████████████████████████| 3.8MB 30.8MB/s 
Collecting gast==0.2.2 (from tensorflow-gpu==2.0.0)
  Downloading https://files.pythonhosted.org/packages/4e/35/11749bf99b2d4e3cceb4d55ca22590b0d7c2c62b9de38ac4a4a7f4687421/gast-0.2.2.tar.gz
Collecting tensorflow-estimator<2.1.0,>=2.0.0 (from tensorflow-gpu==2.0.0)
[?25l  Downloading https://files.pythonhosted.org/packages/95/00/5e6cdf86190a70d7382d320b2b04e4ff0f8191a37d90a422a2f8ff0705bb/tensorflow_estimator

In [2]:
# Download the German-English sentence pairs
!wget http://www.manythings.org/anki/deu-eng.zip
!unzip deu-eng.zip
!head deu.txt

--2019-10-04 09:17:09--  http://www.manythings.org/anki/deu-eng.zip
Resolving www.manythings.org (www.manythings.org)... 104.24.108.196, 104.24.109.196, 2606:4700:30::6818:6cc4, ...
Connecting to www.manythings.org (www.manythings.org)|104.24.108.196|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 4541707 (4.3M) [application/zip]
Saving to: ‘deu-eng.zip’


2019-10-04 09:17:10 (5.36 MB/s) - ‘deu-eng.zip’ saved [4541707/4541707]

Archive:  deu-eng.zip
  inflating: deu.txt                 
  inflating: _about.txt              
Hi.	Hallo!
Hi.	Grüß Gott!
Run!	Lauf!
Wow!	Potzdonner!
Wow!	Donnerwetter!
Fire!	Feuer!
Help!	Hilfe!
Help!	Zu Hülf!
Stop!	Stopp!
Wait!	Warte!


In [0]:
# Create the dataset but only take a subset for faster training
import io

def create_dataset(path, num_examples):
    lines = io.open(path, encoding='UTF-8').read().strip().split('\n')
    word_pairs = [[w for w in l.split('\t')]  for l in lines[:num_examples]]
    return zip(*word_pairs)

eng, ger = create_dataset('deu.txt', 30000)
data = list(zip(ger, eng))

In [0]:
# Split the dataset into train and test
from sklearn.model_selection import train_test_split

train, test = train_test_split(data, test_size=100)

In [5]:
# Define the model and train it
from headliner.trainer import Trainer
from headliner.model.summarizer_attention import SummarizerAttention

summarizer = SummarizerAttention(lstm_size=1024, embedding_size=256)
trainer = Trainer(batch_size=64, 
                  steps_per_epoch=100, 
                  steps_to_log=20, 
                  max_output_len=10, 
                  model_save_path='/tmp/summarizer')
trainer.train(summarizer, train, num_epochs=10, val_data=test)

training a bare model, initializing preprocessing...
vocab encoder: 7551, vocab decoder: 4715, start training loop...
epoch 0, batch 20, logs: {'loss': 3.1557514667510986}
epoch 0, batch 40, logs: {'loss': 2.48339581489563}
epoch 0, batch 60, logs: {'loss': 2.7589330673217773}
epoch 0, batch 80, logs: {'loss': 2.0260400772094727}
epoch 0, batch 100, logs: {'loss': 2.1709651947021484}

(input) Warum bist du zu spät gekommen? 
(target) Why were you late? 
(prediction) you you ? <end>


(input) Benehmt euch nicht daneben! 
(target) Don't misbehave. 
(prediction) you is . <end>


(input) Ich kann auf dich warten. 
(target) I can wait for you. 
(prediction) i i have have . <end>


(input) Tom starb an Krebs. 
(target) Tom died of cancer. 
(prediction) tom is . <end>


(input) Mache bitte mein Bett zurecht! 
(target) Please make my bed. 
(prediction) i have . <end>

loss_val improved from None to 2.345742702484131, saving summarizer to /tmp/summarizer
epoch 1, batch 120, logs: {'loss': 1.932

In [6]:
# Do some prediction
summarizer.predict('Hallo mir geht es gut.')

"hi , i'm fine . <end>"