# Neural Machine Translation Example

In [1]:
# Install the package via PyPI
!pip install headliner

Collecting headliner
  Downloading https://files.pythonhosted.org/packages/ed/4b/9d97ce8ac3738b669c40143e2a32424a6c18d4e488856e78a8c8d9518ed9/headliner-0.0.5-py3-none-any.whl
Collecting nltk==3.4.5 (from headliner)
[?25l  Downloading https://files.pythonhosted.org/packages/f6/1d/d925cfb4f324ede997f6d47bea4d9babba51b49e87a767c170b77005889d/nltk-3.4.5.zip (1.5MB)
[K     |████████████████████████████████| 1.5MB 7.7MB/s 
[?25hCollecting pyyaml==5.1.2 (from headliner)
[?25l  Downloading https://files.pythonhosted.org/packages/e3/e8/b3212641ee2718d556df0f23f78de8303f068fe29cdaa7a91018849582fe/PyYAML-5.1.2.tar.gz (265kB)
[K     |████████████████████████████████| 266kB 37.0MB/s 
Collecting tensorflow==2.0.0 (from headliner)
[?25l  Downloading https://files.pythonhosted.org/packages/46/0f/7bd55361168bb32796b360ad15a25de6966c9c1beb58a8e30c01c8279862/tensorflow-2.0.0-cp36-cp36m-manylinux2010_x86_64.whl (86.3MB)
[K     |████████████████████████████████| 86.3MB 1.2MB/s 
Collecting tensorboar

In [2]:
# Download the German-English sentence pairs
!wget http://www.manythings.org/anki/deu-eng.zip
!unzip deu-eng.zip
!head deu.txt

--2019-10-01 14:57:17--  http://www.manythings.org/anki/deu-eng.zip
Resolving www.manythings.org (www.manythings.org)... 104.24.108.196, 104.24.109.196, 2606:4700:30::6818:6dc4, ...
Connecting to www.manythings.org (www.manythings.org)|104.24.108.196|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 4541707 (4.3M) [application/zip]
Saving to: ‘deu-eng.zip’


2019-10-01 14:57:19 (2.44 MB/s) - ‘deu-eng.zip’ saved [4541707/4541707]

Archive:  deu-eng.zip
  inflating: deu.txt                 
  inflating: _about.txt              
Hi.	Hallo!
Hi.	Grüß Gott!
Run!	Lauf!
Wow!	Potzdonner!
Wow!	Donnerwetter!
Fire!	Feuer!
Help!	Hilfe!
Help!	Zu Hülf!
Stop!	Stopp!
Wait!	Warte!


In [0]:
# Create the dataset
import io

def create_dataset(path, num_examples):
    lines = io.open(path, encoding='UTF-8').read().strip().split('\n')
    word_pairs = [[w for w in l.split('\t')]  for l in lines[:num_examples]]
    return zip(*word_pairs)

eng, ger = create_dataset('deu.txt', 500)
data = list(zip(ger, eng))

In [0]:
# Split the dataset into train and test
from sklearn.model_selection import train_test_split

train, test = train_test_split(data, test_size=0.1)

In [5]:
# Define the model and train it
from headliner.trainer import Trainer
from headliner.model.summarizer_attention import SummarizerAttention

summarizer = SummarizerAttention(lstm_size=64, embedding_size=24)
trainer = Trainer(batch_size=32, steps_per_epoch=100, steps_to_log=20, model_save_path='/tmp/summarizer')
trainer.train(summarizer, train, num_epochs=10, val_data=test)

training a bare model, initializing preprocessing...
vocab encoder: 383, vocab decoder: 203, start training loop...
finished iterating over dataset, total batches: 14
epoch 0, batch 20, logs: {'loss': 4.0605316162109375}
finished iterating over dataset, total batches: 28
epoch 0, batch 40, logs: {'loss': 3.1279962062835693}
finished iterating over dataset, total batches: 42
finished iterating over dataset, total batches: 56
epoch 0, batch 60, logs: {'loss': 2.5084290504455566}
finished iterating over dataset, total batches: 70
epoch 0, batch 80, logs: {'loss': 2.934246778488159}
finished iterating over dataset, total batches: 84
finished iterating over dataset, total batches: 98
epoch 0, batch 100, logs: {'loss': 2.871776580810547}

(input) Ich bin auf. 
(target) I'm up. 
(prediction) . . <end>


(input) Ich bin neu. 
(target) I'm new. 
(prediction) . . <end>


(input) Rettet Tom! 
(target) Save Tom. 
(prediction) . . <end>


(input) Ich benutze es. 
(target) I use it. 
(prediction) . 

In [6]:
# Do some prediction
summarizer.predict('Hi.')

'come on . <end>'