### Simple Neural Machine Translator

Adaptation from the PyTorch Tutorial: 

**Author:** `Matthew Inkawhich <https://github.com/MatthewInkawhich>`_

In [1]:
%matplotlib inline

In [2]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import torch
from torch.jit import script, trace
import torch.nn as nn
from torch import optim
import torch.nn.functional as F
import csv
import random
import re
import os
import unicodedata
import codecs
from io import open
import itertools
import math


USE_CUDA = torch.cuda.is_available()
device = torch.device("cuda" if USE_CUDA else "cpu")

In [3]:
save_dir = os.path.join("data", "save")

In [4]:
# Default word tokens
PAD_token = 0  # Used for padding short sentences
SOS_token = 1  # Start-of-sentence token
EOS_token = 2  # End-of-sentence token
UNK_token = 3 # Unknown token, no Key Error thrown

class Voc:
    def __init__(self, name):
        self.name = name
        self.trimmed = False
        self.word2index = {}
        self.word2count = {}
        self.index2word = {PAD_token: "PAD", SOS_token: "SOS", EOS_token: "EOS", UNK_token:"UNK"}
        self.num_words = 4  # Count SOS, EOS, PAD

    def addSentence(self, sentence):
        for word in sentence.split(' '):
            self.addWord(word)

    def addWord(self, word):
        if word not in self.word2index:
            self.word2index[word] = self.num_words
            self.word2count[word] = 1
            self.index2word[self.num_words] = word
            self.num_words += 1
        else:
            self.word2count[word] += 1

    # Remove words below a certain count threshold
    def trim(self, min_count):
        if self.trimmed:
            return
        self.trimmed = True

        keep_words = []

        for k, v in self.word2count.items():
            if v >= min_count:
                keep_words.append(k)

        print('keep_words {} / {} = {:.4f}'.format(
            len(keep_words), len(self.word2index), len(keep_words) / len(self.word2index)
        ))

        # Reinitialize dictionaries
        self.word2index = {}
        self.word2count = {}
        self.index2word = {PAD_token: "PAD", SOS_token: "SOS", EOS_token: "EOS",UNK_token:"UNK"}
        self.num_words = 4 # Count default tokens

        for word in keep_words:
            self.addWord(word)

In [6]:
# Turn a Unicode string to plain ASCII, thanks to
# https://stackoverflow.com/a/518232/2809427
def unicodeToAscii(s):
    return ''.join(
        c for c in unicodedata.normalize('NFD', s)
        if unicodedata.category(c) != 'Mn'
    )

# Lowercase, trim, and remove non-letter characters


def normalizeString(s):
    s = unicodeToAscii(s.lower().strip())
    s = re.sub(r"([.!?])", r" \1", s)
    s = re.sub(r"[^a-zA-Z.!?]+", r" ", s)
    return s


######################################################################
# To read the data file we will split the file into lines, and then split
# lines into pairs. The files are all English → Other Language, so if we
# want to translate from Other Language → English I added the ``reverse``
# flag to reverse the pairs.
#

def readLangs(lang1, lang2, reverse=False):
    print("Reading lines...")

    # Read the file and split into lines
    lines = open('%s-%s.txt' % (lang1, lang2), encoding='utf-8').\
        read().strip().split('\n')

    # Split every line into pairs and normalize
    pairs = [[normalizeString(s) for s in l.split('\t')] for l in lines]

    # Reverse pairs, make Lang instances
    if reverse:
        pairs = [list(reversed(p)) for p in pairs]
        input_lang = Voc(lang2)
        output_lang = Voc(lang1)
    else:
        input_lang = Voc(lang1)
        output_lang = Voc(lang2)

    return input_lang, output_lang, pairs


######################################################################
# Since there are a *lot* of example sentences and we want to train
# something quickly, we'll trim the data set to only relatively short and
# simple sentences. Here the maximum length is 10 words (that includes
# ending punctuation) and we're filtering to sentences that translate to
# the form "I am" or "He is" etc. (accounting for apostrophes replaced
# earlier).
#

MAX_LENGTH = 10

eng_prefixes = (
    "i am ", "i m ",
    "he is", "he s ",
    "she is", "she s ",
    "you are", "you re ",
    "we are", "we re ",
    "they are", "they re "
)


"""def filterPair(p):
    return len(p[0].split(' ')) < MAX_LENGTH and \
        len(p[1].split(' ')) < MAX_LENGTH and \
        p[1].startswith(eng_prefixes)"""

def filterPair(p):
    return len(p[0].split(' ')) < MAX_LENGTH and len(p[1].split(' ')) < MAX_LENGTH


def filterPairs(pairs):
    return [pair for pair in pairs if filterPair(pair)]


######################################################################
# The full process for preparing the data is:
#
# -  Read text file and split into lines, split lines into pairs
# -  Normalize text, filter by length and content
# -  Make word lists from sentences in pairs
#

def prepareData(lang1, lang2, reverse=False):
    input_lang, output_lang, pairs = readLangs(lang1, lang2, reverse)
    print("Read %s sentence pairs" % len(pairs))
    pairs = filterPairs(pairs)
    print("Trimmed to %s sentence pairs" % len(pairs))
    print("Counting words...")
    for pair in pairs:
        input_lang.addSentence(pair[0])
        output_lang.addSentence(pair[1])
    print("Counted words:")
    print(input_lang.name, input_lang.num_words)
    print(output_lang.name, output_lang.num_words)
    return input_lang, output_lang, pairs


input_lang, output_lang, pairs = prepareData('eng', 'deu', True)
print(random.choice(pairs))

Reading lines...
Read 176692 sentence pairs
Trimmed to 132536 sentence pairs
Counting words...
Counted words:
deu 24307
eng 12416
['ich bin im letzten monat umgezogen .', 'i moved last month .']


In [8]:
def indexesFromSentence(voc, sentence):
    return [voc.word2index.get(word, 3) for word in sentence.split(' ')] + [EOS_token]


def zeroPadding(l, fillvalue=PAD_token):
    return list(itertools.zip_longest(*l, fillvalue=fillvalue))

def binaryMatrix(l, value=PAD_token):
    m = []
    for i, seq in enumerate(l):
        m.append([])
        for token in seq:
            if token == PAD_token:
                m[i].append(0)
            else:
                m[i].append(1)
    return m

# Returns padded input sequence tensor and lengths
def inputVar(l, voc):
    indexes_batch = [indexesFromSentence(voc, sentence) for sentence in l]
    lengths = torch.tensor([len(indexes) for indexes in indexes_batch])
    padList = zeroPadding(indexes_batch)
    padVar = torch.LongTensor(padList)
    return padVar, lengths

# Returns padded target sequence tensor, padding mask, and max target length
def outputVar(l, voc):
    indexes_batch = [indexesFromSentence(voc, sentence) for sentence in l]
    max_target_len = max([len(indexes) for indexes in indexes_batch])
    padList = zeroPadding(indexes_batch)
    mask = binaryMatrix(padList)
    mask = torch.ByteTensor(mask)
    padVar = torch.LongTensor(padList)
    return padVar, mask, max_target_len

# Returns all items for a given batch of pairs
def batch2TrainData(src_voc, tar_voc, pair_batch):
    print(type(pair_batch))
    print(pair_batch[0])
    pair_batch.sort(key=lambda x: len(x[0].split(" ")), reverse=True)
    input_batch, output_batch = [], []
    for pair in pair_batch:
        input_batch.append(pair[0])
        output_batch.append(pair[1])
    inp, lengths = inputVar(input_batch, src_voc)
    output, mask, max_target_len = outputVar(output_batch, tar_voc)
    return inp, lengths, output, mask, max_target_len

In [9]:
# Example for validation
small_batch_size = 5
batches = batch2TrainData(input_lang, output_lang, [random.choice(pairs) for _ in range(small_batch_size)])
input_variable, lengths, target_variable, mask, max_target_len = batches
print("Sentence 1:", pairs[0][1])
print("Test indexesFromSentence:", indexesFromSentence(output_lang, pairs[0][1]))
print("Sentence 2:", pairs[0][0])
print("Test indexesFromSentence:", indexesFromSentence(input_lang, pairs[0][0]))


<class 'list'>
['tom ist doppelt so alt wie maria .', 'tom is twice mary s age .']
Sentence 1: hi .
Test indexesFromSentence: [4, 5, 2]
Sentence 2: hallo !
Test indexesFromSentence: [4, 5, 2]


In [10]:
print("input_variable:\n", input_variable)
print("lengths:\n", lengths)

input_variable:
 tensor([[   25,    87,   167,    39,    70],
        [ 1201,   160,   371,  1261,   350],
        [  892,  6159,    87,    72,   449],
        [21280,   450,    72,  5748,  5139],
        [  915,    62,    30,    19,    19],
        [   20,   234, 11889,     2,     2],
        [ 1532,  1026,  5867,     0,     0],
        [  439,    19,    19,     0,     0],
        [   19,     2,     2,     0,     0],
        [    2,     0,     0,     0,     0]])
lengths:
 tensor([10,  9,  9,  6,  6])


In [12]:
for elem in input_variable:
    print(elem)
    for token in elem:
        #print(token)
        print("token:", token.item(), "transl:", input_lang.index2word.get(token.item(), "UNK"))

tensor([ 25,  87, 167,  39,  70])
token: 25 transl: es
token: 87 transl: tom
token: 167 transl: wir
token: 39 transl: er
token: 70 transl: das
tensor([1201,  160,  371, 1261,  350])
token: 1201 transl: kamen
token: 160 transl: ist
token: 371 transl: konnen
token: 1261 transl: konnte
token: 350 transl: sind
tensor([ 892, 6159,   87,   72,  449])
token: 892 transl: mehr
token: 6159 transl: doppelt
token: 87 transl: tom
token: 72 transl: nicht
token: 449 transl: meine
tensor([21280,   450,    72,  5748,  5139])
token: 21280 transl: zuschauer
token: 450 transl: so
token: 72 transl: nicht
token: 5748 transl: weitergehen
token: 5139 transl: eltern
tensor([915,  62,  30,  19,  19])
token: 915 transl: als
token: 62 transl: alt
token: 30 transl: zum
token: 19 transl: .
token: 19 transl: .
tensor([   20,   234, 11889,     2,     2])
token: 20 transl: ich
token: 234 transl: wie
token: 11889 transl: rucktritt
token: 2 transl: EOS
token: 2 transl: EOS
tensor([1532, 1026, 5867,    0,    0])
token: 1

In [15]:
for elem in input_variable.transpose(1,0):
    print(elem)
    for token in elem:
        #print(token)
        print("token:", token.item(), "transl:", input_lang.index2word.get(token.item(), "UNK"))

tensor([   25,  1201,   892, 21280,   915,    20,  1532,   439,    19,     2])
token: 25 transl: es
token: 1201 transl: kamen
token: 892 transl: mehr
token: 21280 transl: zuschauer
token: 915 transl: als
token: 20 transl: ich
token: 1532 transl: erwartet
token: 439 transl: hatte
token: 19 transl: .
token: 2 transl: EOS
tensor([  87,  160, 6159,  450,   62,  234, 1026,   19,    2,    0])
token: 87 transl: tom
token: 160 transl: ist
token: 6159 transl: doppelt
token: 450 transl: so
token: 62 transl: alt
token: 234 transl: wie
token: 1026 transl: maria
token: 19 transl: .
token: 2 transl: EOS
token: 0 transl: PAD
tensor([  167,   371,    87,    72,    30, 11889,  5867,    19,     2,     0])
token: 167 transl: wir
token: 371 transl: konnen
token: 87 transl: tom
token: 72 transl: nicht
token: 30 transl: zum
token: 11889 transl: rucktritt
token: 5867 transl: zwingen
token: 19 transl: .
token: 2 transl: EOS
token: 0 transl: PAD
tensor([  39, 1261,   72, 5748,   19,    2,    0,    0,    0,    

In [11]:
print("target_variable:\n", target_variable)
print("mask:\n", mask)
print("max_target_len:\n", max_target_len)

target_variable:
 tensor([[  432,    47,   178,    27,   458],
        [ 1022,   213,   118,   794,   333],
        [  601,  1867,   101,   384,   430],
        [11102,   587,  4126,   252,  3304],
        [ 2551,    75,    47,  1045,     5],
        [   16,   552,   209,  5369,     2],
        [  229,     5,   123,     5,     0],
        [  882,     2,     5,     2,     0],
        [    5,     0,     2,     0,     0],
        [    2,     0,     0,     0,     0]])
mask:
 tensor([[1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1],
        [1, 1, 1, 1, 0],
        [1, 1, 1, 1, 0],
        [1, 0, 1, 0, 0],
        [1, 0, 0, 0, 0]], dtype=torch.uint8)
max_target_len:
 10


In [30]:
class EncoderRNN(nn.Module):
    def __init__(self, input_size, hidden_size, n_layers=1, dropout=0.1, bidirectional=True):
        super(EncoderRNN, self).__init__()
        
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.n_layers = n_layers
        self.dropout = dropout
        self.bidirectional = bidirectional
        
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size, n_layers, dropout=self.dropout, bidirectional=True)
        
    def forward(self, input_seqs, input_lengths, hidden=None):
        # Note: we run this all at once (over multiple batches of multiple sequences)
        embedded = self.embedding(input_seqs)
        packed = torch.nn.utils.rnn.pack_padded_sequence(embedded, input_lengths)
        outputs, hidden = self.gru(packed, hidden)
        outputs, output_lengths = torch.nn.utils.rnn.pad_packed_sequence(outputs) # unpack (back to padded)
        if self.bidirectional: outputs = outputs[:, :, :self.hidden_size] + outputs[:, : ,self.hidden_size:] # Sum bidirectional outputs
        return outputs, hidden

In [33]:
class VanillaDecoderRNN(nn.Module):
    def __init__(self, hidden_size, output_size, n_layers=1, dropout=0.1):
        super(VanillaDecoderRNN, self).__init__()
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.n_layers = n_layers
        self.dropout = dropout

        #Define layers
        self.embedding = nn.Embedding(output_size, hidden_size)
        self.embedding_dropout = nn.Dropout(dropout)
        self.gru = nn.GRU(hidden_size, hidden_size, n_layers, dropout=(0 if n_layers == 1 else dropout))
        
        self.out = nn.Linear(hidden_size, output_size)
        #self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, input_step, last_hidden):
        embedded = self.embedding(input_step)
        embedded = self.embedding_dropout(embedded)
        print(embedded.shape)
        # Forward through unidirectional GRU
        output, hidden = self.gru(embedded, last_hidden)
        #output, hidden = self.gru(output, hidden)#
        
        #Squeeze first dimension
        output = output.squeeze(0)
        #Prediction
        output = self.out(output)
        output = F.softmax(output, dim=1)
        # Return output and final hidden state
        return output, hidden

In [12]:
def maskNLLLoss(inp, target, mask):
    #print(inp.shape)
   # print(target.shape)
    #print(mask.shape)
    nTotal = mask.sum()
    crossEntropy = -torch.log(torch.gather(inp, 1, target.view(-1, 1)).squeeze(1))
    loss = crossEntropy.masked_select(mask).mean()
    loss = loss.to(device)
    return loss, nTotal.item()

In [34]:
#Visualization of the training
samll_batch_size = 5
print(random.choice(pairs))
batches = batch2TrainData(input_lang, output_lang, [random.choice(pairs) for _ in range(small_batch_size)])

input_var, lengths, tar_var, mask, max_tar_len = batches

print("Input var shape:", input_var.shape)
print("Lengths:", lengths.shape)
print("Tar var shape:", tar_var.shape)
print("Mask shape:", mask.shape)
print("Max tar len:", max_tar_len)

['kommen sie zuruck nach hause .', 'come back home .']
<class 'list'>
['ich halte viel von diesem buch .', 'i have a high opinion of this book .']
Input var shape: torch.Size([9, 5])
Lengths: torch.Size([5])
Tar var shape: torch.Size([10, 5])
Mask shape: torch.Size([10, 5])
Max tar len: 10


In [35]:
device = "cpu"

In [36]:
hidden_size = 500
encoder_n_layers = 2
decoder_n_layers = 2
dropout = 0.1
att_model = "dot"
src_emb = nn.Embedding(input_lang.num_words, hidden_size)
trg_emb = nn.Embedding(output_lang.num_words, hidden_size)

#Define the decoder and decoder
encoder = EncoderRNN(input_size=input_lang.num_words,hidden_size=hidden_size, n_layers=encoder_n_layers, dropout=dropout)
decoder = VanillaDecoderRNN(hidden_size=hidden_size,output_size= output_lang.num_words, n_layers=decoder_n_layers,dropout= dropout)
encoder = encoder.to(device)
decoder = decoder.to(device)

# Dropout layers in train mode
encoder.train()
decoder.train()

#Initialize optimizers
encoder_optim = optim.Adam(encoder.parameters(), lr=0.0001)
decoder_optim = optim.Adam(decoder.parameters(), lr=0.0001) 

encoder_optim.zero_grad()
decoder_optim.zero_grad()

input_var = input_var.to(device)
lengths = lengths.to(device)
tar_var = tar_var.to(device)
mask = mask.to(device)

loss = 0
print_losses = []
n_totals = 0

encoder_outs, enc_hidden = encoder(input_var, lengths)
print("Encoder output shapes:", encoder_outs.shape)
print("Last encoder hidden state:", enc_hidden.shape)

Encoder output shapes: torch.Size([9, 5, 500])
Last encoder hidden state: torch.Size([4, 5, 500])


In [37]:
#Decoder
decoder_input = torch.LongTensor([[SOS_token for _ in range(small_batch_size)]])
decoder_input = decoder_input.to(device)

print("INitial decoder input shape:", decoder_input.shape)
print(decoder_input)
print(decoder.n_layers)
decoder_hidden = enc_hidden[:decoder.n_layers]
print(decoder_hidden.shape)

INitial decoder input shape: torch.Size([1, 5])
tensor([[1, 1, 1, 1, 1]])
2
torch.Size([2, 5, 500])


In [18]:
print("Visualize timesteps in the RNN:\n")

for t in range(max_tar_len):
    print("*"*20, "Start computation for time step %s" %t, "*"*20)
    decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden, encoder_outs)
    print("Decoder output shape:", decoder_output.shape)
    print("Decoder hidden shape:", decoder_hidden.shape)
    #Teacher forcing: Next input is the current label
    decoder_input = tar_var[t].view(1,-1)
    print("Target variable before reshaping (shape):", tar_var[t].shape)
    print("Target variable before reshaping:", tar_var[t])
    print("Target variable after reshaping (new decoder input):", decoder_input.shape)
    
    #Calculate and accumulate loss
    print("The mask at current timestep:", mask[t])
    print("Mask shape:", mask[t].shape)
    
    
    mask_loss, nTotal = maskNLLLoss(decoder_output, tar_var[t], mask[t])
    
    print("Mask loss:", mask_loss)
    print("Total:", nTotal)
    
    loss+=mask_loss
    
    print_losses.append(mask_loss.item()*nTotal)
    print(print_losses)
    n_totals +=nTotal
    print(n_totals)
    
    encoder_optim.step()
    decoder_optim.step()
    returned_loss = sum(print_losses)/n_totals
    
    print("Returned loss", returned_loss)
    print("*"*20, "Done with this timestep!","*"*20 )
    print("\n")


Visualize timesteps in the RNN:

******************** Start computation for time step 0 ********************
Decoder output shape: torch.Size([5, 12416])
Decoder hidden shape: torch.Size([2, 5, 500])
Target variable before reshaping (shape): torch.Size([5])
Target variable before reshaping: tensor([ 16,  16,  27, 912, 634], device='cuda:0')
Target variable after reshaping (new decoder input): torch.Size([1, 5])
The mask at current timestep: tensor([1, 1, 1, 1, 1], device='cuda:0', dtype=torch.uint8)
Mask shape: torch.Size([5])
Mask loss: tensor(9.4501, device='cuda:0', grad_fn=<MeanBackward1>)
Total: 5
[47.25053310394287]
5
Returned loss 9.450106620788574
******************** Done with this timestep! ********************


******************** Start computation for time step 1 ********************
Decoder output shape: torch.Size([5, 12416])
Decoder hidden shape: torch.Size([2, 5, 500])
Target variable before reshaping (shape): torch.Size([5])
Target variable before reshaping: tensor([

In [19]:
def train(input_variable, lengths, target_variable, mask, max_target_len, encoder, decoder, src_embedding, trg_embedding,
          encoder_optimizer, decoder_optimizer, batch_size, clip, max_length=MAX_LENGTH):

    # Zero gradients
    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()
    
    # Set device options
    input_variable = input_variable.to(device)
    lengths = lengths.to(device)
    target_variable = target_variable.to(device)
    mask = mask.to(device)

    # Initialize variables
    loss = 0
    print_losses = []
    n_totals = 0

    # Forward pass through encoder
    encoder_outputs, encoder_hidden = encoder(input_variable, lengths)

    # Create initial decoder input (start with SOS tokens for each sentence)
    decoder_input = torch.LongTensor([[SOS_token for _ in range(batch_size)]])
    decoder_input = decoder_input.to(device)

    # Set initial decoder hidden state to the encoder's final hidden state
    decoder_hidden = encoder_hidden[:decoder.n_layers]

    # Determine if we are using teacher forcing this iteration
    rand = random.random()
    #print(rand)
    use_teacher_forcing = True if rand < teacher_forcing_ratio else False

    # Forward batch of sequences through decoder one time step at a time
    if use_teacher_forcing:
        for t in range(max_target_len):
            decoder_output, decoder_hidden = decoder(
                decoder_input, decoder_hidden, encoder_outputs
            )
            # Teacher forcing: next input is current target
            decoder_input = target_variable[t].view(1, -1)
            # Calculate and accumulate loss
            mask_loss, nTotal = maskNLLLoss(decoder_output, target_variable[t], mask[t])
            loss += mask_loss
            print_losses.append(mask_loss.item() * nTotal)
            n_totals += nTotal
    else:
        for t in range(max_target_len):
            decoder_output, decoder_hidden = decoder(
                decoder_input, decoder_hidden, encoder_outputs
            )
            # No teacher forcing: next input is decoder's own current output
            _, topi = decoder_output.topk(1)
            decoder_input = torch.LongTensor([[topi[i][0] for i in range(batch_size)]])
            decoder_input = decoder_input.to(device)
            # Calculate and accumulate loss
            mask_loss, nTotal = maskNLLLoss(decoder_output, target_variable[t], mask[t])
            loss += mask_loss
            print_losses.append(mask_loss.item() * nTotal)
            n_totals += nTotal

    # Perform backpropatation
    loss.backward()

    # Clip gradients: gradients are modified in place
    _ = torch.nn.utils.clip_grad_norm_(encoder.parameters(), clip)
    _ = torch.nn.utils.clip_grad_norm_(decoder.parameters(), clip)

    # Adjust model weights
    encoder_optimizer.step()
    decoder_optimizer.step()

    return sum(print_losses) / n_totals

In [20]:
def trainIters(model_name, src_voc, tar_voc, pairs, encoder, decoder, encoder_optimizer, decoder_optimizer, src_embedding, trg_embedding, encoder_n_layers, decoder_n_layers, save_dir, n_iteration, batch_size,print_every, save_every, clip, corpus_name, loadFilename):
    # Load batches for each iteration
    training_batches = [batch2TrainData(src_voc, tar_voc, [random.choice(pairs) for _ in range(batch_size)])
                      for _ in range(n_iteration)]

    # Initializations
    print('Initializing ...')
    start_iteration = 1
    print_loss = 0
    if loadFilename:
        start_iteration = checkpoint['iteration'] + 1

    # Training loop
    print("Training...")
    for iteration in range(start_iteration, n_iteration + 1):
        training_batch = training_batches[iteration - 1]
        # Extract fields from batch
        input_variable, lengths, target_variable, mask, max_target_len = training_batch

        # Run a training iteration with batch
        loss = train(input_variable, lengths, target_variable, mask, max_target_len, encoder,
                     decoder, src_emb, trg_emb, encoder_optimizer, decoder_optimizer, batch_size, clip)
        print_loss += loss

        # Print progress
        if iteration % print_every == 0:
            print_loss_avg = print_loss / print_every
            print("Iteration: {}; Percent complete: {:.1f}%; Average loss: {:.4f}".format(iteration, iteration / n_iteration * 100, print_loss_avg))
            print_loss = 0

        # Save checkpoint
        if (iteration % save_every == 0):
            directory = os.path.join(save_dir, model_name, corpus_name, '{}-{}_{}'.format(encoder_n_layers, decoder_n_layers, hidden_size))
            if not os.path.exists(directory):
                os.makedirs(directory)
            torch.save({
                'iteration': iteration,
                'en': encoder.state_dict(),
                'de': decoder.state_dict(),
                'en_opt': encoder_optimizer.state_dict(),
                'de_opt': decoder_optimizer.state_dict(),
                'loss': loss,
                'src_dict': src_voc.__dict__,
                'tar_dict': tar_voc.__dict__,
                'src_embedding': src_embedding.state_dict(),
                'trg_embedding': trg_embedding.state_dict()
            }, os.path.join(directory, '{}_{}.tar'.format(iteration, 'checkpoint')))



In [21]:
class GreedySearchDecoder(nn.Module):
    def __init__(self, encoder, decoder):
        super(GreedySearchDecoder, self).__init__()
        self.encoder = encoder
        self.decoder = decoder

    def forward(self, input_seq, input_length, max_length):
        # Forward input through encoder model
        encoder_outputs, encoder_hidden = self.encoder(input_seq, input_length)
        # Prepare encoder's final hidden layer to be first hidden input to the decoder
        decoder_hidden = encoder_hidden[:decoder.n_layers]
        # Initialize decoder input with SOS_token
        decoder_input = torch.ones(1, 1, device=device, dtype=torch.long) * SOS_token
        # Initialize tensors to append decoded words to
        all_tokens = torch.zeros([0], device=device, dtype=torch.long)
        all_scores = torch.zeros([0], device=device)
        # Iteratively decode one word token at a time
        for _ in range(max_length):
            # Forward pass through decoder
            decoder_output, decoder_hidden = self.decoder(decoder_input, decoder_hidden, encoder_outputs)
            # Obtain most likely word token and its softmax score
            decoder_scores, decoder_input = torch.max(decoder_output, dim=1)
            # Record token and score
            all_tokens = torch.cat((all_tokens, decoder_input), dim=0)
            all_scores = torch.cat((all_scores, decoder_scores), dim=0)
            # Prepare current token to be next decoder input (add a dimension)
            decoder_input = torch.unsqueeze(decoder_input, 0)
        # Return collections of word tokens and scores
        return all_tokens, all_scores

In [22]:
def evaluate(encoder, decoder, searcher, src_voc, trg_voc, sentence, max_length=MAX_LENGTH):
    ### Format input sentence as a batch
    # words -> indexes
    indexes_batch = [indexesFromSentence(src_voc, sentence)]
    # Create lengths tensor
    lengths = torch.tensor([len(indexes) for indexes in indexes_batch])
    # Transpose dimensions of batch to match models' expectations
    input_batch = torch.LongTensor(indexes_batch).transpose(0, 1)
    # Use appropriate device
    input_batch = input_batch.to(device)
    lengths = lengths.to(device)
    # Decode sentence with searcher
    tokens, scores = searcher(input_batch, lengths, max_length)
    # indexes -> words
    decoded_words = [trg_voc.index2word[token.item()] for token in tokens]
    return decoded_words


def evaluateInput(encoder, decoder, searcher,  src_voc, trg_voc):
    input_sentence = ''
    while(1):
        try:
            # Get input sentence
            input_sentence = input('> ')
            # Check if it is quit case
            if input_sentence == 'q' or input_sentence == 'quit': break
            # Normalize sentence
            input_sentence = normalizeString(input_sentence)
            # Evaluate sentence
            output_words = evaluate(encoder, decoder, searcher, src_voc, trg_voc, input_sentence)
            # Format and print response sentence
            output_words[:] = [x for x in output_words if not (x == 'EOS' or x == 'PAD')]
            print('Translation:', ' '.join(output_words))

        except KeyError:
            print("Error: Encountered unknown word.")



In [23]:
# Configure models
model_name = 'cb_model'
attn_model = 'dot'
#attn_model = 'general'
#attn_model = 'concat'
hidden_size = 500
encoder_n_layers = 2
decoder_n_layers = 2
dropout = 0.1
batch_size = 64

# Set checkpoint to load from; set to None if starting from scratch
loadFilename = None
checkpoint_iter = 4000
#loadFilename = os.path.join(save_dir, model_name, corpus_name,
#                            '{}-{}_{}'.format(encoder_n_layers, decoder_n_layers, hidden_size),
#                            '{}_checkpoint.tar'.format(checkpoint_iter))


# Load model if a loadFilename is provided
if loadFilename:
    # If loading on same machine the model was trained on
    checkpoint = torch.load(loadFilename)
    # If loading a model trained on GPU to CPU
    #checkpoint = torch.load(loadFilename, map_location=torch.device('cpu'))
    encoder_sd = checkpoint['en']
    decoder_sd = checkpoint['de']
    encoder_optimizer_sd = checkpoint['en_opt']
    decoder_optimizer_sd = checkpoint['de_opt']
    embedding_sd = checkpoint['embedding']
    voc.__dict__ = checkpoint['voc_dict']


print('Building encoder and decoder ...')
# Initialize word embeddings
src_emb = nn.Embedding(input_lang.num_words, hidden_size)
trg_emb = nn.Embedding(output_lang.num_words, hidden_size)

if loadFilename:
    embedding.load_state_dict(embedding_sd)
# Initialize encoder & decoder models
encoder = EncoderRNN(hidden_size, src_emb, encoder_n_layers, dropout)
#decoder = LuongAttnDecoderRNN(attn_model, embedding, hidden_size, voc.num_words, decoder_n_layers, dropout)
decoder = VanillaDecoderRNN(trg_emb, hidden_size, output_lang.num_words, decoder_n_layers, dropout)

if loadFilename:
    encoder.load_state_dict(encoder_sd)
    decoder.load_state_dict(decoder_sd)
# Use appropriate device
encoder = encoder.to(device)
decoder = decoder.to(device)
print('Models built and ready to go!')
print(encoder)
print(decoder)

Building encoder and decoder ...
Models built and ready to go!
EncoderRNN(
  (embedding): Embedding(24307, 500)
  (gru): GRU(500, 500, num_layers=2, dropout=0.1)
)
VanillaDecoderRNN(
  (embedding): Embedding(12416, 500)
  (embedding_dropout): Dropout(p=0.1)
  (gru): GRU(500, 500, num_layers=2, dropout=0.1)
  (out): Linear(in_features=500, out_features=12416, bias=True)
)


In [24]:
# Configure training/optimization
clip = 30.0
teacher_forcing_ratio = 0.3
learning_rate = 0.0001
decoder_learning_ratio = 5.0
n_iteration = 5000
print_every = 100
save_every = 500

# Ensure dropout layers are in train mode
encoder.train()
decoder.train()

# Initialize optimizers
print('Building optimizers ...')
encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate)
decoder_optimizer = optim.Adam(decoder.parameters(), lr=learning_rate * decoder_learning_ratio)
if loadFilename:
    encoder_optimizer.load_state_dict(encoder_optimizer_sd)
    decoder_optimizer.load_state_dict(decoder_optimizer_sd)

# Run training iterations
print("Starting Training!")
trainIters(model_name, input_lang, output_lang, pairs, encoder, decoder, encoder_optimizer, decoder_optimizer,
           src_emb, trg_emb, encoder_n_layers, decoder_n_layers, save_dir, n_iteration, batch_size,
           print_every, save_every, clip, "eng-deu.txt", loadFilename)

Building optimizers ...
Starting Training!
<class 'list'>
['schau dir diese da an .', 'look at these .']
<class 'list'>
['tom sagt er wolle sich hauslich niederlassen .', 'tom says he wants to settle down .']
<class 'list'>
['tom hat alles auf eine karte gesetzt .', 'tom put all his eggs in one basket .']
<class 'list'>
['wir bezweifeln gar nicht was du sagst .', 'we don t question what you say .']
<class 'list'>
['tom hatte keine ahnung wie sich maria fuhlte .', 'tom had no idea how mary was feeling .']
<class 'list'>
['tom scheint dumm zu sein .', 'tom seems stupid .']
<class 'list'>
['tom ist erschossen worden .', 'tom has been shot .']
<class 'list'>
['ich werde dich vermissen .', 'i will miss you .']
<class 'list'>
['die tiere starben eins nach dem anderen .', 'one after another the animals died .']
<class 'list'>
['ich werde morgen bei ihm vorbeischauen .', 'i will call on him tomorrow .']
<class 'list'>
['tom ist angewidert .', 'tom is sickened .']
<class 'list'>
['das ist es wa

<class 'list'>
['gestern habe ich ein interessantes buch gelesen .', 'i read an interesting book yesterday .']
<class 'list'>
['ich will eine neue kuche .', 'i want a new kitchen .']
<class 'list'>
['tom fing an zu schwitzen .', 'tom began to perspire .']
<class 'list'>
['tom mochte maria nie sonderlich leiden .', 'tom never liked mary very much .']
<class 'list'>
['sind alle zufrieden ?', 'is everybody happy ?']
<class 'list'>
['ich habe etwas geld dabei .', 'i have some money with me .']
<class 'list'>
['gib dem tom einen kuss !', 'give tom a kiss .']
<class 'list'>
['sie sind autark .', 'they re self sufficient .']
<class 'list'>
['was ist dein lieblingsvogel ?', 'what is your favorite bird ?']
<class 'list'>
['du musst zu der party gehen .', 'you have to go to the party .']
<class 'list'>
['spielen wir dieses spiel nochmal .', 'let s play this game again .']
<class 'list'>
['hier lebt niemand mehr .', 'no one lives here anymore .']
<class 'list'>
['hast du auf tom geschossen ?', 'd

<class 'list'>
['die baume haben begonnen zu knospen .', 'the trees are beginning to bud .']
<class 'list'>
['warum hast du keinen kaffee gekauft ?', 'why didn t you buy any coffee ?']
<class 'list'>
['niemand zwingt dich zu bleiben .', 'nobody s forcing you to stay .']
<class 'list'>
['tom sieht nicht sehr glucklich aus .', 'tom doesn t look too happy .']
<class 'list'>
['danke fur deine eingehende erklarung .', 'thank you for your thorough explanation .']
<class 'list'>
['vielleicht sollten wir mal bei tom vorbeifahren .', 'maybe we should stop by and see tom .']
<class 'list'>
['du magst es nicht oder ?', 'you don t like it do you ?']
<class 'list'>
['wir haben nur eine chance .', 'we have only one chance .']
<class 'list'>
['ich hatte es beinahe vergessen .', 'i almost forgot it .']
<class 'list'>
['wir sahen wolken uber dem berg .', 'we saw clouds above the mountain .']
<class 'list'>
['wir brauchen einen sieg .', 'we need a victory .']
<class 'list'>
['ich mochte abnehmen .', 'i 

<class 'list'>
['tom ist sehr ehrgeizig .', 'tom is very ambitious .']
<class 'list'>
['ich bin ein junge .', 'i am a boy .']
<class 'list'>
['hor mal das kannst du doch !', 'come on you can do it .']
<class 'list'>
['wer hat dir gesagt dass ich kanadier sei ?', 'who told you i was canadian ?']
<class 'list'>
['wessen haus ist das ?', 'whose house is this ?']
<class 'list'>
['tom ist glucklich damit .', 'tom is happy with it .']
<class 'list'>
['diese kopfhorer funktionieren nicht .', 'those earphones don t work .']
<class 'list'>
['ich glaube ich komme allein damit zurecht .', 'i think i can handle this one alone .']
<class 'list'>
['sie alle haben angst vor tom .', 'they re all scared of tom .']
<class 'list'>
['das ware ne dufte sache finde ich .', 'i think that would be cool .']
<class 'list'>
['dieser zug ist auf dem weg nach boston .', 'this train is bound for boston .']
<class 'list'>
['tom ist ein sanger .', 'tom is a singer .']
<class 'list'>
['was haltst du von diesem plan ?'

<class 'list'>
['die leute wussten dass er sie nicht mochte .', 'the people knew he did not like them .']
<class 'list'>
['halt dich am gelander fest .', 'hold on to the handrail .']
<class 'list'>
['du bist es der nicht versteht .', 'it s you who doesn t understand .']
<class 'list'>
['wir sind aus australien zuruck .', 'we re back from australia .']
<class 'list'>
['konntest du das in verstandlichem deutsch formulieren ?', 'could you say that in plain english ?']
<class 'list'>
['im grunde bin ich mit euch einer meinung .', 'basically i agree with your opinion .']
<class 'list'>
['fleckfieber ist eine ansteckende krankheit .', 'typhus is a contagious disease .']
<class 'list'>
['er sagte er wei es nicht .', 'he said that he didn t know .']
<class 'list'>
['nahre die erkaltung hungere das fieber aber aus !', 'feed a cold and starve a fever .']
<class 'list'>
['das habe ich nie bezweifelt .', 'i never doubted that .']
<class 'list'>
['ich mochte nicht dahin zuruck .', 'i don t want to 

<class 'list'>
['hausaufgaben zu machen ist extrem langweilig .', 'doing homework is extremely boring .']
<class 'list'>
['sie durfen tun was immer sie wollen .', 'you may do whatever you want to .']
<class 'list'>
['ich dachte du warst aus geschaftlichen grunden gekommen .', 'i thought you had come on business .']
<class 'list'>
['er erfullte seine pflicht .', 'he did his duty .']
<class 'list'>
['das geht mir auf die nerven .', 'it gets on my nerves .']
<class 'list'>
['worauf horen wir ?', 'what are we listening for ?']
<class 'list'>
['er fuhr mit dem auto nach boston .', 'he went to boston by car .']
<class 'list'>
['du solltest aus deinen fehlern lernen .', 'you should learn from your mistakes .']
<class 'list'>
['setzen wir uns in den schatten !', 'let s go and sit in the shade .']
<class 'list'>
['reich mir bitte den wein heruber .', 'pass me the wine please .']
<class 'list'>
['ich war fur drei jahre im gefangnis .', 'i was in prison for three years .']
<class 'list'>
['tom wi

<class 'list'>
['maria kommt ganz nach ihrer mutter .', 'mary really takes after her mother .']
<class 'list'>
['er lacht nie uber meine witze .', 'he never laughs at my jokes .']
<class 'list'>
['unsere mannschaft ist noch unbesiegt .', 'our team is still undefeated .']
<class 'list'>
['er ist sehr einsam .', 'he s very lonely .']
<class 'list'>
['dieser hut hat mich zehn dollar gekostet .', 'this hat cost me .']
<class 'list'>
['sie stellte mir eine frage .', 'she asked me a question .']
<class 'list'>
['tom spielt klavier .', 'tom is playing piano .']
<class 'list'>
['tom hat keine ahnung was er tun soll .', 'tom has no idea what to do .']
<class 'list'>
['ich mach ihnen eine tasse tee .', 'i ll fix you a cup of tea .']
<class 'list'>
['wir mussen uns an die regeln halten .', 'we must observe the rules .']
<class 'list'>
['ich bin praktisch bereits erwachsen .', 'i m practically an adult already .']
<class 'list'>
['gib ihm eine pause !', 'give him a break !']
<class 'list'>
['ich k

<class 'list'>
['ich habe es genossen .', 'i was enjoying it .']
<class 'list'>
['tom hatte beinahe vergessen die tur abzuschlie en .', 'tom almost forgot to lock the door .']
<class 'list'>
['das programm fangt in zehn minuten an .', 'the show starts in ten minutes .']
<class 'list'>
['sie kennen sie nicht .', 'you don t know them .']
<class 'list'>
['tom hat kein telefon .', 'tom doesn t have a telephone .']
<class 'list'>
['bist du am sonntag nachmittag beschaftigt ?', 'are you busy on sunday afternoon ?']
<class 'list'>
['es ist effizienter das so zu machen .', 'doing it this way is more efficient .']
<class 'list'>
['tom hat mir gestern geschrieben .', 'tom wrote to me yesterday .']
<class 'list'>
['aus welchem teil von kanada kommt ihr ?', 'what part of canada are you from ?']
<class 'list'>
['tom sieht maria an .', 'tom is looking at mary .']
<class 'list'>
['du brauchst einen spielhebel .', 'you need a joystick .']
<class 'list'>
['tom starb an unterkuhlung .', 'tom died of hyp

<class 'list'>
['wir sprechen kaum noch miteinander .', 'we hardly even talk anymore .']
<class 'list'>
['wer ist dein lehrer ?', 'who is your teacher ?']
<class 'list'>
['er bestritt den mann zu kennen .', 'he denied that he knew that man .']
<class 'list'>
['tom hat mir nicht gesagt das zu tun .', 'tom didn t tell me to do that .']
<class 'list'>
['ich bringe es euch morgen .', 'i ll bring it to you tomorrow .']
<class 'list'>
['das ware reiner selbstmord .', 'that would be suicidal .']
<class 'list'>
['tom und maria wohnen jetzt in boston .', 'tom and mary now live in boston .']
<class 'list'>
['tom meldete sich .', 'tom came forward .']
<class 'list'>
['hat tom ein fuhrungszeugnis ?', 'does tom have a criminal record ?']
<class 'list'>
['ich werde ein bad nehmen .', 'i m going to take a bath .']
<class 'list'>
['warte bis sechs .', 'wait till six .']
<class 'list'>
['ich glaube du sitzt auf meinem platz .', 'i think you re in my seat .']
<class 'list'>
['sage bitte meiner frau nich

<class 'list'>
['sie ist genauso beschaftigt wie tom .', 'she s as busy as tom .']
<class 'list'>
['bitte schauen sie sich dieses bild an .', 'please take a look at this picture .']
<class 'list'>
['lass uns gleich an den strand gehen .', 'let s go straight to the beach .']
<class 'list'>
['was wirst du dieses wochenende tun ?', 'what ll you do this weekend ?']
<class 'list'>
['ist das zu viel gefragt ?', 'is that too much to ask ?']
<class 'list'>
['ich mochte heimgehen .', 'i want to go home .']
<class 'list'>
['schon dich wiederzusehen !', 'nice to see you again .']
<class 'list'>
['an deiner stelle wurde ich das kaufen .', 'if i were you i would buy it .']
<class 'list'>
['maria trug ein schlichtes wei es kleid .', 'mary wore a simple white dress .']
<class 'list'>
['sie fuhren abwechselnd das auto .', 'they took turns driving the car .']
<class 'list'>
['tom brauchte unterstutzung .', 'tom needed assistance .']
<class 'list'>
['mir wird der platz im kasten zu wenig .', 'i m runnin

<class 'list'>
['mein vater lauscht klassischer musik .', 'my father listens to classical music .']
<class 'list'>
['die nachricht verbreitete sich wie ein lauffeuer .', 'the news spread like wildfire .']
<class 'list'>
['was ist zwei plus zwei ?', 'what is two plus two ?']
<class 'list'>
['wir mussen ihn warnen .', 'we have to warn him .']
<class 'list'>
['tom hat nicht gelogen .', 'tom wasn t lying .']
<class 'list'>
['es ist ein gutes geschaft .', 'it s a good deal .']
<class 'list'>
['das schiff wird einen zwischenhalt in cadiz machen .', 'the ship will make a stop at cadiz .']
<class 'list'>
['sie sind sehr arrogant .', 'you are very arrogant .']
<class 'list'>
['ich lie ihn meine armbanduhr reparieren .', 'i had him fix my watch .']
<class 'list'>
['er lebt dort alleine .', 'he lives there alone .']
<class 'list'>
['die gehoren tom .', 'these are tom s .']
<class 'list'>
['er ist ein gelehrter mann .', 'he is a learned man .']
<class 'list'>
['warum wurdest du so wutend ?', 'why 

<class 'list'>
['er tauschte vor arzt zu sein .', 'he made believe he was a doctor .']
<class 'list'>
['wir haben reichlich wenig zeit .', 'we have precious little time .']
<class 'list'>
['ich werde bis vier uhr warten .', 'i ll wait until four o clock .']
<class 'list'>
['er bog um die ecke .', 'he turned the corner .']
<class 'list'>
['diese faktoren sind wichtig .', 'those factors are important .']
<class 'list'>
['verunreinigtes trinkwasser kann krankheiten verursachen .', 'dirty drinking water can cause sickness .']
<class 'list'>
['er braucht nicht zu arbeiten .', 'he doesn t need to work .']
<class 'list'>
['ihr konnt auf ihn zahlen .', 'you can rely upon him .']
<class 'list'>
['es mangelt ihm an erfahrung .', 'he is lacking in experience .']
<class 'list'>
['sie ist eine ausgezeichnete studentin .', 'she is an excellent student .']
<class 'list'>
['wie viele sprachen sprechen sie ?', 'how many languages do you speak ?']
<class 'list'>
['wie geht es der familie ?', 'how is the

Iteration: 100; Percent complete: 2.0%; Average loss: 5.4817
Iteration: 200; Percent complete: 4.0%; Average loss: 4.6582
Iteration: 300; Percent complete: 6.0%; Average loss: 4.4096
Iteration: 400; Percent complete: 8.0%; Average loss: 4.2949
Iteration: 500; Percent complete: 10.0%; Average loss: 4.1314
Iteration: 600; Percent complete: 12.0%; Average loss: 4.0345
Iteration: 700; Percent complete: 14.0%; Average loss: 3.9506
Iteration: 800; Percent complete: 16.0%; Average loss: 3.8717
Iteration: 900; Percent complete: 18.0%; Average loss: 3.8009
Iteration: 1000; Percent complete: 20.0%; Average loss: 3.7021
Iteration: 1100; Percent complete: 22.0%; Average loss: 3.6301
Iteration: 1200; Percent complete: 24.0%; Average loss: 3.6322
Iteration: 1300; Percent complete: 26.0%; Average loss: 3.5272
Iteration: 1400; Percent complete: 28.0%; Average loss: 3.4254
Iteration: 1500; Percent complete: 30.0%; Average loss: 3.3856
Iteration: 1600; Percent complete: 32.0%; Average loss: 3.3921
Itera

In [25]:
# Set dropout layers to eval mode
encoder.eval()
decoder.eval()

# Initialize search module
searcher = GreedySearchDecoder(encoder, decoder)



In [26]:
# Begin chatting (uncomment and run the following line to begin)
evaluateInput(encoder, decoder, searcher, input_lang, output_lang)

> how are you
Translation: my is is in . .
> Hello
Translation: have is a .
> hi
Translation: do it .
> Krass
Translation: have is a .
> Hallo wie heißt du?
Translation: what old do you think ?
> Wie alt bist du?
Translation: how old are you ?
> q
