In [1]:
%load_ext autoreload
%autoreload 2

In [99]:
import random
import time
from utils import *
from models import *
from attention import *

import torch
import torch.optim as optim
from torch.utils import data
from sklearn.model_selection import train_test_split
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
# Download the file
get_file()
path_to_file = 'spa-eng/spa.txt'



In [4]:
# Try experimenting with the size of that dataset
num_examples = 118960
input_tensor, target_tensor, inp_lang, targ_lang = load_dataset(path_to_file, num_examples)

# Calculate max_length of the target tensors
max_length_targ, max_length_inp = max_length(target_tensor), max_length(input_tensor)

The number of line in the dataset is 118964


In [5]:
# Creating training and validation sets using an 80-20 split
input_tensor_train, input_tensor_val, target_tensor_train, target_tensor_val = train_test_split(input_tensor, target_tensor, test_size=0.1)

# Show length
print(len(input_tensor_train), len(target_tensor_train), len(input_tensor_val), len(target_tensor_val))

107064 107064 11896 11896


In [6]:
#inp_lang.index_word

In [7]:
print ("Input Language; index to word mapping")
convert(inp_lang, input_tensor_train[0])
print ()
print ("Target Language; index to word mapping")
convert(targ_lang, target_tensor_train[0])

Input Language; index to word mapping
2 ----> <start>
15 ----> es
11 ----> la
820 ----> voz
6 ----> de
23 ----> una
2009 ----> anciana
4 ----> .
3 ----> <end>

Target Language; index to word mapping
2 ----> <start>
15 ----> it
16 ----> s
6 ----> the
924 ----> voice
19 ----> of
74 ----> an
154 ----> old
426 ----> woman
4 ----> .
3 ----> <end>


In [8]:
BATCH_SIZE = 64
steps_per_epoch = len(input_tensor_train)// BATCH_SIZE
embedding_dim = 256
units = 1024
vocab_inp_size = len(inp_lang.word_index)+1
vocab_tar_size = len(targ_lang.word_index)+1

In [9]:
# transform to torch tensor
tensor_x = torch.Tensor(input_tensor_train).long() 
tensor_y = torch.Tensor(target_tensor_train).long()
# create your datset
my_dataset = data.TensorDataset(tensor_x,tensor_y) 
# create your dataloader
my_dataloader = data.DataLoader(my_dataset,
                        batch_size=BATCH_SIZE,
                        shuffle=True,
                        drop_last=True,
                        num_workers=4)   

In [10]:
example_input_batch, example_target_batch = next(iter(my_dataloader))

In [11]:
example_input_batch.shape

torch.Size([64, 42])

In [12]:
encoder = Encoder(vocab_inp_size, embedding_dim, units, BATCH_SIZE)

# sample input
sample_hidden = encoder.initialize_hidden_state()
sample_output, sample_hidden = encoder(example_input_batch, sample_hidden)
print ('Encoder output shape: (batch size, sequence length, units) {}'.format(sample_output.shape))
print ('Encoder Hidden state shape: (batch size, units) {}'.format(sample_hidden.shape))

Encoder output shape: (batch size, sequence length, units) torch.Size([64, 42, 1024])
Encoder Hidden state shape: (batch size, units) torch.Size([1, 64, 1024])


In [13]:
attention_layer = BahdanauAttention(10, 1024)
attention_result, attention_weights = attention_layer(sample_hidden, sample_output)

print("Attention result shape: (batch size, units) {}".format(attention_result.shape))
print("Attention weights shape: (batch_size, sequence_length, 1) {}".format(attention_weights.shape))

Attention result shape: (batch size, units) torch.Size([64, 1024])
Attention weights shape: (batch_size, sequence_length, 1) torch.Size([64, 42, 1])


In [14]:
decoder = Decoder(vocab_tar_size, embedding_dim, units, BATCH_SIZE, 1024)

sample_decoder_output, _, _ = decoder(torch.randint(1, 20, (BATCH_SIZE, 1)),
                                      sample_hidden, sample_output)

print ('Decoder output shape: (batch_size, vocab size) {}'.format(sample_decoder_output.shape))

Decoder output shape: (batch_size, vocab size) torch.Size([64, 12930])


In [15]:
encoder = Encoder(vocab_inp_size, embedding_dim, units, BATCH_SIZE).to(device)
decoder = Decoder(vocab_tar_size, embedding_dim, units, BATCH_SIZE, 1024).to(device)

encoder_optimizer = optim.Adam(encoder.parameters())
decoder_optimizer = optim.Adam(decoder.parameters())

criterion = nn.NLLLoss()

In [16]:
# Train the model
EPOCHS = 10

for epoch in range(EPOCHS):
    start = time.time()

    total_loss = 0

    for (batch, (inp, targ)) in enumerate(my_dataloader):
        inp, targ = inp.to(device), targ.to(device)
        batch_loss = train_step(inp, targ, encoder, decoder,
                                encoder_optimizer, decoder_optimizer,
                                criterion, device, BATCH_SIZE, targ_lang)
        
        total_loss += batch_loss

        if batch % 100 == 0:
            print('Epoch {} Batch {} Loss {:.4f}'.format(epoch + 1, batch, batch_loss))
            
    # saving (checkpoint) the model every 2 epochs
    if (epoch + 1) % 2 == 0:
        
        pass

    print('Epoch {} Loss {:.4f}'.format(epoch + 1, total_loss / steps_per_epoch))
    print('Time taken for 1 epoch {} sec\n'.format(time.time() - start))


Epoch 1 Batch 0 Loss 9.2308
Epoch 1 Batch 100 Loss 1.3001
Epoch 1 Batch 200 Loss 0.9794
Epoch 1 Batch 300 Loss 1.2874
Epoch 1 Batch 400 Loss 0.9178
Epoch 1 Batch 500 Loss 1.3672
Epoch 1 Batch 600 Loss 0.7364
Epoch 1 Batch 700 Loss 1.5136
Epoch 1 Batch 800 Loss 0.7674
Epoch 1 Batch 900 Loss 1.3587
Epoch 1 Batch 1000 Loss 0.6570
Epoch 1 Batch 1100 Loss 0.7186
Epoch 1 Batch 1200 Loss 0.6857
Epoch 1 Batch 1300 Loss 0.7897
Epoch 1 Batch 1400 Loss 0.7597
Epoch 1 Batch 1500 Loss 0.5298
Epoch 1 Batch 1600 Loss 0.9037
Epoch 1 Loss 0.9999
Time taken for 1 epoch 1102.848022222519 sec

Epoch 2 Batch 0 Loss 0.7875
Epoch 2 Batch 100 Loss 0.7621
Epoch 2 Batch 200 Loss 0.5236
Epoch 2 Batch 300 Loss 0.6147
Epoch 2 Batch 400 Loss 0.5021
Epoch 2 Batch 500 Loss 0.4828
Epoch 2 Batch 600 Loss 0.7860
Epoch 2 Batch 700 Loss 0.6613
Epoch 2 Batch 800 Loss 0.9216
Epoch 2 Batch 900 Loss 1.0046
Epoch 2 Batch 1000 Loss 0.7857
Epoch 2 Batch 1100 Loss 0.7995
Epoch 2 Batch 1200 Loss 0.6462
Epoch 2 Batch 1300 Loss 0.50

In [87]:
translate(u'trata de averiguarlo .', max_length_targ, max_length_inp, encoder,
          decoder, inp_lang, targ_lang, device, beam_width=10, alpha=0.1)

[2, 260, 7, 440, 4, 3]
[2, 260, 7, 440, 4, 3]
Input: <start> trata de averiguarlo . <end>
Predicted translation: <start> try to change . <end> 


In [68]:
translate(u'trata de averiguarlo .', max_length_targ, max_length_inp, encoder,
          decoder, inp_lang, targ_lang, device, beam_search=False)

Input: <start> trata de averiguarlo . <end>
Predicted translation: try to change . <end> 


In [97]:
translate(u'¿ todavia estan en casa ?', max_length_targ, max_length_inp, encoder,
          decoder, inp_lang, targ_lang, device, beam_width=10, alpha=0.2)

Input: <start> ¿ todavia estan en casa ? <end>
Predicted translation: <start> are you home home ? <end> 


In [96]:
translate(u'¿ todavia estan en casa ? ', max_length_targ, max_length_inp, encoder,
          decoder, inp_lang, targ_lang, device, beam_search=False)

Input: <start> ¿ todavia estan en casa ? <end>
Predicted translation: are you home home ? <end> 


In [23]:
translate(u'esta es mi vida .', max_length_targ, max_length_inp, encoder, decoder, inp_lang, targ_lang, device)

Input: <start> esta es mi vida . <end>
Predicted translation: <start> this is my life . 


In [101]:
translate(u'esta es mi vida .', max_length_targ, max_length_inp, encoder,
          decoder, inp_lang, targ_lang, device, beam_width=10, alpha=0.2)

Input: <start> esta es mi vida . <end>
Predicted translation: <start> this is my life . <end> 


In [24]:
translate(u'esta es mi vida .', max_length_targ, max_length_inp, encoder,
          decoder, inp_lang, targ_lang, device, beam_search=False)

Input: <start> esta es mi vida . <end>
Predicted translation: this is my life . <end> 


In [35]:
translate(u'hace mucho frio aqui .', max_length_targ, max_length_inp, encoder, decoder, inp_lang, targ_lang, device, beam_search=False)

 c ici
Input: <start> hace mucho frio aqui . <end>
Predicted translation: it weather very cold here . <end> 


In [100]:
%%time
translate(u'hace mucho frio aqui .', max_length_targ, max_length_inp, encoder, decoder, inp_lang, targ_lang, device, beam_width=3, alpha=0.3)

Input: <start> hace mucho frio aqui . <end>
Predicted translation: <start> it weather very cold here . <end> 
CPU times: user 166 ms, sys: 11.8 ms, total: 178 ms
Wall time: 178 ms


# FIN