In [1]:
from nltk.translate.bleu_score import sentence_bleu
from torch.utils.data import TensorDataset
from dataset import Dataset
import torch
import io
import unicodedata
import re
from train import Trainer
from functools import partial
from models import *
import torch
import utils
import math

In [2]:
# load train dataset to get vocabulary
ds = Dataset(corp_paths=('./data/train.lc.norm.tok.en', './data/train.lc.norm.tok.fr'),
             emb_paths=('./data/wiki.multi.en.vec', './data/wiki.multi.fr.vec'),
             pairs_paths=('./data/full_en_fr.npy', './data/full_fr_en.npy'),
             max_length=20, test_size=0.1)
print('finish loading dataset')

Files already exit.
finish loading dataset


In [3]:
# load test set and preprosess

def unicode_to_ascii(s):
    return ''.join(
        c for c in unicodedata.normalize('NFD', s)
        if unicodedata.category(c) != 'Mn'
    )

def normalize_string(s):
    s = unicode_to_ascii(s.lower().strip())
    s = re.sub(r'([.!?])', r' \1', s)
    s = re.sub(r'[^a-zA-Z.!?]+', r' ', s)
    return s.strip()

src_test_path = './data/val.lc.norm.tok.en'
tgt_test_path = './data/val.lc.norm.tok.fr'

with io.open(src_test_path, 'r', encoding='utf-8', newline='\n', errors='ignore') as f:
    src_sents = list(map(normalize_string, f.readlines()))

with io.open(tgt_test_path, 'r', encoding='utf-8', newline='\n', errors='ignore') as f:
    tgt_sents = list(map(normalize_string, f.readlines()))

assert len(src_sents) == len(tgt_sents)

src_sentences = [ds.vocabs['src'].get_indices(sentence, language='src', pad=0) for sentence in src_sents]
tgt_sentences = [ds.vocabs['tgt'].get_indices(sentence, language='tgt', pad=0) for sentence in tgt_sents]

raw_src_sents = []
raw_tgt_sents = []
test_sentences1 = []
test_sentences2 = []

for i in range(len(src_sentences)):
    if len(src_sentences[i]) <= 20 and len(tgt_sentences[i]) <= 20:
        test_sentences1.append(src_sentences[i])
        test_sentences2.append(tgt_sentences[i])
        raw_src_sents.append(src_sents[i])
        raw_tgt_sents.append(tgt_sents[i])
        
src_sentences = test_sentences1
tgt_sentences = test_sentences2
src_sents = raw_src_sents
tgt_sents = raw_tgt_sents

# print(src_sents[10])
# print(tgt_sents[10])
# max_lenth = max([len(s) for s in tgt_sentences])
# print(max_lenth)

In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
num_epochs = 50
src2tgt = torch.load('./saved_models/en-fr/' + str(num_epochs) + '/' + str(num_epochs) + '_word_to_word_1_' + str(num_epochs - 1) + '.src2tgt.pt').to(device)
tgt2src = torch.load('./saved_models/en-fr/' + str(num_epochs) + '/' + str(num_epochs) + '_word_to_word_1_' + str(num_epochs - 1) + '.tgt2src.pt').to(device)
print(src2tgt)
print(tgt2src)

Seq2Seq(
  (encoder): EncoderRNN(
    (embedding): Embedding(
      (embedding): Embedding(11244, 300)
    )
    (rnn): LSTM(300, 150, num_layers=3, dropout=0.3, bidirectional=True)
  )
  (decoder): DecoderRNN(
    (embedding): Embedding(
      (embedding): Embedding(11581, 300)
    )
    (attn): Linear(in_features=600, out_features=20, bias=False)
    (attn_sm): Softmax(dim=1)
    (attn_out): Linear(in_features=600, out_features=300, bias=False)
    (attn_out_relu): ReLU()
    (rnn): LSTM(300, 300, num_layers=3, dropout=0.3)
    (generator): Generator(
      (out): Linear(in_features=300, out_features=11581, bias=True)
      (sm): LogSoftmax()
    )
  )
)
Seq2Seq(
  (encoder): EncoderRNN(
    (embedding): Embedding(
      (embedding): Embedding(11581, 300)
    )
    (rnn): LSTM(300, 150, num_layers=3, dropout=0.3, bidirectional=True)
  )
  (decoder): DecoderRNN(
    (embedding): Embedding(
      (embedding): Embedding(11244, 300)
    )
    (attn): Linear(in_features=600, out_features=

In [5]:
def predict(model, batch, length, l1='src', l2='tgt', n_iters=None):
    sos_index = ds.get_sos_index(l1)
    eos_index = ds.get_eos_index(l2)
    results = model.evaluate(batch.to(device),length.to(device), sos_index, eos_index, n_iters=n_iters)
    results = utils.log_probs2indices(results)
    results = ds.visualize_batch(results, l2)
    return results
    

In [6]:
# generate the translations of the test set
pad_index = {}
pad_index['src'] = ds.get_pad_index('src')
pad_index['tgt'] = ds.get_pad_index('tgt')
# src_sentences

s_length, s_padded_batches = utils.pad_monolingual_batch(src_sentences, pad_index['src'])
s_data = TensorDataset(torch.tensor(s_length, dtype=torch.long),torch.tensor(s_padded_batches, dtype=torch.long))
s_data_iter = torch.utils.data.DataLoader(s_data, batch_size=32, shuffle=False, pin_memory=True)
src_predictions = []
for i, batch in enumerate(s_data_iter):
    length = batch[0]
    b = batch[1].transpose(0, 1)
    src_predictions += predict(src2tgt, b, length, n_iters=20)
    
t_length, t_padded_batches = utils.pad_monolingual_batch(tgt_sentences, pad_index['tgt'])
t_data = TensorDataset(torch.tensor(t_length, dtype=torch.long),torch.tensor(t_padded_batches, dtype=torch.long))
t_data_iter = torch.utils.data.DataLoader(t_data, batch_size=32, shuffle=False, pin_memory=True)
tgt_predictions = []
for i, batch in enumerate(t_data_iter):
    length = batch[0]
    b = batch[1].transpose(0, 1)
    tgt_predictions += predict(tgt2src, b, length, l1='tgt', l2='src', n_iters=20)

In [7]:
# compare src_to_tgt
predictions = [[w for w in sentence.strip().split()] for sentence in src_predictions]
references = [[[w for w in sentence.strip().split()]] for sentence in tgt_sents]
# reference = references[0]
# candidate = predictions[0]
bleu_scores = [sentence_bleu(references[i], predictions[i], weights=(0.5, 0.5)) for i in range(len(predictions))]
print(sum(bleu_scores) / len(bleu_scores))

0.2759952747081595


The hypothesis contains 0 counts of 2-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()


In [8]:
# compare tgt_to_src
predictions = [[w for w in sentence.strip().split()] for sentence in tgt_predictions]
references = [[[w for w in sentence.strip().split()]] for sentence in src_sents]
# reference = references[0]s
# candidate = predictions[0]
bleu_scores = [sentence_bleu(references[i], predictions[i], weights=(0.5, 0.5)) for i in range(len(predictions))]
print(sum(bleu_scores) / len(bleu_scores))

0.3126098050650645


In [9]:
reference = references[50]
candidate = predictions[50]
print(reference)
print(candidate)

[['a', 'man', 'in', 'a', 'black', 'wetsuit', 'is', 'surfing', 'on', 'a', 'wave', '.']]
['a', 'man', 'in', 'a', 'wetsuit', 'surfing', 'a', 'wave', '.']
