In [1]:
from __future__ import unicode_literals, print_function, division
from io import open
import io
import unicodedata
import string
import re
import random
import os
import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from torch.utils.data import Dataset
from torch.optim import lr_scheduler
import itertools
import glob
plt.switch_backend('agg')
import matplotlib.ticker as ticker
from sacrebleu import corpus_bleu
import sacrebleu

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [2]:
import pdb

In [3]:
batch_size = 32
words_to_load = 100000
SOS_token = 0
EOS_token = 1
PAD_token = 2
UNK_token = 3
LR_RATE = 0.001
MAX_LENGTH = 100
hidden_size = 128
teacher_forcing_ratio = 0.5

__Preprocess Data__

In [4]:
class Lang:
    def __init__(self, name):
        self.name = name
        self.word2index = {}
        self.word2count = {}
        self.index2word = {0: "SOS", 1: "EOS", 2: "<pad>", 3: "<unk>"}
        self.n_words = 4  # Count SOS, EOS, pad and unk

    def addSentence(self, sentence):
        for word in sentence.split(' '):
            self.addWord(word)

    def addWord(self, word):
        if word not in self.word2index:
            self.word2index[word] = self.n_words
            self.word2count[word] = 1
            self.index2word[self.n_words] = word
            self.n_words += 1
        else:
            self.word2count[word] += 1

In [5]:
# Turn a Unicode string to plain ASCII, thanks to
# http://stackoverflow.com/a/518232/2809427
def unicodeToAscii(s):
    return ''.join(
        c for c in unicodedata.normalize('NFD', s)
        if unicodedata.category(c) != 'Mn'
    )

# Lowercase, trim, and remove non-letter characters


def normalizeString(s):
    s = unicodeToAscii(s.lower().strip())
    s = re.sub(r"([.!?])", r" \1", s)
    s = re.sub(r"[^a-zA-Z.!?]+", r" ", s)
    return s

In [6]:
def filterPair(p):
    return len(p[0].split(' ')) < MAX_LENGTH and \
        len(p[1].split(' ')) < MAX_LENGTH

def filterPairs(pairs):
    return [pair for pair in pairs if filterPair(pair)]

In [7]:
def readLangs(dataset, lang1, lang2):
    chinese = os.getcwd()+'/iwslt-zh-en/{}.tok.{}'.format(dataset, lang1)
    english = os.getcwd()+'/iwslt-zh-en/{}.tok.{}'.format(dataset, lang2)

    chinese_lines = open(chinese, encoding='utf-8').read().strip().split('\n')
    english_lines = open(english, encoding='utf-8').read().strip().split('\n')
    length = len(chinese_lines)

    pairs = [[chinese_lines[i], normalizeString(english_lines[i])] for i in range(length)]
    pairs = filterPairs(pairs)
    
    input_lang = Lang(lang1)
    output_lang = Lang(lang2)

    for pair in pairs:
        input_lang.addSentence(pair[0])
        output_lang.addSentence(pair[1])

    return input_lang, output_lang, pairs

In [8]:
train_input_lang, train_output_lang, train_pairs = readLangs('train', 'zh', 'en')
val_input_lang, val_output_lang, val_pairs = readLangs('dev', 'zh', 'en')
test_input_lang, test_output_lang, test_pairs = readLangs('test', 'zh', 'en')

In [46]:
train_input_lang.index2word[399]

'你们'

In [122]:
len(train_pairs)  # 6621 batch

211869

In [123]:
len(val_pairs)

1250

In [124]:
len(test_pairs)

1395

__Data Loader__

In [9]:
class NMTDataset(Dataset):
    """
    Class that represents a train/validation/test dataset that's readable for PyTorch
    Note that this class inherits torch.utils.data.Dataset
    """

    def __init__(self, input_lang, output_lang, pairs):
        """
        @param data_list_1: list of sentence 1 tokens 
        @param data_list_2: list of sentence 2 tokens
        @param target_list: list of review targets 

        """
        self.input_lang = input_lang
        self.output_lang = output_lang
        self.pairs = pairs

    def __len__(self):
        return len(self.pairs)

    def __getitem__(self, key):
        """
        Triggered when you call dataset[i]
        """
        input_sentence = self.pairs[key][0]
        input_indexes = [self.input_lang.word2index[word] for word in input_sentence.split(' ')]
        input_indexes.append(EOS_token)
        input_length = len(input_indexes)

        output_sentence = self.pairs[key][1]
        output_indexes = [self.output_lang.word2index[word] for word in output_sentence.split(' ')]
        output_indexes.append(EOS_token)
        output_length = len(output_indexes)
        return [input_indexes, input_length, output_indexes, output_length]

    
def NMTDataset_collate_func(batch):
    """
    Customized function for DataLoader that dynamically pads the batch so that all 
    data have the same length
    """
    input_ls = []
    output_ls = []
    input_length_ls = []
    output_length_ls = []
    
    for datum in batch:
        input_length_ls.append(datum[1])
        output_length_ls.append(datum[3])
    
    #find max length in each batch
    max_input = sorted(input_length_ls)[-1]
    max_output = sorted(output_length_ls)[-1]
    
    # padding
    for datum in batch:
        padded_vec_input = np.pad(np.array(datum[0]), 
                                  pad_width=((0,max_input-datum[1])), 
                                  mode="constant", constant_values=2).tolist()
        padded_vec_output = np.pad(np.array(datum[2]), 
                                   pad_width=((0,max_output-datum[3])), 
                                   mode="constant", constant_values=2).tolist()
        input_ls.append(padded_vec_input)
        output_ls.append(padded_vec_output)
    return [torch.tensor(torch.from_numpy(np.array(input_ls)), device=device), 
            torch.tensor(input_length_ls, device=device), 
            torch.tensor(torch.from_numpy(np.array(output_ls)), device=device), 
            torch.tensor(output_length_ls, device=device)]

In [10]:
# create pytorch dataloader
train_dataset = NMTDataset(train_input_lang, train_output_lang, train_pairs)
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, 
                                           batch_size=batch_size,
                                           collate_fn=NMTDataset_collate_func,
                                           shuffle=True)

val_dataset = NMTDataset(val_input_lang, val_output_lang, val_pairs)
val_loader = torch.utils.data.DataLoader(dataset=val_dataset,
                                         batch_size=batch_size,
                                         collate_fn=NMTDataset_collate_func,
                                         shuffle=True)

In [11]:
for i in train_loader:
#     print(i)
#     print(i[2])
#     for ind in i[2]:
#         for token in ind:
#             print(token)
#             print(train_output_lang.index2word[token.item()])
#     for ind in i[2]:
#         print(' '.join(train_output_lang.index2word[token.item()] for token in ind))
#     print([train_output_lang.index2word[token.item()] for ind in i[2] for token in ind])
    break

__Embedding__

In [12]:
def load_embedding(ft_path, words_to_load):
    fin = io.open(ft_path, 'r', encoding='utf-8', newline='\n', errors='ignore')

    n, d = map(int, fin.readline().split())
    vocab_size = words_to_load + 4
    embedding_dim = d

    embedding_mat = np.zeros((vocab_size, embedding_dim))
    token2id = {}
    id2token = {}
    all_tokens = ['SOS', 'EOS', '<unk>', '<pad>']

    for i, line in enumerate(fin):
        if i >= words_to_load:
            break
        s = line.rstrip().split(' ')
        embedding_mat[i+4, :] = np.asarray(s[1:])
        token2id[s[0]] = i+4
        id2token[i+4] = s[0]
        all_tokens.append(s[0])

        token2id['<pad>'] = PAD_token 
        token2id['<unk>'] = UNK_token
        token2id['SOS'] = SOS_token
        token2id['EOS'] = EOS_token
        id2token[PAD_token] = '<pad>'
        id2token[UNK_token] = '<unk>'
        id2token[SOS_token] = 'SOS'
        id2token[EOS_token] = 'EOS'
        embedding_mat[PAD_token, :] = np.zeros((1,d))
        #generate normal dist 1d array for UNK, SOS, EOS token
        embedding_mat[UNK_token, :] = np.random.normal(size=d)
        embedding_mat[SOS_token, :] = np.random.normal(size=d)
        embedding_mat[EOS_token, :] = np.random.normal(size=d)
        
    return embedding_mat, all_tokens, token2id, id2token

In [13]:
fname_zh = os.getcwd()+'/wiki.zh.vec'
fname_eng = '/'.join(os.getcwd().split('/')[:-1])+'/hw2/wiki-news-300d-1M.vec'
embedding_mat_zh, all_tokens_zh, token2id_zh, id2token_zh = load_embedding(fname_zh, words_to_load)
embedding_mat_en, all_tokens_en, token2id_en, id2token_en = load_embedding(fname_eng, words_to_load)

In [14]:
embedding_mat_zh.shape

(100004, 300)

In [15]:
embedding_mat_en.shape

(100004, 300)

__Encoder__

In [16]:
class EncoderRNN(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(EncoderRNN, self).__init__()
        self.hidden_size = hidden_size
        
        embed_mat = torch.from_numpy(embedding_mat_zh).float()
        n, embed_dim = embed_mat.shape
        mask = np.zeros((n,1))
        mask[0] = 1
        mask[1] = 1
        mask[2] = 1
        mask[3] = 1
        mask = torch.from_numpy(mask).float()
        self.mask_embedding = nn.Embedding.from_pretrained(mask, freeze = False)
        self.embedding = nn.Embedding.from_pretrained(embed_mat, freeze = True)
        
        self.gru = nn.GRU(embed_dim, hidden_size, batch_first=True)

    def forward(self, input, input_len, hidden):
        # Compute sorted sequence lengths
        _, idx_sort = torch.sort(input_len, dim=0, descending=True)
        _, idx_unsort = torch.sort(idx_sort, dim=0)
        
        # get embedding of characters
        embed = self.embedding(input)
        mask = self.mask_embedding(input)
        
        embedded = mask*embed + (1-mask)*embed.clone().detach()
        
        # Sort embedding and length
        embedded = embedded.index_select(0, idx_sort)
        input_len = input_len.index_select(0, idx_sort)
        
        packed_emb = nn.utils.rnn.pack_padded_sequence(embedded, input_len.cpu().numpy(), batch_first=True)
        packed_output, hidden = self.gru(packed_emb, hidden)
        output, output_lens =  nn.utils.rnn.pad_packed_sequence(packed_output, batch_first=True)
        
        # Unsort output and last hidden unit
        output = output.index_select(0, idx_unsort)
        
        return output, hidden

    def initHidden(self, batch_size):
        return torch.zeros(1, batch_size, self.hidden_size, device=device)

In [36]:
class EncoderCNN(nn.Module):
    def __init__(self, hidden_size, kernel_dim, batch_size):
        super(EncoderCNN, self).__init__()
        self.hidden_size = hidden_size
        self.batch_size = batch_size
        embed_mat = torch.from_numpy(embedding_mat_zh).float()
        n, embed_dim = embed_mat.shape
        mask = np.zeros((n,1))
        mask[0] = 1
        mask[1] = 1
        mask[2] = 1
        mask[3] = 1
        mask = torch.from_numpy(mask).float()
        self.mask_embedding = nn.Embedding.from_pretrained(mask, freeze = False)
        self.embedding = nn.Embedding.from_pretrained(embed_mat, freeze = True)

        self.conv1 = nn.Conv1d(embed_dim, hidden_size*2, kernel_size=kernel_dim, padding=1)
        self.conv2 = nn.Conv1d(hidden_size*2, hidden_size*2, kernel_size=kernel_dim, padding=1)
        self.linear1 = nn.Linear(hidden_size*2, hidden_size)

    def forward(self, input):
        # get embedding of words
        embed = self.embedding(input)
        mask = self.mask_embedding(input)
        
        embedded = mask*embed + (1-mask)*embed.clone().detach()
#         # get embedding of words
#         embedded = self.embedding(inputs).float()
        
        # perform convolution 1
        hidden = self.conv1(embedded.transpose(1,2)).transpose(1,2)
        hidden = F.relu(hidden.contiguous().view(-1, hidden.size(-1))).view(batch_size, hidden.size(1), hidden.size(-1))

        # perform convolution 2
        hidden = self.conv2(hidden.transpose(1,2)).transpose(1,2)
        hidden = F.relu(hidden.contiguous().view(-1, hidden.size(-1))).view(batch_size, hidden.size(1), hidden.size(-1))

        hidden,_ = hidden.max(dim=1)
        out = self.linear1(hidden)
        out = out.view(1,out.size(0),out.size(1))
        
        return out

__Decoder Without Attention__

In [26]:
class DecoderRNN(nn.Module):
    def __init__(self, hidden_size, output_size):
        super(DecoderRNN, self).__init__()
        self.hidden_size = hidden_size
        
        embed_mat = torch.from_numpy(embedding_mat_en).float()
        n, embed_dim = embed_mat.shape
        mask = np.zeros((n,1))
        mask[0] = 1
        mask[1] = 1
        mask[2] = 1
        mask[3] = 1
        mask = torch.from_numpy(mask).float()
        self.mask_embedding = nn.Embedding.from_pretrained(mask, freeze = False)
        self.embedding = nn.Embedding.from_pretrained(embed_mat, freeze = True)
        
        self.gru = nn.GRU(embed_dim, hidden_size, batch_first=True)
        self.linear = nn.Linear(hidden_size, output_size)
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, word_input, hidden):
        # get embedding of words
        embed = self.embedding(word_input)
        mask = self.mask_embedding(word_input)
        
        embedded = mask*embed + (1-mask)*embed.clone().detach()

        output, hidden = self.gru(embedded, hidden)
        
        # Final output layer
        output = output.squeeze(1) # B x N
        output = self.linear(output)
        output = self.softmax(output)

        return output, hidden

__Training__

In [27]:
import time
import math


def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)


def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s (- %s)' % (asMinutes(s), asMinutes(rs))

In [28]:
def showPlot(points):
    plt.figure()
    fig, ax = plt.subplots()
    # this locator puts ticks at regular intervals
    loc = ticker.MultipleLocator(base=0.2)
    ax.yaxis.set_major_locator(loc)
    plt.plot(points)

In [32]:
def train(input, target, input_len, target_len, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, max_length=MAX_LENGTH, teach_forcing_ratio=0.5, encoder_cnn = False):
    
    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()

    max_input_len = max(input_len)
    max_target_len = max(target_len)

    loss = 0
    
    if not encoder_cnn:
        encoder_hidden = encoder.initHidden(batch_size)
        encoder_output, encoder_hidden = encoder(input, input_len, encoder_hidden)
    else:
        encoder_hidden = encoder(input)
        
    decoder_input = torch.tensor([[SOS_token]]*batch_size, device=device)
    decoder_hidden = encoder_hidden

    use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False

    if use_teacher_forcing:
        # Teacher forcing: Feed the target as the next input
        for di in range(max_target_len):
            decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden)
            loss += criterion(decoder_output, target[:,di])
            decoder_input = target[:,di].unsqueeze(1)  # Teacher forcing (batch_size, 1)

    else:
        # Without teacher forcing: use its own predictions as the next input
        for di in range(max_target_len):
            decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden)
            topv, topi = decoder_output.topk(1)
            decoder_input = topi.squeeze().detach().unsqueeze(1)  # detach from history as input
            loss += criterion(decoder_output, target[:,di])
    #         if decoder_input.item() == EOS_token:
    #             break

    loss.backward()

    encoder_optimizer.step()
    decoder_optimizer.step()

    return loss.item() / float(max_target_len)

In [41]:
def trainIters(loader, encoder, decoder, n_iters, encoder_cnn, print_every=1000, plot_every=100, learning_rate=0.01):
    start = time.time()
    plot_losses = []
    print_loss_total = 0  # Reset every print_every
    plot_loss_total = 0  # Reset every plot_every

    encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate)
    decoder_optimizer = optim.Adam(decoder.parameters(), lr=learning_rate)

    criterion = nn.NLLLoss()
    
    for iter in range(1, n_iters + 1):
        best_bleu = None
        save_path = os.getcwd() + '/saved_model/En-CNN-De-NoAttn.pt'
        
        for i, (input, input_len, target, target_len) in enumerate(train_loader):
            loss = train(input, target, input_len, target_len, encoder, decoder, 
                         encoder_optimizer, decoder_optimizer, criterion, 
                         max_length=MAX_LENGTH, teach_forcing_ratio=teacher_forcing_ratio, encoder_cnn = encoder_cnn)
            print_loss_total += loss
            plot_loss_total += loss
            
            
                    
            if i % print_every == 0:
                current_bleu = test(encoder, decoder, val_loader, encoder_cnn)
                if not best_bleu or current_bleu > best_bleu:
                    torch.save({
                                'epoch': iter,
                                'encoder_state_dict': encoder.state_dict(),
                                'decoder_state_dict': decoder.state_dict(),
                                'encoder_optimizer_state_dict': encoder_optimizer.state_dict(),
                                'decoder_optimizer_state_dict': decoder_optimizer.state_dict(),
                                'train_loss': loss,
                                'best_BLEU': best_bleu
                                }, save_path)
                    best_bleu = current_bleu
                
                print_loss_avg = print_loss_total / print_every
                print_loss_total = 0
                print('%s (Epoch: %d %d%%) | Train Loss: %.4f | Best Bleu: %.4f | Current Blue: %.4f' 
                      % (timeSince(start, iter / n_iters), iter, iter / n_iters * 100, print_loss_avg, best_bleu, current_bleu))

            if i % plot_every == 0:
                plot_loss_avg = plot_loss_total / plot_every
                plot_losses.append(plot_loss_avg)
                plot_loss_total = 0
#     showPlot(plot_losses)
    

#     for iter in range(1, n_iters + 1):
#         for i, (input, input_len, target, target_len) in enumerate(loader):
#             loss = train(input, target, input_len, target_len, encoder, decoder, 
#                          encoder_optimizer, decoder_optimizer, criterion, 
#                          max_length=MAX_LENGTH, teach_forcing_ratio=teacher_forcing_ratio)
#             print_loss_total += loss
#             plot_loss_total += loss
            
#             # TO DO: ADD BLEU
#             if i % print_every == 0:
#                 print_loss_avg = print_loss_total / float(print_every)
#                 print_loss_total = 0
#                 print('%s (%d %d%%) %.4f' % (timeSince(start, iter / n_iters),
#                                              iter, iter / n_iters * 100, print_loss_avg))

#             if i % plot_every == 0:
#                 plot_loss_avg = plot_loss_total / plot_every
#                 plot_losses.append(plot_loss_avg)
#                 plot_loss_total = 0
#     showPlot(plot_losses)

In [43]:
# encoder = EncoderRNN(train_input_lang.n_words, hidden_size).to(device)
encoder = EncoderCNN(hidden_size,kernel_dim=3,batch_size=batch_size).to(device)
noattn_decoder = DecoderRNN(hidden_size, train_output_lang.n_words).to(device)
# attn_decoder1 = AttnDecoderRNN(hidden_size, output_lang.n_words, dropout_p=0.1).to(device)
# attn_decoder1 = BahdanauAttnDecoderRNN(hidden_size, output_lang.n_words, n_layers=1, dropout_p=0.1).to(device)

#UNCOMMENT TO TRAIN THE MODEL
trainIters(train_loader, encoder, noattn_decoder, n_iters=1, encoder_cnn=True, print_every=1, plot_every=1, learning_rate=LR_RATE)
# trainIters(encoder1, attn_decoder1, 75000, print_every=5000)

# encoder.load_state_dict(torch.load("encoder.pth"))
# attn_decoder1.load_state_dict(torch.load("attn_decoder.pth"))

0m 2s (- 0m 0s) (Epoch: 1 100%) | Train Loss: 10.8002 | Best Bleu: 0.0000 | Current Blue: 0.0000
0m 5s (- 0m 0s) (Epoch: 1 100%) | Train Loss: 10.6844 | Best Bleu: 0.0000 | Current Blue: 0.0000
0m 8s (- 0m 0s) (Epoch: 1 100%) | Train Loss: 10.8003 | Best Bleu: 0.0000 | Current Blue: 0.0000
0m 11s (- 0m 0s) (Epoch: 1 100%) | Train Loss: 10.6366 | Best Bleu: 0.0000 | Current Blue: 0.0000
0m 14s (- 0m 0s) (Epoch: 1 100%) | Train Loss: 10.7102 | Best Bleu: 0.0000 | Current Blue: 0.0000
0m 17s (- 0m 0s) (Epoch: 1 100%) | Train Loss: 10.6480 | Best Bleu: 0.0000 | Current Blue: 0.0000
0m 20s (- 0m 0s) (Epoch: 1 100%) | Train Loss: 10.5280 | Best Bleu: 0.0000 | Current Blue: 0.0000
0m 23s (- 0m 0s) (Epoch: 1 100%) | Train Loss: 10.4667 | Best Bleu: 0.0000 | Current Blue: 0.0000


KeyboardInterrupt: 

In [40]:
def evaluate(encoder, decoder, input, input_len, encoder_cnn, max_length=MAX_LENGTH):
    """
    Function that generate translation.
    First, feed the source sentence into the encoder and obtain the hidden states from encoder.
    Secondly, feed the hidden states into the decoder and unfold the outputs from the decoder.
    Lastly, for each outputs from the decoder, collect the corresponding words in the target language's vocabulary.
    And collect the attention for each output words.
    @param encoder: the encoder network
    @param decoder: the decoder network
    @param input: string, input sentence in source language to be translated
    @param max_length: the max # of words that the decoder can return
    @output decoded_words: a list of words in target language
    @output decoder_attentions: a list of vector, each of which sums up to 1.0
    """    
    # process input sentence
    with torch.no_grad():
        
        max_input_len = max(input_len)
        
        if not encoder_cnn:
            encoder_hidden = encoder.initHidden(batch_size)
            encoder_output, encoder_hidden = encoder(input, input_len, encoder_hidden)
        else:
            encoder_hidden = encoder(input)

        decoder_input = torch.tensor([[SOS_token]]*batch_size, device=device)
        # decode the context vector
        decoder_hidden = encoder_hidden # decoder starts from the last encoding sentence
        
        # output of this function
        decoded_words = []
#         decoder_attentions = torch.zeros(max_length, max_length)

        for di in range(max_length):
            # for each time step, the decoder network takes two inputs: previous outputs and the previous hidden states
            decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden)
            topv, topi = decoder_output.topk(1)
            decoded_words.append(topi.cpu().numpy())
            decoder_input = topi.squeeze().detach().unsqueeze(1)  # detach from history as input

        return np.asarray(decoded_words).T#, decoder_attentions[:di + 1]

In [39]:
def test(encoder, decoder, data_loader, encoder_cnn):
    total_score = 0
    count = 0
    for i, (input, input_len, target, target_len) in enumerate(data_loader):
        decoded_words = evaluate(encoder, decoder, input, input_len, encoder_cnn)
        candidate_sentences = []
        for ind in range(decoded_words.shape[1]):
            sent_words = []
            for token in decoded_words[0][ind]:
                if token != EOS_token:
                    sent_words.append(train_output_lang.index2word[token])
                else:
                    break
            sent_words = ' '.join(sent_words)
#             sent_words = ' '.join([train_output_lang.index2word[token] for token in decoded_words[0][ind]])
            candidate_sentences.append(sent_words)            
#         print('candidate')
        
        reference_sentences = []
        for sent in target:
            sent_words = []
            for token in sent:
                if token.item() != EOS_token:
                    sent_words.append(train_output_lang.index2word[token.item()])
                else:
                    break
            sent_words = ' '.join(sent_words)
#             sent_words = ' '.join([train_output_lang.index2word[token.item()] for token in sent])
            reference_sentences.append(sent_words)
#         print('reference')
        
        score = corpus_bleu(candidate_sentences, [reference_sentences], smooth='floor', smooth_floor=0.0, force=False)
        
        count += 1
        total_score += score.score
        if i == 10:
            break
    return total_score / float(count)

In [117]:
score = test(encoder,noattn_decoder,train_loader)

candidate
reference
candidate
reference
candidate
reference
candidate
reference
candidate
reference
candidate
reference
candidate
reference
candidate
reference
candidate
reference
candidate
reference
candidate
reference


In [118]:
score

0.0

In [28]:
def evaluateRandomly(encoder, decoder, n=10):
    """
    Randomly select a English sentence from the dataset and try to produce its French translation.
    Note that you need a correct implementation of evaluate() in order to make this function work.
    """    
    for i in range(n):
        pair = random.choice(pairs)
        print('>', pair[0])
        print('=', pair[1])
        output_words, attentions = evaluate(encoder, decoder, pair[0])
        output_sentence = ' '.join(output_words)
        print('<', output_sentence)
        print('')

In [None]:
# torch.save(encoder1.state_dict(), "encoder.pth")
# torch.save(attn_decoder1.state_dict(), "attn_decoder.pth")

In [None]:
evaluateRandomly(encoder, noattn_decoder)

In [None]:
corpus_bleu(sys_stream, ref_streams, smooth='exp', smooth_floor=0.0, force=False, lowercase=False,
                tokenize=DEFAULT_TOKENIZER, use_effective_order=False) 

In [119]:
ref = [['this is   test']]
candidates = ['this is a test']
# score = sacrebleu.corpus_bleu(ref,candidates)
score = sacrebleu.corpus_bleu(candidates,ref)
print(score.score)

35.35533905932737
