In [1]:
from __future__ import unicode_literals, print_function, division
from io import open
import unicodedata
import string
import re
import random

import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F
from torch.autograd import Variable
import time
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
! pip install zhon



'''Some part of this code is modified from NLP lab code'''

In [110]:
PAD_token = 0
SOS_token = 1
EOS_token = 2
UNK_token = 3



class Lang:
    ''' 
    Class to build word vaocabulary
    '''
    def __init__(self, name):
        self.name = name
        self.trimmed = False
        self.word2index = {"UNK":3}
        self.word2count = {}
        self.index2word = {0: "PAD", 1: "SOS", 2: "EOS", 3: "UNK"}
        self.n_words = 4  # Count PAD, SOS and EOS

    def addSentence(self, sentence):
        for word in sentence.split(' '):
            self.addWord(word)

    def addWord(self, word):
        if word not in self.word2index:
            self.word2index[word] = self.n_words
            self.word2count[word] = 1
            self.index2word[self.n_words] = word
            self.n_words += 1
        else:
            self.word2count[word] += 1
            

    def trim_sentences(self, min_count):
        if self.trimmed: 
            return
        self.trimmed = True
        
        words_keep = []
        
        for i, j in self.word2count.items():
            if j >= min_count:
                words_keep.append(i)

        print('keep_words %s / %s = %.3f' % (
            len(words_keep), len(self.word2index), len(words_keep) / len(self.word2index)
        ))


        self.word2index = {"UNK":3}
        self.word2count = {}
        self.index2word = {0: "PAD", 1: "SOS", 2: "EOS", 3: "UNK"}
        self.n_words = 4 
        
        for word in words_keep:
            self.addWord(word)

In [25]:
# Turn a Unicode string to plain ASCII, thanks to
# http://stackoverflow.com/a/518232/2809427
def unicodeToAscii(s):
    return ''.join(
        c for c in unicodedata.normalize('NFD', s)
        if unicodedata.category(c) != 'Mn'
    )

# Lowercase, trim, and remove non-letter characters


def normalizeString_en(s):
    '''
    Normalize function for English and Vietnamese
    '''
    s = unicodeToAscii(s.lower().strip())
    s = re.sub(r"([.!?])", r" \1", s)
    s = re.sub(r"&apos", r"", s)
    s = re.sub(r"[^a-zA-Z.!?]+", r" ", s)
    return s

from zhon.hanzi import punctuation
def normalizeString_zh(s):
    '''
    Normalize function for Chinese
    '''
    
    punc = '＂＃＄％＆＇（）＊＋，－／：；＜＝＞＠［＼］＾＿｀｛｜｝～｟｠｢｣､\u3000、〃〈〉《》「」『』【】〔〕〖〗〘〙〚〛〜〝〞〟〰〾〿–—‘’‛“”„‟…‧﹏﹑﹔·.'
    s = unicodeToAscii(s.lower().strip())
    s = re.sub(r"([。！？])", r" \1", s)
    s = re.sub(r"[0-9]", r" ", s)
    s = re.sub(r"[%s]+" %punc, r" ", s)
    return s

In [26]:
def readLangs(lang1, lang2, reverse=False):
    '''
    function to read in data file
    '''
    
    print("Reading lines...")

    # Read the file and split into lines
    lines1 = open(lang1, encoding='utf-8').\
        read().strip().split('\n')
    
    lines2 = open(lang2, encoding='utf-8').\
        read().strip().split('\n')
    
    # Split every line into pairs and normalize
    pairs = [None] * len(lines1)
    print(len(pairs))
    for i in range(len(lines1)):
        pairs[i] = [normalizeString_en(lines1[i]), normalizeString_en(lines2[i])]
        
    # Reverse pairs, make Lang instances
    if reverse:
        pairs = [list(reversed(p)) for p in pairs]
        #set object
        input_lang = Lang(lang2)
        output_lang = Lang(lang1)
    else:
        input_lang = Lang(lang1)
        output_lang = Lang(lang2)

    return input_lang, output_lang, pairs

In [27]:
MAX_LENGTH = 9999


def filterPairs(pairs):
    '''
    Filter maximum length
    '''
    
    keep_pairs = []
    for pair in pairs:
        if len(pair[0]) <= MAX_LENGTH \
            and len(pair[1]) <= MAX_LENGTH:
                keep_pairs.append(pair)
    return keep_pairs    
  

In [28]:
def prepareData(lang1, lang2, reverse=False):
    
    '''
    Assemble function
    '''
    
    input_lang, output_lang, pairs = readLangs(lang1, lang2, reverse)
    print("Read %s sentence pairs" % len(pairs))
    pairs = filterPairs(pairs)
    print("Trimmed to %s sentence pairs" % len(pairs))
    print("Counting words...")
    for pair in pairs:
        input_lang.addSentence(pair[0])
        output_lang.addSentence(pair[1])
    print("Counted words:")
    print(input_lang.name, input_lang.n_words)
    #print(input_lang.word2index)
    print(output_lang.name, output_lang.n_words)
    return input_lang, output_lang, pairs



In [29]:
train_input_lang, train_output_lang, train_pairs = prepareData('train.tok.en', 'train.tok.vi', True)
test_input_lang, test_output_lang, test_pairs = prepareData('test.tok.en', 'test.tok.vi', True)

print(random.choice(train_pairs))

Reading lines...
133317
Read 133317 sentence pairs
Trimmed to 133317 sentence pairs
Counting words...
Counted words:
train.tok.vi 14237
train.tok.en 41271
Reading lines...
1553
Read 1553 sentence pairs
Trimmed to 1553 sentence pairs
Counting words...
Counted words:
test.tok.vi 1071
test.tok.en 3407
['usda cho phep cac thuoc khang sinh cac hormon va cac thuoc tru sau trong nguon cung thuc pham cua ta va usda tra tien cho quang cao nay trong tap chi time .', 'the usda allows these antibiotics these hormones and these pesticides in our food supply and the usda paid for this ad in time magazine .']


In [30]:
val_input_lang, val_output_lang, val_pairs = prepareData('dev.tok.en', 'dev.tok.vi', True)

Reading lines...
1268
Read 1268 sentence pairs
Trimmed to 1268 sentence pairs
Counting words...
Counted words:
dev.tok.vi 1105
dev.tok.en 3571


In [31]:
# Trim the words based on the min word count
MIN_COUNT = 2

train_input_lang.trim_sentences(MIN_COUNT)
train_output_lang.trim_sentences(MIN_COUNT)

keep_words 7844 / 14234 = 0.551
keep_words 27395 / 41268 = 0.664


In [32]:
def indexesFromSentence(lang, sentence):
    out = []
    for word in sentence.split(' '):
        if word in lang.word2index.keys():
            out.append(lang.word2index[word])
        else:
            out.append(lang.word2index['UNK'])
    return out

In [33]:
##dataloader
import numpy as np
import torch
from torch.utils.data import Dataset

PAD_IDX = 0
SOS_IDX = 1
EOS_IDX = 2
UNK_IDX = 3

class MTDataset(Dataset):
    """
    Class that represents a train/validation/test dataset that's readable for PyTorch
    Note that this class inherits torch.utils.data.Dataset
    """
    
    def __init__(self, input_lang, output_lang, pairs):
        """
        @param candiate_list: list of candidate sentence
        @param reference_list: list of reference sentence

        """
        self.pairs = pairs
        self.input_lang = input_lang
        self.output_lang = output_lang

        self.candidate_list = [indexesFromSentence(self.input_lang, pair[0]) for pair in pairs]
        self.reference_list = [indexesFromSentence(self.output_lang, pair[1]) for pair in pairs]

        assert (len(self.candidate_list) == len(self.reference_list))

    def __len__(self):
        return len(self.pairs)
        
    def __getitem__(self, key):
        """
        Triggered when you call dataset[i]
        """
        
        candidate_idx = self.candidate_list[key][:MAX_LENGTH]
        reference_idx = self.reference_list[key][:MAX_LENGTH]
        candidate_idx = [SOS_IDX] + candidate_idx + [EOS_IDX]
        reference_idx = [SOS_IDX] + reference_idx + [EOS_IDX]

        return [candidate_idx, len(candidate_idx), reference_idx, len(reference_idx)]
    

def MT_collate_func(batch):
    """
    Customized function for DataLoader that dynamically pads the batch so that all 
    data have the same length
    """
    candidate_list = []
    reference_list = []
    candidate_length_list = []
    reference_length_list = []
    for datum in batch:
        candidate_length_list.append(datum[1])
        reference_length_list.append(datum[3])
    # padding
    MAX_LENGTH = [max(candidate_length_list), max(reference_length_list)]
    for datum in batch:
        padded_vec_1 = np.pad(np.array(datum[0]), 
                                pad_width=((0,MAX_LENGTH[0]-datum[1])), 
                                mode="constant", constant_values=0)
        candidate_list.append(padded_vec_1)
        
        padded_vec_2 = np.pad(np.array(datum[2]), 
                                pad_width=((0,MAX_LENGTH[1]-datum[3])), 
                                mode="constant", constant_values=0)
        reference_list.append(padded_vec_2)
    
    sorted_order = np.argsort(candidate_length_list)[::-1]
    candidate_list, candidate_length_list = np.array(candidate_list)[sorted_order], np.array(candidate_length_list)[sorted_order]
    reference_list, reference_length_list = np.array(reference_list)[sorted_order], np.array(reference_length_list)[sorted_order]
    
    return [torch.from_numpy(np.array(candidate_list)), torch.LongTensor(candidate_length_list), 
            torch.from_numpy(np.array(reference_list)), torch.LongTensor(reference_length_list)]


In [34]:
BATCH_SIZE = 32
train_dataset = MTDataset(train_input_lang, train_output_lang, train_pairs)
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, 
                                           batch_size=BATCH_SIZE,
                                           collate_fn=MT_collate_func,
                                           shuffle=True,
                                           pin_memory = True)

val_dataset = MTDataset(train_input_lang, train_output_lang, val_pairs)
val_loader = torch.utils.data.DataLoader(dataset=val_dataset, 
                                           batch_size=BATCH_SIZE,
                                           collate_fn=MT_collate_func,
                                           shuffle=False,
                                           pin_memory = True)

test_dataset = MTDataset(train_input_lang, train_output_lang, test_pairs)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, 
                                           batch_size=BATCH_SIZE,
                                           collate_fn=MT_collate_func,
                                           shuffle=False,
                                           pin_memory = True)


In [35]:
class EncoderRNN(nn.Module):
    '''
    One layer bidurectional GRU model
    '''
    def __init__(self, input_size, embedding_size, hidden_size, num_layers=1):
        super(EncoderRNN, self).__init__()
        
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        
        self.embedding = nn.Embedding(input_size, embedding_size)
        self.gru = nn.GRU(embedding_size, hidden_size, num_layers=num_layers, bidirectional=True)

    def forward(self, input, input_length, hidden=None):
         

        batch_size = input.size(0)

        embedded = self.embedding(input.transpose(0,1))
        packed = nn.utils.rnn.pack_padded_sequence(embedded, input_length) 

        output, hidden = self.gru(packed, hidden)
        output, output_lengths = nn.utils.rnn.pad_packed_sequence(output)
        output = output[:, :, :self.hidden_size] + output[:, : ,self.hidden_size:]
        hidden = hidden[:self.num_layers, :, :] + hidden[self.num_layers:,:,:]
        
        return output, hidden
     
    def initHidden(self, batch_size):
        return torch.zeros(self.num_layers*2, batch_size, self.hidden_size, device=device)

In [36]:
class DecoderRNN(nn.Module):
    def __init__(self, output_size, hidden_size, n_layers):
        super(DecoderRNN, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(output_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size)
        self.out = nn.Linear(hidden_size, output_size)
        self.softmax = nn.LogSoftmax(dim=1)
        self.n_layers = n_layers

    def forward(self, input, hidden):

        
        input = input.view(1,-1)
        _, batch_size = input.size()

        
        output = self.embedding(input).view(1, batch_size, self.hidden_size)

        output = F.relu(output)

        output, hidden = self.gru(output, hidden)

        output = self.softmax(self.out(output[0]))
        return output, hidden

    def initHidden(self,batch_size):
        return Variable(torch.zeros(1, batch_size, self.hidden_size, device = device))

In [37]:
###minibatch
teacher_forcing_ratio = 1

def train(input_tensor, target_tensor, input_length, output_length, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, max_length=MAX_LENGTH):
    #encoder_hidden = encoder.initHidden()
    
    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()

    batch_size= input_tensor.size(0)
    target_length = target_tensor.size(1)
    
    input_tensor = input_tensor.cuda()
    target_tensor = target_tensor.cuda()
    
    encoder_outputs, encoder_hidden = encoder(input_tensor, input_length, None)
    
    loss = 0

    decoder_input = torch.tensor([[SOS_IDX]*batch_size], device=device)
    
    decoder_hidden = encoder_hidden


    use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False
    
    if use_teacher_forcing:
        
        for di in range(target_length):
            decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden)
            #import pdb; pdb.set_trace()
            loss += criterion(decoder_output, target_tensor[:,di])
            decoder_input = target_tensor[:,di]  # Teacher forcing

    else:
        # Without teacher forcing: use its own predictions as the next input
        for di in range(target_length):
            decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden)
            topv, topi = decoder_output.topk(1)
            decoder_input = topi.squeeze().detach()  # detach from history as input

            loss += criterion(decoder_output, target_tensor[:,di])


    loss.backward()

    encoder_optimizer.step()
    decoder_optimizer.step()

    return loss.item() / target_length

In [38]:
###minibatch
def trainEpochs(encoder, decoder, n_epochs, print_every, plot_every, learning_rate):
    start = time.time()

    plot_losses = []
    print_loss_total = 0  # Reset every print_every
    plot_loss_total = 0  # Reset every plot_every

    encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate)
    decoder_optimizer = optim.Adam(decoder.parameters(), lr=learning_rate)
    
    criterion = nn.NLLLoss()
    
    
    for epoch in range(n_epochs):
        for i, (candidate, length_1, reference, length_2) in enumerate(train_loader):
 
            loss = train(candidate, reference, length_1, length_2, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion)
            print_loss_total += loss
   
            plot_loss_total += loss

            if i > 0 and i % print_every == 0:
                print_loss_avg = print_loss_total / print_every
                print_loss_total = 0
                #print('%s (%d %d%%) %.4f' % (timeSince(start, iter / n_iters), iter, iter / n_iters * 100, print_loss_avg))
                print('Time: {}, Epoch: [{}/{}], Step: [{}/{}], Train Loss: {}'.format(timeSince(start, i+1 / len(train_loader)), 
                    epoch+1, n_epochs, i+1, len(train_loader), print_loss_avg))
            if i >0 and i % plot_every == 0:
                plot_loss_avg = plot_loss_total / plot_every
                plot_losses.append(plot_loss_avg)
                plot_loss_total = 0
                
        print_loss_total = 0
        plot_loss_total = 0
        
        s, output_words, true_words = evaluateRandomly(encoder1, decoder1, val_loader)
        print('-------vali score---------')
        print('score {}'.format(s))
        print('---------Save trained model--------')
        torch.save(encoder1.state_dict(), "encoder_wo_attn{}.pth".format(epoch+1))
        torch.save(decoder1.state_dict(), "decoder_wo_attn{}.pth".format(epoch+1))
        
        
    showPlot(plot_losses)
    return plot_losses
    

In [72]:
import matplotlib.pyplot as plt
#plt.switch_backend('agg')
import matplotlib.ticker as ticker
import numpy as np


def showPlot(points):
    plt.figure()
    fig, ax = plt.subplots()
    # this locator puts ticks at regular intervals
    loc = ticker.MultipleLocator(base=0.2)
    ax.yaxis.set_major_locator(loc)
    plt.plot(points)
    

In [40]:
import time
import math


def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)


def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s (- %s)' % (asMinutes(s), asMinutes(rs))

In [41]:
hidden_size = 512
n_layers = 1
dropout = 0.1
batch_size = 32

# Configure training/optimization
#clip = 50.0
teacher_forcing_ratio = 1
learning_rate = 0.0005
#decoder_learning_ratio = 5.0
n_epochs = 10
epoch = 0
plot_every = 20
print_every = 500
evaluate_every = 1000

In [42]:
encoder1 = EncoderRNN(train_input_lang.n_words, hidden_size, num_layers=1).to(device)
decoder1 = DecoderRNN(train_output_lang.n_words, hidden_size, n_layers = 1).to(device)

In [46]:
encoder1.load_state_dict(torch.load("encoder_wo_attn1.pth"))
decoder1.load_state_dict(torch.load("decoder_wo_attn1.pth"))

In [48]:
####Start training!
#hidden_size = 300
#learning_rate = 0.0002
#number of epoch = 15
#language = Vietnamese
#n_epochs = 5
train_loss = trainEpochs(encoder1, decoder1, n_epochs, print_every, plot_every, learning_rate) 

Time: 2m 14s (- -3m 45s), Epoch: [1/10], Step: [501/4167], Train Loss: 1.3796693702717406
Time: 4m 27s (- -5m 32s), Epoch: [1/10], Step: [1001/4167], Train Loss: 1.358652635617888
Time: 6m 40s (- -7m 20s), Epoch: [1/10], Step: [1501/4167], Train Loss: 1.3595095971989277
Time: 8m 51s (- -9m 8s), Epoch: [1/10], Step: [2001/4167], Train Loss: 1.3399361855020766
Time: 11m 2s (- -12m 57s), Epoch: [1/10], Step: [2501/4167], Train Loss: 1.316713573988913
Time: 13m 13s (- -14m 46s), Epoch: [1/10], Step: [3001/4167], Train Loss: 1.3152373194315716
Time: 15m 28s (- -16m 32s), Epoch: [1/10], Step: [3501/4167], Train Loss: 1.3052257556206308
Time: 17m 44s (- -18m 16s), Epoch: [1/10], Step: [4001/4167], Train Loss: 1.2837309225181024




SOS i didn t have to go from the million people who have been a million dollars but i was going to be a million people who have been a million dollars but i was going to be a lot of people who are not
SOS i had no idea what life was going to be like as a north korean refugee but i soon learned it s not only extremely difficult it s also very dangerous since north korean refugees are considered in china as illegal migrants .
-------vali score---------
score BLEU(score=7.246089940093556, counts=[10273, 2971, 1069, 427], totals=[28602, 27334, 26066, 24798], precisions=[35.917068736451995, 10.869247091534353, 4.101127906084555, 1.7219130575046375], bp=1.0, sys_len=28602, ref_len=26162)
---------Save trained model--------
Time: 21m 0s (- -21m 1s), Epoch: [2/10], Step: [501/4167], Train Loss: 1.1675726089586538
Time: 23m 15s (- -24m 45s), Epoch: [2/10], Step: [1001/4167], Train Loss: 1.152335923180405
Time: 25m 27s (- -26m 33s), Epoch: [2/10], Step: [1501/4167], Train Loss: 1.144483315497384



SOS i didn t even have to tell the million deaths but i was going to have a million dollars in the united states but it was the same thing that the united states is going to be the richest of the north africa because
SOS i had no idea what life was going to be like as a north korean refugee but i soon learned it s not only extremely difficult it s also very dangerous since north korean refugees are considered in china as illegal migrants .
-------vali score---------
score BLEU(score=7.731039195681839, counts=[10476, 3076, 1136, 461], totals=[28157, 26889, 25621, 24353], precisions=[37.20566821749476, 11.439622150321693, 4.433862846883416, 1.892990596641071], bp=1.0, sys_len=28157, ref_len=26162)
---------Save trained model--------
Time: 39m 53s (- -40m 11s), Epoch: [3/10], Step: [501/4167], Train Loss: 1.003379777745962
Time: 42m 4s (- -43m 57s), Epoch: [3/10], Step: [1001/4167], Train Loss: 1.0237047343374581
Time: 44m 18s (- -45m 42s), Epoch: [3/10], Step: [1501/4167], Train Loss: 1.



SOS i didn t even know about million people but it was a million people who had died of poverty but i was going to be a victim of the people who had died of poverty but the death toll was not only about a
SOS i had no idea what life was going to be like as a north korean refugee but i soon learned it s not only extremely difficult it s also very dangerous since north korean refugees are considered in china as illegal migrants .
-------vali score---------
score BLEU(score=8.457942163945331, counts=[10541, 3121, 1183, 498], totals=[26750, 25482, 24214, 22946], precisions=[39.40560747663552, 12.247861235381839, 4.8856033699512675, 2.1703129085679422], bp=1.0, sys_len=26750, ref_len=26162)
---------Save trained model--------
Time: 58m 49s (- -59m 17s), Epoch: [4/10], Step: [501/4167], Train Loss: 0.9275924887882191
Time: 61m 6s (- -62m 57s), Epoch: [4/10], Step: [1001/4167], Train Loss: 0.9277849679770874
Time: 63m 22s (- -64m 40s), Epoch: [4/10], Step: [1501/4167], Train Loss: 0.929974954



SOS i didn t even hear that one million people but i was not afraid of the refugees of refugees but it was a very difficult thing to be in the middle of the refugees but the idea of the north atlantic ocean is going
SOS i had no idea what life was going to be like as a north korean refugee but i soon learned it s not only extremely difficult it s also very dangerous since north korean refugees are considered in china as illegal migrants .
-------vali score---------
score BLEU(score=8.372010628714495, counts=[10702, 3200, 1226, 519], totals=[27748, 26480, 25212, 23944], precisions=[38.56854548075537, 12.084592145015106, 4.862763763287323, 2.1675576344804544], bp=1.0, sys_len=27748, ref_len=26162)
---------Save trained model--------
Time: 77m 43s (- -78m 26s), Epoch: [5/10], Step: [501/4167], Train Loss: 0.8423107770262616
Time: 79m 56s (- -80m 8s), Epoch: [5/10], Step: [1001/4167], Train Loss: 0.8534909424377295
Time: 82m 7s (- -83m 55s), Epoch: [5/10], Step: [1501/4167], Train Loss: 0.



SOS i didn t even hear about billion dollars but it s not about billion people in the s but i think that there are a million people in africa but it s a difficult to fight for a crime of death or even refugees
SOS i had no idea what life was going to be like as a north korean refugee but i soon learned it s not only extremely difficult it s also very dangerous since north korean refugees are considered in china as illegal migrants .
-------vali score---------
score BLEU(score=8.001474039702265, counts=[10898, 3171, 1185, 505], totals=[28591, 27323, 26055, 24787], precisions=[38.116889930397676, 11.605606997767449, 4.5480713874496255, 2.037358292653407], bp=1.0, sys_len=28591, ref_len=26162)
---------Save trained model--------
Time: 96m 51s (- -97m 19s), Epoch: [6/10], Step: [501/4167], Train Loss: 0.7858768178369475
Time: 99m 10s (- -100m 55s), Epoch: [6/10], Step: [1001/4167], Train Loss: 0.7767923611180135
Time: 101m 20s (- -102m 43s), Epoch: [6/10], Step: [1501/4167], Train Loss: 0.



SOS i didn t have a million refugees that would have been visited but not to mention north africa s most difficult to reach a refugee but the death of the pyramid of the refugees would be the same thing but the north pole was
SOS i had no idea what life was going to be like as a north korean refugee but i soon learned it s not only extremely difficult it s also very dangerous since north korean refugees are considered in china as illegal migrants .
-------vali score---------
score BLEU(score=7.91853967926915, counts=[10789, 3142, 1160, 489], totals=[28385, 27117, 25849, 24581], precisions=[38.00951206623216, 11.586827451414242, 4.487601067739565, 1.9893413612139457], bp=1.0, sys_len=28385, ref_len=26162)
---------Save trained model--------
Time: 116m 2s (- -116m 11s), Epoch: [7/10], Step: [501/4167], Train Loss: 0.7262077723101394
Time: 118m 18s (- -119m 48s), Epoch: [7/10], Step: [1001/4167], Train Loss: 0.7428285961627228
Time: 120m 31s (- -121m 33s), Epoch: [7/10], Step: [1501/4167]



SOS i didn t have a situation in the north of the refugees but i was devastated for a million dollars a year but a refugee child survival was dying in the north africa .
SOS i had no idea what life was going to be like as a north korean refugee but i soon learned it s not only extremely difficult it s also very dangerous since north korean refugees are considered in china as illegal migrants .
-------vali score---------
score BLEU(score=8.235938408184898, counts=[10639, 3115, 1166, 505], totals=[27461, 26193, 24925, 23657], precisions=[38.74221623393176, 11.892490360019853, 4.6780341023069205, 2.1346747262966566], bp=1.0, sys_len=27461, ref_len=26162)
---------Save trained model--------
Time: 135m 0s (- -135m 16s), Epoch: [8/10], Step: [501/4167], Train Loss: 0.6826451477919455
Time: 137m 10s (- -138m 57s), Epoch: [8/10], Step: [1001/4167], Train Loss: 0.7034927001260772
Time: 139m 22s (- -140m 42s), Epoch: [8/10], Step: [1501/4167], Train Loss: 0.724800356084746
Time: 141m 35s (- -142



SOS i didn t have a million refugees in the north of new jersey but i was going to die but if i was a criminal prosecutor with the refugees of refugees who had been raped from the north atlantic ocean north africa .
SOS i had no idea what life was going to be like as a north korean refugee but i soon learned it s not only extremely difficult it s also very dangerous since north korean refugees are considered in china as illegal migrants .
-------vali score---------
score BLEU(score=7.837115981570091, counts=[10757, 3058, 1122, 478], totals=[28091, 26823, 25555, 24287], precisions=[38.293403581218186, 11.400663609588786, 4.39053022891802, 1.9681310989418208], bp=1.0, sys_len=28091, ref_len=26162)
---------Save trained model--------
Time: 154m 0s (- -154m 17s), Epoch: [9/10], Step: [501/4167], Train Loss: 0.6481255815513448
Time: 156m 13s (- -157m 55s), Epoch: [9/10], Step: [1001/4167], Train Loss: 0.6742613934886457
Time: 158m 24s (- -159m 42s), Epoch: [9/10], Step: [1501/4167], Train L



SOS i couldn t talk about million dollars but it s about a decade but it s difficult to reach the north of crime but it s also about the west bank as the master s good thing to do .
SOS i had no idea what life was going to be like as a north korean refugee but i soon learned it s not only extremely difficult it s also very dangerous since north korean refugees are considered in china as illegal migrants .
-------vali score---------
score BLEU(score=7.810051974081473, counts=[10632, 3003, 1116, 468], totals=[27815, 26547, 25279, 24011], precisions=[38.22397986697825, 11.312012656797378, 4.414731595395388, 1.949106659447753], bp=1.0, sys_len=27815, ref_len=26162)
---------Save trained model--------
Time: 172m 58s (- -173m 22s), Epoch: [10/10], Step: [501/4167], Train Loss: 0.6327316100957352
Time: 175m 10s (- -175m 0s), Epoch: [10/10], Step: [1001/4167], Train Loss: 0.6538031099728917
Time: 177m 18s (- -178m 48s), Epoch: [10/10], Step: [1501/4167], Train Loss: 0.6698027216332805
Time: 17



SOS i didn t tell me that when million refugees were coming from a refugee camp or a refugee camp but i was also a north carolina who died in the refugee camps but the north pole was about a terrible crime .
SOS i had no idea what life was going to be like as a north korean refugee but i soon learned it s not only extremely difficult it s also very dangerous since north korean refugees are considered in china as illegal migrants .
-------vali score---------
score BLEU(score=7.853069259602653, counts=[10690, 3034, 1127, 472], totals=[27893, 26625, 25357, 24089], precisions=[38.32502778474886, 11.395305164319248, 4.444532081870884, 1.9594005562704968], bp=1.0, sys_len=27893, ref_len=26162)
---------Save trained model--------


In [29]:
torch.save(encoder1.state_dict(), "encoder_wo_attn_vi_300_0.0005.pth")
torch.save(decoder1.state_dict(), "decoder_wo_attn_vi_300_0.0005.pth")

In [94]:
encoder1.load_state_dict(torch.load("encoder_wo_attn6.pth"))
decoder1.load_state_dict(torch.load("decoder_wo_attn6.pth"))

In [104]:
def mapback(reference):

    words = []
    
    for i in range(reference.size(0)):
        line = []
        for j in range(reference.size(1)):
            
            if int(reference[i,j].item()) == 1:
                pass
            
            if int(reference[i,j].item()) == 2:
                break
            else:
                line.append(train_output_lang.index2word[int(reference[i,j].item())])
        
        line.remove('SOS')
        
        line = ' '.join(line)
        words.append(line)
      
    return words
  
  
def evaluate_wo1(encoder, decoder, candidate, length_1, reference, length_2, max_length):
  
    with torch.no_grad():

        batch_size = candidate.size(0)
        candidate =  candidate.cuda()
        reference = reference.cuda()
        encoder_ouputs, encoder_hidden = encoder(candidate, length_1, None)
        decoder_input = torch.tensor([[SOS_token]*batch_size], device = device)
        decoder_hidden = encoder_hidden
        #print(encoder_hidden.size())
        batch_size = candidate.size(0)

        decoded_words = torch.ones([batch_size, max_length])

        for di in range(max_length):

            try:

                decoder_output, decoder_hidden = decoder(
                    decoder_input, decoder_hidden)
            except:

                import pdb; pdb.set_trace()

            topv, topi = decoder_output.topk(1)

            decoded_words[:,di] = topi.squeeze()



            decoder_input = topi.squeeze().detach().cuda()
            words = mapback(decoded_words) 


    return words
        

In [105]:
#! pip install sacrebleu
import sacrebleu
sacrebleu.corpus_bleu
def evaluateRandomly(encoder1, decoder1, loader):
  
    score = 0
    output_words = []
    true_words = []
    for i, (candidate, length_1, reference, length_2) in enumerate(loader):

            max_length = max(length_2).item()
            output_words += evaluate_wo1(encoder1, decoder1, candidate, length_1, reference, length_2,max_length)
            true_words += mapback(reference)
        
    score = sacrebleu.corpus_bleu(output_words,[true_words])
    
    print(output_words[0])
    print(true_words[0])
        
        
    return (score, output_words, true_words)

In [106]:
s, output_words, true_words = evaluateRandomly(encoder1, decoder1, test_loader)



how can i make her life for the world s first child with the world s bowery life span of her hometown that her mother s been told her quot how long did not have a conversation with her life on her hair and her mom quot dolly quot and she said quot mom s born with her life on her skirt and her quot hometown of baby ? quot 
how can i speak in minutes about the bonds of women over three generations about how the astonishing strength of those bonds took hold in the life of a four year old girl huddled with her young sister her mother and her grandmother for five days and nights in a small boat in the china sea more than years ago bonds that took hold in the life of that small girl and never let go that small girl now living in san francisco and speaking to you today ?


In [107]:
s

BLEU(score=7.204095684103843, counts=[10334, 2779, 988, 404], totals=[27928, 26375, 24822, 23276], precisions=[37.0022916069894, 10.53649289099526, 3.980340020949158, 1.7356934181130779], bp=1.0, sys_len=27928, ref_len=26525)

In [108]:
for i in range(len(output_words)):
    print('= {}'.format(true_words[i]))
    print('> {}'.format(output_words[i]))
    if i > 10:
        break

= how can i speak in minutes about the bonds of women over three generations about how the astonishing strength of those bonds took hold in the life of a four year old girl huddled with her young sister her mother and her grandmother for five days and nights in a small boat in the china sea more than years ago bonds that took hold in the life of that small girl and never let go that small girl now living in san francisco and speaking to you today ?
> how can i make her life for the world s first child with the world s bowery life span of her hometown that her mother s been told her quot how long did not have a conversation with her life on her hair and her mom quot dolly quot and she said quot mom s born with her life on her skirt and her quot hometown of baby ? quot 
= but i do remember the lights on the oil rig off the UNK coast and the young man who collapsed and died the journey s end too much for him and the first apple i tasted given to me by the men on the rig .
> but i remember

In [0]:
train_input_lang_zh, train_output_lang_zh, train_pairs_zh = prepareData('train.tok.en', 'train.tok.zh', True)
test_input_lang_zh, test_output_lang_zh, test_pairs_zh = prepareData('test.tok.en', 'test.tok.zh', True)

print(random.choice(train_pairs))

Collecting sacrebleu
  Downloading https://files.pythonhosted.org/packages/37/51/bffea2b666d59d77be0413d35220022040a1f308c39009e5b023bc4eb8ab/sacrebleu-1.2.12.tar.gz
Building wheels for collected packages: sacrebleu
  Running setup.py bdist_wheel for sacrebleu ... [?25l- done
[?25h  Stored in directory: /root/.cache/pip/wheels/ea/0a/7d/ddcbdcd15a04b72de1b3f78e7e754aab415aff81c423376385
Successfully built sacrebleu
Installing collected packages: sacrebleu
Successfully installed sacrebleu-1.2.12
