In [1]:
import json
import pickle
import random

import torch
from torch import nn, optim
from torch import autograd
from torch.autograd import Variable
import torch.nn.functional as F
import numpy as np
import torch.nn.utils.rnn as rnn_utils

import nltk
from nltk.translate.bleu_score import SmoothingFunction
from nltk.translate.bleu_score import sentence_bleu
import time

import copy

from Vocab import Vocab

import torch
torch.cuda.set_device(0)

print('import over')

import over


In [2]:
def batch_words2sentence(words_list):
    return [' '.join(words) for words in words_list]
def batch_tokens2words(tokens_list, vocab):
    ##    para: tokens_list is list[list] type
    ##    return: words_list corresponding to tokens
    return [[vocab.token2word[token] for token in tokens] for tokens in tokens_list]

def batch_tokens_remove_eos(tokens_list, vocab):
    ##    para: tokens_list is list[list] type
    ##    return pure tokens_list removed eos symbol
    result=[]
    for tokens in tokens_list:
        tokens_filtered=[]
        for token in tokens:
            if token == vocab.word2token['<eos>']:
#                 tokens_filtered.append(token)
                break
            else:
                tokens_filtered.append(token)
        result.append(tokens_filtered)
    return result

def batch_tokens_bleu(references, candidates, smooth_epsilon=0.001):
    ##    para: references and candidates are list[list] type
    ##    return: list of BLEU for every sample
    ##
    bleu_scores=[]
    for ref, candidate in zip(references, candidates):
        if min(len(ref), len(candidate))<4:
            bleu_scores.append(0)
        else:
            bleu_scores.append(sentence_bleu([ref], candidate, smoothing_function = SmoothingFunction(epsilon=smooth_epsilon).method1))
    return bleu_scores

with open('data_set/vocab.pk', 'rb') as f:
    vocab=pickle.load(f)
    
batch_tokens_bleu([[1,2,3,4,5,6]], [[2,3,1,4,5]])

[0.021744100219015735]

In [3]:
def get_labels(sents, lens, vocab):
    labels = copy.deepcopy(sents)
    for idx, sent in enumerate(labels):
        sent.insert(lens[idx], vocab.word2token['<eos>'])
        
    return labels

def get_inputs(sents, lens, vocab):
    inputs = copy.deepcopy(sents)
    lens_=copy.deepcopy(lens)
    
    for sent in inputs:
        sent.insert(0, vocab.word2token['<sos>'])
    for idx in range(len(lens_)):
        lens_[idx]+=1
    
    return inputs, lens_

with open('./data_set/split_data_set/train_complex_sents.pk', 'rb') as f:
    split_train_set_inputs = pickle.load(f)
with open('./data_set/split_data_set/train_complex_sent_lens.pk', 'rb') as f:
    split_train_set_input_lens = pickle.load(f)
# with open('./data_set/split_data_set/train_pseudo_labels.pk', 'rb') as f:
#     split_pseudo_train_set_labels = pickle.load(f)
    
with open('./data_set/split_data_set/validation_complex_sents.pk', 'rb') as f:
    split_valid_set_inputs = pickle.load(f)
with open('./data_set/split_data_set/validation_complex_sent_lens.pk', 'rb') as f:
    split_valid_set_input_lens = pickle.load(f)
# with open('./data_set/split_data_set/validation_labels.pk', 'rb') as f:
#     split_pseudo_valid_set_labels = pickle.load(f)


In [4]:
split_train_set_labels = get_labels(split_train_set_inputs, split_train_set_input_lens, vocab)
split_train_set_inputs, split_train_set_input_lens = get_inputs(split_train_set_inputs, split_train_set_input_lens, vocab)
print(len(split_train_set_inputs), len(split_train_set_input_lens))

split_valid_set_labels = get_labels(split_valid_set_inputs, split_valid_set_input_lens, vocab)
split_valid_set_inputs, split_valid_set_input_lens = get_inputs(split_valid_set_inputs, split_valid_set_input_lens, vocab)
print(len(split_valid_set_inputs), len(split_valid_set_labels))

989944 989944
5000 5000


In [5]:
class LanguageModel(nn.Module):
    def __init__(self, use_cuda, hidden_dim, input_dim, vocab):#, pre_train_weight, is_fix_word_vector = 1):
        super(LanguageModel, self).__init__()
        
        self.use_cuda = use_cuda
        self.input_dim=input_dim
        self.hidden_dim=hidden_dim
        self.vocab = vocab
        
        self.lstm=torch.nn.LSTM(input_size=self.input_dim, 
                                hidden_size= self.hidden_dim, 
                                bidirectional=False,
                                batch_first=True
                               )
        
        #embedding
        self.embed=nn.Embedding(len(self.vocab.word2token), input_dim)
        #loading pre trained word embedding
        with open('data_set/pre_trained_token_embedding_300d.pk', 'rb') as f:
            pre_train_word_embedding = pickle.load(f)
            
        self.embed.weight.data.copy_(torch.FloatTensor(pre_train_word_embedding))
#         self.embed.weight.requires_grad = False


        self.weight = [1]*len(self.vocab.word2token)
        self.weight[self.vocab.word2token['<padding>']]=0
        self.cost_func = nn.CrossEntropyLoss(weight=torch.Tensor(self.weight), reduce=True)
        self.fcnn=nn.Linear(in_features = self.hidden_dim, out_features = len(self.vocab.word2token))
        
    def order(self, inputs, inputs_len):    #inputs: tensor, inputs_len: 1D tensor
        inputs_len, sort_ids = torch.sort(inputs_len, dim=0, descending=True)
        
        if self.use_cuda:
            inputs = inputs.index_select(0, Variable(sort_ids).cuda())
        else:
            inputs = inputs.index_select(0, Variable(sort_ids))
        
        _, true_order_ids = torch.sort(sort_ids, dim=0, descending=False)
        
        return inputs, inputs_len, true_order_ids
    #
    def forward(self, inputs, inputs_len):
        inputs = Variable(inputs)
        if self.use_cuda:
            inputs=inputs.cuda()
            
        inputs, sort_len, true_order_ids = self.order(inputs, inputs_len)

        in_vecs=self.embed(inputs)

        packed = rnn_utils.pack_padded_sequence(input=in_vecs, lengths=list(sort_len), batch_first =True)
        
        outputs, (hn,cn) = self.lstm(packed)
        outputs, sent_lens = rnn_utils.pad_packed_sequence(outputs)
        
        #print('outpurs size, hn size and cn size: ', outputs.size(), hn.size(), cn.size())
        outputs = outputs.transpose(0,1)  #transpose is necessary
        #print('outpurs size, hn size and cn size: ', outputs.size(), hn.size(), cn.size())
        
        #warnning: outputs, hn and cn have been sorted by sentences length so the order is wrong, now to sort them
        if self.use_cuda:
            outputs = outputs.index_select(0, Variable(true_order_ids).cuda())
        else:
            outputs = outputs.index_select(0, Variable(true_order_ids))
        
#         hn = torch.cat((hn[0], hn[1]), dim=1)
#         cn = torch.cat((cn[0], cn[1]), dim=1)
#         #print('hn size and cn size: ', hn.size(), cn.size())
        
#         if self.use_cuda:
#             hn = hn.index_select(0, Variable(true_order_ids).cuda())
#             cn = cn.index_select(0, Variable(true_order_ids).cuda())
#         else:
#             hn = hn.index_select(0, Variable(true_order_ids))
#             cn = cn.index_select(0, Variable(true_order_ids))
        logits = self.fcnn(outputs)
        return logits
    
    def get_loss(self, logits, labels):
        labels = self._tocuda(Variable(labels))
        sents = logits.size(dim=1)
        labels = labels[:, :sents]
        labels = labels.contiguous().view(-1)
        logits = logits.view(-1, len(self.vocab.word2token))
#         print('logits size: ', logits.size())
        
        return self.cost_func(logits, labels)
    
    def get_sentences_ppl(self, inputs):
        pass
        
    def _tocuda(self, var):
        if self.use_cuda:
            return var.cuda()
        else:
            return var

In [6]:
# def get_labels(sents, lens, vocab):
#     labels = copy.deepcopy(sents)
#     for idx, sent in enumerate(labels):
#         sent.insert(lens[idx], vocab.word2token['<eos>'])
        
#     return labels

# def get_inputs(sents, lens, vocab):
#     inputs = copy.deepcopy(sents)
#     lens_=copy.deepcopy(lens)
    
#     for sent in inputs:
#         sent.insert(0, vocab.word2token['<sos>'])
#     for idx in range(len(lens_)):
#         lens_[idx]+=1
    
#     return inputs, lens_

# id1=random.randint(0, len(split_train_set_inputs)-1)
# id2=random.randint(0, len(split_train_set_inputs)-1)
# sents=[split_train_set_inputs[id1], split_train_set_inputs[id2]]
# lens=[split_train_set_input_lens[id1], split_train_set_input_lens[id2]]
# print(sents, lens)
# labels = get_labels(sents, lens, vocab)
# inputs, lens = get_inputs(sents, lens, vocab)
# print(inputs, lens)
# print(labels)

In [7]:
# use_cuda = 1
# hidden_dim = 256
# input_dim = 100
# sample_num = 11

# language_model = LanguageModel(use_cuda = use_cuda, input_dim = input_dim, hidden_dim = hidden_dim, vocab = vocab)
# lm_optimizer = optim.Adam(filter(lambda p: p.requires_grad, language_model.parameters()), lr=0.005)

# if use_cuda:
#     language_model = language_model.cuda()
    
# a = time.time()

# lm_optimizer.zero_grad()#clear
# outputs = language_model.forward(torch.LongTensor(split_train_set_inputs[0:sample_num]), 
#                                              torch.LongTensor(split_train_set_input_lens[0:sample_num])
#                                               )
# loss = language_model.get_loss(outputs, torch.LongTensor(split_train_set_labels[:sample_num]))
# loss.backward()#retain_graph=True)
# lm_optimizer.step()

# print('language_model: loss is %4.7f'%loss.data[0])

# print(time.time()-a)

In [8]:
use_cuda = 1
hidden_dim = 2048
input_dim = 300
lr=0.005
batch_size=100
split_train_set_size=int(len(split_train_set_inputs)/1)
epochs=10000
train_bleu_mean=-1
train_bleu_max=-1

language_model = LanguageModel(use_cuda = use_cuda, input_dim = input_dim, hidden_dim = hidden_dim, vocab = vocab)
if use_cuda:
    language_model = language_model.cuda()
#pre train para
# pre_train = torch.load('./models_saved/time-[2019-01-24-17-17-59]-info=[split_model]-loss=1.704468250-bleu=0.0485-hidden_dim=256-input_dim=100-epoch=9-batch_size=200-batch_id=[1-[of]-4949]-lr=0.0050', map_location='cpu')
# split_model.load_state_dict(pre_train)
# pre_train = torch.load('./models_saved/time-[2019-01-24-17-17-59]-info=[fusion_model]-loss=2.401571035-bleu=0.0334-hidden_dim=256-input_dim=100-epoch=9-batch_size=200-batch_id=[1-[of]-4949]-lr=0.0050', map_location='cpu')
# fusion_model.load_state_dict(pre_train)

lm_optimizer = optim.Adam(filter(lambda p: p.requires_grad, language_model.parameters()), lr=0.005)


def model_train(epoch, batch_size, train_set_size):
    batch_id = 0
    valid_bleu = 0
    for start_idx in range(0, train_set_size-batch_size+1, batch_size):
#         print('batch id: ', batch_id)
            
        batch_id+=1
        end_idx = start_idx + batch_size
        
        #split model
        lm_optimizer.zero_grad()#clear
        outputs = language_model.forward(torch.LongTensor(split_train_set_inputs[start_idx:end_idx]), 
                                     torch.LongTensor(split_train_set_input_lens[start_idx:end_idx]),
                                                )
        loss = language_model.get_loss(outputs, torch.LongTensor(split_train_set_labels[start_idx:end_idx]))
        
        #optimize
        loss.backward()#retain_graph=True)
        lm_optimizer.step()
        
        
        if batch_id%50==1:
            language_model.eval()
            
            sample_num = 5
            rand_idx = random.randint(0, train_set_size-sample_num-1)
            outputs = language_model.forward(torch.LongTensor(split_train_set_inputs[rand_idx:rand_idx+batch_size]), 
                                     torch.LongTensor(split_train_set_input_lens[rand_idx:rand_idx+batch_size]),
                                                )
            loss = language_model.get_loss(outputs, torch.LongTensor(split_train_set_labels[rand_idx:rand_idx+batch_size]))
                
            info_stamp = 'split_loss={:2.9f}-train_bleu_mean={:2.9f}-train_bleu_max={:2.9f}-batch_size={:n}-epoch={:n}-batch_id=({:n}/{:n})'.format(
                              loss.data[0], train_bleu_mean, train_bleu_max, batch_size, epoch, batch_id, int(train_set_size/batch_size))
            print(info_stamp)
            
            if batch_id%50000==1:
                valid_bleu=-1
                info_stamp = 'info=[{:s}]-loss={:2.9f}-bleu={:1.4f}-hidden_dim={:n}-input_dim={:n}-epoch={:n}-batch_size={:n}-batch_id=[{:n}-[of]-{:n}]-lr={:1.4f}'.format(
                              'language_model', loss.data[0], valid_bleu, hidden_dim, input_dim, epoch, batch_size, batch_id, int(train_set_size/batch_size), lr)
                now = int(round(time.time()*1000))
                time_stamp = time.strftime('time-[%Y-%m-%d-%H-%M-%S]-',time.localtime(now/1000))
                torch.save(language_model.state_dict(), ''.join(['./models_language_model/', time_stamp, info_stamp]))

# #             #valid_set testing
#             if batch_id%1000==1:
#                 rand_idx=random.randint(0, len(split_valid_set_inputs)-batch_size-1-1)
#                 predicts = split_model.forward(torch.LongTensor(split_valid_set_inputs[rand_idx:rand_idx+batch_size]), 
#                                                  torch.LongTensor(split_valid_set_input_lens[rand_idx:rand_idx+batch_size]), 
#                                                  labels=[],#torch.LongTensor(valid_set_labels[rand_idx:rand_idx+batch_size]), 
#                                                  is_train=0, teaching_rate=1)
#                 predicts = batch_tokens_remove_eos(predicts, vocab)
#                 labels = batch_tokens_remove_eos(split_pseudo_valid_set_labels[rand_idx:rand_idx+batch_size], vocab)
                
#                 bleu_scores = batch_tokens_bleu(references=labels, candidates=predicts, smooth_epsilon=0.001)

#                 valid_bleu = 0
#                 for x in bleu_scores:
#                     valid_bleu+=x
#                 valid_bleu/=len(bleu_scores)
                       
#                 info_stamp = 'info=[{:s}]-loss={:2.9f}-bleu={:1.4f}-hidden_dim={:n}-input_dim={:n}-epoch={:n}-batch_size={:n}-batch_id=[{:n}-[of]-{:n}]-lr={:1.4f}'.format(
#                               'split_model', split_loss.data[0], valid_bleu, hidden_dim, input_dim, epoch, batch_size, batch_id, int(train_set_size/batch_size), lr)
#                 print(info_stamp, valid_bleu)
#                 now = int(round(time.time()*1000))
#                 time_stamp = time.strftime('time-[%Y-%m-%d-%H-%M-%S]-',time.localtime(now/1000))
#                 torch.save(split_model.state_dict(), ''.join(['./models_saved/', time_stamp, info_stamp]))


#                 rand_idx=random.randint(0, len(fusion_pseudo_valid_set_inputs)-batch_size-1-1)
#                 predicts = fusion_model.forward(torch.LongTensor(fusion_pseudo_valid_set_inputs[rand_idx:rand_idx+batch_size]), 
#                                                  torch.LongTensor(fusion_pseudo_valid_set_input_lens[rand_idx:rand_idx+batch_size]), 
#                                                  labels=[],#torch.LongTensor(valid_set_labels[rand_idx:rand_idx+batch_size]), 
#                                                  is_train=0, teaching_rate=1)
#                 predicts = batch_tokens_remove_eos(predicts, vocab)
#                 labels = batch_tokens_remove_eos(fusion_pseudo_valid_set_labels[rand_idx:rand_idx+batch_size], vocab)
                
#                 bleu_scores = batch_tokens_bleu(references=labels, candidates=predicts, smooth_epsilon=0.001)

#                 valid_bleu = 0
#                 for x in bleu_scores:
#                     valid_bleu+=x
#                 valid_bleu/=len(bleu_scores)
#                 info_stamp = 'info=[{:s}]-loss={:2.9f}-bleu={:1.4f}-hidden_dim={:n}-input_dim={:n}-epoch={:n}-batch_size={:n}-batch_id=[{:n}-[of]-{:n}]-lr={:1.4f}'.format(
#                               'fusion_model', fusion_loss.data[0], valid_bleu, hidden_dim, input_dim, epoch, batch_size, batch_id, int(train_set_size/batch_size), lr)
#                 print(info_stamp, valid_bleu)
#                 now = int(round(time.time()*1000))
#                 time_stamp = time.strftime('time-[%Y-%m-%d-%H-%M-%S]-',time.localtime(now/1000))
#                 torch.save(fusion_model.state_dict(), ''.join(['./models_saved/', time_stamp, info_stamp]))
                
            language_model.train()

            
for epoch in range(epochs):
    model_train(epoch, batch_size, split_train_set_size)
    
print('running time: %.2f mins'%((time.time()-start_time)/60))

split_loss=8.822279930-train_bleu_mean=-1.000000000-train_bleu_max=-1.000000000-batch_size=100-epoch=0-batch_id=(1/9899)
split_loss=11.204010963-train_bleu_mean=-1.000000000-train_bleu_max=-1.000000000-batch_size=100-epoch=0-batch_id=(51/9899)
split_loss=7.924545288-train_bleu_mean=-1.000000000-train_bleu_max=-1.000000000-batch_size=100-epoch=0-batch_id=(101/9899)
split_loss=8.429313660-train_bleu_mean=-1.000000000-train_bleu_max=-1.000000000-batch_size=100-epoch=0-batch_id=(151/9899)
split_loss=8.397708893-train_bleu_mean=-1.000000000-train_bleu_max=-1.000000000-batch_size=100-epoch=0-batch_id=(201/9899)
split_loss=8.559820175-train_bleu_mean=-1.000000000-train_bleu_max=-1.000000000-batch_size=100-epoch=0-batch_id=(251/9899)
split_loss=7.150449753-train_bleu_mean=-1.000000000-train_bleu_max=-1.000000000-batch_size=100-epoch=0-batch_id=(301/9899)
split_loss=7.790458202-train_bleu_mean=-1.000000000-train_bleu_max=-1.000000000-batch_size=100-epoch=0-batch_id=(351/9899)
split_loss=10.7768

KeyboardInterrupt: 

In [9]:
stop

NameError: name 'stop' is not defined

In [None]:
use_cuda = 1
hidden_dim = 512
input_dim = 300
lr=0.005
batch_size=100
split_train_set_size=int(len(split_train_set_inputs)/1)
epochs=10000
train_bleu_mean=-1
train_bleu_max=-1

language_model = LanguageModel(use_cuda = use_cuda, input_dim = input_dim, hidden_dim = hidden_dim, vocab = vocab)
model_path = './models_language_model/time-[2019-02-26-01-34-36]-info=[language_model]-loss=4.287574768-bleu=-1.0000-hidden_dim=512-input_dim=300-epoch=5-batch_size=100-batch_id=[1-[of]-9899]-lr=0.0050'
model_path = './models_language_model/time-[2019-02-26-05-54-37]-info=[language_model]-loss=4.003368378-bleu=-1.0000-hidden_dim=512-input_dim=300-epoch=12-batch_size=100-batch_id=[1-[of]-9899]-lr=0.0050'
model_path = './models_language_model/time-[2019-02-26-11-27-36]-info=[language_model]-loss=3.414012194-bleu=-1.0000-hidden_dim=512-input_dim=300-epoch=21-batch_size=100-batch_id=[1-[of]-9899]-lr=0.0050'
pre_train = torch.load(model_path, map_location='cpu')
language_model.load_state_dict(pre_train)

if use_cuda:
    language_model = language_model.cuda()

In [None]:
data_range = int((len(split_valid_set_inputs)-1)/100)
rand_idx=random.randint(0, data_range)
sample_num=2
language_model.eval()
logits = language_model.forward(torch.LongTensor(split_valid_set_inputs[rand_idx:rand_idx+sample_num]), 
                                     torch.LongTensor(split_valid_set_input_lens[rand_idx:rand_idx+sample_num]),
                                                )
print(logits.size())
_, predicts = logits.max(dim=2)
predicts = predicts.cpu().data.tolist()
predicts = batch_tokens_remove_eos(tokens_list=predicts, vocab=vocab)
words_list = batch_tokens2words(predicts, vocab)
sents = batch_words2sentence(words_list=words_list)
for sent in sents:
    print(sent)

print('\n')

#labels
labels=split_valid_set_labels[rand_idx:rand_idx+sample_num]
labels = batch_tokens_remove_eos(tokens_list=labels, vocab=vocab)
words_list = batch_tokens2words(labels, vocab)
sents = batch_words2sentence(words_list=words_list)
for sent in sents:
    print(sent)
    
language_model.train()
print('\n')

In [None]:
k

In [None]:
#300d
split_model_path='./models_saved/time-[2019-02-24-14-50-48]-info=[split_model]-loss=1.512540460-bleu=0.0136-hidden_dim=512-input_dim=300-epoch=48-batch_size=200-batch_id=[1-[of]-494]-lr=0.0050'
fusion_model_path='./models_saved/time-[2019-02-24-14-50-49]-info=[fusion_model]-loss=1.444324136-bleu=0.0243-hidden_dim=512-input_dim=300-epoch=48-batch_size=200-batch_id=[1-[of]-494]-lr=0.0050'

#300d
split_model_path='./models_saved/time-[2019-02-24-06-03-39]-info=[split_model]-loss=1.667227983-bleu=0.0144-hidden_dim=512-input_dim=300-epoch=22-batch_size=200-batch_id=[1-[of]-494]-lr=0.0050'
fusion_model_path='./models_saved/time-[2019-02-24-06-03-39]-info=[fusion_model]-loss=1.542479157-bleu=0.0147-hidden_dim=512-input_dim=300-epoch=22-batch_size=200-batch_id=[1-[of]-494]-lr=0.0050'

#100d
# split_model_path='./models_saved/time-[2019-01-24-17-17-59]-info=[split_model]-loss=1.704468250-bleu=0.0485-hidden_dim=256-input_dim=100-epoch=9-batch_size=200-batch_id=[1-[of]-4949]-lr=0.0050'
# fusion_model_path='./models_saved/time-[2019-01-24-17-17-59]-info=[fusion_model]-loss=2.401571035-bleu=0.0334-hidden_dim=256-input_dim=100-epoch=9-batch_size=200-batch_id=[1-[of]-4949]-lr=0.0050'

use_cuda = 1
hidden_dim = 512
input_dim = 300
lr=0.005
batch_size=200
split_train_set_size=int(len(split_train_set_inputs)/10)
epochs=10000
train_bleu_mean=-1
train_bleu_max=-1
split_model = Seq2Seq(use_cuda = use_cuda, input_dim = input_dim, hidden_dim = hidden_dim, 
                          vocab = vocab, max_length = 61)

fusion_model = Seq2Seq(use_cuda = use_cuda, input_dim = input_dim, hidden_dim = hidden_dim, 
                          vocab = vocab, max_length = 51)
#pre train para
pre_train = torch.load(split_model_path, map_location='cpu')
split_model.load_state_dict(pre_train)
pre_train = torch.load(fusion_model_path, map_location='cpu')
fusion_model.load_state_dict(pre_train)

if use_cuda:
    split_model = split_model.cuda()
    fusion_model = fusion_model.cuda()

In [None]:
rand_idx=80300
sample_num=100

loss_, predicts = fusion_model.forward(torch.LongTensor(fusion_pseudo_train_set_inputs[rand_idx:rand_idx+sample_num]), 
                                             torch.LongTensor(fusion_pseudo_train_set_input_lens[rand_idx:rand_idx+sample_num]), 
                                             labels=torch.LongTensor(fusion_pseudo_train_set_labels[rand_idx:rand_idx+sample_num]), 
                                             is_train=1, teaching_rate=1)
# del loss_

predicts = batch_tokens_remove_eos(predicts, vocab)
labels = batch_tokens_remove_eos(fusion_pseudo_train_set_labels[rand_idx:rand_idx+sample_num], vocab)

bleu_scores = batch_tokens_bleu(references=labels, candidates=predicts, smooth_epsilon=0.001)

train_bleu = 0
for x in bleu_scores:
    train_bleu+=x
train_bleu/=len(bleu_scores)
print(train_bleu)
                
# predicts = batch_tokens2words(predicts, vocab)
# labels = batch_tokens2words(labels, vocab)

# predicts_sents = batch_words2sentence(predicts)
# labels_sents = batch_words2sentence(labels)

# for (predict_sent, label_sent) in zip(predicts_sents, labels_sents):
#     print(' 1----> ', predict_sent)
#     print(' 2----> ', label_sent)
#     print('\n')

predicts = fusion_model.forward(torch.LongTensor(fusion_pseudo_train_set_inputs[rand_idx:rand_idx+sample_num]), 
                                             torch.LongTensor(fusion_pseudo_train_set_input_lens[rand_idx:rand_idx+sample_num]), 
                                             labels=torch.LongTensor(fusion_pseudo_train_set_labels[rand_idx:rand_idx+sample_num]), 
                                             is_train=0, teaching_rate=1)
# del loss_

predicts = batch_tokens_remove_eos(predicts, vocab)
labels = batch_tokens_remove_eos(fusion_pseudo_train_set_labels[rand_idx:rand_idx+sample_num], vocab)

bleu_scores = batch_tokens_bleu(references=labels, candidates=predicts, smooth_epsilon=0.001)

train_bleu = 0
for x in bleu_scores:
    train_bleu+=x
train_bleu/=len(bleu_scores)
print(train_bleu)


rand_idx=3000
batch_size=100
loss_, predicts = fusion_model.forward(torch.LongTensor(fusion_pseudo_valid_set_inputs[rand_idx:rand_idx+batch_size]), 
                                                 torch.LongTensor(fusion_pseudo_valid_set_input_lens[rand_idx:rand_idx+batch_size]), 
                                                 labels=torch.LongTensor(fusion_pseudo_valid_set_labels[rand_idx:rand_idx+batch_size]),
                                                 is_train=1, teaching_rate=1)
predicts = batch_tokens_remove_eos(predicts, vocab)
labels = batch_tokens_remove_eos(fusion_pseudo_valid_set_labels[rand_idx:rand_idx+batch_size], vocab)

bleu_scores = batch_tokens_bleu(references=labels, candidates=predicts, smooth_epsilon=0.001)

valid_bleu = 0
for x in bleu_scores:
    valid_bleu+=x
valid_bleu/=len(bleu_scores)
print(valid_bleu)


predicts = fusion_model.forward(torch.LongTensor(fusion_pseudo_valid_set_inputs[rand_idx:rand_idx+batch_size]), 
                                                 torch.LongTensor(fusion_pseudo_valid_set_input_lens[rand_idx:rand_idx+batch_size]), 
                                                 labels=torch.LongTensor(fusion_pseudo_valid_set_labels[rand_idx:rand_idx+batch_size]),
                                                 is_train=0, teaching_rate=1)
predicts = batch_tokens_remove_eos(predicts, vocab)
labels = batch_tokens_remove_eos(fusion_pseudo_valid_set_labels[rand_idx:rand_idx+batch_size], vocab)

bleu_scores = batch_tokens_bleu(references=labels, candidates=predicts, smooth_epsilon=0.001)

valid_bleu = 0
for x in bleu_scores:
    valid_bleu+=x
valid_bleu/=len(bleu_scores)
print(valid_bleu)