In [1]:
import random
import time
import math
import numpy as np

import torch
import torch.nn as nn
from torch.autograd import Variable
from torch import optim
import torch.nn.functional as F

import preprocess
from DataLoader import DataLoader
import model

In [2]:
USE_CUDA = torch.cuda.is_available()
print(USE_CUDA)

True


## Data loading and processing

In [3]:
# twitter dataset

PAD_token = 0
SOS_token = 1
EOS_token = 2
UNK_token = 3

max_vocab_size = 20000
max_sen, min_sen = 14, 3
unk_most = 2
reverse_flag = 1  # reverse the input sequence order Sutskever et al., 2014
inverse_flag = 0  # MMI bidirection: train P(T|S) by inversing source and target

dataStat = preprocess.dataPreProcess('dataset/twitter.txt', max_vocab_size, max_sen, min_sen, unk_most, reverse_flag, inverse_flag) 
print("Number of total words:", dataStat.numOfWords)

wordCount = sorted(dataStat.word2cnt.values(), reverse=True)
print("Dictionary cover ratio:", sum(wordCount[:max_vocab_size-4]) / sum(wordCount))

Number of total words: 51805
Dictionary cover ratio: 0.9796814762021151


In [4]:
# pad or cut input and output sentence

def padding(source, maxLen):
    return np.pad(source[:maxLen],(0,max(0,maxLen-len(source))),'constant')

input_max_len, output_max_len = 15, 15

pairsNum = len(dataStat.pairsInd)
pairsLength = np.array([[len(l[0]), len(l[1])] for l in dataStat.pairsInd])
upperLength = np.concatenate((np.ones((pairsNum,1), dtype=int)*input_max_len, 
                              np.ones((pairsNum,1), dtype=int)*output_max_len), axis=1)
pairsLength = np.minimum(pairsLength,upperLength)
pairsAligned = np.array([np.concatenate((padding(l[0], input_max_len), 
                                              padding(l[1], output_max_len))) for l in dataStat.pairsInd])

In [5]:
train_type = 'resume'

ratios = [0.90, 0.05, 0.05]
pairsNumTrain, pairsNumDeve = int(ratios[0]*pairsNum), int(ratios[1]*pairsNum)
pairsNumTest = pairsNum - pairsNumTrain - pairsNumDeve

if train_type=='restart':
    deve_idxes = np.random.choice(pairsNum, pairsNumDeve, replace=False)
    test_idxes = np.random.choice(list(set(np.arange(pairsNum)).difference(set(deve_idxes))), pairsNumTest, replace=False)
    train_idxes = np.array(list(set(np.arange(pairsNum)).difference(set(deve_idxes)).difference(set(test_idxes))))
    np.save("parameter/pairsIdxesTriple.npy", (train_idxes, deve_idxes, test_idxes))
else:
    train_idxes, deve_idxes, test_idxes = np.load( "parameter/pairsIdxesTriple.npy" )

pairsAlignedTrain, pairsAlignedDeve, pairsAlignedTest = pairsAligned[train_idxes], pairsAligned[deve_idxes], pairsAligned[test_idxes]
pairsLengthTrain, pairsLengthDeve, pairsLengthTest = pairsLength[train_idxes], pairsLength[deve_idxes], pairsLength[test_idxes]
    
print("Total training pairs: ",pairsNumTrain)
print("Total develop pairs: ",pairsNumDeve)
print("Total test pairs: ",pairsNumTest)

Total training pairs:  104010
Total develop pairs:  5778
Total test pairs:  5779


## Construct Seq2Seq model

In [6]:
# load forward and backward model

# environment setup
vocab_size = min(dataStat.numOfWords, max_vocab_size)
hidden_size = 1000

embedding = nn.Embedding(vocab_size, hidden_size)
rnnEncoder = model.Encoder(embedding, vocab_size,20,hidden_size, n_layers=2, bidirectional=False, variable_lengths=True)
rnnDecoder = model.Decoder(embedding, vocab_size,1,hidden_size, n_layers=2)

embedding_IN = nn.Embedding(vocab_size, hidden_size)
rnnEncoder_IN = model.Encoder(embedding_IN, vocab_size,20,hidden_size, n_layers=2, bidirectional=False, variable_lengths=True)
rnnDecoder_IN = model.Decoder(embedding_IN, vocab_size,1,hidden_size, n_layers=2)

if train_type.lower()=='restart': pass
elif train_type.lower()=='resume':
    para_name = 'twitter_0518_35'
    embedding.load_state_dict(torch.load('parameter/embeding_'+para_name+'.pt'))
    rnnEncoder.load_state_dict(torch.load('parameter/encoder_'+para_name+'.pt'))
    rnnDecoder.load_state_dict(torch.load('parameter/decoder_'+para_name+'.pt'))
    para_name = '1_twitter_0518_40'
    embedding_IN.load_state_dict(torch.load('parameter/embeding_'+para_name+'.pt'))
    rnnEncoder_IN.load_state_dict(torch.load('parameter/encoder_'+para_name+'.pt'))
    rnnDecoder_IN.load_state_dict(torch.load('parameter/decoder_'+para_name+'.pt'))
    
else: print("Please enter valid training type !")

if USE_CUDA:
    rnnEncoder.cuda()
    rnnDecoder.cuda()
    rnnEncoder_IN.cuda()
    rnnDecoder_IN.cuda()

criterion = nn.NLLLoss(size_average=True)

In [7]:
learning_rate = 0.0002
optimizer_encoder = optim.Adam(rnnEncoder.parameters(), learning_rate)
optimizer_decoder = optim.Adam(rnnDecoder.parameters(), learning_rate)

# initialize dataloader
batch_size = 32
trainLoader = DataLoader(pairsAlignedTrain, pairsLengthTrain, input_max_len, output_max_len)
trainLoader.reset(batch_size)

deveLoader = DataLoader(pairsAlignedDeve, pairsLengthDeve, input_max_len, output_max_len)
deveLoader.reset(1)

testLoader = DataLoader(pairsAlignedTest, pairsLengthTest, input_max_len, output_max_len)
testLoader.reset(1)

print("iteration per epoch:", int(pairsNumTrain/batch_size))


iteration per epoch: 3250


In [8]:
def geneMask(outputs_record, lengths):
    batch_size = lengths.size(0)
    # prepare
    comp = torch.arange(output_max_len).view(-1,1)
    if USE_CUDA: comp = comp.cuda()
    comp_ex = comp.repeat(1,vocab_size).repeat(batch_size,1,1)
    # generate
    l_ex = lengths[:,1].view(batch_size,-1).repeat(1,vocab_size).view(batch_size,1,-1)
    if USE_CUDA: l_ex = l_ex.type(torch.cuda.FloatTensor)
    else: l_ex = l_ex.type(torch.FloatTensor)
    mask = comp_ex < l_ex
    if USE_CUDA: mask = mask.type(torch.cuda.FloatTensor)
    else: mask = mask.type(torch.FloatTensor)
    return torch.mul(mask, outputs_record)

## Train

In [9]:
def oneEpoch():

    running_loss = 0

    for batch_ind in range(int(pairsNum/batch_size)+1):
    
        # prepare mini-batch data
        try:
            inputs, targets, lengths = trainLoader.getMiniBatch()
        except Exception as e:
            # print('GG...')
            break
        else:
            # print('Good!')
            # Zero gradients of both optimizers
            optimizer_encoder.zero_grad()
            optimizer_decoder.zero_grad()

            # encoding and decoding
            inputs, targets = Variable(inputs), Variable(targets)
            hid_init = rnnEncoder.init_hidden(batch_size)
            out_enc, hid_enc = rnnEncoder.forward(inputs,lengths[:,0],hid_init)
            
            # teacher forcing
            hid_dec = hid_enc#[:rnnDecoder.n_layers]

            # SOS_token
            in_dec = Variable(torch.LongTensor([dataStat.word2ind['SOS']] * batch_size))
            if USE_CUDA: in_dec = in_dec.cuda()
            outputs_record, hid_dec = rnnDecoder.forward(in_dec.view(batch_size,-1),hid_dec)
            # continue
            for i in range(output_max_len-1):
                out_dec, hid_dec = rnnDecoder.forward(targets[:,i].view(batch_size,-1),hid_dec)
                outputs_record = torch.cat((outputs_record, out_dec), 1)

            outputs_mask = geneMask(outputs_record, lengths)
            loss = criterion(torch.transpose(outputs_mask,1,2), targets)
            # print(loss)
            
            loss.backward()
            optimizer_encoder.step()
            optimizer_decoder.step()
            
            running_loss += float(loss)
            
            if (batch_ind+1)%1000 == 0:
                print("iteration", batch_ind+1, " ---- running loss:", running_loss/batch_ind)
            
    # print('running_loss:',running_loss)
    return running_loss/batch_ind

In [10]:
def savePara(epoch):
    para_name = 'twitter_0518_'+str(epoch)
    torch.save(embedding.state_dict(),'parameter/embeding_'+para_name+'.pt')
    torch.save(rnnEncoder.state_dict(),'parameter/encoder_'+para_name+'.pt')
    torch.save(rnnDecoder.state_dict(),'parameter/decoder_'+para_name+'.pt')

In [21]:
print("Begin training...")
print(time.asctime( time.localtime(time.time()) ))

for i in range(10):
    trainLoader.reset(batch_size)
    loss = oneEpoch()
    if (i+1)%1==0:
        print('Epoch:', i+1, '\tLoss:',loss)
        print(time.asctime( time.localtime(time.time()) ))
    if (i+1)%5==0:
        savePara(i+1+30)

Begin training...
Fri May 18 13:48:28 2018
iteration 1000  ---- running loss: 0.7764374853015781
iteration 2000  ---- running loss: 0.7936298665313377
iteration 3000  ---- running loss: 0.8099324350636894
Epoch: 1 	Loss: 0.8131477450590867
Fri May 18 13:53:55 2018
iteration 1000  ---- running loss: 0.7073472350447982
iteration 2000  ---- running loss: 0.7228136096315542
iteration 3000  ---- running loss: 0.7357599485632021
Epoch: 2 	Loss: 0.7386495542526245
Fri May 18 13:59:20 2018
iteration 1000  ---- running loss: 0.6406484229547961
iteration 2000  ---- running loss: 0.6553890522030844
iteration 3000  ---- running loss: 0.6696273117869963
Epoch: 3 	Loss: 0.6734025310736436
Fri May 18 14:04:46 2018
iteration 1000  ---- running loss: 0.5867972803545428
iteration 2000  ---- running loss: 0.597785772011243
iteration 3000  ---- running loss: 0.6119136117962688
Epoch: 4 	Loss: 0.615221876676266
Fri May 18 14:10:12 2018
iteration 1000  ---- running loss: 0.5322195835657664
iteration 2000  -

## Compute posterior probability: P(S|T)

In [12]:
# compute MMI-bidi P(S|T)
def postProb(inputs, targets, lengths):
    
    batch_size = lengths.size(0)
    # encoding and decoding
    hid_init = rnnEncoder_IN.init_hidden(batch_size)
    out_enc, hid_enc = rnnEncoder_IN.forward(inputs,lengths[:,0],hid_init)

    # teacher forcing
    hid_dec = hid_enc#[:rnnDecoder.n_layers]

    # SOS_token
    in_dec = Variable(torch.LongTensor([dataStat.word2ind['SOS']] * batch_size))
    if USE_CUDA: in_dec = in_dec.cuda()
    outputs_record, hid_dec = rnnDecoder_IN.forward(in_dec.view(batch_size,-1),hid_dec)
    # continue
    for i in range(output_max_len-1):
        out_dec, hid_dec = rnnDecoder_IN.forward(targets[:,i].view(batch_size,-1),hid_dec)
        outputs_record = torch.cat((outputs_record, out_dec), 1)
    
    pp = []
    for b in range(batch_size):
        pp.append( sum([outputs_record[b][i][int(targets[b][i])] for i in range(lengths[b][1])]) )
    return pp

    
# trainLoader.reset(2)
# inputs, targets, lengths = trainLoader.getMiniBatch()
# print('Good!')
# prob = postProb(inputs, targets, lengths)

In [13]:
def interChange(inputs, targets, lengths):
    input_len, output_len = len(inputs[0]), len(targets[0])
    np_inputs, np_targets, np_lengths = inputs.cpu().numpy(), targets.cpu().numpy(), lengths.cpu().numpy()
    for i in range(len(np_inputs)):
        np_inputs[i] = padding(np.append(np_inputs[i][:np_lengths[i][0]-1][::-1],np.array([EOS_token])), input_len)
        np_targets[i] = padding(np.append(np_targets[i][:np_lengths[i][1]-1][::-1],np.array([EOS_token])), output_len)
        np_lengths[i] = np_lengths[i][::-1]
    inputs_tensor = torch.cuda.LongTensor(np_inputs) if USE_CUDA else torch.LongTensor(np_inputs)
    targets_tensor = torch.cuda.LongTensor(np_targets) if USE_CUDA else torch.LongTensor(np_targets)
    lengths_tensor = torch.cuda.LongTensor(np_lengths) if USE_CUDA else torch.LongTensor(np_lengths)
    return targets_tensor, inputs_tensor, lengths_tensor

## Evalute by BLEU and distinct

In [14]:
# translate into natural language

def showResult(ind_seq, reverse=False):
    token_list = []
    for i in ind_seq:
        if i == dataStat.word2ind['EOS']: break
        token_list.append(dataStat.ind2word[i])
    return ' '.join(token_list[::-1]) if reverse else  ' '.join(token_list)

In [15]:
class Sentence:
    def __init__(self, decoder_hidden, last_idx=SOS_token, sentence_idxes=[], sentence_scores=[]):
        if(len(sentence_idxes) != len(sentence_scores)):
            raise ValueError("length of indexes and scores should be the same")
        self.decoder_hidden = decoder_hidden
        self.last_idx = last_idx
        self.sentence_idxes =  sentence_idxes
        self.sentence_scores = sentence_scores

    def getScore(self, mode='sum', gamma=-0.2):
        if len(self.sentence_scores) == 0:
            return torch.cuda.FloatTensor(-999) if USE_CUDA else torch.FloatTensor(-999)
        if mode=='avg':
            res = sum(self.sentence_scores) / len(self.sentence_scores)
        else:
            res = sum(self.sentence_scores) + gamma*len(self.sentence_scores)
        return res

    def addTopk(self, topi, topv, decoder_hidden, beam_size, voc):
        terminates, sentences = [], []
        
        topi, topv = topi[0], topv[0]  # get data out of batch
        
        for i in range(beam_size):
            if topi[0][i] == EOS_token:
                terminates.append(([int(idx) for idx in self.sentence_idxes] + [EOS_token],
                                   self.getScore())) # tuple(word_list, score_float)
                continue
            idxes = self.sentence_idxes[:] # pass by value
            scores = self.sentence_scores[:] # pass by value
            idxes.append(topi[0][i])
            scores.append(topv[0][i])
            sentences.append(Sentence(decoder_hidden, topi[0][i], idxes, scores))
        return terminates, sentences

    def toWordScore(self, voc):
        words = []
        for i in range(len(self.sentence_idxes)):
            if self.sentence_idxes[i] == EOS_token:
                words.append(EOS_token)
            else:
                words.append(int(self.sentence_idxes[i]))
        if self.sentence_idxes[-1] != EOS_token:
            words.append(EOS_token)
        return (words, self.getScore())


In [16]:
def topOneDecode(decoder, decoder_hidden, stat, max_length=output_max_len):
    
    decoder_input = torch.LongTensor([SOS_token]).view(1,-1)
    if USE_CUDA: 
        decoder_input = decoder_input.cuda()
        decoder_hidden = decoder_hidden.cuda()

    decoded_words = []

    for di in range(max_length):

        decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden)
        topv, topi = decoder_output.topk(1)
        ni = topi[0][0]
        if ni == EOS_token:
            decoded_words.append(ni.item())
            break
        else:
            decoded_words.append(ni.item())

        decoder_input = torch.LongTensor([[ni]])
        if USE_CUDA: decoder_input = decoder_input.cuda()

    return decoded_words

In [17]:
def beamDecode(decoder, decoder_hidden, voc, beam_size, max_length=output_max_len):
    terminal_sentences, prev_top_sentences, next_top_sentences = [], [], []
    prev_top_sentences.append(Sentence(decoder_hidden))
    for _ in range(max_length-1):
        for sentence in prev_top_sentences:
            decoder_input = torch.LongTensor([[sentence.last_idx]])
            if USE_CUDA: decoder_input = decoder_input.cuda()
            decoder_output, decoder_hidden = decoder(decoder_input, sentence.decoder_hidden)
            topv, topi = decoder_output.topk(beam_size)
            term, top = sentence.addTopk(topi, topv, decoder_hidden, beam_size, voc)
            terminal_sentences.extend(term)
            next_top_sentences.extend(top)
            
        next_top_sentences.sort(key=lambda s: s.getScore(), reverse=True)
        prev_top_sentences = next_top_sentences[:beam_size]
        next_top_sentences = []

    terminal_sentences += [sentence.toWordScore(voc) for sentence in prev_top_sentences]
    terminal_sentences.sort(key=lambda x: x[1], reverse=True)

    n = min(len(terminal_sentences), 128)  # N-best list
    return terminal_sentences[:n]

In [18]:
def generate(input, length, encoder, decoder, beam_size, lamda, verbose=False):
    # encoding and decoding
    hid_init = encoder.init_hidden(batch_size = 1)
    out_enc, hid_enc = encoder.forward(input.view(1,-1),length.view(1),hid_init)
    hid_dec = hid_enc#[:rnnDecoder.n_layers]
    
    if beam_size==1:
        path = topOneDecode(decoder, hid_dec, dataStat, max_length=15)  # return path in list
        return path
    else:
        path_beam = beamDecode(decoder, hid_dec, dataStat, beam_size)  # return list of tuples: (path, score)
        path_beam.sort(key=lambda x:len(x[0]),reverse=True)
        
        # apply MMI-bidi
        batch_size = len(path_beam)
        targets_np = np.array([padding(path, output_max_len) for path,_ in path_beam])
        lengths_np = np.array([[int(length), len(path)] for path,_ in path_beam])
        
        inputs_tensor = input.view(1,-1).repeat(batch_size,1)
        targets_tensor = torch.cuda.LongTensor(targets_np) if USE_CUDA else torch.LongTensor(targets_np)
        lengths_tensor = torch.cuda.LongTensor(lengths_np) if USE_CUDA else torch.LongTensor(lengths_np)
        
        inputs_flip, targets_flip, lengths_flip = interChange(inputs_tensor, targets_tensor, lengths_tensor)
        post_score = postProb(inputs_flip, targets_flip, lengths_flip)
        path_beam = [(path_beam[i][0],(1-lamda)*path_beam[i][1]+lamda*post_score[i]) for i in range(len(path_beam))]
        path_beam.sort(key=lambda x:x[1],reverse=True)
        
        if verbose:
            for p in path_beam: print(float(p[1]), '\t', showResult(p[0]))
        return path_beam[0][0]


In [19]:
def evaluateSample(encoder, decoder, beam_size=5, lamda=0.0, verbose=True, myQuery=''):
    
    if myQuery == '':
        print("Blank Input")
        return -1
    
    # feed in customized tokens
    sample_query = myQuery.lower()
    sample_query_ind, _ = preprocess.encodePair(dataStat, (sample_query,'.'),reverse=True)
    sample_query_tensor = torch.LongTensor([padding(sample_query_ind, input_max_len)])
    sample_query_length = torch.LongTensor([len(sample_query_ind)])
    if USE_CUDA: input, length, target = sample_query_tensor.cuda(), sample_query_length.cuda(), None
    
    trace = generate(input, length, encoder, decoder, beam_size, lamda, verbose=True)
    if verbose:
        print("Message:\t", showResult(input.data[0].cpu().numpy(), reverse=True))
        print("Response:\t", showResult(trace))
        if target is not None:
            print("Teaching:\t", showResult(target.data[0].cpu().numpy()))
    return 0


In [20]:
from nltk.translate.bleu_score import corpus_bleu
from nltk.translate.bleu_score import SmoothingFunction
chencherry = SmoothingFunction()

def evaluateCorpus(encoder, decoder, beam_size=1, lamda=0.0, loader=testLoader, display=False):
    
    loader.reset(1)
    data_length = loader.dataLength
    responses = []
    total_score = 0
    sample_length = min(2000, data_length)
    
    for i in range(sample_length):
        inputs, targets, lengths = loader.getMiniBatch()
        input, length, target = inputs, lengths[0][0], targets

        trace = generate(input, length, encoder, decoder, beam_size, lamda)
        responses.append(trace)
                
        length_ref = lengths[0][1]
        references = [[target.data[0].tolist()[:int(length_ref)]]]
        candidates = [trace]
        score = corpus_bleu(references, candidates, smoothing_function=chencherry.method1)
        total_score += score
        
        if display and (i+1)%int(sample_length/10)==0: print("complete",int(100*(i+1)/sample_length),"%")
        
    return total_score/i, responses

In [21]:
# distinct evaluation

import nltk

def distinctEval(all_paths):

    response_ugm = set([])
    response_bgm = set([])        
    response_len = sum([len(p) for p in all_paths])

    for path in all_paths:
        for u in path:
            response_ugm.add(u)
        for b in list(nltk.bigrams(path)):
            response_bgm.add(b)

    print("total length of response:", response_len)
    print("distinct unigrams:", len(response_ugm)/response_len)
    print("distinct bigrams:", len(response_bgm)/response_len)

In [56]:
evaluateSample(rnnEncoder, rnnDecoder, 5, 0.5, True, 'seriously one of the best shows ! ! !')

-28.299510955810547 	 love it ! can t wait for the next one . .
-28.70806121826172 	 love it ! can t wait for the next one with new UNK .
-28.76465606689453 	 love it ! can t wait for the next one .
-29.720705032348633 	 love it ! can t wait for the next one . . .
-30.940441131591797 	 love it ! can t wait for the next one with their message .
-33.03714370727539 	 love it ! can t wait to hear your UNK UNK
-33.1236686706543 	 agreed . it s over there !
-33.641258239746094 	 love it ! can t wait for your UNK at !
-34.11371994018555 	 love it ! can t wait for the next one with new message .
-34.264404296875 	 love it ! can t wait for the next one with new UNK
-34.78605651855469 	 love it ! can t wait
-34.79853057861328 	 agreed . it s over there
-34.92169952392578 	 agreed . it s over there .
-36.388023376464844 	 can t wait for it
-37.20137023925781 	 love it ! can t wait to hear your UNK UNK reporting
-37.800838470458984 	 can t wait for it well
-38.074317932128906 	 love it ! can t wai

0

In [61]:
evaluateSample(rnnEncoder, rnnDecoder, 5, 0.5, True, 'thank you for your retweet')

-11.428078651428223 	 of course love it
-13.738347053527832 	 of course i love you
-15.266695022583008 	 of course boo
-16.15329933166504 	 of course i love you !
-16.477169036865234 	 now of it i m glad it was an look .
-16.86753273010254 	 of course love UNK
-16.93350601196289 	 of course love it !
-17.79831886291504 	 now of it i m like it can look it up it
-17.813858032226562 	 of course your welcome
-17.907814025878906 	 of course i love you ! !
-17.964780807495117 	 now of it i m like it it can you look it up it
-18.291221618652344 	 now of it i m like it it can you look at it ?
-18.43475914001465 	 now of it i m like it it can you look it up
-18.67047119140625 	 now of it i m like it it can you look it up this
-19.173110961914062 	 of course your welcome !
-19.234413146972656 	 of course love UNK i m UNK of course
-19.29511260986328 	 now of it i m like it it can you look at it again
-19.543359756469727 	 now of it i m glad it was an look !
-19.93739128112793 	 now of it i m gla

0

## Hyper-parameter tuning

In [25]:
# hyper-prrameter tuning
lamda_list = [0,0.2,0.4,0.6]  # gamma = 0
score_list = [[],[],[]]

for lamda in lamda_list:
    print(time.asctime( time.localtime(time.time()) ))
    print('lambda:\t', lamda)
    score_train,_ = evaluateCorpus(rnnEncoder, rnnDecoder,5, lamda, loader=trainLoader, display=0)
    score_deve,paths_deve = evaluateCorpus(rnnEncoder, rnnDecoder, 5, lamda, loader=deveLoader, display=0)
    score_test,paths_test = evaluateCorpus(rnnEncoder, rnnDecoder, 5, lamda, loader=testLoader, display=0)
    score_list[0].append(score_train)
    score_list[1].append(score_deve)
    score_list[2].append(score_test)
    print(score_train, score_deve, score_test)
    distinctEval(paths_deve)
    distinctEval(paths_test)

# score_list

Sun May 20 10:26:13 2018
lambda:	 0
0.5362942283961266 0.03836904777509308 0.03646343053341346
total length of response: 10962
distinct unigrams: 0.12324393358876118
distinct bigrams: 0.4154351395730706
total length of response: 10856
distinct unigrams: 0.12186809137803979
distinct bigrams: 0.4106484893146647
Sun May 20 10:43:18 2018
lambda:	 0.2
0.8723004851154688 0.03933534291751297 0.039010927313406565
total length of response: 13268
distinct unigrams: 0.10604461863129334
distinct bigrams: 0.40284895990352726
total length of response: 13255
distinct unigrams: 0.10818559034326669
distinct bigrams: 0.4055827989437948
Sun May 20 11:00:21 2018
lambda:	 0.4
0.8546722297900166 0.039246627489394934 0.03635357186474585
total length of response: 14644
distinct unigrams: 0.09990439770554493
distinct bigrams: 0.4164162797049986
total length of response: 14685
distinct unigrams: 0.10160027238678924
distinct bigrams: 0.40360912495743956
Sun May 20 11:16:40 2018
lambda:	 0.6
0.8686487922366823 0.

In [25]:
# hyper-prrameter tuning
lamda_list = [0.1,0.3]  # gamma = 0.0
score_list = [[],[],[]]

for lamda in lamda_list:
    print(time.asctime( time.localtime(time.time()) ))
    print('lambda:\t', lamda)
    score_train,_ = evaluateCorpus(rnnEncoder, rnnDecoder,5, lamda, loader=trainLoader, display=0)
    score_deve,paths_deve = evaluateCorpus(rnnEncoder, rnnDecoder, 5, lamda, loader=deveLoader, display=0)
    score_test,paths_test = evaluateCorpus(rnnEncoder, rnnDecoder, 5, lamda, loader=testLoader, display=0)
    score_list[0].append(score_train)
    score_list[1].append(score_deve)
    score_list[2].append(score_test)
    print(score_train, score_deve, score_test)
    distinctEval(paths_deve)
    distinctEval(paths_test)

# score_list

Sun May 20 16:52:18 2018
lambda:	 0.1
0.852224614722001 0.040576123123919484 0.03989233536631668
total length of response: 12413
distinct unigrams: 0.10859582695561106
distinct bigrams: 0.40248126963667125
total length of response: 12237
distinct unigrams: 0.10942224401405573
distinct bigrams: 0.4067990520552423
Sun May 20 17:10:19 2018
lambda:	 0.3
0.8579391756165962 0.03616957871798036 0.03882489577941158
total length of response: 14538
distinct unigrams: 0.1008391800797909
distinct bigrams: 0.4087907552620718
total length of response: 14715
distinct unigrams: 0.09901461094121644
distinct bigrams: 0.40706761807679237


In [28]:
# hyper-prrameter tuning
lamda_list = [0,0.2,0.4] # gamma = -0.2
score_list = [[],[],[]]

for lamda in lamda_list:
    print(time.asctime( time.localtime(time.time()) ))
    print('lambda:\t', lamda)
    score_train,_ = evaluateCorpus(rnnEncoder, rnnDecoder,5, lamda, loader=trainLoader, display=0)
    score_deve,paths_deve = evaluateCorpus(rnnEncoder, rnnDecoder, 5, lamda, loader=deveLoader, display=0)
    score_test,paths_test = evaluateCorpus(rnnEncoder, rnnDecoder, 5, lamda, loader=testLoader, display=0)
    score_list[0].append(score_train)
    score_list[1].append(score_deve)
    score_list[2].append(score_test)
    print(score_train, score_deve, score_test)
    distinctEval(paths_deve)
    distinctEval(paths_test)

# score_list

Sun May 20 14:08:33 2018
lambda:	 0
0.5180059415845227 0.03863957783386995 0.03834643839072519
total length of response: 10468
distinct unigrams: 0.12227741688956821
distinct bigrams: 0.40513947267863964
total length of response: 10329
distinct unigrams: 0.1140478265078904
distinct bigrams: 0.3913253945202827
Sun May 20 14:26:32 2018
lambda:	 0.2
0.846535550333517 0.04082734135081596 0.04208997728009065
total length of response: 12281
distinct unigrams: 0.1079716635453139
distinct bigrams: 0.40070026870775993
total length of response: 12237
distinct unigrams: 0.11048459589768735
distinct bigrams: 0.40786140393887393
Sun May 20 14:44:39 2018
lambda:	 0.4
0.8461584355910389 0.0388449260833696 0.0401199617052644
total length of response: 13552
distinct unigrams: 0.1016086186540732
distinct bigrams: 0.4042207792207792
total length of response: 13798
distinct unigrams: 0.09856500942165532
distinct bigrams: 0.3992607624293376


In [27]:
# hyper-prrameter tuning
lamda_list = [0,0.1,0.3] # gamma = -0.4
score_list = [[],[],[]]

for lamda in lamda_list:
    print(time.asctime( time.localtime(time.time()) ))
    print('lambda:\t', lamda)
    score_train,_ = evaluateCorpus(rnnEncoder, rnnDecoder,5, lamda, loader=trainLoader, display=0)
    score_deve,paths_deve = evaluateCorpus(rnnEncoder, rnnDecoder, 5, lamda, loader=deveLoader, display=0)
    score_test,paths_test = evaluateCorpus(rnnEncoder, rnnDecoder, 5, lamda, loader=testLoader, display=0)
    score_list[0].append(score_train)
    score_list[1].append(score_deve)
    score_list[2].append(score_test)
    print(score_train, score_deve, score_test)
    distinctEval(paths_deve)
    distinctEval(paths_test)

# score_list

Sun May 20 19:07:30 2018
lambda:	 0
0.4812283438774192 0.03668723277002475 0.03664434406796168
total length of response: 10175
distinct unigrams: 0.11921375921375921
distinct bigrams: 0.4048157248157248
total length of response: 10121
distinct unigrams: 0.11915818595000494
distinct bigrams: 0.3964035174389882
Sun May 20 19:24:11 2018
lambda:	 0.1
0.7663262347786298 0.04041385453447268 0.040433159341322335
total length of response: 10946
distinct unigrams: 0.10871551251598757
distinct bigrams: 0.3868079663804129
total length of response: 11000
distinct unigrams: 0.10718181818181818
distinct bigrams: 0.38736363636363635
Sun May 20 19:40:52 2018
lambda:	 0.3
0.8408575209564628 0.04122847407251038 0.04197858564967113
total length of response: 13069
distinct unigrams: 0.10306832963501415
distinct bigrams: 0.40477465758665543
total length of response: 13112
distinct unigrams: 0.10295912141549725
distinct bigrams: 0.4044386821232459


In [25]:
# hyper-prrameter tuning
lamda_list = [0.2,0.4] # gamma = -0.4
score_list = [[],[],[]]

for lamda in lamda_list:
    print(time.asctime( time.localtime(time.time()) ))
    print('lambda:\t', lamda)
    score_train,_ = evaluateCorpus(rnnEncoder, rnnDecoder,5, lamda, loader=trainLoader, display=0)
    score_deve,paths_deve = evaluateCorpus(rnnEncoder, rnnDecoder, 5, lamda, loader=deveLoader, display=0)
    score_test,paths_test = evaluateCorpus(rnnEncoder, rnnDecoder, 5, lamda, loader=testLoader, display=0)
    score_list[0].append(score_train)
    score_list[1].append(score_deve)
    score_list[2].append(score_test)
    print(score_train, score_deve, score_test)
    distinctEval(paths_deve)
    distinctEval(paths_test)

# score_list

Sun May 20 21:06:41 2018
lambda:	 0.2
0.8291092543628275 0.04064549432381752 0.04014092327730664
total length of response: 12028
distinct unigrams: 0.10583638177585633
distinct bigrams: 0.3937479215164616
total length of response: 12051
distinct unigrams: 0.10970043979752718
distinct bigrams: 0.4005476723923326
Sun May 20 21:23:43 2018
lambda:	 0.4
0.8664403552459563 0.036505805755905216 0.04140470640171562
total length of response: 14277
distinct unigrams: 0.098550115570498
distinct bigrams: 0.40526721299992996
total length of response: 14288
distinct unigrams: 0.10029395296752519
distinct bigrams: 0.41097424412094063


In [None]:
# hyper-prrameter tuning
lamda_list = [0,0.1,0.2,0.4] # gamma = 0.4, N-best list = 128
score_list = [[],[],[]]

for lamda in lamda_list:
    print(time.asctime( time.localtime(time.time()) ))
    print('lambda:\t', lamda)
    score_train,_ = evaluateCorpus(rnnEncoder, rnnDecoder,10, lamda, loader=trainLoader, display=0)
    score_deve,paths_deve = evaluateCorpus(rnnEncoder, rnnDecoder, 10, lamda, loader=deveLoader, display=0)
    score_test,paths_test = evaluateCorpus(rnnEncoder, rnnDecoder, 10, lamda, loader=testLoader, display=0)
    score_list[0].append(score_train)
    score_list[1].append(score_deve)
    score_list[2].append(score_test)
    print(score_train, score_deve, score_test)
    distinctEval(paths_deve)
    distinctEval(paths_test)

# score_list

Mon May 21 00:27:26 2018
lambda:	 0
0.8748437645375782 0.03542231429482811 0.041545325818199495
total length of response: 15681
distinct unigrams: 0.11453351189337414
distinct bigrams: 0.41617243798227155
total length of response: 15711
distinct unigrams: 0.11297816816243396
distinct bigrams: 0.41696900260963654
Mon May 21 01:39:39 2018
lambda:	 0.1
0.886314003481408 0.03865351265139616 0.03854682356666949
total length of response: 16250
distinct unigrams: 0.10504615384615384
distinct bigrams: 0.4083076923076923
total length of response: 15929
distinct unigrams: 0.10408688555464875
distinct bigrams: 0.416347542218595
Mon May 21 02:51:34 2018
lambda:	 0.2
