In [9]:
import torch
import torch.nn.functional as F
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
import torch.nn.init as weight_init
import numpy as np
from time import time
import data
import sys
from metrics import Metrics
import random
import matplotlib.pyplot as plt
%matplotlib inline
DEVICE = torch.device('cuda:0')


In [16]:
def config_HRED():
    conf = {
    'maxlen':30, # maximum utterance length
    'diaglen':10, # how many utterance kept in the context window

# Model Arguments
    'emb_size':200, # size of word embeddings
    'n_hidden':300, # number of hidden units per layer
    'n_layers':1, # number of layers
    'noise_radius':0.2, # stdev of noise for autoencoder (regularizer)
    'z_size':200, # dimension of z # 300 performs worse
    'lambda_gp':10, # Gradient penalty lambda hyperparameter.
    'temp':1.0, # softmax temperature (lower --> more discrete)
    'dropout':0.5, # dropout applied to layers (0 = no dropout)

# Training Arguments
    'batch_size':32,
    'epochs':100, # maximum number of epochs
    'min_epochs':2, # minimum number of epochs to train for

    'n_iters_d':5, # number of discriminator iterations in training
    'lr_ae':1.0, # autoencoder learning rate
    'lr_gan_g':5e-05, # generator learning rate
    'lr_gan_d':1e-05, # critic/discriminator learning rate
    'beta1':0.9, # beta1 for adam
    'clip':1.0,  # gradient clipping, max norm
    'gan_clamp':0.01,  # WGAN clamp (Do not use clamp when you apply gradient penelty             
    }
    return conf 

config = config_HRED()
random.seed(1111)

In [11]:
def gData(data):
    tensor=data
    if isinstance(data, np.ndarray):
        tensor = torch.from_numpy(data)
    tensor=tensor.to(DEVICE)
    return tensor
def gVar(data):
    return gData(data)
def print_flush(data, args=None):
    if args == None:
        print(data)
    else:
        print(data, args)
    sys.stdout.flush()
    
def indexes2sent(indexes, vocab, eos_tok, ignore_tok=0): 
    '''indexes: numpy array'''
    def revert_sent(indexes, ivocab, eos_tok, ignore_tok=0):
        toks=[]
        length=0
        indexes=filter(lambda i: i!=ignore_tok, indexes)
        for idx in indexes:
            toks.append(ivocab[idx])
            length+=1
            if idx == eos_tok:
                break
        return ' '.join(toks), length
    
    ivocab = {v: k for k, v in vocab.items()}
    if indexes.ndim==1:# one sentence
        return revert_sent(indexes, ivocab, eos_tok, ignore_tok)
    else:# dim>1
        sentences=[] # a batch of sentences
        lens=[]
        for inds in indexes:
            sentence, length = revert_sent(inds, ivocab, eos_tok, ignore_tok)
            sentences.append(sentence)
            lens.append(length)
        return sentences, lens

In [23]:
class Encoder(nn.Module):
    def __init__(self, embedder, input_size, hidden_size, bidirectional, n_layers, noise_radius=0.2):
        super(Encoder, self).__init__()
        self.hidden_size = hidden_size
        self.noise_radius=noise_radius
        self.n_layers = n_layers
        self.bidirectional = bidirectional
        assert type(self.bidirectional)==bool
        self.embedding = embedder
        self.rnn = nn.GRU(input_size, hidden_size, n_layers, batch_first=True, bidirectional=bidirectional)
        self.init_weights()
        
    def init_weights(self):
        for w in self.rnn.parameters(): 
            if w.dim()>1:
                weight_init.orthogonal_(w)
                
    def store_grad_norm(self, grad):
        norm = torch.norm(grad, 2, 1)
        self.grad_norm = norm.detach().data.mean()
        return grad
    
    def forward(self, inputs, input_lens=None, noise=False): 
        if self.embedding is not None:
            inputs=self.embedding(inputs) 
        
        batch_size, seq_len, emb_size=inputs.size()
#         inputs=F.dropout(inputs, 0.5, self.training)
        
        if input_lens is not None:
            input_lens_sorted, indices = input_lens.sort(descending=True)
            inputs_sorted = inputs.index_select(0, indices)        
            inputs = pack_padded_sequence(inputs_sorted, input_lens_sorted.data.tolist(), batch_first=True)
            
        init_hidden = gVar(torch.zeros(self.n_layers*(1+self.bidirectional), batch_size, self.hidden_size))
        hids, h_n = self.rnn(inputs, init_hidden) 
        if input_lens is not None:
            _, inv_indices = indices.sort()
            hids, lens = pad_packed_sequence(hids, batch_first=True)     
            hids = hids.index_select(0, inv_indices)
            h_n = h_n.index_select(1, inv_indices)
        h_n = h_n.view(self.n_layers, (1+self.bidirectional), batch_size, self.hidden_size)
        h_n = h_n[-1]
        enc = h_n.transpose(1,0).contiguous().view(batch_size,-1) 
#         if noise and self.noise_radius > 0:
#             gauss_noise = gVar(torch.normal(means=torch.zeros(enc.size()),std=self.noise_radius))
#             enc = enc + gauss_noise
            
        return enc, hids
    
class ContextEncoder(nn.Module):
    def __init__(self, utt_encoder, input_size, hidden_size, n_layers=1, noise_radius=0.2):
        super(ContextEncoder, self).__init__()
        self.hidden_size = hidden_size
        self.noise_radius=noise_radius
        
        self.n_layers = n_layers
        
        self.utt_encoder=utt_encoder
        self.rnn = nn.GRU(input_size, hidden_size, batch_first=True)
        self.init_weights()
        
    def init_weights(self):
        for w in self.rnn.parameters(): # initialize the gate weights with orthogonal
            if w.dim()>1:
                weight_init.orthogonal_(w)
    
    def store_grad_norm(self, grad):
        norm = torch.norm(grad, 2, 1)
        self.grad_norm = norm.detach().data.mean()
        return grad

    def forward(self, context, context_lens, utt_lens, floors, noise=False): 
        batch_size, max_context_len, max_utt_len = context.size()
        utts=context.view(-1, max_utt_len) 
        utt_lens=utt_lens.view(-1)
        utt_encs,_ = self.utt_encoder(utts, utt_lens) 
        utt_encs = utt_encs.view(batch_size, max_context_len, -1)
        floor_one_hot = gVar(torch.zeros(floors.numel(), 2))
        floor_one_hot.data.scatter_(1, floors.view(-1, 1), 1)
        floor_one_hot = floor_one_hot.view(-1, max_context_len, 2)
        utt_floor_encs = torch.cat([utt_encs, floor_one_hot], 2) 
        
#         utt_floor_encs=F.dropout(utt_floor_encs, 0.25, self.training)
        context_lens_sorted, indices = context_lens.sort(descending=True)
        utt_floor_encs = utt_floor_encs.index_select(0, indices)
        utt_floor_encs = pack_padded_sequence(utt_floor_encs, context_lens_sorted.data.tolist(), batch_first=True)
        
        init_hidden=gVar(torch.zeros(1, batch_size, self.hidden_size))
        hids, h_n = self.rnn(utt_floor_encs, init_hidden)
        
        _, inv_indices = indices.sort()
        h_n = h_n.index_select(1, inv_indices)  
        enc = h_n.transpose(1,0).contiguous().view(batch_size, -1)

#         if noise and self.noise_radius > 0:
#             gauss_noise = gVar(torch.normal(means=torch.zeros(enc.size()),std=self.noise_radius))
#             enc = enc + gauss_noise
        return enc
class Variation(nn.Module):
    def __init__(self, input_size, z_size):
        super(Variation, self).__init__()
        self.input_size = input_size
        self.z_size=z_size   
        self.fc = nn.Sequential(
            nn.Linear(input_size, z_size),
            nn.BatchNorm1d(z_size, eps=1e-05, momentum=0.1),
            nn.Tanh(),
            nn.Linear(z_size, z_size),
            nn.BatchNorm1d(z_size, eps=1e-05, momentum=0.1),
            nn.Tanh(),
        )
        self.context_to_mu=nn.Linear(z_size, z_size) # activation???
        self.context_to_logsigma=nn.Linear(z_size, z_size) 
        
        self.fc.apply(self.init_weights)
        self.init_weights(self.context_to_mu)
        self.init_weights(self.context_to_logsigma)
        
    def init_weights(self, m):
        if isinstance(m, nn.Linear):        
            m.weight.data.uniform_(-0.02, 0.02)
            m.bias.data.fill_(0)

    def forward(self, context):
        batch_size,_=context.size()
        context = self.fc(context)
        mu=self.context_to_mu(context)
        logsigma = self.context_to_logsigma(context) 
        std = torch.exp(0.5 * logsigma)
        
        epsilon = gVar(torch.randn([batch_size, self.z_size]))
        z = epsilon * std + mu  
        return z, mu, logsigma 
    
class Decoder(nn.Module):
    def __init__(self, embedder, input_size, hidden_size, vocab_size, n_layers=1):
        super(Decoder, self).__init__()
        self.n_layers = n_layers
        self.input_size= input_size 
        self.hidden_size = hidden_size 
        self.vocab_size = vocab_size 
        self.embedding = embedder
#         self.linear = nn.Linear(500, hidden_size)
        self.rnn = nn.GRU(input_size, hidden_size, batch_first=True)
        self.out = nn.Linear(hidden_size, vocab_size)
        self.init_weights()
        
    def init_weights(self):
        initrange = 0.1
        for w in self.rnn.parameters():
            if w.dim()>1:
                weight_init.orthogonal_(w)
        self.out.weight.data.uniform_(-initrange, initrange)
        self.out.bias.data.fill_(0)
    
    def forward(self, init_hidden, context=None, inputs=None, lens=None):
        batch_size, maxlen = inputs.size()
        if self.embedding is not None:
            inputs = self.embedding(inputs)
        if context is not None:
            repeated_context = context.unsqueeze(1).repeat(1, maxlen, 1)
            inputs = torch.cat([inputs, repeated_context], 2)
#         inputs = F.dropout(inputs, 0.5, self.training)  ß
#         init_hidden = self.linear(init_hidden)
        hids, h_n = self.rnn(inputs, init_hidden.unsqueeze(0))
        decoded = self.out(hids.contiguous().view(-1, self.hidden_size))# reshape before linear over vocab
        decoded = decoded.view(batch_size, maxlen, self.vocab_size)
        return decoded
    
    def sampling(self, init_hidden, context, maxlen, SOS_tok, EOS_tok, mode='greedy'):
        batch_size=init_hidden.size(0)
#         init_hidden = self.linear(init_hidden)
        decoded_words = np.zeros((batch_size, maxlen), dtype=np.int)
        sample_lens=np.zeros(batch_size, dtype=np.int)         
        decoder_input = gVar(torch.LongTensor([[SOS_tok]*batch_size]).view(batch_size,1))
        decoder_input = self.embedding(decoder_input) if self.embedding is not None else decoder_input 
        decoder_input = torch.cat([decoder_input, context.unsqueeze(1)],2) if context is not None else decoder_input
        decoder_hidden = init_hidden.unsqueeze(0).contiguous()
        for di in range(maxlen):
            decoder_output, decoder_hidden = self.rnn(decoder_input, decoder_hidden)
            decoder_output=self.out(decoder_output)
            if mode=='greedy':
                topi = decoder_output[:,-1].max(1, keepdim=True)[1] 
            elif mode=='sample':
                topi = torch.multinomial(F.softmax(decoder_output[:,-1], dim=1), 1)                    
            decoder_input = self.embedding(topi) if self.embedding is not None else topi
            decoder_input = torch.cat([decoder_input, context.unsqueeze(1)],2) if context is not None else decoder_input
            ni = topi.squeeze().data.cpu().numpy() 
            decoded_words[:,di]=ni
                      
        for i in range(batch_size):
            for word in decoded_words[i]:
                if word == EOS_tok:
                    break
                sample_lens[i]=sample_lens[i]+1
        return decoded_words, sample_lens

In [24]:
one = gData(torch.FloatTensor([1]))
minus_one = one * -1    
class DialogWAE(nn.Module):
    def __init__(self, config, vocab_size, PAD_token=0):
        super(DialogWAE, self).__init__()
        self.vocab_size = vocab_size
        self.maxlen=config['maxlen']
        self.clip = config['clip']
        self.lambda_gp = config['lambda_gp']
        self.temp=config['temp']
        
        self.embedder= nn.Embedding(vocab_size, config['emb_size'], padding_idx=PAD_token)
        self.utt_encoder = Encoder(self.embedder, config['emb_size'], config['n_hidden'], 
                                   True, config['n_layers'], config['noise_radius']) 
        self.context_encoder = ContextEncoder(self.utt_encoder, config['n_hidden']*2+2, config['n_hidden'], 1, config['noise_radius']) 
        self.prior_net = Variation(config['n_hidden'], config['z_size']) # p(e|c)
        self.post_net = Variation(config['n_hidden']*3, config['z_size']) # q(e|c,x)
        
        self.post_generator = nn.Sequential( 
            nn.Linear(config['z_size'], config['z_size']),
            nn.BatchNorm1d(config['z_size'], eps=1e-05, momentum=0.1),
            nn.ReLU(),
            nn.Linear(config['z_size'], config['z_size']),
            nn.BatchNorm1d(config['z_size'], eps=1e-05, momentum=0.1),
            nn.ReLU(),
            nn.Linear(config['z_size'], config['z_size'])
        )
        self.post_generator.apply(self.init_weights)
                                                                              
        self.prior_generator = nn.Sequential( 
            nn.Linear(config['z_size'], config['z_size']),
            nn.BatchNorm1d(config['z_size'], eps=1e-05, momentum=0.1),
            nn.ReLU(),
            nn.Linear(config['z_size'], config['z_size']),
            nn.BatchNorm1d(config['z_size'], eps=1e-05, momentum=0.1),
            nn.ReLU(),
            nn.Linear(config['z_size'], config['z_size'])
        ) 
        self.prior_generator.apply(self.init_weights)
                                                                                             
        self.decoder = Decoder(self.embedder, config['emb_size'], config['n_hidden']+config['z_size'], 
                               vocab_size, n_layers=1) 
        
        self.discriminator = nn.Sequential(  
            nn.Linear(config['n_hidden']+config['z_size'], config['n_hidden']*2),
            nn.BatchNorm1d(config['n_hidden']*2, eps=1e-05, momentum=0.1),
            nn.LeakyReLU(0.2),
            nn.Linear(config['n_hidden']*2, config['n_hidden']*2),
            nn.BatchNorm1d(config['n_hidden']*2, eps=1e-05, momentum=0.1),
            nn.LeakyReLU(0.2),
            nn.Linear(config['n_hidden']*2, 1),
        )
        self.discriminator.apply(self.init_weights)
        
           
        self.optimizer_AE = optim.SGD(list(self.context_encoder.parameters())
                                      +list(self.post_net.parameters())
                                      +list(self.post_generator.parameters())
                                      +list(self.decoder.parameters()),lr=config['lr_ae'])
        self.optimizer_G = optim.RMSprop(list(self.post_net.parameters())
                                      +list(self.post_generator.parameters())
                                      +list(self.prior_net.parameters())
                                      +list(self.prior_generator.parameters()), lr=config['lr_gan_g'])
        self.optimizer_D = optim.RMSprop(self.discriminator.parameters(), lr=config['lr_gan_d'])
        
        self.lr_scheduler_AE = optim.lr_scheduler.StepLR(self.optimizer_AE, step_size = 10, gamma=0.6)
        
        self.criterion_ce = nn.CrossEntropyLoss()
        
    def init_weights(self, m):
        if isinstance(m, nn.Linear):        
            m.weight.data.uniform_(-0.02, 0.02)
            m.bias.data.fill_(0)
            
    def sample_code_post(self, x, c):
        e, _, _ = self.post_net(torch.cat((x, c),1))
        z = self.post_generator(e)
        return z
   
    def sample_code_prior(self, c):
        e, _, _ = self.prior_net(c)
        z = self.prior_generator(e)
        return z    
    
    def train_AE(self, context, context_lens, utt_lens, floors, response, res_lens):
        self.context_encoder.train()
        self.decoder.train()
        c = self.context_encoder(context, context_lens, utt_lens, floors)
        x,_ = self.utt_encoder(response[:,1:], res_lens-1)      
        z = self.sample_code_post(x, c)
        output = self.decoder(torch.cat((z, c),1), None, response[:,:-1], (res_lens-1))  
        flattened_output = output.view(-1, self.vocab_size) 
        
        dec_target = response[:,1:].contiguous().view(-1)
        mask = dec_target.gt(0) # [(batch_sz*seq_len)]
        masked_target = dec_target.masked_select(mask) # 
        output_mask = mask.unsqueeze(1).expand(mask.size(0), self.vocab_size)# [(batch_sz*seq_len) x n_tokens]
        masked_output = flattened_output.masked_select(output_mask).view(-1, self.vocab_size)
        
        self.optimizer_AE.zero_grad()
        loss = self.criterion_ce(masked_output/self.temp, masked_target)
        loss.backward()

        torch.nn.utils.clip_grad_norm_(list(self.context_encoder.parameters())+list(self.decoder.parameters()), self.clip)
        self.optimizer_AE.step()

        return [('train_loss_AE', loss.item())]
    
    def train_G(self, context, context_lens, utt_lens, floors, response, res_lens): 
        self.context_encoder.eval()
        self.optimizer_G.zero_grad()
        
        for p in self.discriminator.parameters():
            p.requires_grad = False  
        
        c = self.context_encoder(context, context_lens, utt_lens, floors)
        # -----------------posterior samples ---------------------------
        x,_ = self.utt_encoder(response[:,1:], res_lens-1)
        z_post= self.sample_code_post(x.detach(), c.detach())
        errG_post = torch.mean(self.discriminator(torch.cat((z_post, c.detach()),1) ))
        errG_post.backward(minus_one) 
    
        # ----------------- prior samples ---------------------------
        prior_z = self.sample_code_prior(c.detach()) 
        errG_prior = torch.mean(self.discriminator(torch.cat((prior_z, c.detach()),1)))
        errG_prior.backward(one) 
    
        self.optimizer_G.step()
        
        for p in self.discriminator.parameters():
            p.requires_grad = True  
        
        costG = errG_prior - errG_post
        return [('train_loss_G', costG.item())]
    
    def train_D(self, context, context_lens, utt_lens, floors, response, res_lens):
        self.context_encoder.eval()
        self.discriminator.train()
        
        self.optimizer_D.zero_grad()
        
        batch_size=context.size(0)

        c = self.context_encoder(context, context_lens, utt_lens, floors)
        x,_ = self.utt_encoder(response[:,1:], res_lens-1)
        post_z = self.sample_code_post(x, c)
        errD_post = torch.mean(self.discriminator(torch.cat((post_z.detach(), c.detach()),1)))
        errD_post.backward(one)
 
        prior_z = self.sample_code_prior(c) 
        errD_prior = torch.mean(self.discriminator(torch.cat((prior_z.detach(), c.detach()),1)))
        errD_prior.backward(minus_one) 
    
        alpha = gData(torch.rand(batch_size, 1))
        alpha = alpha.expand(prior_z.size())
        interpolates = alpha * prior_z.data + ((1 - alpha) * post_z.data)
        interpolates = Variable(interpolates, requires_grad=True)
        d_input=torch.cat((interpolates, c.detach()),1)
        disc_interpolates = torch.mean(self.discriminator(d_input))
        gradients = torch.autograd.grad(outputs=disc_interpolates, inputs=interpolates,
                               grad_outputs=gData(torch.ones(disc_interpolates.size())),
                              create_graph=True, retain_graph=True, only_inputs=True)[0]
        gradient_penalty = ((gradients.contiguous().view(gradients.size(0),-1).norm(2,dim=1)-1)**2).mean()*self.lambda_gp
        gradient_penalty.backward()
    
        self.optimizer_D.step()
        costD = -(errD_prior - errD_post) + gradient_penalty
        return [('train_loss_D', costD.item())]   
    
    def valid(self, context, context_lens, utt_lens, floors, response, res_lens):
        self.context_encoder.eval()      
        self.discriminator.eval()
        self.decoder.eval()
        
        c = self.context_encoder(context, context_lens, utt_lens, floors)
        x,_ = self.utt_encoder(response[:,1:], res_lens-1)
        post_z = self.sample_code_post(x, c)
        prior_z = self.sample_code_prior(c)
        errD_post = torch.mean(self.discriminator(torch.cat((post_z, c),1)))
        errD_prior = torch.mean(self.discriminator(torch.cat((prior_z, c),1)))
        costD = -(errD_prior - errD_post)
        costG = -costD 
        
        dec_target = response[:,1:].contiguous().view(-1)
        mask = dec_target.gt(0) # [(batch_sz*seq_len)]
        masked_target = dec_target.masked_select(mask) 
        output_mask = mask.unsqueeze(1).expand(mask.size(0), self.vocab_size)
        output = self.decoder(torch.cat((post_z, c),1), None, response[:,:-1], (res_lens-1)) 
        flattened_output = output.view(-1, self.vocab_size) 
        masked_output = flattened_output.masked_select(output_mask).view(-1, self.vocab_size)
        lossAE = self.criterion_ce(masked_output/self.temp, masked_target)
        return [('valid_loss_AE', lossAE.item()),('valid_loss_G', costG.item()), ('valid_loss_D', costD.item())]
        
    def sample(self, context, context_lens, utt_lens, floors, repeat, SOS_tok, EOS_tok):    
        self.prior_net.eval()
        self.prior_generator.eval()
        self.context_encoder.eval()
        self.decoder.eval()
        
        c = self.context_encoder(context, context_lens, utt_lens, floors)
#         c_repeated = c.expand(repeat, -1)
        prior_z = self.sample_code_prior(c)
        sample_words, sample_lens= self.decoder.sampling(torch.cat((prior_z, c),1), 
                                                         None, self.maxlen, SOS_tok, EOS_tok, "greedy") 
        return sample_words, sample_lens 
      
    def adjust_lr(self):
        self.lr_scheduler_AE.step()

In [19]:
corpus = getattr(data, 'SWDA'+'Corpus')('../datasets/SWDA/', wordvec_path='../datasets/'+'glove.twitter.27B.200d.txt', wordvec_dim=config['emb_size'])
dials = corpus.get_dialogs()
metas = corpus.get_metas()
vocab = corpus.ivocab
ivocab = corpus.vocab
n_tokens = len(ivocab)
train_dial, valid_dial, test_dial = dials.get("train"), dials.get("valid"), dials.get("test")
train_meta, valid_meta, test_meta = metas.get("train"), metas.get("valid"), metas.get("test")
train_loader = getattr(data, 'SWDA'+'DataLoader')("Train", train_dial, train_meta, config['maxlen'])
valid_loader = getattr(data, 'SWDA'+'DataLoader')("Valid", valid_dial, valid_meta, config['maxlen'])
test_loader = getattr(data, 'SWDA'+'DataLoader')("Test", test_dial, test_meta, config['maxlen'])


Max utt len 96, mean utt len 14.69
Max utt len 75, mean utt len 15.06
Max utt len 74, mean utt len 15.39
Load corpus with train size 3, valid size 3, test size 3 raw vocab size 24497 vocab size 10000 at cut_off 4 OOV rate 0.008035
<d> index 143
<sil> index -1
67 topics in train data
['statement-non-opinion', 'acknowledge_(backchannel)', 'statement-opinion', 'abandoned_or_turn-exit/uninterpretable', 'yes-no-question', 'agree/accept', 'appreciation', 'wh-question', 'backchannel_in_question_form', 'yes_answers', 'conventional-closing', 'response_acknowledgement', 'open-question', 'no_answers', 'affirmative_non-yes_answers', 'declarative_yes-no-question', 'summarize/reformulate', 'other', 'action-directive', 'rhetorical-questions', 'conventional-opening', 'collaborative_completion', 'signal-non-understanding', 'or-clause', 'hold_before_answer/agreement', 'quotation', 'negative_non-no_answers', 'self-talk', 'apology', 'dispreferred_answers', 'offers,_options_commits', 'other_answers', 'reje

In [6]:
corpus = getattr(data, 'DailyDial'+'Corpus')('../datasets/DailyDial/', wordvec_path='../datasets/'+'glove.twitter.27B.200d.txt', wordvec_dim=config['emb_size'])
dials = corpus.get_dialogs()
metas = corpus.get_metas()
vocab = corpus.ivocab
ivocab = corpus.vocab
n_tokens = len(ivocab)
train_dial, valid_dial, test_dial = dials.get("train"), dials.get("valid"), dials.get("test")
train_meta, valid_meta, test_meta = metas.get("train"), metas.get("valid"), metas.get("test")
train_loader = getattr(data, 'DailyDial'+'DataLoader')("Train", train_dial, train_meta, config['maxlen'])
valid_loader = getattr(data, 'DailyDial'+'DataLoader')("Valid", valid_dial, valid_meta, config['maxlen'])
test_loader = getattr(data, 'DailyDial'+'DataLoader')("Test", test_dial, test_meta, config['maxlen'])


Max utt len 296, mean utt len 16.48
Max utt len 174, mean utt len 16.37
Max utt len 214, mean utt len 16.68
Load corpus with train size 2, valid size 2, test size 2 raw vocab size 17716 vocab size 10000 at cut_off 2 OOV rate 0.006757
<d> index 21
<sil> index -1
word2vec cannot cover 0.032194 vocab
Done loading corpus
Max len 36 and min len 3 and avg len 8.840439
Max len 32 and min len 3 and avg len 9.069000
Max len 27 and min len 3 and avg len 8.740000


In [7]:
# 与dialog_doublegan 采用相同的数据集大小
def valid_small(valid_loader):
    model.eval()
    total_loss = 0.0
    total_valid_batch = 0
    valid_count = 0
    with torch.no_grad():
        while True:
            batch = valid_loader.next_batch()
            if batch is None or total_valid_batch >= 1500: # end of epoch
                break
            total_valid_batch += 20
            valid_count += 1
            context, context_lens, utt_lens, floors,_,_,_,response,res_lens,_ = batch
            context, utt_lens = context[:,:,1:], utt_lens-1 # remove the sos token in the context and reduce the context length
            context, context_lens, utt_lens, floors, response, res_lens\
                    = gVar(context), gVar(context_lens), gVar(utt_lens), gData(floors), gVar(response), gVar(res_lens)
            target, outputs = model(context, context_lens, utt_lens, floors, response, res_lens)
            loss_batch = criterion(outputs, target)
            total_loss += float(loss_batch.item())
        return total_loss / valid_count    
    
def sample(context, context_lens, utt_lens, floors, repeat, SOS_tok, EOS_tok):    
    model.eval()
    c = model.context_encoder(context, context_lens, utt_lens, floors)
#     c_repeated = c.expand(repeat, -1)
    sample_words, sample_lens= model.decoder.sampling(c, None, config['maxlen'], SOS_tok, EOS_tok, "greedy")
    return sample_words, sample_lens 

def evaluate(model, metrics, test_loader, vocab, ivocab, f_eval, repeat):
    recall_bleus, prec_bleus, bows_extrema, bows_avg, bows_greedy, intra_dist1s, intra_dist2s, avg_lens, inter_dist1s, inter_dist2s\
        = [], [], [], [], [], [], [], [], [], []
    bleu1_4s = []
    local_t = 0
    test_loader.epoch_init(1, config['diaglen'], 1, shuffle=False)
    valid_count = 0
    begin_time = time()
    all_generated_sentences = []
    all_generated_lens = []
    while True:
        batch = test_loader.next_batch()
        if batch is None:
#         if batch is None or valid_count >= 400:
            break
        valid_count += 1
        local_t += 1 
        context, context_lens, utt_lens, floors,_,_,_,response,res_lens,_ = batch   
        context, utt_lens = context[:,:,1:], utt_lens-1 # remove the sos token in the context and reduce the context length
        f_eval.write("Batch %d \n" % (local_t))# print the context
        start = np.maximum(0, context_lens[0]-5)
        for t_id in range(start, context.shape[1], 1):
            context_str = indexes2sent(context[0, t_id], vocab, vocab["</s>"], 0)
            f_eval.write("Context %d-%d: %s\n" % (t_id, floors[0, t_id], context_str))
        # print the true outputs    
        ref_str, _ = indexes2sent(response[0], vocab, vocab["</s>"], vocab["<s>"])
        ref_tokens = ref_str.split(' ')
        f_eval.write("Target >> %s\n" % (ref_str.replace(" ' ", "'")))
        context, context_lens, utt_lens, floors = gVar(context), gVar(context_lens), gVar(utt_lens), gData(floors)
        sample_words, sample_lens = model.sample(context, context_lens, utt_lens, floors, repeat, vocab["<s>"], vocab["</s>"])
        # 存储所有生成的回复，用来计算div
        all_generated_sentences.append(sample_words[0].tolist())
        all_generated_lens.append(sample_lens[0].tolist())
        # nparray: [repeat x seq_len]
        pred_sents, _ = indexes2sent(sample_words, vocab, vocab["</s>"], 0)
        if valid_count % 300 == 0:
            print('true response: ', ref_str)
            print('generate response: ', pred_sents[0])
        pred_tokens = [sent.split(' ') for sent in pred_sents]
        for r_id, pred_sent in enumerate(pred_sents):
            f_eval.write("Generate >> %s\n" % (pred_sent.replace(" ' ", "'")))
        max_bleu, avg_bleu = metrics.sim_bleu(pred_tokens, ref_tokens)
        recall_bleus.append(max_bleu)
        prec_bleus.append(avg_bleu)
        bleu1_4s.append(metrics.sim_bleu1_4(pred_tokens[0], ref_tokens))
        bow_extrema, bow_avg, bow_greedy = metrics.sim_bow(sample_words, sample_lens, response[:,1:], res_lens-2)
        bows_extrema.append(bow_extrema)
        bows_avg.append(bow_avg)
        bows_greedy.append(bow_greedy)
#         intra_dist1, intra_dist2, inter_dist1, inter_dist2 = metrics.div_distinct(sample_words, sample_lens-1)
#         intra_dist1s.append(intra_dist1)
#         intra_dist2s.append(intra_dist2)
        avg_lens.append(np.mean(sample_lens))
#         inter_dist1s.append(inter_dist1)
#         inter_dist2s.append(inter_dist2)
        f_eval.write("\n")
    recall_bleu = float(np.mean(recall_bleus))
    prec_bleu = float(np.mean(prec_bleus))
    f1 = 2*(prec_bleu*recall_bleu) / (prec_bleu+recall_bleu+10e-12)
    bleu1_4 = np.mean(bleu1_4s, 0)
    bow_extrema = float(np.mean(bows_extrema))
    bow_avg = float(np.mean(bows_avg))
    bow_greedy=float(np.mean(bows_greedy))
#     intra_dist1=float(np.mean(intra_dist1s))
#     intra_dist2=float(np.mean(intra_dist2s))
    avg_len=float(np.mean(avg_lens))
    all_generated_sentences = np.array(all_generated_sentences)
    all_generated_lens = np.array(all_generated_lens)
#     print(all_generated_sentences[:5])
#     print(all_generated_lens[:5])
    intra_dist1, intra_dist2, inter_dist1, inter_dist2 = metrics.div_distinct(all_generated_sentences, all_generated_lens)
#     inter_dist1=float(np.mean(inter_dist1s))
#     inter_dist2=float(np.mean(inter_dist2s))
#     report = "Avg recall BLEU %f, bow_extrema %f, bow_avg %f, bow_greedy %f, inter_dist1 %f, inter_dist2 %f avg_len %f" \
#     % (recall_bleu, bow_extrema, bow_avg, bow_greedy, inter_dist1, inter_dist2, avg_len)
    report = "BLEU1 %f, BLEU2 %f, BLEU3 %f, BLEU4 %f, inter_dist1 %f, inter_dist2 %f avg_len %f" % (bleu1_4[0], bleu1_4[1], bleu1_4[2], bleu1_4[3], inter_dist1, inter_dist2, avg_len)
    f_eval.write(report + "\n")
    print(report)
    print(' time: %.1f s'%(time()-begin_time))
    print("Done testing")
    return recall_bleu, bow_extrema, bow_avg, bow_greedy, inter_dist1, inter_dist2, avg_len

In [22]:
metrics=Metrics(corpus.word2vec)
model = DialogWAE(config, n_tokens)
if corpus.word2vec is not None:
    print("Loaded word2vec")
    model.embedder.weight.data.copy_(torch.from_numpy(corpus.word2vec))
    model.embedder.weight.data[0].fill_(0)
model.to(DEVICE)
model.zero_grad()
print_every = 50
best_state = None
max_metric = 0
for epoch in range(100):
    print('Epoch: ', epoch+1)
    train_loader.epoch_init(32, config['diaglen'], 1, shuffle=True)
    n_iters=train_loader.num_batch
    loss_ae = 0.0
    loss_g = 0.0
    loss_d = 0.0
    epoch_begin = time()
    batch_count = 0
    batch_begin_time = time()
    total_train_batch = 0 # 记录训练的样本数量
    total_valid_batch = 0 # 记录测试的样本数量
    while True:
        model.train()
        loss_records=[]
        batch = train_loader.next_batch()
        total_train_batch += 32
        if batch is None:
#         if batch is None or total_train_batch >= 1000: # end of epoch
            break
        context, context_lens, utt_lens, floors,_,_,_,response,res_lens,_ = batch
        context, utt_lens = context[:,:,1:], utt_lens-1 # remove the sos token in the context and reduce the context length
        context, context_lens, utt_lens, floors, response, res_lens\
                = gVar(context), gVar(context_lens), gVar(utt_lens), gData(floors), gVar(response), gVar(res_lens)
        loss_1 = model.train_AE(context, context_lens, utt_lens, floors, response, res_lens)
        loss_ae += float(loss_1[0][1])
        loss_2 = model.train_G(context, context_lens, utt_lens, floors, response, res_lens)
        loss_g += float(loss_2[0][1])
        
        for i in range(config['n_iters_d']):# train discriminator/critic
            loss_3 = model.train_D(context, context_lens, utt_lens, floors, response, res_lens)  
            if i==0:
                loss_d += float(loss_3[0][1])
            if i==config['n_iters_d']-1:
                break
            batch = train_loader.next_batch()
            if batch is None: # end of epoch
                break
            context, context_lens, utt_lens, floors,_,_,_,response,res_lens,_ = batch
            context, utt_lens = context[:,:,1:], utt_lens-1 # remove the sos token in the context and reduce the context length
            context, context_lens, utt_lens, floors, response, res_lens\
                = gVar(context), gVar(context_lens), gVar(utt_lens), gData(floors), gVar(response), gVar(res_lens)                      
        
        batch_count += 1
        if batch_count % print_every == 0:
            print_flush('[%d %d] loss_ae: %.6f loss_g: %.6f loss_d: %.6f time: %.1f s' %
                  (epoch + 1, batch_count, np.exp(loss_ae / print_every), loss_g / print_every,
                    loss_d / print_every, time() - batch_begin_time))
            loss_ae = 0.0
            loss_g = 0.0
            loss_d = 0.0
            batch_begin_time = time()
    print_flush("Evaluating....")
#     valid_loader.epoch_init(20, config['diaglen'], 1, shuffle=False)
#     loss_valid = valid_small(valid_loader)
#     valid_result.append(F1)
#     print_flush('*'*60)
#     print_flush('[epoch %d]. loss: %.6f time: %.1f s'%(epoch+1, np.exp(loss_valid), time()-epoch_begin))
#     print_flush('*'*60)
#     print_flush("testing....")
#     test_loader.epoch_init(1, config['diaglen'], 1, shuffle=False)
#     loss_valid = valid(test_loader)
#     print_flush('*'*60)
#     print_flush('[epoch %d]. loss: %.6f time: %.1f s'%(epoch+1, np.exp(loss_valid), time()-epoch_begin))
#     print_flush('*'*60)
    if (epoch+1) > 20:
        f_eval = open("../result/{}/{}/epoch{}.txt".format('DialogWAE', 'SWDA', epoch), "w")
        recall_bleu, bow_extrema, bow_avg, bow_greedy, inter_dist1, inter_dist2, avg_len\
         =evaluate(model, metrics, test_loader, vocab, ivocab, f_eval, repeat=1)
    epoch_begin = time()
#     if F1 > max_metric:
#         best_state = model.state_dict()
#         max_metric = F1
#         print_flush("save model...")
#         torch.save(best_state, '../datasets/models/baseline_LSTM.pth')
#     epoch_begin = time()
#     if training_termination(valid_result):
#         print_flush("early stop at [%d] epoch!" % (epoch+1))
#         break
    model.adjust_lr()

Loaded word2vec
Epoch:  1
Train begins with 6398 batches with 12 left over samples
[1 50] loss_ae: 772.952671 loss_g: 0.003260 loss_d: 9.441190 time: 7.1 s
[1 100] loss_ae: 281.586226 loss_g: 0.007951 loss_d: 9.201698 time: 6.9 s
[1 150] loss_ae: 176.866460 loss_g: 0.021754 loss_d: 9.092923 time: 7.0 s
[1 200] loss_ae: 146.034011 loss_g: 0.029456 loss_d: 8.941715 time: 7.0 s
[1 250] loss_ae: 127.633686 loss_g: 0.038266 loss_d: 8.804258 time: 7.0 s
[1 300] loss_ae: 117.296023 loss_g: 0.084724 loss_d: 8.665235 time: 7.0 s
[1 350] loss_ae: 103.915196 loss_g: 0.089025 loss_d: 8.589442 time: 7.0 s
[1 400] loss_ae: 91.235519 loss_g: 0.078870 loss_d: 8.492210 time: 7.0 s
[1 450] loss_ae: 86.707661 loss_g: 0.069349 loss_d: 8.387099 time: 7.0 s
[1 500] loss_ae: 84.711306 loss_g: 0.085879 loss_d: 8.203546 time: 7.0 s
[1 550] loss_ae: 84.355394 loss_g: 0.084642 loss_d: 8.040713 time: 7.0 s
[1 600] loss_ae: 76.815586 loss_g: 0.099949 loss_d: 7.835270 time: 7.0 s
[1 650] loss_ae: 75.378659 loss_g: 

[5 350] loss_ae: 37.859651 loss_g: 6.218454 loss_d: -3.364293 time: 6.8 s
[5 400] loss_ae: 36.183868 loss_g: 6.302505 loss_d: -3.560900 time: 6.7 s
[5 450] loss_ae: 33.951994 loss_g: 6.429477 loss_d: -3.783187 time: 6.7 s
[5 500] loss_ae: 37.703691 loss_g: 6.518563 loss_d: -3.748051 time: 6.7 s
[5 550] loss_ae: 35.514817 loss_g: 6.578220 loss_d: -3.797254 time: 6.7 s
[5 600] loss_ae: 36.086232 loss_g: 6.717169 loss_d: -3.815160 time: 6.8 s
[5 650] loss_ae: 38.782208 loss_g: 7.205209 loss_d: -4.488389 time: 6.8 s
[5 700] loss_ae: 37.765013 loss_g: 7.306391 loss_d: -4.605716 time: 6.8 s
[5 750] loss_ae: 33.293509 loss_g: 6.983238 loss_d: -4.192238 time: 6.8 s
[5 800] loss_ae: 33.331662 loss_g: 7.339334 loss_d: -4.687312 time: 6.7 s
[5 850] loss_ae: 35.962166 loss_g: 7.493131 loss_d: -4.819886 time: 6.8 s
[5 900] loss_ae: 35.991774 loss_g: 7.853860 loss_d: -5.241279 time: 6.7 s
[5 950] loss_ae: 35.134504 loss_g: 8.025310 loss_d: -5.515351 time: 6.7 s
[5 1000] loss_ae: 32.883906 loss_g: 8.

[9 550] loss_ae: 27.186585 loss_g: 30.330113 loss_d: -28.816940 time: 9.7 s
[9 600] loss_ae: 28.296203 loss_g: 31.404423 loss_d: -29.884428 time: 9.6 s
[9 650] loss_ae: 28.992795 loss_g: 32.532437 loss_d: -31.093220 time: 9.7 s
[9 700] loss_ae: 25.947943 loss_g: 31.456332 loss_d: -30.074452 time: 9.8 s
[9 750] loss_ae: 26.714921 loss_g: 31.091417 loss_d: -29.636735 time: 9.7 s
[9 800] loss_ae: 24.871557 loss_g: 32.259931 loss_d: -30.757568 time: 9.7 s
[9 850] loss_ae: 28.846435 loss_g: 32.680481 loss_d: -31.104310 time: 9.8 s
[9 900] loss_ae: 27.489551 loss_g: 33.479888 loss_d: -31.993742 time: 9.6 s
[9 950] loss_ae: 25.767852 loss_g: 33.775568 loss_d: -32.205791 time: 9.6 s
[9 1000] loss_ae: 25.624436 loss_g: 32.484541 loss_d: -30.946149 time: 9.6 s
[9 1050] loss_ae: 26.808944 loss_g: 31.962697 loss_d: -30.184419 time: 9.7 s
[9 1100] loss_ae: 27.484942 loss_g: 32.816767 loss_d: -31.329340 time: 9.7 s
[9 1150] loss_ae: 29.139065 loss_g: 34.913741 loss_d: -33.506772 time: 9.7 s
[9 1200]

[13 650] loss_ae: 20.147793 loss_g: 39.559189 loss_d: -35.983067 time: 9.8 s
[13 700] loss_ae: 21.377983 loss_g: 38.643849 loss_d: -34.895344 time: 9.7 s
[13 750] loss_ae: 22.262758 loss_g: 34.967984 loss_d: -31.698883 time: 9.7 s
[13 800] loss_ae: 20.861565 loss_g: 37.047480 loss_d: -33.475615 time: 9.7 s
[13 850] loss_ae: 22.427043 loss_g: 39.427452 loss_d: -34.851948 time: 9.8 s
[13 900] loss_ae: 24.014749 loss_g: 34.902353 loss_d: -30.447578 time: 9.7 s
[13 950] loss_ae: 21.324488 loss_g: 33.862068 loss_d: -29.388565 time: 9.7 s
[13 1000] loss_ae: 20.321437 loss_g: 34.198361 loss_d: -29.761123 time: 9.5 s
[13 1050] loss_ae: 21.351447 loss_g: 34.231571 loss_d: -31.460623 time: 9.8 s
[13 1100] loss_ae: 23.056334 loss_g: 37.965926 loss_d: -34.086987 time: 9.6 s
[13 1150] loss_ae: 22.303441 loss_g: 32.989465 loss_d: -28.432210 time: 9.7 s
[13 1200] loss_ae: 21.254090 loss_g: 33.655690 loss_d: -29.491164 time: 9.6 s
[13 1250] loss_ae: 22.074044 loss_g: 35.200301 loss_d: -30.936599 time:

[17 950] loss_ae: 18.575243 loss_g: 37.636724 loss_d: -32.320322 time: 8.4 s
[17 1000] loss_ae: 17.308367 loss_g: 33.547353 loss_d: -29.296290 time: 8.1 s
[17 1050] loss_ae: 18.388342 loss_g: 36.622879 loss_d: -33.363054 time: 8.4 s
[17 1100] loss_ae: 18.355875 loss_g: 35.853897 loss_d: -30.095099 time: 8.4 s
[17 1150] loss_ae: 18.858322 loss_g: 34.428349 loss_d: -30.245642 time: 8.3 s
[17 1200] loss_ae: 21.042025 loss_g: 36.157712 loss_d: -33.849894 time: 8.2 s
[17 1250] loss_ae: 17.914418 loss_g: 35.508517 loss_d: -31.905805 time: 8.3 s
Evaluating....
Epoch:  18
Train begins with 6398 batches with 12 left over samples
[18 50] loss_ae: 20.126635 loss_g: 35.524317 loss_d: -31.672435 time: 15.5 s
[18 100] loss_ae: 20.588881 loss_g: 34.070028 loss_d: -29.479850 time: 20.8 s
[18 150] loss_ae: 18.720538 loss_g: 34.022009 loss_d: -31.305369 time: 21.0 s
[18 200] loss_ae: 19.336663 loss_g: 35.493099 loss_d: -32.130773 time: 20.7 s
[18 250] loss_ae: 19.965893 loss_g: 37.532155 loss_d: -32.129

[21 1000] loss_ae: 17.847564 loss_g: 40.273684 loss_d: -33.863154 time: 11.7 s
[21 1050] loss_ae: 19.222470 loss_g: 45.819096 loss_d: -39.444961 time: 11.9 s
[21 1100] loss_ae: 17.659443 loss_g: 38.446475 loss_d: -32.723279 time: 11.7 s
[21 1150] loss_ae: 18.935319 loss_g: 43.383489 loss_d: -39.712220 time: 11.8 s
[21 1200] loss_ae: 18.331022 loss_g: 42.060851 loss_d: -38.113651 time: 12.0 s
[21 1250] loss_ae: 16.932001 loss_g: 43.906909 loss_d: -39.653926 time: 11.7 s
Evaluating....
Test begins with 5481 batches with 0 left over samples
true response:  and at this particular point you know taking appropriate course work but really nothing that ' s in out of the ordinary </s>
generate response:  there ' s a lot of people who ' ve been in the past couple of years and they ' re all grown up and </s>
true response:  well i have two cats </s>
generate response:  yeah </s>
true response:  yeah </s>
generate response:  do you think that ' s the best way to go to the movies and the </s>
true 

[23 100] loss_ae: 16.347742 loss_g: 41.656369 loss_d: -36.345287 time: 11.9 s
[23 150] loss_ae: 16.496165 loss_g: 45.731339 loss_d: -39.214445 time: 11.8 s
[23 200] loss_ae: 16.712152 loss_g: 43.293928 loss_d: -38.145871 time: 11.7 s
[23 250] loss_ae: 15.576974 loss_g: 38.719015 loss_d: -36.966442 time: 11.8 s
[23 300] loss_ae: 16.776312 loss_g: 44.333146 loss_d: -38.556081 time: 11.9 s
[23 350] loss_ae: 16.802333 loss_g: 43.260780 loss_d: -36.988033 time: 11.7 s
[23 400] loss_ae: 16.874404 loss_g: 39.533107 loss_d: -32.992594 time: 11.7 s
[23 450] loss_ae: 17.790325 loss_g: 38.940751 loss_d: -36.524079 time: 11.8 s
[23 500] loss_ae: 16.916476 loss_g: 37.040757 loss_d: -33.396891 time: 11.8 s
[23 550] loss_ae: 16.114051 loss_g: 41.441024 loss_d: -35.887354 time: 11.9 s
[23 600] loss_ae: 17.167469 loss_g: 41.164264 loss_d: -38.557910 time: 11.6 s
[23 650] loss_ae: 16.034474 loss_g: 44.030381 loss_d: -38.581377 time: 12.0 s
[23 700] loss_ae: 15.767735 loss_g: 44.336854 loss_d: -40.305299

true response:  uh - huh </s>
generate response:  and i ' m i ' m a i ' m a i ' m a i ' m a i ' m a i ' m a </s>
true response:  they before we got her everybody said oh they ' re just like a cat you know </s>
generate response:  i don ' t know i just i just i just love it i love it i love it i love it </s>
true response:  uh - huh </s>
generate response:  oh </s>
true response:  we sort of like our roots and like to get back and have family reunions and all that </s>
generate response:  and </s>
true response:  but they </s>
generate response:  oh yeah that ' s what they ' re doing is they ' re </s>
true response:  yeah well see that ' s the reason that we couldn ' t make really make them at first we were going to get a pick up truck </s>
generate response:  </s>
true response:  you ' d think i ' d have a lot time but </s>
generate response:  with some of the things that you can do is you can ' t even get a little bit of a little bit of a </s>
true response:  and </s>
generate respons

[26 1100] loss_ae: 15.954533 loss_g: 46.110864 loss_d: -41.220070 time: 11.7 s
[26 1150] loss_ae: 15.281787 loss_g: 54.024956 loss_d: -48.768736 time: 11.8 s
[26 1200] loss_ae: 15.793415 loss_g: 47.190856 loss_d: -40.651883 time: 12.0 s
[26 1250] loss_ae: 18.749435 loss_g: 53.990301 loss_d: -45.938214 time: 11.6 s
Evaluating....
Test begins with 5481 batches with 0 left over samples
true response:  and at this particular point you know taking appropriate course work but really nothing that ' s in out of the ordinary </s>
generate response:  do you think that ' s the most important thing to do with the idea of having a family reunion that ' s a good idea </s>
true response:  well i have two cats </s>
generate response:  yeah </s>
true response:  yeah </s>
generate response:  uh - huh </s>
true response:  <unk> of people ' s heart strings and making them cry and feel like they ' re doing somebody good by giving them </s>
generate response:  and but you know the guy who ' s the one who ' 

[28 50] loss_ae: 16.715142 loss_g: 47.754024 loss_d: -39.335933 time: 11.9 s
[28 100] loss_ae: 14.990292 loss_g: 53.663593 loss_d: -49.795186 time: 11.7 s
[28 150] loss_ae: 14.135208 loss_g: 55.472925 loss_d: -48.524078 time: 12.0 s
[28 200] loss_ae: 16.764632 loss_g: 54.909294 loss_d: -46.861897 time: 12.0 s
[28 250] loss_ae: 16.466419 loss_g: 47.914995 loss_d: -41.882195 time: 11.8 s
[28 300] loss_ae: 15.170659 loss_g: 48.991169 loss_d: -45.397120 time: 11.9 s
[28 350] loss_ae: 16.410373 loss_g: 52.057193 loss_d: -45.704337 time: 11.7 s
[28 400] loss_ae: 17.192936 loss_g: 54.001135 loss_d: -48.032908 time: 12.1 s
[28 450] loss_ae: 15.972217 loss_g: 53.668984 loss_d: -47.990660 time: 11.9 s
[28 500] loss_ae: 15.152699 loss_g: 49.934871 loss_d: -43.338299 time: 11.7 s
[28 550] loss_ae: 15.890078 loss_g: 56.351540 loss_d: -51.909413 time: 11.9 s
[28 600] loss_ae: 14.326134 loss_g: 52.114688 loss_d: -45.404619 time: 12.0 s
[28 650] loss_ae: 17.834261 loss_g: 58.039721 loss_d: -48.441588 

true response:  uh - huh </s>
generate response:  </s>
true response:  we sort of like our roots and like to get back and have family reunions and all that </s>
generate response:  have been in a situation where they were in a nursing home and </s>
true response:  but they </s>
generate response:  yeah </s>
true response:  yeah well see that ' s the reason that we couldn ' t make really make them at first we were going to get a pick up truck </s>
generate response:  oh you know like that </s>
true response:  you ' d think i ' d have a lot time but </s>
generate response:  you know if you if you ' re a kid you ' re a lot more than you ' re in a you know you ' re a </s>
true response:  and </s>
generate response:  do it all right do you have a favorite team or </s>
true response:  yeah i ' ve got a well my roses are on the west side of the house i asked my neighbors what they wanted to see outside their </s>
generate response:  they do have a lot of good stuff there ' s a lot of good pla

[31 1200] loss_ae: 14.499870 loss_g: 54.068565 loss_d: -43.466257 time: 11.7 s
[31 1250] loss_ae: 17.580406 loss_g: 55.534192 loss_d: -49.338411 time: 11.6 s
Evaluating....
Test begins with 5481 batches with 0 left over samples
true response:  and at this particular point you know taking appropriate course work but really nothing that ' s in out of the ordinary </s>
generate response:  and they don ' t want to go to school and i don ' t know if they ' re going to be in the middle of the </s>
true response:  well i have two cats </s>
generate response:  not really a good idea i like to do that i like to do that </s>
true response:  yeah </s>
generate response:  right and i </s>
true response:  <unk> of people ' s heart strings and making them cry and feel like they ' re doing somebody good by giving them </s>
generate response:  and i don ' t i don ' t i don ' t i don ' t have a i don ' t have a i don ' t have
true response:  something like that </s>
generate response:  huh </s>
true re

[33 50] loss_ae: 15.087180 loss_g: 52.778548 loss_d: -43.413756 time: 10.4 s
[33 100] loss_ae: 15.104458 loss_g: 57.144391 loss_d: -49.095902 time: 10.3 s
[33 150] loss_ae: 17.438085 loss_g: 65.095002 loss_d: -52.025290 time: 10.5 s
[33 200] loss_ae: 14.973750 loss_g: 61.671786 loss_d: -49.643110 time: 10.4 s
[33 250] loss_ae: 14.259963 loss_g: 61.164263 loss_d: -51.121805 time: 10.6 s
[33 300] loss_ae: 14.093850 loss_g: 58.772030 loss_d: -50.939692 time: 10.6 s
[33 350] loss_ae: 14.555262 loss_g: 58.549418 loss_d: -50.716444 time: 10.3 s
[33 400] loss_ae: 13.789466 loss_g: 65.281822 loss_d: -57.247269 time: 10.4 s
[33 450] loss_ae: 13.405034 loss_g: 58.386706 loss_d: -50.352901 time: 10.6 s
[33 500] loss_ae: 13.638375 loss_g: 57.809190 loss_d: -50.439020 time: 10.6 s
[33 550] loss_ae: 14.029018 loss_g: 54.037604 loss_d: -43.396455 time: 10.6 s
[33 600] loss_ae: 14.999777 loss_g: 63.479599 loss_d: -53.019400 time: 10.4 s
[33 650] loss_ae: 16.249062 loss_g: 60.284534 loss_d: -50.624787 

true response:  but they </s>
generate response:  yeah </s>
true response:  yeah well see that ' s the reason that we couldn ' t make really make them at first we were going to get a pick up truck </s>
generate response:  but there is a lot of that </s>
true response:  you ' d think i ' d have a lot time but </s>
generate response:  and we have a we have a we have a we have a we have a <unk> we have a <unk> we have a <unk> </s>
true response:  and </s>
generate response:  yeah </s>
true response:  yeah i ' ve got a well my roses are on the west side of the house i asked my neighbors what they wanted to see outside their </s>
generate response:  i have i have a sister that lives in a nursing home and i have a sister that lives in a nursing home </s>
true response:  um - hum </s>
generate response:  trying to do something to the wrong thing that i think that i think that the people that i think that ' s a good idea </s>
BLEU1 0.459248, BLEU2 0.372249, BLEU3 0.313621, BLEU4 0.251779, inte

true response:  yeah </s>
generate response:  and they </s>
true response:  <unk> of people ' s heart strings and making them cry and feel like they ' re doing somebody good by giving them </s>
generate response:  you can you can get a <unk> <unk> you know you can you can you can you can you can </s>
true response:  something like that </s>
generate response:  has been a <unk> of <unk> and they have a lot of <unk> and they have a lot of <unk> and </s>
true response:  yeah that ' s fun </s>
generate response:  yeah </s>
true response:  oh </s>
generate response:  you know and we have a we have a we have a we have a we have a <unk> a <unk> a <unk> <unk> </s>
true response:  lay on those beaches over there </s>
generate response:  it ' ll do that but you know you can ' t really get it on a little bit of a <unk> or something like that </s>
true response:  uh - huh </s>
generate response:  uh - huh </s>
true response:  they before we got her everybody said oh they ' re just like a cat you k

[38 750] loss_ae: 12.531853 loss_g: 66.618644 loss_d: -56.023406 time: 8.1 s
[38 800] loss_ae: 14.257061 loss_g: 67.287107 loss_d: -52.885913 time: 8.2 s
[38 850] loss_ae: 14.524394 loss_g: 70.956589 loss_d: -61.854369 time: 8.2 s
[38 900] loss_ae: 14.110746 loss_g: 67.181309 loss_d: -52.647604 time: 8.1 s
[38 950] loss_ae: 13.267765 loss_g: 64.166645 loss_d: -53.292514 time: 8.3 s
[38 1000] loss_ae: 14.224413 loss_g: 64.629637 loss_d: -52.875391 time: 8.1 s
[38 1050] loss_ae: 13.973133 loss_g: 71.807679 loss_d: -57.223994 time: 8.1 s
[38 1100] loss_ae: 12.566089 loss_g: 63.054411 loss_d: -50.376710 time: 8.2 s
[38 1150] loss_ae: 14.395494 loss_g: 72.347236 loss_d: -58.313389 time: 8.2 s
[38 1200] loss_ae: 15.890694 loss_g: 75.810319 loss_d: -59.664959 time: 8.2 s
[38 1250] loss_ae: 15.534064 loss_g: 72.259208 loss_d: -54.209964 time: 8.1 s
Evaluating....
Test begins with 5481 batches with 0 left over samples
true response:  and at this particular point you know taking appropriate cour

true response:  um - hum </s>
generate response:  um - hum </s>
BLEU1 0.442836, BLEU2 0.358711, BLEU3 0.302301, BLEU4 0.242885, inter_dist1 0.006852, inter_dist2 0.041511 avg_len 15.416895
 time: 144.1 s
Done testing
Epoch:  40
Train begins with 6398 batches with 12 left over samples
[40 50] loss_ae: 14.115519 loss_g: 73.345139 loss_d: -62.360778 time: 8.2 s
[40 100] loss_ae: 14.183557 loss_g: 81.769256 loss_d: -68.878419 time: 8.1 s
[40 150] loss_ae: 14.893810 loss_g: 72.059781 loss_d: -55.476762 time: 8.1 s
[40 200] loss_ae: 13.619443 loss_g: 64.338232 loss_d: -50.754033 time: 8.1 s
[40 250] loss_ae: 15.063963 loss_g: 75.102769 loss_d: -57.882945 time: 8.1 s
[40 300] loss_ae: 15.663721 loss_g: 73.852754 loss_d: -61.669175 time: 8.2 s
[40 350] loss_ae: 13.473955 loss_g: 73.479257 loss_d: -62.510004 time: 8.0 s
[40 400] loss_ae: 13.610918 loss_g: 70.765913 loss_d: -62.472615 time: 8.3 s
[40 450] loss_ae: 13.212073 loss_g: 75.257348 loss_d: -57.889950 time: 8.1 s
[40 500] loss_ae: 14.74

true response:  uh - huh </s>
generate response:  right </s>
true response:  they before we got her everybody said oh they ' re just like a cat you know </s>
generate response:  you know they have to have a <unk> they have a they have a <unk> you know they have a they have a <unk> you have to have a </s>
true response:  uh - huh </s>
generate response:  but but </s>
true response:  we sort of like our roots and like to get back and have family reunions and all that </s>
generate response:  yeah </s>
true response:  but they </s>
generate response:  and i ' ve i ' ve had to have a <unk> <unk> <unk> </s>
true response:  yeah well see that ' s the reason that we couldn ' t make really make them at first we were going to get a pick up truck </s>
generate response:  um - hum i do </s>
true response:  you ' d think i ' d have a lot time but </s>
generate response:  they have to have to have to have a you have to have a have a you have to have a have a you have to have a have a
true response:

[43 1150] loss_ae: 11.303818 loss_g: 70.866063 loss_d: -60.696419 time: 8.2 s
[43 1200] loss_ae: 12.802812 loss_g: 69.402908 loss_d: -53.274745 time: 8.2 s
[43 1250] loss_ae: 12.084300 loss_g: 63.054377 loss_d: -52.976743 time: 8.1 s
Evaluating....
Test begins with 5481 batches with 0 left over samples
true response:  and at this particular point you know taking appropriate course work but really nothing that ' s in out of the ordinary </s>
generate response:  you know they just don ' t know what ' s going on in the world and they ' re not going to be able to </s>
true response:  well i have two cats </s>
generate response:  and they have they have a they have a <unk> <unk> they have a <unk> they have a <unk> </s>
true response:  yeah </s>
generate response:  don don don don ' t they don ' t really do anything like that either </s>
true response:  <unk> of people ' s heart strings and making them cry and feel like they ' re doing somebody good by giving them </s>
generate response:  we

[45 150] loss_ae: 13.264503 loss_g: 69.318129 loss_d: -55.219090 time: 8.2 s
[45 200] loss_ae: 11.572850 loss_g: 71.265170 loss_d: -58.761178 time: 8.2 s
[45 250] loss_ae: 12.365019 loss_g: 75.066510 loss_d: -57.947377 time: 8.1 s
[45 300] loss_ae: 14.626301 loss_g: 77.675261 loss_d: -62.770832 time: 8.3 s
[45 350] loss_ae: 15.023743 loss_g: 74.126505 loss_d: -57.686898 time: 8.1 s
[45 400] loss_ae: 13.372075 loss_g: 76.184355 loss_d: -64.123051 time: 8.2 s
[45 450] loss_ae: 15.212722 loss_g: 81.684488 loss_d: -66.254872 time: 8.3 s
[45 500] loss_ae: 14.968741 loss_g: 62.515265 loss_d: -52.146907 time: 8.2 s
[45 550] loss_ae: 13.380012 loss_g: 66.144416 loss_d: -52.880338 time: 8.1 s
[45 600] loss_ae: 13.765258 loss_g: 70.493287 loss_d: -58.919594 time: 8.2 s
[45 650] loss_ae: 13.844587 loss_g: 71.334291 loss_d: -59.161275 time: 8.1 s
[45 700] loss_ae: 13.179829 loss_g: 73.632174 loss_d: -56.947465 time: 8.2 s
[45 750] loss_ae: 14.250845 loss_g: 75.113453 loss_d: -67.107167 time: 8.2 s

true response:  but they </s>
generate response:  you know in fact he ' s a he ' s a <unk> and he ' s a real good guy he ' s a real good </s>
true response:  yeah well see that ' s the reason that we couldn ' t make really make them at first we were going to get a pick up truck </s>
generate response:  you know it ' s not really expensive </s>
true response:  you ' d think i ' d have a lot time but </s>
generate response:  oh </s>
true response:  and </s>
generate response:  it really doesn ' t seem to be any good to go to a place where you ' re in a small town </s>
true response:  yeah i ' ve got a well my roses are on the west side of the house i asked my neighbors what they wanted to see outside their </s>
generate response:  oh are you </s>
true response:  um - hum </s>
generate response:  uh - huh </s>
BLEU1 0.442634, BLEU2 0.358468, BLEU3 0.302032, BLEU4 0.242476, inter_dist1 0.007436, inter_dist2 0.046731 avg_len 15.335340
 time: 145.6 s
Done testing
Epoch:  47
Train begins with

true response:  something like that </s>
generate response:  and </s>
true response:  yeah that ' s fun </s>
generate response:  if they if they if they don ' t do that they ' ll do that they ' ll do that they ' ll do that they ' ll </s>
true response:  oh </s>
generate response:  yeah </s>
true response:  lay on those beaches over there </s>
generate response:  to you know to go to you know to go to a you know a <unk> or a <unk> or something like that </s>
true response:  uh - huh </s>
generate response:  uh - huh </s>
true response:  they before we got her everybody said oh they ' re just like a cat you know </s>
generate response:  i don ' t know i don ' t know if you ' ve ever heard of it but they ' re not going to be good </s>
true response:  uh - huh </s>
generate response:  but they really are and it </s>
true response:  we sort of like our roots and like to get back and have family reunions and all that </s>
generate response:  you know like you say you have to have a you have 

[50 900] loss_ae: 14.192778 loss_g: 72.534483 loss_d: -59.234739 time: 8.1 s
[50 950] loss_ae: 12.405971 loss_g: 83.628374 loss_d: -68.843308 time: 8.2 s
[50 1000] loss_ae: 13.799872 loss_g: 80.137710 loss_d: -67.054188 time: 8.2 s
[50 1050] loss_ae: 12.006888 loss_g: 88.816797 loss_d: -72.749808 time: 8.2 s
[50 1100] loss_ae: 15.091746 loss_g: 85.541025 loss_d: -66.427308 time: 8.3 s
[50 1150] loss_ae: 13.384642 loss_g: 78.558480 loss_d: -68.254283 time: 8.1 s
[50 1200] loss_ae: 12.056616 loss_g: 81.226975 loss_d: -62.200268 time: 8.3 s
[50 1250] loss_ae: 12.585144 loss_g: 77.659974 loss_d: -64.676538 time: 8.3 s
Evaluating....
Test begins with 5481 batches with 0 left over samples
true response:  and at this particular point you know taking appropriate course work but really nothing that ' s in out of the ordinary </s>
generate response:  have been up to a university of texas and i ' m in the university of texas and i ' m in the university of </s>
true response:  well i have two cats

true response:  um - hum </s>
generate response:  um - hum </s>
BLEU1 0.447572, BLEU2 0.362907, BLEU3 0.306056, BLEU4 0.245864, inter_dist1 0.007187, inter_dist2 0.045483 avg_len 15.738186
 time: 144.8 s
Done testing
Epoch:  52
Train begins with 6398 batches with 12 left over samples
[52 50] loss_ae: 14.095118 loss_g: 99.914553 loss_d: -77.452876 time: 8.2 s
[52 100] loss_ae: 13.280411 loss_g: 81.249697 loss_d: -69.552970 time: 8.2 s
[52 150] loss_ae: 11.475578 loss_g: 82.947726 loss_d: -65.522666 time: 8.1 s
[52 200] loss_ae: 13.339310 loss_g: 78.157746 loss_d: -63.331024 time: 8.2 s
[52 250] loss_ae: 12.024824 loss_g: 76.857448 loss_d: -67.727753 time: 8.3 s
[52 300] loss_ae: 13.295323 loss_g: 84.269418 loss_d: -68.299925 time: 8.1 s
[52 350] loss_ae: 12.981093 loss_g: 80.989807 loss_d: -63.951020 time: 8.2 s
[52 400] loss_ae: 12.884617 loss_g: 88.877777 loss_d: -68.491301 time: 8.1 s
[52 450] loss_ae: 11.835911 loss_g: 86.743360 loss_d: -69.990732 time: 8.2 s
[52 500] loss_ae: 12.93

true response:  lay on those beaches over there </s>
generate response:  you know you can you can you can have a you have a you have a you have a you have a you have a </s>
true response:  uh - huh </s>
generate response:  um - hum </s>
true response:  they before we got her everybody said oh they ' re just like a cat you know </s>
generate response:  you do it </s>
true response:  uh - huh </s>
generate response:  right </s>
true response:  we sort of like our roots and like to get back and have family reunions and all that </s>
generate response:  to be <unk> to be honest with you i ' m not sure that we ' re not going to do it but i think </s>
true response:  but they </s>
generate response:  if you if you don ' t like it you can ' t </s>
true response:  yeah well see that ' s the reason that we couldn ' t make really make them at first we were going to get a pick up truck </s>
generate response:  um - hum </s>
true response:  you ' d think i ' d have a lot time but </s>
generate res

[55 1100] loss_ae: 13.647359 loss_g: 76.044138 loss_d: -58.373056 time: 8.2 s
[55 1150] loss_ae: 14.747439 loss_g: 88.668172 loss_d: -72.303770 time: 8.1 s
[55 1200] loss_ae: 13.302446 loss_g: 77.852413 loss_d: -61.663929 time: 8.1 s
[55 1250] loss_ae: 13.973475 loss_g: 61.796522 loss_d: -49.698402 time: 8.2 s
Evaluating....
Test begins with 5481 batches with 0 left over samples
true response:  and at this particular point you know taking appropriate course work but really nothing that ' s in out of the ordinary </s>
generate response:  they do that there ' s no way they can do that they can do that they can do that they can do that they can do that </s>
true response:  well i have two cats </s>
generate response:  there are a lot of people that are in the in the in the </s>
true response:  yeah </s>
generate response:  oh yeah you ' re right there </s>
true response:  <unk> of people ' s heart strings and making them cry and feel like they ' re doing somebody good by giving them </s>


[57 50] loss_ae: 12.223073 loss_g: 79.790001 loss_d: -62.076813 time: 8.2 s
[57 100] loss_ae: 11.457471 loss_g: 75.727042 loss_d: -55.834300 time: 8.2 s
[57 150] loss_ae: 12.895528 loss_g: 69.229018 loss_d: -46.253623 time: 8.2 s
[57 200] loss_ae: 11.770202 loss_g: 87.299253 loss_d: -71.904751 time: 8.2 s
[57 250] loss_ae: 12.928410 loss_g: 75.082807 loss_d: -56.548256 time: 8.1 s
[57 300] loss_ae: 10.846453 loss_g: 73.676974 loss_d: -59.564926 time: 8.2 s
[57 350] loss_ae: 11.555902 loss_g: 67.789981 loss_d: -61.378743 time: 8.1 s
[57 400] loss_ae: 13.858761 loss_g: 77.773745 loss_d: -66.424566 time: 8.1 s
[57 450] loss_ae: 13.860472 loss_g: 83.544292 loss_d: -61.742488 time: 8.2 s
[57 500] loss_ae: 13.333420 loss_g: 78.807991 loss_d: -69.921781 time: 8.1 s
[57 550] loss_ae: 12.168274 loss_g: 76.900504 loss_d: -59.951826 time: 8.0 s
[57 600] loss_ae: 14.801496 loss_g: 73.985469 loss_d: -63.641567 time: 8.1 s
[57 650] loss_ae: 13.518975 loss_g: 77.123277 loss_d: -64.433099 time: 8.3 s


true response:  uh - huh </s>
generate response:  with those two hundred and fifty dollars a year and a half a month to a </s>
true response:  we sort of like our roots and like to get back and have family reunions and all that </s>
generate response:  they have like in the past and they ' ve been there for about twenty years and they ' ve been there for about twenty </s>
true response:  but they </s>
generate response:  even though it ' s just a matter of fact i ' m not sure what the what the what the what the </s>
true response:  yeah well see that ' s the reason that we couldn ' t make really make them at first we were going to get a pick up truck </s>
generate response:  yeah </s>
true response:  you ' d think i ' d have a lot time but </s>
generate response:  just to get to the point where you ' re going to be in the same position and you ' re going to have </s>
true response:  and </s>
generate response:  and it is the </s>
true response:  yeah i ' ve got a well my roses are on t

[60 1250] loss_ae: 12.081774 loss_g: 78.154992 loss_d: -60.600941 time: 8.1 s
Evaluating....
Test begins with 5481 batches with 0 left over samples
true response:  and at this particular point you know taking appropriate course work but really nothing that ' s in out of the ordinary </s>
generate response:  not but it doesn ' t do it but it ' s not it ' s not it ' s not it ' s not a very good </s>
true response:  well i have two cats </s>
generate response:  we have to have to have a we have a we have a we have a we have a <unk> a </s>
true response:  yeah </s>
generate response:  uh - huh </s>
true response:  <unk> of people ' s heart strings and making them cry and feel like they ' re doing somebody good by giving them </s>
generate response:  you know you have to have a you have a you have a you have a <unk> you have </s>
true response:  something like that </s>
generate response:  uh - huh i ' ve heard of them i ' ve heard of them i ' ve </s>
true response:  yeah that ' s fun </s>


[62 250] loss_ae: 12.051957 loss_g: 70.859047 loss_d: -53.304384 time: 8.2 s
[62 300] loss_ae: 11.251465 loss_g: 69.772708 loss_d: -55.981528 time: 8.2 s
[62 350] loss_ae: 11.992822 loss_g: 67.894737 loss_d: -57.569265 time: 8.3 s
[62 400] loss_ae: 13.053799 loss_g: 71.124266 loss_d: -70.531081 time: 8.2 s
[62 450] loss_ae: 11.619828 loss_g: 70.544205 loss_d: -59.296688 time: 8.2 s
[62 500] loss_ae: 13.439958 loss_g: 70.728218 loss_d: -56.198434 time: 8.2 s
[62 550] loss_ae: 11.276034 loss_g: 78.389010 loss_d: -57.740609 time: 8.3 s
[62 600] loss_ae: 13.195666 loss_g: 72.123023 loss_d: -55.883611 time: 8.3 s
[62 650] loss_ae: 14.200044 loss_g: 74.670223 loss_d: -63.546931 time: 8.3 s
[62 700] loss_ae: 13.135586 loss_g: 65.070586 loss_d: -52.474919 time: 8.1 s
[62 750] loss_ae: 12.069705 loss_g: 73.214118 loss_d: -66.098064 time: 8.2 s
[62 800] loss_ae: 11.623887 loss_g: 74.244401 loss_d: -60.576637 time: 8.1 s
[62 850] loss_ae: 13.888992 loss_g: 87.184770 loss_d: -74.288508 time: 8.1 s

true response:  uh - huh </s>
generate response:  uh - huh they have they have you have you have you have you have you have you have you have you have you have you have you </s>
true response:  we sort of like our roots and like to get back and have family reunions and all that </s>
generate response:  with you know we ' ve got a lot of people that are in the in the in the in the in the in the in the </s>
true response:  but they </s>
generate response:  you know </s>
true response:  yeah well see that ' s the reason that we couldn ' t make really make them at first we were going to get a pick up truck </s>
generate response:  um - huh </s>
true response:  you ' d think i ' d have a lot time but </s>
generate response:  and we have a lot of people that have a lot of kids in the family and we ' re </s>
true response:  and </s>
generate response:  oh it ' s not too bad i mean you know the </s>
true response:  yeah i ' ve got a well my roses are on the west side of the house i asked my ne

[65 1150] loss_ae: 11.924389 loss_g: 68.105197 loss_d: -53.056934 time: 8.1 s
[65 1200] loss_ae: 13.485151 loss_g: 77.659790 loss_d: -62.825544 time: 8.2 s
[65 1250] loss_ae: 13.002628 loss_g: 77.823783 loss_d: -59.326178 time: 8.2 s
Evaluating....
Test begins with 5481 batches with 0 left over samples
true response:  and at this particular point you know taking appropriate course work but really nothing that ' s in out of the ordinary </s>
generate response:  but it ' s not it ' s not it ' s not really it ' s not really it ' s not really a big <unk> </s>
true response:  well i have two cats </s>
generate response:  right but what ' s </s>
true response:  yeah </s>
generate response:  oh do you really </s>
true response:  <unk> of people ' s heart strings and making them cry and feel like they ' re doing somebody good by giving them </s>
generate response:  and they are trying to get a gun and they ' re not going to be able to do it you know they ' re </s>
true response:  something lik

[67 50] loss_ae: 14.896230 loss_g: 74.727844 loss_d: -62.691887 time: 8.2 s
[67 100] loss_ae: 15.641166 loss_g: 88.205675 loss_d: -69.641473 time: 8.1 s
[67 150] loss_ae: 12.584902 loss_g: 70.644036 loss_d: -57.236436 time: 8.2 s
[67 200] loss_ae: 13.074956 loss_g: 69.481959 loss_d: -55.986193 time: 8.1 s
[67 250] loss_ae: 12.481743 loss_g: 71.794504 loss_d: -58.184425 time: 8.1 s
[67 300] loss_ae: 12.939551 loss_g: 64.198185 loss_d: -55.908227 time: 8.3 s
[67 350] loss_ae: 11.268201 loss_g: 68.411702 loss_d: -56.809882 time: 8.2 s
[67 400] loss_ae: 12.236228 loss_g: 71.022211 loss_d: -66.633551 time: 8.2 s
[67 450] loss_ae: 12.446792 loss_g: 67.618921 loss_d: -52.356744 time: 8.2 s
[67 500] loss_ae: 13.663639 loss_g: 64.998892 loss_d: -47.411358 time: 8.1 s
[67 550] loss_ae: 12.118422 loss_g: 81.588069 loss_d: -66.486620 time: 8.2 s
[67 600] loss_ae: 12.865875 loss_g: 70.897570 loss_d: -55.036958 time: 8.2 s
[67 650] loss_ae: 13.366248 loss_g: 78.219786 loss_d: -57.737946 time: 8.2 s


true response:  they before we got her everybody said oh they ' re just like a cat you know </s>
generate response:  and that ' s what i ' ve seen and i ' ve heard of it i ' ve heard of it that ' s </s>
true response:  uh - huh </s>
generate response:  there ' s they don ' t do they do they do they do they do they do they do they do they do they do it they do they
true response:  we sort of like our roots and like to get back and have family reunions and all that </s>
generate response:  they have they have they have you have you have you have you have you have you have you have you have you have you have you have you have
true response:  but they </s>
generate response:  uh - huh no they have </s>
true response:  yeah well see that ' s the reason that we couldn ' t make really make them at first we were going to get a pick up truck </s>
generate response:  yeah </s>
true response:  you ' d think i ' d have a lot time but </s>
generate response:  you know when you do get to work you ' 

[70 1150] loss_ae: 12.464300 loss_g: 78.396345 loss_d: -66.578576 time: 7.1 s
[70 1200] loss_ae: 12.712506 loss_g: 75.273353 loss_d: -63.164849 time: 7.4 s
[70 1250] loss_ae: 13.552194 loss_g: 69.668520 loss_d: -59.320515 time: 7.2 s
Evaluating....
Test begins with 5481 batches with 0 left over samples
true response:  and at this particular point you know taking appropriate course work but really nothing that ' s in out of the ordinary </s>
generate response:  and they are they are very <unk> and they ' re very <unk> and they ' re very <unk> and they ' re very <unk> </s>
true response:  well i have two cats </s>
generate response:  you know </s>
true response:  yeah </s>
generate response:  they have they have they have you have you have you have you have you have you have you have you </s>
true response:  <unk> of people ' s heart strings and making them cry and feel like they ' re doing somebody good by giving them </s>
generate response:  and not even though they ' re not going to b

[72 100] loss_ae: 13.105793 loss_g: 77.959961 loss_d: -60.964555 time: 8.3 s
[72 150] loss_ae: 11.784356 loss_g: 69.335640 loss_d: -65.145461 time: 8.3 s
[72 200] loss_ae: 12.134576 loss_g: 83.982528 loss_d: -69.355308 time: 8.2 s
[72 250] loss_ae: 11.064911 loss_g: 79.223944 loss_d: -63.170125 time: 8.2 s
[72 300] loss_ae: 12.106904 loss_g: 73.172250 loss_d: -53.511844 time: 8.1 s
[72 350] loss_ae: 12.777995 loss_g: 80.928006 loss_d: -65.296925 time: 8.3 s
[72 400] loss_ae: 11.363484 loss_g: 63.738096 loss_d: -53.094576 time: 8.3 s
[72 450] loss_ae: 12.418220 loss_g: 73.848645 loss_d: -56.098549 time: 8.2 s
[72 500] loss_ae: 13.406568 loss_g: 71.299121 loss_d: -57.746575 time: 8.3 s
[72 550] loss_ae: 12.379763 loss_g: 75.457959 loss_d: -63.515713 time: 8.1 s
[72 600] loss_ae: 10.829616 loss_g: 68.635186 loss_d: -55.017099 time: 8.2 s
[72 650] loss_ae: 13.077646 loss_g: 72.349377 loss_d: -64.510824 time: 8.1 s
[72 700] loss_ae: 12.456655 loss_g: 53.010050 loss_d: -54.080053 time: 8.1 s

true response:  uh - huh </s>
generate response:  uh - huh and they </s>
true response:  we sort of like our roots and like to get back and have family reunions and all that </s>
generate response:  but they do have to do it and they ' ll do it you know they do it they do they do it but they do </s>
true response:  but they </s>
generate response:  they ' re not even <unk> </s>
true response:  yeah well see that ' s the reason that we couldn ' t make really make them at first we were going to get a pick up truck </s>
generate response:  uh - huh they do they do a lot of that in the summer </s>
true response:  you ' d think i ' d have a lot time but </s>
generate response:  oh </s>
true response:  and </s>
generate response:  you know when they when they ' re in the process of the <unk> and the <unk> and the <unk> and the <unk> and the <unk> and </s>
true response:  yeah i ' ve got a well my roses are on the west side of the house i asked my neighbors what they wanted to see outside the

[75 1100] loss_ae: 12.668672 loss_g: 60.790519 loss_d: -56.247068 time: 8.0 s
[75 1150] loss_ae: 11.636281 loss_g: 76.275342 loss_d: -60.539616 time: 8.2 s
[75 1200] loss_ae: 12.033682 loss_g: 63.963143 loss_d: -46.745766 time: 8.3 s
[75 1250] loss_ae: 11.934740 loss_g: 61.057071 loss_d: -46.652404 time: 8.3 s
Evaluating....
Test begins with 5481 batches with 0 left over samples
true response:  and at this particular point you know taking appropriate course work but really nothing that ' s in out of the ordinary </s>
generate response:  and the other thing is he ' s a he ' s a <unk> and he ' s a very <unk> <unk> and he ' s </s>
true response:  well i have two cats </s>
generate response:  no they are they are they are they are they are they are they are they are they are they are they are they are they are they are they
true response:  yeah </s>
generate response:  when they do have a little bit of a <unk> that ' s kind of a <unk> thing and i think that ' s a good idea </s>
true respon

BLEU1 0.437237, BLEU2 0.357669, BLEU3 0.302210, BLEU4 0.242822, inter_dist1 0.006417, inter_dist2 0.040303 avg_len 16.318737
 time: 146.2 s
Done testing
Epoch:  77
Train begins with 6398 batches with 12 left over samples
[77 50] loss_ae: 13.621265 loss_g: 71.286364 loss_d: -60.201575 time: 8.1 s
[77 100] loss_ae: 13.786707 loss_g: 58.637111 loss_d: -45.160833 time: 8.3 s
[77 150] loss_ae: 13.636718 loss_g: 56.161177 loss_d: -45.776324 time: 8.1 s
[77 200] loss_ae: 12.978549 loss_g: 59.406504 loss_d: -51.797628 time: 8.4 s
[77 250] loss_ae: 12.872517 loss_g: 64.067453 loss_d: -48.757173 time: 8.1 s
[77 300] loss_ae: 11.937484 loss_g: 61.005506 loss_d: -59.199236 time: 8.3 s
[77 350] loss_ae: 12.816769 loss_g: 77.702070 loss_d: -61.540579 time: 8.1 s
[77 400] loss_ae: 13.355735 loss_g: 58.189241 loss_d: -47.580624 time: 8.3 s
[77 450] loss_ae: 15.762558 loss_g: 59.456516 loss_d: -47.837111 time: 8.1 s
[77 500] loss_ae: 11.519977 loss_g: 71.758527 loss_d: -60.206928 time: 8.3 s
[77 550] l

true response:  uh - huh </s>
generate response:  uh - huh </s>
true response:  they before we got her everybody said oh they ' re just like a cat you know </s>
generate response:  and they have they have they have you have you have you have you have you have you have you have you have you have you have you have you
true response:  uh - huh </s>
generate response:  um - hum </s>
true response:  we sort of like our roots and like to get back and have family reunions and all that </s>
generate response:  are not real good about it but it ' s not it ' s not that it ' s not that we don ' t have </s>
true response:  but they </s>
generate response:  or if they ' re not going to do it or do they just </s>
true response:  yeah well see that ' s the reason that we couldn ' t make really make them at first we were going to get a pick up truck </s>
generate response:  uh - huh </s>
true response:  you ' d think i ' d have a lot time but </s>
generate response:  and </s>
true response:  and </s>


[80 1150] loss_ae: 11.785789 loss_g: 63.819946 loss_d: -43.778493 time: 8.4 s
[80 1200] loss_ae: 12.655370 loss_g: 59.025192 loss_d: -43.095632 time: 8.1 s
[80 1250] loss_ae: 15.028719 loss_g: 54.557121 loss_d: -41.527669 time: 8.2 s
Evaluating....
Test begins with 5481 batches with 0 left over samples
true response:  and at this particular point you know taking appropriate course work but really nothing that ' s in out of the ordinary </s>
generate response:  have been out of school for two years and he ' s been there for a long time and he was a little bit </s>
true response:  well i have two cats </s>
generate response:  have been <unk> for about ten years and have a </s>
true response:  yeah </s>
generate response:  right </s>
true response:  <unk> of people ' s heart strings and making them cry and feel like they ' re doing somebody good by giving them </s>
generate response:  you can you can you can you can you can have <unk> you can you can you can you can you can have a <unk> y

[82 50] loss_ae: 13.548893 loss_g: 67.156386 loss_d: -55.775310 time: 8.1 s
[82 100] loss_ae: 11.885604 loss_g: 58.889361 loss_d: -44.256339 time: 8.3 s
[82 150] loss_ae: 13.677795 loss_g: 56.947170 loss_d: -41.550723 time: 8.3 s
[82 200] loss_ae: 12.465453 loss_g: 60.742114 loss_d: -48.050526 time: 8.3 s
[82 250] loss_ae: 13.588757 loss_g: 60.690032 loss_d: -42.241471 time: 8.2 s
[82 300] loss_ae: 13.548787 loss_g: 56.969866 loss_d: -44.877773 time: 8.3 s
[82 350] loss_ae: 12.995280 loss_g: 50.936880 loss_d: -38.901051 time: 8.2 s
[82 400] loss_ae: 10.754140 loss_g: 51.626108 loss_d: -40.840349 time: 8.4 s
[82 450] loss_ae: 12.603563 loss_g: 58.945602 loss_d: -45.674091 time: 8.3 s
[82 500] loss_ae: 12.489509 loss_g: 69.248028 loss_d: -51.847644 time: 8.2 s
[82 550] loss_ae: 13.502976 loss_g: 64.469981 loss_d: -46.662682 time: 8.3 s
[82 600] loss_ae: 11.240563 loss_g: 54.385782 loss_d: -44.170971 time: 8.2 s
[82 650] loss_ae: 12.969768 loss_g: 65.676757 loss_d: -52.101055 time: 8.2 s


true response:  oh </s>
generate response:  oh </s>
true response:  lay on those beaches over there </s>
generate response:  oh they are </s>
true response:  uh - huh </s>
generate response:  uh - huh </s>
true response:  they before we got her everybody said oh they ' re just like a cat you know </s>
generate response:  but they do have to do that i mean i ' ve never been there for a while and i ' ve been there </s>
true response:  uh - huh </s>
generate response:  um - hum </s>
true response:  we sort of like our roots and like to get back and have family reunions and all that </s>
generate response:  and you can get there ' s a lot of people that are in the in the in the in the community and they ' re </s>
true response:  but they </s>
generate response:  it doesn ' t seem to do anything to do with it but they don ' t do it they don ' t do anything to </s>
true response:  yeah well see that ' s the reason that we couldn ' t make really make them at first we were going to get a pick 

[85 1050] loss_ae: 12.734057 loss_g: 53.015645 loss_d: -44.383823 time: 8.3 s
[85 1100] loss_ae: 12.507245 loss_g: 53.289615 loss_d: -43.987784 time: 8.1 s
[85 1150] loss_ae: 12.557768 loss_g: 68.004674 loss_d: -49.234351 time: 8.3 s
[85 1200] loss_ae: 12.447689 loss_g: 53.511334 loss_d: -47.564232 time: 8.1 s
[85 1250] loss_ae: 12.152066 loss_g: 57.938966 loss_d: -49.139457 time: 8.3 s
Evaluating....
Test begins with 5481 batches with 0 left over samples
true response:  and at this particular point you know taking appropriate course work but really nothing that ' s in out of the ordinary </s>
generate response:  and they are very young so they ' re not in the in the in the in the in the in the in the home </s>
true response:  well i have two cats </s>
generate response:  you can do you do you do you have any kind of a garden or a or a pet or a <unk> </s>
true response:  yeah </s>
generate response:  they really i think they </s>
true response:  <unk> of people ' s heart strings and ma

true response:  um - hum </s>
generate response:  and i ' ve heard of her i have a friend who ' s a nurse </s>
BLEU1 0.423745, BLEU2 0.346878, BLEU3 0.294325, BLEU4 0.237146, inter_dist1 0.006520, inter_dist2 0.042179 avg_len 16.230432
 time: 145.1 s
Done testing
Epoch:  87
Train begins with 6398 batches with 12 left over samples
[87 50] loss_ae: 13.300895 loss_g: 51.790411 loss_d: -49.556982 time: 8.2 s
[87 100] loss_ae: 12.003135 loss_g: 65.460683 loss_d: -53.411483 time: 8.2 s
[87 150] loss_ae: 12.352210 loss_g: 54.056063 loss_d: -40.861381 time: 8.2 s
[87 200] loss_ae: 14.034116 loss_g: 58.461886 loss_d: -48.570011 time: 8.3 s
[87 250] loss_ae: 12.385987 loss_g: 41.477191 loss_d: -33.148976 time: 8.1 s
[87 300] loss_ae: 12.855967 loss_g: 47.866682 loss_d: -41.227888 time: 8.3 s
[87 350] loss_ae: 11.621316 loss_g: 55.645201 loss_d: -44.659942 time: 8.2 s
[87 400] loss_ae: 14.186161 loss_g: 55.312325 loss_d: -50.865803 time: 8.3 s
[87 450] loss_ae: 12.971264 loss_g: 46.206063 loss_d:

true response:  yeah that ' s fun </s>
generate response:  yeah you don ' t </s>
true response:  oh </s>
generate response:  they are just really good </s>
true response:  lay on those beaches over there </s>
generate response:  you have to have you have you have you have you have you have you have you have you have you have you have you have you have you have
true response:  uh - huh </s>
generate response:  uh - huh and you have to say </s>
true response:  they before we got her everybody said oh they ' re just like a cat you know </s>
generate response:  right now we ' ve got to go to the <unk> ' s not too bad but it ' s not a good movie but </s>
true response:  uh - huh </s>
generate response:  but it ' s still been nice </s>
true response:  we sort of like our roots and like to get back and have family reunions and all that </s>
generate response:  you know with <unk> and they have they have a <unk> they have a <unk> <unk> you have to have </s>
true response:  but they </s>
genera

[90 800] loss_ae: 13.082747 loss_g: 59.785046 loss_d: -49.617427 time: 8.3 s
[90 850] loss_ae: 13.715729 loss_g: 58.853496 loss_d: -50.389934 time: 8.2 s
[90 900] loss_ae: 14.287871 loss_g: 58.075284 loss_d: -56.153450 time: 8.3 s
[90 950] loss_ae: 12.904881 loss_g: 51.283081 loss_d: -34.622133 time: 8.1 s
[90 1000] loss_ae: 12.996835 loss_g: 53.605338 loss_d: -40.897828 time: 8.3 s
[90 1050] loss_ae: 13.337683 loss_g: 47.648267 loss_d: -50.086796 time: 8.2 s
[90 1100] loss_ae: 12.029847 loss_g: 53.550934 loss_d: -36.111537 time: 8.2 s
[90 1150] loss_ae: 12.712329 loss_g: 45.661450 loss_d: -44.266403 time: 8.2 s
[90 1200] loss_ae: 13.664434 loss_g: 49.796669 loss_d: -37.036212 time: 8.3 s
[90 1250] loss_ae: 12.670691 loss_g: 50.846089 loss_d: -36.054270 time: 8.3 s
Evaluating....
Test begins with 5481 batches with 0 left over samples
true response:  and at this particular point you know taking appropriate course work but really nothing that ' s in out of the ordinary </s>
generate resp

true response:  you ' d think i ' d have a lot time but </s>
generate response:  you know just to have to have a have you have you have you have you have you have you have you have you </s>
true response:  and </s>
generate response:  yeah it ' ll do it for a while and then they ' ll get it back again </s>
true response:  yeah i ' ve got a well my roses are on the west side of the house i asked my neighbors what they wanted to see outside their </s>
generate response:  there ' s always going to be a big place </s>
true response:  um - hum </s>
generate response:  um - hum </s>
BLEU1 0.428025, BLEU2 0.349636, BLEU3 0.295135, BLEU4 0.236885, inter_dist1 0.006654, inter_dist2 0.043220 avg_len 15.985222
 time: 146.0 s
Done testing
Epoch:  92
Train begins with 6398 batches with 12 left over samples
[92 50] loss_ae: 12.023200 loss_g: 54.132193 loss_d: -39.256814 time: 8.3 s
[92 100] loss_ae: 12.749739 loss_g: 67.571648 loss_d: -50.091847 time: 8.2 s
[92 150] loss_ae: 13.205615 loss_g: 53.116

true response:  yeah </s>
generate response:  yeah </s>
true response:  <unk> of people ' s heart strings and making them cry and feel like they ' re doing somebody good by giving them </s>
generate response:  you know with <unk> you can have a <unk> you know you have to have a <unk> you know </s>
true response:  something like that </s>
generate response:  yeah what ' s interesting is that you ' ve got to have a lot of <unk> and stuff like that and i ' m not sure </s>
true response:  yeah that ' s fun </s>
generate response:  uh - huh </s>
true response:  oh </s>
generate response:  oh yeah </s>
true response:  lay on those beaches over there </s>
generate response:  and to get they have to have you know a <unk> or a <unk> or something like that you know i mean i ' ve heard </s>
true response:  uh - huh </s>
generate response:  um - hum </s>
true response:  they before we got her everybody said oh they ' re just like a cat you know </s>
generate response:  or they are not sure how lon

[95 550] loss_ae: 11.922449 loss_g: 44.677827 loss_d: -36.697819 time: 8.2 s
[95 600] loss_ae: 12.879628 loss_g: 51.886022 loss_d: -39.424097 time: 8.2 s
[95 650] loss_ae: 13.483893 loss_g: 44.629009 loss_d: -32.939882 time: 8.3 s
[95 700] loss_ae: 14.304925 loss_g: 42.675052 loss_d: -28.517915 time: 8.3 s
[95 750] loss_ae: 13.490675 loss_g: 36.880632 loss_d: -27.012731 time: 8.3 s
[95 800] loss_ae: 13.237393 loss_g: 38.154242 loss_d: -34.201610 time: 8.3 s
[95 850] loss_ae: 12.780832 loss_g: 43.941451 loss_d: -39.361835 time: 8.3 s
[95 900] loss_ae: 13.804437 loss_g: 41.996913 loss_d: -33.601454 time: 8.2 s
[95 950] loss_ae: 13.599225 loss_g: 52.486835 loss_d: -40.792955 time: 8.3 s
[95 1000] loss_ae: 14.553553 loss_g: 48.572036 loss_d: -38.068780 time: 8.2 s
[95 1050] loss_ae: 14.294220 loss_g: 47.348842 loss_d: -35.610943 time: 8.4 s
[95 1100] loss_ae: 14.020874 loss_g: 47.286647 loss_d: -35.619220 time: 8.1 s
[95 1150] loss_ae: 14.100567 loss_g: 45.994283 loss_d: -35.708619 time: 8

true response:  yeah well see that ' s the reason that we couldn ' t make really make them at first we were going to get a pick up truck </s>
generate response:  um - hum </s>
true response:  you ' d think i ' d have a lot time but </s>
generate response:  you have to have a you have to have a you have a you have a you have a </s>
true response:  and </s>
generate response:  they are they are they are they are they are they are they are you know a lot of times they are they are they are they are </s>
true response:  yeah i ' ve got a well my roses are on the west side of the house i asked my neighbors what they wanted to see outside their </s>
generate response:  yeah you can do you can ' t get anything like that </s>
true response:  um - hum </s>
generate response:  um - hum </s>
BLEU1 0.417832, BLEU2 0.342708, BLEU3 0.290364, BLEU4 0.233675, inter_dist1 0.006277, inter_dist2 0.039266 avg_len 16.421273
 time: 145.5 s
Done testing
Epoch:  97
Train begins with 6398 batches with 12 left 

true response:  something like that </s>
generate response:  they are they are they are they are they are they are they are they are they are they are they are they are they are they are they are
true response:  yeah that ' s fun </s>
generate response:  and you can always get you can get them out of there and you can ' t get them in the house and </s>
true response:  oh </s>
generate response:  if they do not do they do they do they do they do they do they do it outside of the house and they do that they </s>
true response:  lay on those beaches over there </s>
generate response:  and have been having to have a lot of <unk> and i have a lot of <unk> and i have a lot of <unk> and i have </s>
true response:  uh - huh </s>
generate response:  uh - huh </s>
true response:  they before we got her everybody said oh they ' re just like a cat you know </s>
generate response:  they are they are they are they are they are they are they are they are they are they are they are they are they are t

[100 450] loss_ae: 15.087776 loss_g: 45.159494 loss_d: -34.218481 time: 8.5 s
[100 500] loss_ae: 14.233923 loss_g: 43.753378 loss_d: -45.501710 time: 8.4 s
[100 550] loss_ae: 13.344636 loss_g: 51.137384 loss_d: -37.833727 time: 8.4 s
[100 600] loss_ae: 14.683267 loss_g: 42.967670 loss_d: -32.429834 time: 8.3 s
[100 650] loss_ae: 13.145862 loss_g: 40.106173 loss_d: -34.415643 time: 8.4 s
[100 700] loss_ae: 13.071185 loss_g: 41.093087 loss_d: -31.705087 time: 8.2 s
[100 750] loss_ae: 12.726765 loss_g: 46.139163 loss_d: -36.457487 time: 8.4 s
[100 800] loss_ae: 13.525625 loss_g: 39.800800 loss_d: -26.833157 time: 8.2 s
[100 850] loss_ae: 13.825599 loss_g: 43.555780 loss_d: -27.418980 time: 8.5 s
[100 900] loss_ae: 14.398101 loss_g: 42.999491 loss_d: -26.528606 time: 8.2 s
[100 950] loss_ae: 13.477579 loss_g: 37.928825 loss_d: -31.887104 time: 8.4 s
[100 1000] loss_ae: 13.719309 loss_g: 34.090738 loss_d: -34.493382 time: 8.3 s
[100 1050] loss_ae: 12.965043 loss_g: 42.403228 loss_d: -36.693