In [50]:
!pip install contractions
import contractions



In [51]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import torch
from torch.jit import script, trace
import torch.nn as nn
from torch import optim
import torch.nn.functional as F
import csv
import random
import re
import os
import unicodedata
import operator
import codecs
from io import open
import itertools
import math
import pickle
import numpy as np
from queue import PriorityQueue


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# device = torch.device("cpu")
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# Load and Preprocess Data

## Class Voc

Preprocess text data to prepare it for training

In [0]:
# Default word tokens
PAD_token = 0  # Used for padding short sentences
SOS_token = 1  # Start-of-sentence token
EOS_token = 2  # End-of-sentence token
num_emotion = 5
emo2idx = {'neutral':0,'joy':1,'sadness':2,'fear':3,'anger':4}
idx2emo = {0:'neutral',1:'joy',2:'sadness',3:'fear',4:'anger'}
MAX_LENGTH = 20


class Voc:
    def __init__(self,name,version,normalize=False):
        self.name = name
        self.version = version
        self.word2index = {}
        self.word2count = {}
        self.index2word = {PAD_token: "PAD", SOS_token: "SOS", EOS_token: "EOS"}
        self.num_words = 3  # Count SOS, EOS, PAD
        self.normalize = normalize
        if version == "pre-trained-word2vec-100d" or version=='pre-trained-word2vec-300d':
            from torchnlp.word_to_vector import GloVe
            if version == "pre-trained-word2vec-300d":
                self.dim = 300 
                self.glove = GloVe()
                self.weights_matrix = np.zeros((10000,self.dim))
            else:
                self.dim = 100 
                self.glove = GloVe(name='6B', dim=self.dim)
            self.weights_matrix = np.zeros((10000,self.dim))
            self.weights_matrix[0] = self.glove[str(PAD_token)]
            self.weights_matrix[1] = self.glove[str(SOS_token)]
            self.weights_matrix[2] = self.glove[str(EOS_token)]
        elif version == "pre-trained-subword-100d":
            from bpemb import BPEmb
            self.dim = 100
            self.bpemb = BPEmb(lang="en", dim=self.dim)
            self.index2word = {PAD_token: self.bpemb.decode_ids([PAD_token]), 
                               SOS_token: self.bpemb.decode_ids([SOS_token]), 
                               EOS_token: self.bpemb.decode_ids([EOS_token])} 
            self.weights_matrix = self.bpemb.vectors.copy()
    
    # Load the data to go through the pipeline
    def loadData(self, filepath,small=False):
        print("Start preparing data ...")
        normalized_pairs = []
        lines = open(datafile, encoding='utf-8').read().strip().split('\n')
        if small:
            lines = lines[:1000]
        for i,line in enumerate(lines):
            pair = line.split('\t')
            post = self.addSentence(pair[0].strip())
            response = self.addSentence(pair[1].strip())
            if(post and response):
                normalized_pairs.append([post,response])
                if i%10000 == 0:
                    print('Loading {}th data pairs'.format(i))
                    print(post,response)
        return normalized_pairs
      
    def loadDataFromPickle(self, filepath,small=False):
        print("Start preparing data ...")
        train_set,val_set,test_set  = [],[],[]
        train_emo,val_emo,test_emo  = [],[],[]
        
        f = open(filepath+'/train.pickle', 'rb')
        train_data,train_emo_set = pickle.load(f),pickle.load(f)
        for i,line in enumerate(train_data):
            post = self.addSentence(line[0].strip())
            response = self.addSentence(line[1].strip())
            if len(post.split(' '))< 5 or len(response.split(' ')) < 3:
                continue
            train_set.append([post,response])
            train_emo.append(train_emo_set[i])
            if (i%10000 == 0):
                print('loading {} th pair from training dataset'.format(i),post,response)
            if small and i == 10000:
                break
        
        f = open(filepath+'/val.pickle', 'rb')
        val_data,val_emo_set = pickle.load(f),pickle.load(f)
        for i,line in enumerate(val_data):
            post = self.addSentence(line[0].strip())
            response = self.addSentence(line[1].strip()) 
            if len(post.split(' '))< 5 or len(response.split(' ')) < 3:
                continue
            val_set.append([post,response])
            val_emo.append(val_emo_set[i])
            if (i%2000 == 0):
                print('loading {} th pair from validation dataset'.format(i),post,response)
        
        
        f = open(filepath+'/test.pickle', 'rb')
        test_data,test_emo_set = pickle.load(f),pickle.load(f)
        for i,line in enumerate(test_data):
            post = self.addSentence(line[0].strip())
            response = self.addSentence(line[1].strip())
            if len(post.split(' '))< 5 or len(response.split(' ')) < 3:
                continue
            test_set.append([post,response])
            test_emo.append(test_emo_set[i])
            if (i%2000 == 0):
                print('loading {} th pair from training dataset'.format(i),post,response)
        
        return train_set,train_emo,val_set,val_emo,test_set,test_emo
      
      
    # Batch [post, response] pair to required training format  
    def batchSent2VecData(self,idxs,pairs,emos=None):
        pair_batch = [pairs[i] for i in idxs]
        if emos != None:
            emo_batch = [emos[i] for i in idxs]
        pair_batch.sort(key=lambda x: len(x[0].split(' ')), reverse=True)
        input_batch,output_batch,emo_in,emo_out = [], [], [], []
        for i,pair in enumerate(pair_batch):
            input_batch.append(pair[0])
            output_batch.append(pair[1])
            if emos!=None:
                emo_in.append(emo2idx[emo_batch[i][0]])
                emo_out.append(emo2idx[emo_batch[i][1]])
        inp, lengths = self.inputVar(input_batch)
        output, mask, max_target_len = self.outputVar(output_batch)
        
        return inp,lengths,output,mask, max_target_len,torch.LongTensor(emo_in),torch.LongTensor(emo_out)
    

    # Helper methods to facilitate the functions above
    def tokenizer(self,s,normalize=False):
        if (normalize):
            s = ''.join(c for c in unicodedata.normalize('NFD', s.lower().strip())
                if unicodedata.category(c) != 'Mn')
            s = contractions.fix(s)
            s = re.sub(r"[\*\"“”\n\\…\+\-\/\=\(\)‘•:\[\]\|’\;]", " ", s)
            s = re.sub(r"[ ]+", " ", s)
            s = re.sub(r"\!+", "!", s)
            s = re.sub(r"\,+", ",", s)
            s = re.sub(r"\?+", "?", s)
        if self.version  == "pre-trained-subword-100d":
            return self.bpemb.encode(s)
        else: 
            return s.strip().split(' ')
          
    def addSentence(self, sentence,normalize=False):
        words = self.tokenizer(sentence,normalize)
        words = words[:min(MAX_LENGTH,len(words))]
        for word in words:
            self.addWord(word)
        return ' '.join(words)

    def addWord(self, word):
        if not word:
            return
        if word not in self.word2index:
            self.word2index[word] = self.num_words
            self.word2count[word] = 1
            self.index2word[self.num_words] = word
            self.num_words += 1
            if self.version in ["pre-trained-word2vec-100d","pre-trained-word2vec-300d"]:
                self.weights_matrix[self.num_words] = self.glove[word]
        else:
            self.word2count[word] += 1
    
    def getWordIndex(self,word):
        return self.word2index[word]
    

    def indexesFromSentence(self,sentence,normalize=False):
        if self.version == "pre-trained-subword-100d":
            return self.bpemb.encode_ids(sentence) + [EOS_token]
        return [self.getWordIndex(word) for word in self.tokenizer(sentence,normalize)] + [EOS_token]

    def binaryMatrix(self,l, value=PAD_token):
        m = []
        for i, seq in enumerate(l):
            m.append([])
            for token in seq:
                if token == PAD_token:
                    m[i].append(0)
                else:
                    m[i].append(1)
        return m
    def zeroPadding(self,l,fillvalue=PAD_token):
        return list(itertools.zip_longest(*l, fillvalue=fillvalue))  

    def inputVar(self,l):
        indexes_batch = [self.indexesFromSentence(sentence) for sentence in l]
        lengths = torch.tensor([len(indexes) for indexes in indexes_batch],dtype=torch.int64)

        padList = self.zeroPadding(indexes_batch)
        padVar = torch.LongTensor(padList)
        return padVar, lengths
      
    # Returns padded target sequence tensor, padding mask, and max target length
    def outputVar(self,l):
        indexes_batch = [self.indexesFromSentence(sentence) for sentence in l]
        max_target_len = max([len(indexes) for indexes in indexes_batch])
        padList = self.zeroPadding(indexes_batch)
        mask = self.binaryMatrix(padList)
        mask = torch.ByteTensor(mask)
        padVar = torch.LongTensor(padList)
        return padVar,mask,max_target_len

# Model

## Encoder, Decoder, Attn

In [0]:
class EncoderRNN(nn.Module):
    def __init__(self, embedding, n_layers=1, dropout=0):
        super(EncoderRNN, self).__init__()
        self.n_layers = n_layers
        self.hidden_size = embedding.weight.size()[1]
        self.embedding = embedding

        # Initialize GRU; the input_size and hidden_size params are both set to 'hidden_size'
        #   because our input size is a word embedding with number of features == hidden_size
        self.gru = nn.GRU(self.hidden_size, self.hidden_size, n_layers,
                          dropout=(0 if n_layers == 1 else dropout), bidirectional=True)

    def forward(self, input_seq, input_lengths, hidden=None):
        # Convert word indexes to embeddings
        embedded = self.embedding(input_seq)
        # Pack padded batch of sequences for RNN module
        packed = torch.nn.utils.rnn.pack_padded_sequence(embedded, input_lengths)
        # Forward pass through GRU
        outputs, hidden = self.gru(packed, hidden)
        # Unpack padding
        outputs, _ = torch.nn.utils.rnn.pad_packed_sequence(outputs)
        # Sum bidirectional GRU outputs
        outputs = outputs[:, :, :self.hidden_size] + outputs[:, : ,self.hidden_size:]
        # Return output and final hidden state
        return outputs, hidden
# Luong attention layer

class Attn(torch.nn.Module):
    def __init__(self, method, hidden_size):
        super(Attn, self).__init__()
        self.method = method
        if self.method not in ['dot', 'general', 'concat']:
            raise ValueError(self.method, "is not an appropriate attention method.")
        self.hidden_size = hidden_size
        if self.method == 'general':
            self.attn = torch.nn.Linear(self.hidden_size, hidden_size)
        elif self.method == 'concat':
            self.attn = torch.nn.Linear(self.hidden_size * 2, hidden_size)
#             self.v = torch.nn.Parameter(torch.FloatTensor(hidden_size))
            self.v = torch.nn.Parameter(torch.randn(hidden_size,dtype=torch.float))

    def dot_score(self, hidden, encoder_output):
        return torch.sum(hidden * encoder_output, dim=2)

    def general_score(self, hidden, encoder_output):
        energy = self.attn(encoder_output)
        return torch.sum(hidden * energy, dim=2)

    def concat_score(self, hidden, encoder_output):
        energy = self.attn(torch.cat((hidden.expand(encoder_output.size(0), -1, -1), encoder_output), 2)).tanh()
        return torch.sum(self.v * energy, dim=2)

    def forward(self, hidden, encoder_outputs):
        # Calculate the attention weights (energies) based on the given method
        if self.method == 'general':
            attn_energies = self.general_score(hidden, encoder_outputs)
        elif self.method == 'concat':
            attn_energies = self.concat_score(hidden, encoder_outputs)
        elif self.method == 'dot':
            attn_energies = self.dot_score(hidden, encoder_outputs)

        # Transpose max_length and batch_size dimensions
        attn_energies = attn_energies.t()
        # Return the softmax normalized probability scores (with added dimension)
        return F.softmax(attn_energies, dim=1).unsqueeze(1)
      
class AttnDecoderRNN(nn.Module):
    def __init__(self,attn_model,embedding,emotion_embedding,n_layers=1,dropout=0.1,use_emb=False,use_imemory=False,use_ememory=False):
        super(AttnDecoderRNN, self).__init__()
        # Keep for reference
        self.attn_model = attn_model
        self.hidden_size = embedding.weight.size()[1]
        self.output_size = embedding.weight.size()[0]
        self.n_layers = n_layers
        self.dropout = dropout
        self.use_emb = use_emb
        self.use_imemory = use_imemory
        self.use_ememory = use_ememory

        # Define layers
        self.embedding = embedding
        self.emotion_embedding = emotion_embedding
        self.embedding_dropout = nn.Dropout(dropout)
        self.gru = nn.GRU(self.hidden_size, self.hidden_size, n_layers, dropout=(0 if n_layers == 1 else dropout))
        self.concat_1 = nn.Linear(self.hidden_size * 2, self.hidden_size)
        self.concat_3 = nn.Linear(self.hidden_size * 2, self.hidden_size)
        self.out = nn.Linear(self.hidden_size, self.output_size)
        
        
        # DIY layers
        self.read_linear = nn.Linear(self.hidden_size*(self.n_layers+1),self.hidden_size)
        self.write_linear = nn.Linear(self.hidden_size,self.hidden_size)
        self.gru_concat = nn.Linear(self.hidden_size*2,self.hidden_size)

        self.attn = Attn(attn_model, self.hidden_size)
        
    def forward(self, input_step, emotion, last_hidden, encoder_outputs):
        # Note: we run this one step (word) at a time
        # Get embedding of current input word
        embedded = self.embedding(input_step) # 1,64,300
        embedded = self.embedding_dropout(embedded)
        
        if self.use_emb and self.use_imemory:
          
            if emotion.size()!=embedded.size():
                emotion = self.emotion_embedding(emotion).unsqueeze(dim=0) # mem_write
            
            _,tmp_size,_ = last_hidden.size()

            read_gate = torch.sigmoid(self.read_linear(torch.cat([embedded,torch.reshape(last_hidden,(1,tmp_size,-1))],dim=2)))
            mem_read = torch.mul(emotion,read_gate)
            
            gru_input = self.gru_concat(torch.cat([embedded,mem_read],dim=2))
            rnn_output, hidden = self.gru(gru_input, last_hidden)
            
            write_gate = torch.sigmoid(self.write_linear(rnn_output))
            emotion = torch.mul(write_gate,mem_read)
          
          
          

            attn_weights = self.attn(rnn_output, encoder_outputs)
            # Multiply attention weights to encoder outputs to get new "weighted sum" context vector
            context = attn_weights.bmm(encoder_outputs.transpose(0, 1))
            # Concatenate weighted context vector and GRU output using Luong eq. 5
            rnn_output = rnn_output.squeeze(0)
            context = context.squeeze(1)
            concat_input = torch.cat((rnn_output, context), 1)
            concat_output = torch.tanh(self.concat_3(concat_input))
            # Predict next word using Luong eq. 6
            output = self.out(concat_output)
            output = F.softmax(output, dim=1)
            return output,hidden,emotion
        elif self.use_emb:
            emotion_embedded = self.emotion_embedding(emotion).unsqueeze(dim=0)
            embedded = self.concat_1(torch.cat([embedded,emotion_embedded],dim = 2))

        # Forward through unidirectional GRU
        rnn_output, hidden = self.gru(embedded, last_hidden)
        # Calculate attention weights from the current GRU output
        attn_weights = self.attn(rnn_output, encoder_outputs)
        # Multiply attention weights to encoder outputs to get new "weighted sum" context vector
        context = attn_weights.bmm(encoder_outputs.transpose(0, 1))
        # Concatenate weighted context vector and GRU output using Luong eq. 5
        rnn_output = rnn_output.squeeze(0)
        context = context.squeeze(1)
        concat_input = torch.cat((rnn_output, context), 1)
        concat_output = torch.tanh(self.concat_3(concat_input))
        # Predict next word using Luong eq. 6
        output = self.out(concat_output)
        output = F.softmax(output, dim=1)
        # Return output and final hidden state
        return output,hidden,emotion      

## Loss Function, Train, TrainIter Methods

In [0]:
def maskNLLLoss(inp,emotion,target,mask,decoder):
    nTotal = mask.sum()
    crossEntropy = -torch.log(torch.gather(inp, 1, target.view(-1, 1)).squeeze(1))
    loss = crossEntropy.masked_select(mask).mean()
    loss = 0 if math.isnan(loss) else loss
    if decoder.use_imemory:
        emo_loss = torch.norm(emotion.squeeze(),dim=1)
        emo_loss = emo_loss.masked_select(1-mask).mean()
        loss += 0 if math.isnan(emo_loss) else emo_loss
    loss = loss.to(device)
    return loss, nTotal.item()

def train(input_variable,input_emotion,lengths,target_variable,mask,max_target_len,encoder,decoder,
          encoder_optimizer, decoder_optimizer, batch_size, clip, max_length=MAX_LENGTH):

    # Zero gradients
    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()

#     Set device options
    input_variable = input_variable.to(device)
    lengths = lengths.to(device)
    target_variable = target_variable.to(device)
    mask = mask.to(device)

    # Initialize variables
    loss = 0
    print_losses = []
    n_totals = 0

    # Forward pass through encoder
    encoder_outputs, encoder_hidden = encoder(input_variable, lengths)

    # Create initial decoder input (start with SOS tokens for each sentence)
    decoder_input = torch.LongTensor([[SOS_token for _ in range(batch_size)]])
    decoder_input = decoder_input.to(device)
    input_emotion = input_emotion.to(device)
    

    # Set initial decoder hidden state to the encoder's final hidden state
    decoder_hidden = encoder_hidden[:decoder.n_layers]

    # Determine if we are using teacher forcing this iteration
    use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False

    # Forward batch of sequences through decoder one time step at a time
    if use_teacher_forcing:
        for t in range(max_target_len):
            decoder_output, decoder_hidden,input_emotion = decoder(
                decoder_input,input_emotion,decoder_hidden, encoder_outputs
            )
            # Teacher forcing: next input is current target
            decoder_input = target_variable[t].view(1, -1)
            # Calculate and accumulate loss
            mask_loss, nTotal = maskNLLLoss(decoder_output,input_emotion,target_variable[t], mask[t], decoder)
            loss += mask_loss
            print_losses.append(mask_loss.item() * nTotal)
            n_totals += nTotal
    else:
        for t in range(max_target_len):
            decoder_output, decoder_hidden,input_emotion = decoder(
                decoder_input,input_emotion,decoder_hidden, encoder_outputs
            )
            # No teacher forcing: next input is decoder's own current output
            _, topi = decoder_output.topk(1)
            decoder_input = torch.LongTensor([[topi[i][0] for i in range(batch_size)]])
            decoder_input = decoder_input.to(device)
            # Calculate and accumulate loss
            mask_loss, nTotal = maskNLLLoss(decoder_output,input_emotion,target_variable[t], mask[t],decoder)
            loss += mask_loss
            print_losses.append(mask_loss.item() * nTotal)
            n_totals += nTotal

    # Perform backpropatation
    loss.backward()

    # Clip gradients: gradients are modified in place
    _ = torch.nn.utils.clip_grad_norm_(encoder.parameters(), clip)
    _ = torch.nn.utils.clip_grad_norm_(decoder.parameters(), clip)

    # Adjust model weights
    encoder_optimizer.step()
    decoder_optimizer.step()

    return sum(print_losses) / n_totals

def trainIters(voc,train_set,train_emo, dev_set,dev_emo, encoder,decoder,encoder_optimizer, 
               decoder_optimizer,embedding,emo_embedding,encoder_n_layers,decoder_n_layers, 
               n_iteration,batch_size,print_every,save_every,clip,loadFilename=None):

    # Load batches for each iteration
#     training_batches = [batch2TrainData(voc, [random.choice(pairs) for _ in range(batch_size)])
#                       for _ in range(n_iteration)]
    training_batches = [voc.batchSent2VecData([random.choice(np.arange(len(train_set))) for _ in range(batch_size)],train_set,train_emo)
                      for _ in range(n_iteration)]
#     dev_batches = [batch2TrainData(voc, [random.choice(dev_pairs) for _ in range(batch_size)])
#                       for _ in range(n_iteration)]
    dev_batches = [voc.batchSent2VecData([random.choice(np.arange(len(dev_set))) for _ in range(batch_size)],dev_set,dev_emo)
                      for _ in range(n_iteration)]
    # Initializations
    print('Initializing ...')
    start_iteration = 1
    print_loss = 0
    if loadFilename:
        start_iteration = checkpoint['iteration'] + 1

    # Training loop
    print("Training...")
    for iteration in range(start_iteration, n_iteration + 1):
        training_batch = training_batches[iteration - 1]
        # Extract fields from batch
        input_variable, lengths, target_variable, mask, max_target_len,emo_in,emo_out = training_batch
        # Run a training iteration with batch
        loss = train(input_variable,emo_out,lengths,target_variable,mask, max_target_len,encoder,
                     decoder, encoder_optimizer, decoder_optimizer, batch_size, clip)


        print_loss += loss

        # Print progress
        if iteration % print_every == 0:
            print_loss_avg = print_loss / print_every
            print_perplexity = np.exp(print_loss_avg)
            print("Iteration: {}; Percent complete: {:.1f}%; Average loss: {:.4f}; Average perplexity: {:.4f}".format(iteration, iteration / n_iteration * 100, print_loss_avg,print_perplexity))
            print_loss = 0

        # Save checkpoint
        if (iteration % save_every == 0):
            dev_batch = dev_batches[iteration-1]
            input_variable2, lengths2, target_variable2, mask2, max_target_len2,emo_in2,emo_out2 = dev_batch
            dev_loss = train(input_variable2,emo_out2, lengths2, target_variable2, mask2, max_target_len2, encoder,
                     decoder,encoder_optimizer, decoder_optimizer,batch_size, clip)
            dev_perplexity = np.exp(dev_loss)
            
            print("Iteration: {}; Dev loss: {:.4f}; Dev perplexity: {:.4f}".format(iteration,dev_loss,dev_perplexity))
            directory = os.path.join(PATH, MODEL_NAME)
            if not os.path.exists(directory):
                os.makedirs(directory)
            torch.save({
                'iteration': iteration,
                'en': encoder.state_dict(),
                'de': decoder.state_dict(),
                'en_opt': encoder_optimizer.state_dict(),
                'de_opt': decoder_optimizer.state_dict(),
                'loss': loss,
                'voc_dict': voc.__dict__,
                'embedding': embedding.state_dict(),
                'emo_embedding':emo_embedding.state_dict()
            }, os.path.join(directory,'{}_{}.tar'.format(iteration, 'checkpoint')))

## Searcher

In [0]:
class BeamSearchNode(object):
    def __init__(self, hiddenstate,hiddenemotion,previousNode,wordId,logProb,length,past,debug=False):
        '''
        :param hiddenstate:
        :param previousNode:
        :param wordId:
        :param logProb:
        :param length:
        '''
        self.h = hiddenstate
        self.e = hiddenemotion
        self.prevNode = previousNode
        self.wordid = wordId
        self.logp = logProb
        self.leng = length
        self.past = past
        self.debug=debug
        if debug:
           self.print_node()
        
    def __lt__(self, other):      
#         return self.logp < other.logp
        return self.eval() < other.eval()
    def eval(self, alpha=1.0):
#         if self.leng == 4:
#             return 1
        reward = 0
        # Add here a function for shaping a reward
        return self.logp / float(self.leng - 1 + 1e-6) + alpha * reward
    def print_node(self):
        past_words = [voc.index2word[x] for x in self.past]
        print("Printing Node {}, loss = {}, eval = {}, len = {},past = {}".format(voc.index2word[self.wordid.item()],self.logp,self.eval(),self.leng,past_words))

In [0]:
beam_width = 10
max_qsize = 1000
min_sentence_length = 7
similar_word_len = 2
sent_breaker = ['.','.','!','?',':','EOS']
class Searcher(nn.Module):
    def __init__(self, encoder,decoder,k=1,debug=False):
        super(Searcher, self).__init__()
        self.encoder = encoder
        self.decoder = decoder
        self.k = k
        self.debug = debug
    def forward(self, input_seq,target_emotion,input_length,max_length=MAX_LENGTH):
        # Forward input through encoder model
        encoder_output, encoder_hidden = self.encoder(input_seq, input_length)
        # Prepare encoder's final hidden layer to be first hidden input to the decoder
        decoder_hidden = encoder_hidden[:decoder.n_layers]
        all_tokens = torch.zeros([0], device=device, dtype=torch.long)
        all_scores = torch.zeros([0], device=device)
        decoder_emotion = torch.LongTensor([target_emotion]).to(device)
        decoder_input = torch.ones(1, 1, device=device, dtype=torch.long) * SOS_token

        if self.k == 1:
            for _ in range(max_length):
                # Forward pass through decoder
                decoder_output, decoder_hidden, decoder_emotion = self.decoder(decoder_input,decoder_emotion, decoder_hidden, encoder_output)
                # Obtain most likely word token and its softmax score
                decoder_scores, decoder_input = torch.max(decoder_output, dim=1)
                # Record token and score
                all_tokens = torch.cat((all_tokens, decoder_input), dim=0)
                all_scores = torch.cat((all_scores, decoder_scores), dim=0)
                # Prepare current token to be next decoder input (add a dimension)
                decoder_input = torch.unsqueeze(decoder_input, 0)
            # Return collections of word tokens and scores
            return [all_tokens]
        else:
            return self.beam_decode(decoder_emotion,decoder_hidden,encoder_output)  
    def eval_node(self,decoded_t,n):
        length = n.leng
        word = voc.index2word[decoded_t.item()]
        if word in sent_breaker and length < min_sentence_length:
            return False
        return True

    def beam_decode(self,decoder_emotion,decoder_hidden,encoder_output,debug=False):
        endnodes = []
        decoder_input = torch.ones(1, 1, device=device, dtype=torch.long) * SOS_token

        node = BeamSearchNode(decoder_hidden, decoder_emotion, None, decoder_input, 0, 1,[],debug)
        nodes = PriorityQueue()
        nodes.put((-node.eval(), node))
        qsize = 1
        # start beam search
        while True:
            # give up when decoding takes too long
            if qsize > max_qsize or nodes.empty(): break
            # fetch the best node
            score,n = nodes.get()
            decoder_input = n.wordid
            decoder_hidden = n.h
            if n.wordid.item() == EOS_token and n.prevNode != None:
                endnodes.append((score, n))
                # if we reached maximum # of sentences required
                if len(endnodes) >= topk:
                    break
            # decode one step using decoder
            decoder_output,decoder_hidden,decoder_emotion = self.decoder(decoder_input,decoder_emotion,decoder_hidden,encoder_output)
            log_prob, indexes = torch.topk(decoder_output, beam_width)

            nextnodes = []
#             n.print_node()
            for new_k in range(beam_width):
                decoded_t = indexes[0][new_k].view(1, -1)    
                if not self.eval_node(decoded_t,n):
                    continue
                log_p = log_prob[0][new_k].item()
                past = n.past + [n.wordid.item()]
                node = BeamSearchNode(decoder_hidden,decoder_emotion, n,decoded_t,n.logp + log_p, n.leng + 1,past,debug)
#                 node.print_node()
                score = -node.eval()
                nextnodes.append((score, node))
            for i in range(len(nextnodes)):
                score, nn = nextnodes[i]
                nodes.put((score, nn))
            qsize += len(nextnodes) - 1

        # choose nbest paths, back trace them
        if len(endnodes) == 0:
            endnodes = [nodes.get() for _ in range(min(100,nodes.qsize()))]
        utterances = []
        for score, n in sorted(endnodes, key=operator.itemgetter(0)):
            if n.leng<min_sentence_length:
                continue
            utterance = []
            utterance.append(n.wordid)
            while n.prevNode != None:
                n = n.prevNode
                utterance.append(n.wordid)

            utterance = utterance[::-1]
            utterances.append(utterance)

        return utterances


### Searcher test

In [0]:
encoder.eval()
decoder.eval()
my_searcher = Searcher(encoder,decoder,2)

input_sentence = 'my name is '
indexes_batch = [voc.indexesFromSentence(input_sentence,normalize=False)]
lengths = torch.tensor([len(indexes) for indexes in indexes_batch])
input_batch = input_batch.to(device)
lengths = lengths.to(device)

In [182]:
my_searcher = Searcher(encoder,decoder,1)
tmp = my_searcher(input_batch,1,lengths)
for i in range(min(len(tmp),10)):
    decoded_words = [voc.index2word[token.item()] for token in tmp[i]]
    decoded_words[:] = [x for x in decoded_words if not (x == 'EOS' or x == 'PAD' or x == 'SOS')]
    print('{}: '.format(idx2emo[1]), ' '.join(decoded_words))

joy:  i m not a lot . you have to be a lot . you have to be


## Evaluation

In [0]:
def evaluateInput(searcher,voc,num_output=1,max_length=MAX_LENGTH):
    input_sentence = ''
    while(1):
        try:
            input_sentence = input('> ')
            if input_sentence == 'q' or input_sentence == 'quit': break
            
            indexes_batch = [voc.indexesFromSentence(input_sentence,normalize=False)]
            lengths = torch.tensor([len(indexes) for indexes in indexes_batch])
            input_batch = torch.LongTensor(indexes_batch).transpose(0, 1)
            input_batch = input_batch.to(device)
            lengths = lengths.to(device)
            output = []
            for e in range(num_emotion):
                tmp = my_searcher(input_batch,e,lengths)
                for i in range(min(len(tmp),num_output)):
                    decoded_words = [voc.index2word[token.item()] for token in tmp[i]]
                    decoded_words[:] = [x for x in decoded_words if not (x == 'EOS' or x == 'PAD' or x == 'SOS')]
                    print('{}: '.format(idx2emo[1]), ' '.join(decoded_words))

        except KeyError:
            print("Error: Encountered unknown word.")

# Run Model

## Configuration

In [0]:
# Configure models
MODEL_NAME = 'use_emb_imem'
PATH = 'drive/My Drive/ECM'
corpus = 'FullData'

use_embedding =  True
use_imemory = True
use_ememory = False

embedding_version = 'self-trained'
# embedding_version = 'pre-trained-word2vec-100d'
# embedding_version = 'pre-trained-word2vec-300d'
# embedding_version = 'pre-trained-subword-100d'

attn_model = 'dot'
#attn_model = 'general'
#attn_model = 'concat'


encoder_n_layers = 2
decoder_n_layers = 2
dropout = 0.1
batch_size = 64

# Set checkpoint to load from; set to None if starting from scratch
loadFilename = "drive/My Drive/ECM/use_emb_imem/900_checkpoint.tar"
checkpoint_iter = 4000
#loadFilename = os.path.join(save_dir, model_name, corpus_name,
#                            '{}-{}_{}'.format(encoder_n_layers, decoder_n_layers, hidden_size),
#                            '{}_checkpoint.tar'.format(checkpoint_iter))
# loadFilename = "drive/My Drive/ECM/1000_checkpoint.tar"

## Load Data

In [63]:
datafile = os.path.join(PATH,corpus)
voc = Voc(MODEL_NAME, embedding_version)
train_set,train_emo,val_set,val_emo,test_set,test_emo = voc.loadDataFromPickle(datafile)

# path = 'drive/My Drive/ECM'
# corpus = "CornellMovie"
# filename = "formatted_movie_lines.txt"
# datafile = os.path.join(path,corpus,filename)
# voc = Voc(MODEL_NAME,corpus)
# pairs = voc.loadData(datafile)

Start preparing data ...
loading 10000 th pair from training dataset you get in trouble ? maybe . can t really tell yet .
loading 20000 th pair from training dataset is your back still killing you ? i didn t think you d come today .
loading 30000 th pair from training dataset ray something s here . where are you pete ?
loading 40000 th pair from training dataset thomas could you do me a favor ? what s up ?
loading 50000 th pair from training dataset good . i want to discuss with you the mode of payment for the construction of the power plant under well i trust that your presence will hasten the settlement of payment terms .
loading 60000 th pair from training dataset well where are you going ? i am going over here .
loading 70000 th pair from training dataset that word again ! i don t even know what it means . . . it s time you found out . i love you . i love you . you re a beautiful and
loading 80000 th pair from training dataset mr . smith i m wondering whether you have found a way o

## Load Model

In [64]:
def load_emb(emb_version,loadFilename,hidden_size=500):
    embedding = nn.Embedding(voc.num_words, hidden_size)
    if emb_version in ['pre-trained-word2vec-100d','pre-trained-word2vec-300d']:
            wm = voc.weights_matrix[:voc.num_words]
            num_embeddings, embedding_dim = wm.shape
            embedding = nn.Embedding(num_embeddings, embedding_dim)
            embedding.load_state_dict({'weight': torch.Tensor(wm)})
    elif emb_version == 'bpemb':
            wm = voc.weights_matrix
            num_embeddings, embedding_dim = wm.shape
            embedding = nn.Embedding(num_embeddings, embedding_dim)
            embedding.load_state_dict({'weight': torch.Tensor(wm)})
    if loadFilename:
        embedding.load_state_dict(embedding_sd)
    return embedding  


# Load model if a loadFilename is provided
if loadFilename:
    # If loading on same machine the model was trained on
    checkpoint = torch.load(loadFilename)
    # If loading a model trained on GPU to CPU
    #checkpoint = torch.load(loadFilename, map_location=torch.device('cpu'))
    encoder_sd = checkpoint['en']
    decoder_sd = checkpoint['de']
    encoder_optimizer_sd = checkpoint['en_opt']
    decoder_optimizer_sd = checkpoint['de_opt']
    embedding_sd = checkpoint['embedding']
    voc.__dict__ = checkpoint['voc_dict']


print('Building encoder and decoder ...')



# Initialize word embeddings and emo embedding or load in previous ones
embedding = load_emb(embedding_version,loadFilename)
emo_embedding = nn.Embedding(num_emotion,embedding.weight.size()[1])


# Initialize encoder & decoder models
# encoder = EncoderRNN(hidden_size, embedding, encoder_n_layers, dropout)
# decoder = LuongAttnDecoderRNN(attn_model, embedding, hidden_size, voc.num_words, decoder_n_layers, dropout)

encoder = EncoderRNN(embedding,encoder_n_layers,dropout)
decoder = AttnDecoderRNN(attn_model, embedding, emo_embedding, decoder_n_layers, 
                         dropout,use_emb=use_embedding, use_imemory=use_imemory, use_ememory=use_ememory)


if loadFilename:
    encoder.load_state_dict(encoder_sd)
    decoder.load_state_dict(decoder_sd)
# Use appropriate device
encoder = encoder.to(device)
decoder = decoder.to(device)
print('Models built and ready to go!')

Building encoder and decoder ...
Models built and ready to go!


## Run Model

In [166]:
# Configure training/optimization
clip = 50.0
teacher_forcing_ratio = 1.0
learning_rate = 0.0001
decoder_learning_ratio = 5.0
n_iteration = 1000
print_every = 5
save_every = 100
loadFilename = None

# Ensure dropout layers are in train mode
encoder.train()
decoder.train()

# Initialize optimizers
print('Building optimizers ...')
encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate)
decoder_optimizer = optim.Adam(decoder.parameters(), lr=learning_rate * decoder_learning_ratio)
if loadFilename:
    encoder_optimizer.load_state_dict(encoder_optimizer_sd)
    decoder_optimizer.load_state_dict(decoder_optimizer_sd)

# Run training iterations
print("Starting Training!")
# trainIters(voc, train_set[:5000], train_emo[:5000],val_set[:500],val_emo[:500], encoder, decoder, encoder_optimizer, 
#            decoder_optimizer,embedding, emo_embedding, encoder_n_layers, decoder_n_layers, n_iteration, batch_size,
#            print_every, save_every, clip)
trainIters(voc, train_set,train_emo,val_set,val_emo, encoder, decoder, encoder_optimizer, 
           decoder_optimizer,embedding, emo_embedding, encoder_n_layers, decoder_n_layers, n_iteration, batch_size,
           print_every, save_every, clip)

Building optimizers ...
Starting Training!
Initializing ...
Training...
Iteration: 5; Percent complete: 0.5%; Average loss: 10.5367; Average perplexity: 37674.8678
Iteration: 10; Percent complete: 1.0%; Average loss: 8.5017; Average perplexity: 4923.1735
Iteration: 15; Percent complete: 1.5%; Average loss: 6.9234; Average perplexity: 1015.8085
Iteration: 20; Percent complete: 2.0%; Average loss: 6.2465; Average perplexity: 516.1860
Iteration: 25; Percent complete: 2.5%; Average loss: 6.0005; Average perplexity: 403.6250
Iteration: 30; Percent complete: 3.0%; Average loss: 6.1390; Average perplexity: 463.5930
Iteration: 35; Percent complete: 3.5%; Average loss: 6.0850; Average perplexity: 439.2072
Iteration: 40; Percent complete: 4.0%; Average loss: 6.2416; Average perplexity: 513.7022
Iteration: 45; Percent complete: 4.5%; Average loss: 6.1131; Average perplexity: 451.7244
Iteration: 50; Percent complete: 5.0%; Average loss: 5.9897; Average perplexity: 399.2981
Iteration: 55; Percent c

## Evaluate

In [17]:
# Set dropout layers to eval mode
encoder.eval()
decoder.eval()

# Initialize search module
# searcher = GreedySearchDecoder(encoder, decoder)
searcher = MyTopKDecoder(encoder, decoder,3)

# Begin chatting (uncomment and run the following line to begin)
evaluateInput(encoder, decoder, searcher,voc,num_output=3)

> hello


KeyboardInterrupt: ignored

## Save Model

In [0]:
# from google.colab import drive
# drive.mount('/content/gdrive')
# !zip -r cb_model.zip cb_model
# !cp cb_model.zip 'gdrive/My Drive/'

# content/ecm_model_pretrained2/DailyDialogue/2-2_300/1400_checkpoint.tar
# content/ecm_model_pretrained2/DailyDialogue/2-2_300/2000_checkpoint.tar
# content/ecm_model_withimemory_embfix/DailyDialogue/2-2_300/2000_checkpoint.tar
# content/ecm_model_withimemory/DailyDialogue/2-2_100/2000_checkpoint.tar
# ! cp ecm_model_withimemory/DailyDialogue/2-2_300/2000_checkpoint.tar 
# content/ecm_model_withimemory_bpemb/DailyDialogue/2-2_100/5000_checkpoint.tar
# content/ecm_model_imemory_bpemb_notfix/DailyDialogue/2-2_100/5000_checkpoint.tar
# content/ecm_model_imemory_bpemb_fix/CornellMovie/2-2_100/10000_checkpoint.tar
# content/1000_checkpoint.tar
# ! mv ecm_model_imemory_bpemb_fix/CornellMovie/2-2_100/10000_checkpoint.tar ecm_imemory_bpemb_nofix_10000_cornell_checkpoint.tar
! cp 1000_checkpoint.tar 'drive/My Drive/ECM'

# Debug

In [0]:
filepath = 'drive/My Drive/ECM/FullData'
f = open(filepath+'/train.pickle', 'rb')
train_data,train_emo_set = pickle.load(f),pickle.load(f)

In [76]:
torch.FloatTensor(10)

tensor([2.1851e+17, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00])

In [81]:
torch.randn(10,dtype=torch.float)

tensor([-0.9379, -1.5944,  0.0552,  1.4294, -0.1484,  0.9212, -0.6980,  0.8830,
         0.5407,  2.8342])

## TrainIter

In [0]:
batches = voc.batchSent2VecData([random.choice(np.arange(len(val_set))) for _ in range(64)],val_set,val_emo)
input_variable, lengths, target_variable, mask, max_target_len,emo_in,emo_out = batches
# loss = train(input_variable, emo_out, lengths, target_variable, mask, max_target_len, encoder,
#                      decoder, encoder_optimizer, decoder_optimizer, batch_size, clip)


## Train

In [149]:
input_variable = input_variable.to(device)
lengths = lengths.to(device)
target_variable = target_variable.to(device)
mask = mask.to(device)


encoder_outputs, encoder_hidden = encoder(input_variable, lengths)


decoder_input = torch.LongTensor([[SOS_token for _ in range(batch_size)]])
decoder_input = decoder_input.to(device)
input_emotion = emo_out.to(device)
decoder_hidden = encoder_hidden[:decoder.n_layers]


decoder_output, decoder_hidden,input_emotion = decoder(decoder_input,input_emotion,decoder_hidden, encoder_outputs)


_, topi = decoder_output.topk(1)
decoder_input = torch.LongTensor([[topi[i][0] for i in range(batch_size)]])
decoder_input = decoder_input.to(device)
loss = 0
for t in range(max_target_len):
    mask_loss,nTotal = maskNLLLoss(decoder_output,input_emotion,target_variable[t],mask[t],decoder)
    loss += mask_loss
loss.backward()

encounter error loss is NAN
encounter error loss is NAN
encounter error loss is NAN
encounter error loss is NAN


In [154]:
t = 0
maskNLLLoss(decoder_output,input_emotion,target_variable[t],mask[t],decoder)


encounter error loss is NAN


(tensor(10.7799, grad_fn=<MeanBackward1>), 64)

In [53]:
# print(encoder_outputs, encoder_hidden)
# emo_embedding.weight
embedding.weight

Parameter containing:
tensor([[    nan,     nan,     nan,  ...,     nan,     nan,     nan],
        [    nan,     nan,     nan,  ...,     nan,     nan,     nan],
        [    nan,     nan,     nan,  ...,     nan,     nan,     nan],
        ...,
        [ 1.2485, -0.8687, -0.4576,  ..., -0.6730,  1.0463,  2.1569],
        [-0.2561,  0.4457, -0.3998,  ..., -1.6912,  0.3333,  0.2043],
        [ 1.1599,  0.6201, -0.2096,  ..., -0.9545, -0.9864, -0.2484]],
       requires_grad=True)

In [25]:
print(decoder_output.size(),decoder_hidden.size(),input_emotion.size())

torch.Size([64, 47651]) torch.Size([2, 64, 500]) torch.Size([1, 64, 500])


## Encoder

In [1]:
# embedding(input_variable).size()
emo_embedding(emo_out).unsqueeze(dim=0).size()

NameError: ignored

In [15]:
print(input_variable.shape)
embedded = embedding(input_variable)
#         # Pack padded batch of sequences for RNN module
#         packed = torch.nn.utils.rnn.pack_padded_sequence(embedded, input_lengths)
#         # Forward pass through GRU
#         outputs, hidden = self.gru(packed, hidden)
#         # Unpack padding
#         outputs, _ = torch.nn.utils.rnn.pad_packed_sequence(outputs)
#         # Sum bidirectional GRU outputs
#         outputs = outputs[:, :, :self.hidden_size] + outputs[:, : ,self.hidden_size:]

torch.Size([65, 64])


RuntimeError: ignored

In [46]:
# embedding2 = nn.Embedding(voc.num_words, 500)
# embedding2(input_variable)
# print(embedding,input_variable.size())
# voc.num_words
print(torch.max(input_variable))

tensor(47479)


In [0]:





class EncoderRNN(nn.Module):
    def __init__(self, embedding, n_layers=1, dropout=0):
        super(EncoderRNN, self).__init__()
        self.n_layers = n_layers
        self.hidden_size = embedding.weight.size()[1]
        self.embedding = embedding

        # Initialize GRU; the input_size and hidden_size params are both set to 'hidden_size'
        #   because our input size is a word embedding with number of features == hidden_size
        self.gru = nn.GRU(self.hidden_size, self.hidden_size, n_layers,
                          dropout=(0 if n_layers == 1 else dropout), bidirectional=True)

    def forward(self, input_seq, input_lengths, hidden=None):
        # Convert word indexes to embeddings
        embedded = self.embedding(input_seq)
        # Pack padded batch of sequences for RNN module
        packed = torch.nn.utils.rnn.pack_padded_sequence(embedded, input_lengths)
        # Forward pass through GRU
        outputs, hidden = self.gru(packed, hidden)
        # Unpack padding
        outputs, _ = torch.nn.utils.rnn.pad_packed_sequence(outputs)
        # Sum bidirectional GRU outputs
        outputs = outputs[:, :, :self.hidden_size] + outputs[:, : ,self.hidden_size:]
        # Return output and final hidden state
        return outputs, hidden

## Decoder

In [38]:
batches = voc.batchSent2VecData([random.choice(np.arange(len(pairs))) for _ in range(64)],pairs)
input_variable, lengths, target_variable, mask, max_target_len,emo_in,emo_out = batches

# Define path to new file
# datafile = "drive/My Drive/ECM/CornellMovie/formatted_movie_lines.txt"
# voc2, pairs2 = loadPrepareData(datafile)
batches2 = batch2TrainData(voc2, [random.choice(pairs2) for _ in range(64)])
input_variable2, lengths2, target_variable2, mask2, max_target_len2 = batches2

# print("input_variable:", input_variable)
# print("lengths:", lengths2)
# print("target_variable:", target_variable)
# print("mask:", mask)
# print("max_target_len:", max_target_len)
# input_variable2 = input_variable2.to(device)
# lengths2 = lengths2.to(device)
# hidden=None
# embedded2 = embedding(input_variable2)
# torch.as_tensor(lengths2, dtype=torch.int64)
# packed2 = torch.nn.utils.rnn.pack_padded_sequence(embedded2, lengths2)

NameError: ignored

In [0]:
print(lengths2)
print(lengths)
# packed2 = torch.nn.utils.rnn.pack_padded_sequence(embedded, lengths)

tensor([20, 20, 19, 19, 17, 17, 17, 17, 17, 16, 16, 15, 13, 13, 13, 12, 12, 11,
        11, 11, 11, 11, 10, 10, 10, 10,  9,  9,  9,  9,  9,  8,  8,  8,  8,  8,
         8,  8,  8,  7,  7,  7,  7,  6,  6,  6,  6,  6,  6,  5,  5,  5,  5,  5,
         4,  4,  4,  4,  4,  3,  3,  3,  3,  3])
tensor([22, 22, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 20, 20, 20, 18,
        18, 17, 16, 14, 14, 14, 13, 13, 13, 12, 12, 12, 12, 11, 11, 10, 10, 10,
        10,  9,  9,  9,  9,  9,  9,  9,  8,  8,  8,  7,  7,  7,  7,  7,  6,  6,
         6,  6,  5,  5,  5,  5,  4,  4,  3,  3])


In [0]:
encoder_outputs, encoder_hidden = encoder(input_variable, lengths)
    # Create initial decoder input (start with SOS tokens for each sentence)
    decoder_input = torch.LongTensor([[SOS_token for _ in range(batch_size)]])
    decoder_input = decoder_input.to(device)

    # Set initial decoder hidden state to the encoder's final hidden state
    decoder_hidden = encoder_hidden[:decoder.n_layers]
decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden, encoder_outputs)
# No teacher forcing: next input is decoder's own current output
_, topi = decoder_output.topk(1)
decoder_input = torch.LongTensor([[topi[i][0] for i in range(batch_size)]])
decoder_input = decoder_input.to(device)
# Calculate and accumulate loss
mask_loss, nTotal = maskNLLLoss(decoder_output, target_variable[t], mask[t])
loss += mask_loss
print_losses.append(mask_loss.item() * nTotal)
n_totals += nTotal

In [0]:

batches = batch2TrainData(voc, [random.choice(pairs) for _ in range(small_batch_size)])
input_variable, lengths, target_variable, mask, max_target_len = batches

In [0]:
pair_batch = voc.batchSent2VecData([random.choice(np.arange(len(pairs))) for _ in range(64)],pairs)

input_variable, lengths, target_variable, mask, max_target_len,emo_in,emo_out = batches
# loss = train(input_variable,lengths, target_variable, mask, max_target_len,encoder,
#                       decoder,embedding,encoder_optimizer, decoder_optimizer, batch_size, clip)


In [0]:
encoder_outputs, encoder_hidden = encoder(input_variable, lengths)

In [0]:
decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden, encoder_outputs)
            # Teacher forcing: next input is current target
decoder_input = target_variable[t].view(1, -1)
            # Calculate and accumulate loss
mask_loss, nTotal = maskNLLLoss(decoder_output, target_variable[t], mask[t])

torch.Size([22, 64, 500])

In [0]:
input_variable = input_variable.to(device)
lengths = lengths.to(device)
# encoder_outputs, encoder_hidden = encoder(input_variable, lengths)
gru = nn.GRU(hidden_size, hidden_size,2,dropout=0, bidirectional=True)
hidden=None
embedded = embedding(input_variable)
torch.as_tensor(lengths, dtype=torch.int64)
# packed = torch.nn.utils.rnn.pack_padded_sequence(embedded, lengths)
# packed = torch.nn.utils.rnn.pack_padded_sequence(embedded, input_lengths)
# outputs, hidden = gru(packed, hidden)
# outputs, _ = torch.nn.utils.rnn.pad_packed_sequence(outputs)
# outputs = outputs[:, :, :hidden_size] + outputs[:, : ,hidden_size:]
embedded.size()

torch.Size([22, 64, 500])

## Loss

In [162]:

# mask_loss,nTotal = maskNLLLoss(decoder_output,input_emotion,target_variable[t],mask[t],decoder)
# (inp,emotion,target,mask,decoder):
target = target_variable[1]
nTotal = mask[1].sum()
crossEntropy = -torch.log(torch.gather(decoder_output, 1, target.view(-1, 1)).squeeze(1))
loss = crossEntropy.masked_select(mask[1]).mean()
emo_loss = torch.norm(input_emotion.squeeze(),dim=1)
emo_loss = emo_loss.masked_select(1-mask[1]).mean()
emo_loss = 0 if math.isnan(emo_loss) else emo_loss
loss+= emo_loss
loss

tensor(10.7795, grad_fn=<AddBackward0>)

In [158]:
1-mask[1]

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=torch.uint8)

## Debug Beam Search

In [49]:



# def evaluateInput(encoder,decoder,searcher,voc,num_output=1,max_length=MAX_LENGTH):
max_length=MAX_LENGTH

searcher = MyTopKDecoder(encoder,decoder,3)
indexes_batch = [voc.indexesFromSentence(input_sentence,normalize=False)]
lengths = torch.tensor([len(indexes) for indexes in indexes_batch])
input_batch = input_batch.to(device)
lengths = lengths.to(device)
e = 1
tmp = searcher(input_batch,e,lengths,3,max_length)

for i in range(min(len(tmp),3)):
    if voc.version == "bpemb":
        padding = [0,1,2]
        words = [token.item() for token in tmp[i]]
        filtered = filter(lambda x: True if x not in padding else False, words)
        print('{}: '.format(idx2emo[e]), voc.bpemb.decode_ids(list(filtered)))
    else:
        decoded_words = [voc.index2word[token.item()] for token in tmp[i]]
        decoded_words[:] = [x for x in decoded_words if not (x == 'EOS' or x == 'PAD' or x == 'SOS')]
        print('{}: '.format(idx2emo[e]), ' '.join(decoded_words))

               

AttributeError: ignored

### Beam Decode

In [0]:
encoder_output, encoder_hidden = encoder(input_batch, lengths)
decoder_hidden = encoder_hidden[:decoder.n_layers]
all_tokens = torch.zeros([0], device=device, dtype=torch.long)
all_scores = torch.zeros([0], device=device)
decoder_emotion = torch.LongTensor([e]).to(device)
# beam_decode2(decoder, emotion, decoder_hidden, encoder_output=encoder_output,topk=3,debug=True)     


In [43]:
node = BeamSearchNode(decoder_hidden, decoder_emotion, None, decoder_input, 0, 1)
-node.eval()

0

In [0]:
min_sentence_length = 15

def valid_node(n,wordidx):
    word = voc.index2word[wordidx.item()] 
    eval_1 = not word in sent_breaker
    eval_2 = n.leng > min_sentence_length
#     eval_2 = decoded_t.item() in word_past
#     eval_3 = len(voc.bpemb.decode_ids([decoded_t.item()]))>similar_word_len
    
    return eval_1 and eval_2

In [0]:
# Beam_width 是每次选几个， Topk是最终留几个
sent_breaker = ['.','.','!','?',':','EOS']
endnodes = []
topk = 10
decoder_input = torch.ones(1, 1, device=device, dtype=torch.long) * SOS_token
node = BeamSearchNode(decoder_hidden, decoder_emotion, None, decoder_input, 0, 1)
nodes = PriorityQueue()
nodes.put((-node.eval(), node))
qsize = 1
while True:
        # Break condition
        if qsize > max_qsize or nodes.empty(): break
        
        # Fetch
        score, n = nodes.get()
        decoder_input = n.wordid
        decoder_hidden = n.h
        
        # If we reach end of sentence, put it on endnodes list
        if n.wordid.item() == EOS_token and n.prevNode != None:
            endnodes.append((score, n))
            # if we reached maximum # of sentences required
            if len(endnodes) >= 50:
                break
                
        # Decode one step
        decoder_output,decoder_hidden,decoder_emotion = decoder(decoder_input,decoder_emotion,decoder_hidden,encoder_output)
        log_prob, indexes = torch.topk(decoder_output, beam_width)
        
        
        # Extend Queue
        nextnodes = []
#         word_past = n.past.copy()
        for new_k in range(beam_width):
            decoded_t = indexes[0][new_k].view(1, -1)   
            
#             eval_1 = voc.index2word[decoded_t.item()] in sent_breaker
#             eval_2 = decoded_t.item() in word_past
#             eval_3 = False
            
            if valid_node(n,decoded_t):
#                 print("current word is {}".format(voc.index2word[decoded_t.item()]),eval_1,eval_2,eval_3)
                log_p = log_prob[0][new_k].item()
                word_past.append(decoded_t.item())
                if len(word_past)>max_past_word:
                    word_past = word_past[-max_past_word:]
                node = BeamSearchNode(decoder_hidden,decoder_emotion, n,decoded_t,n.logp + log_p, n.leng + 1,word_past)
                score = -node.eval()
#                 
                nextnodes.append((score, node))
        # put them into queue
        for i in range(len(nextnodes)):
            score, nn = nextnodes[i]
#             print(score,voc.index2word[nn.wordid.item()])
            nodes.put((score, nn))
            # increase qsize
        qsize += len(nextnodes) - 1
 

In [0]:
if len(endnodes) == 0:
    endnodes = [nodes.get() for _ in range(min(200,nodes.qsize()))]

In [45]:
len(endnodes)

0

In [0]:
utterances = []
#     print(len(endnodes))
for score, n in sorted(endnodes, key=operator.itemgetter(0)):

    utterance = []
    utterance.append(n.wordid)
    # back trace
    while n.prevNode != None:
        n = n.prevNode
        utterance.append(n.wordid)

    utterance = utterance[::-1]
    print_sentence(utterance)


In [0]:
def print_sentence(utterance):
    decoded_words = [voc.index2word[token.item()] for token in utterance]
    decoded_words[:] = [x for x in decoded_words if not (x == 'EOS' or x == 'PAD' or x == 'SOS')]
    print('{}: '.format(idx2emo[e]), ' '.join(decoded_words))
      

In [205]:
beam_width = 10
max_qsize = 1000
max_past_word = 20
min_sentence_length = 10
similar_word_len = 2


def beam_decode2(decoder,decoder_emotion,decoder_hidden,encoder_output,topk,debug=False):
    sent_breaker = ['.','.','!','?',':','EOS']
    endnodes = []
    decoder_input = torch.ones(1, 1, device=device, dtype=torch.long) * SOS_token
    
    # starting node -  hidden vector, previous node, word id, logp, length
    node = BeamSearchNode(decoder_hidden, decoder_emotion, None, decoder_input, 0, 1)
    nodes = PriorityQueue()
    nodes.put((-node.eval(), node))
    qsize = 1
    
    
    
    # start beam search
    while True:
        # give up when decoding takes too long
        if qsize > max_qsize or nodes.empty(): break
        
        # fetch the best node
        score, n = nodes.get()
        decoder_input = n.wordid
        decoder_hidden = n.h
        if n.wordid.item() == EOS_token and n.prevNode != None:
            endnodes.append((score, n))
            # if we reached maximum # of sentences required
            if len(endnodes) >= 5*topk:
                break
        # decode one step using decoder
        decoder_output,decoder_hidden,decoder_emotion = decoder(decoder_input,decoder_emotion,decoder_hidden,encoder_output)
        log_prob, indexes = torch.topk(decoder_output, beam_width)

        nextnodes = []
        word_past = n.past.copy()
        for new_k in range(beam_width):
            decoded_t = indexes[0][new_k].view(1, -1)          
            eval_1 = voc.index2word[decoded_t.item()] in sent_breaker
            eval_2 = decoded_t.item() in word_past
            eval_3 = False
#             eval_3 = len(voc.bpemb.decode_ids([decoded_t.item()]))>similar_word_len
            if (eval_1 and n.leng < min_sentence_length) or (eval_2 and eval_3):
                continue
            else:   
#                 print("current word is {}".format(voc.bpemb.decode_ids([decoded_t.item()])))
                log_p = log_prob[0][new_k].item()
                word_past.append(decoded_t.item())
                if len(word_past)>max_past_word:
                    word_past = word_past[-max_past_word:]
                node = BeamSearchNode(decoder_hidden,decoder_emotion, n,decoded_t,n.logp + log_p, n.leng + 1,word_past)
                score = -node.eval()
                nextnodes.append((score, node))
        # put them into queue
        for i in range(len(nextnodes)):
            score, nn = nextnodes[i]
            nodes.put((score, nn))
            # increase qsize
        qsize += len(nextnodes) - 1
    
    # choose nbest paths, back trace them
    if len(endnodes) == 0:
        endnodes = [nodes.get() for _ in range(min(100,nodes.qsize()))]
    utterances = []
#     print(len(endnodes))
    for score, n in sorted(endnodes, key=operator.itemgetter(0)):
        if n.leng<min_sentence_length:
            continue
        utterance = []
        utterance.append(n.wordid)
        while n.prevNode != None:
            n = n.prevNode
            utterance.append(n.wordid)
        
        utterance = utterance[::-1]
        utterances.append(utterance)


    return utterances

[]

In [0]:
beam_width = 10
max_qsize = 1000
min_sentence_length = 10
similar_word_len = 2
class Searcher(nn.Module):
    def __init__(self, encoder,decoder,k=1,debug=False):
        super(Searcher, self).__init__()
        self.encoder = encoder
        self.decoder = decoder
        self.k = k
        self.debug = debug
    def forward(self, input_seq,target_emotion,input_length,max_length=MAX_LENGTH):
        # Forward input through encoder model
        encoder_output, encoder_hidden = self.encoder(input_seq, input_length)
        # Prepare encoder's final hidden layer to be first hidden input to the decoder
        decoder_hidden = encoder_hidden[:decoder.n_layers]
        # Initialize decoder input with SOS_token
        
        # Initialize tensors to append decoded words to
        all_tokens = torch.zeros([0], device=device, dtype=torch.long)
        all_scores = torch.zeros([0], device=device)
        decoder_emotion = torch.LongTensor([target_emotion]).to(device)
        decoder_input = torch.ones(1, 1, device=device, dtype=torch.long) * SOS_token

        if self.k == 1:
            for _ in range(max_length):
                # Forward pass through decoder
                decoder_output, decoder_hidden, decoder_emotion = self.decoder(decoder_input,decoder_emotion, decoder_hidden, encoder_output)
                # Obtain most likely word token and its softmax score
                decoder_scores, decoder_input = torch.max(decoder_output, dim=1)
                # Record token and score
                all_tokens = torch.cat((all_tokens, decoder_input), dim=0)
                all_scores = torch.cat((all_scores, decoder_scores), dim=0)
                # Prepare current token to be next decoder input (add a dimension)
                decoder_input = torch.unsqueeze(decoder_input, 0)
            # Return collections of word tokens and scores
            return all_tokens, all_scores
        else:
            return self.beam_decode(decoder_emotion,decoder_hidden,encoder_output)  
          
    def beam_decode(self,decoder_emotion,decoder_hidden,encoder_output,debug=False):
        sent_breaker = ['.','.','!','?',':','EOS']
        endnodes = []
        decoder_input = torch.ones(1, 1, device=device, dtype=torch.long) * SOS_token

        # starting node -  hidden vector, previous node, word id, logp, length
        node = BeamSearchNode(decoder_hidden, decoder_emotion, None, decoder_input, 0, 1)
        nodes = PriorityQueue()
        nodes.put((-node.eval(), node))
        qsize = 1

        # start beam search
        while True:
            # give up when decoding takes too long
            if qsize > max_qsize or nodes.empty(): break

            # fetch the best node
            score,n = nodes.get()
            decoder_input = n.wordid
            decoder_hidden = n.h
            if n.wordid.item() == EOS_token and n.prevNode != None:
                endnodes.append((score, n))
                # if we reached maximum # of sentences required
                if len(endnodes) >= topk:
                    break
            # decode one step using decoder
            decoder_output,decoder_hidden,decoder_emotion = self.decoder(decoder_input,decoder_emotion,decoder_hidden,encoder_output)
            log_prob, indexes = torch.topk(decoder_output, beam_width)

            nextnodes = []
#             word_past = n.past.copy()
            for new_k in range(beam_width):
                decoded_t = indexes[0][new_k].view(1, -1)          
                #                 eval_1 = voc.index2word[decoded_t.item()] in sent_breaker
                #                 eval_2 = decoded_t.item() in word_past
                #                 eval_3 = False
                #             eval_3 = len(voc.bpemb.decode_ids([decoded_t.item()]))>similar_word_len
                #                 if (eval_1 and n.leng < min_sentence_length) or (eval_2 and eval_3):
                #                     continue
                #                 else:   
                #                 print("current word is {}".format(voc.bpemb.decode_ids([decoded_t.item()])))
                log_p = log_prob[0][new_k].item()
                #                   word_past.append(decoded_t.item())
                #                   if len(word_past)>max_past_word:
                #                       word_past = word_past[-max_past_word:]
                node = BeamSearchNode(decoder_hidden,decoder_emotion, n,decoded_t,n.logp + log_p, n.leng + 1)
                score = -node.eval()
                nextnodes.append((score, node))
            # put them into queue
            for i in range(len(nextnodes)):
                score, nn = nextnodes[i]
                nodes.put((score, nn))
#                 print(score,nn.item())
                # increase qsize
            qsize += len(nextnodes) - 1

        # choose nbest paths, back trace them
        if len(endnodes) == 0:
            endnodes = [nodes.get() for _ in range(min(100,nodes.qsize()))]
        utterances = []
    #     print(len(endnodes))
        for score, n in sorted(endnodes, key=operator.itemgetter(0)):
            if n.leng<min_sentence_length:
                continue
            utterance = []
            utterance.append(n.wordid)
            while n.prevNode != None:
                n = n.prevNode
                utterance.append(n.wordid)

            utterance = utterance[::-1]
            utterances.append(utterance)

        return utterances
