In [7]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import torch
from torch.jit import script, trace
import torch.nn as nn
from torch import optim
import torch.nn.functional as F
import numpy as np

import pandas as pd
import csv
import random
import re
import os
import unicodedata
import codecs
from io import open
import itertools
import math
import time
import json
from preprocessing_dailydialogue import *
import pickle
from queue import PriorityQueue

In [8]:
# Define constant
# Default word tokens
#
torch.autograd.set_detect_anomaly(True)
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
USE_CUDA = torch.cuda.is_available()
device = torch.device("cuda" if USE_CUDA else "cpu")
PAD_token = 0  # Used for padding short sentences
SOS_token = 1  # Start-of-sentence token
EOS_token = 2  # End-of-sentence token
MAX_LENGTH = 10  # Maximum sentence length to consider
MIN_COUNT = 3    # Minimum word count threshold for trimming
save_dir = os.path.join("data", "save")
emo_dict = { 0: 'neutral', 1: 'joy', 2: 'anger', 
            3: 'sadness',4:'fear'}
emo2idx = {value:key for key,value in emo_dict.items()}


# Run this block if using daily dialogue

In [9]:
USE_CUDA

True

# get data from pickle (No preprocessing required)

In [10]:
with open('processed_train.pickle','rb') as f:
    pairs = pickle.load(f)
    pairs_emotion = pickle.load(f)
    voc = pickle.load(f)
#with open('processed_test.pickle','rb') as f:
#    pairs_t = pickle.load(f)
#    pairs_emotion_t = pickle.load(f)

In [11]:
test_batch = batch2TrainData(voc,list(range(1000)),pairs[-1000:],pairs_emotion[-1000:])
pairs = pairs[:-1000]
pairs_emotion = pairs_emotion[:-1000]
test_pairs = pairs[-1000:]
test_pairs_emotion = pairs_emotion[-1000:]


In [12]:
len(pairs),len(pairs_emotion)



(14923, 14923)

# Convert data to tensor

In [13]:
# Example for validation
small_batch_size = 5
batches = batch2TrainData(voc, [random.choice(list(range(len(test_pairs)))) for _ in range(small_batch_size)],test_pairs,test_pairs_emotion)
input_variable,input_emotion, lengths, target_variable,target_emotion, mask, max_target_len = batches
#loss = evaluate_performance(input_variable,lengths,target_variable,target_emotion,mask,max_target_len,encoder,decoder)
print("input_variable:", input_variable)
print('Input_emotion:',input_emotion)
print("lengths:", lengths)
print("target_variable:", target_variable)
print('target_emotion:',target_emotion)
print("mask:", mask)
print("max_target_len:", max_target_len)
#print('Loss:',loss)

input_variable: tensor([[ 120,  160,   52,   27,  288],
        [   8,  386,    8,  663,   15],
        [  27,  190,   54,   75, 1059],
        [ 212,  456,   55,  723,   99],
        [2397,  568,   77,   99,    2],
        [  45,   10,  667,    2,    0],
        [ 643,  298,    8,    0,    0],
        [  10,    8,    2,    0,    0],
        [   8,    2,    0,    0,    0],
        [   2,    0,    0,    0,    0]])
Input_emotion: tensor([1, 0, 0, 1, 1])
lengths: tensor([10,  9,  8,  6,  5])
target_variable: tensor([[  95,  288,   49,   27,   18],
        [  10,    8,   64, 1166,   10],
        [ 161,    2,   92, 2490, 1390],
        [   8,    0,    4,   75,    8],
        [   2,    0,   39,  723,    2],
        [   0,    0,   15,   99,    0],
        [   0,    0,    2,    2,    0]])
target_emotion: tensor([1, 0, 0, 1, 1])
mask: tensor([[1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1],
        [1, 0, 1, 1, 1],
        [1, 0, 1, 1, 1],
        [0, 0, 1, 1, 0],
        [0, 

# Encoder + Attention

In [14]:
class EncoderRNN(nn.Module):
    def __init__(self, hidden_size, embedding, n_layers=1, dropout=0):
        super(EncoderRNN, self).__init__()
        self.n_layers = n_layers
        self.hidden_size = hidden_size
        self.embedding = embedding

        # Initialize GRU; the input_size and hidden_size params are both set to 'hidden_size'
        #   because our input size is a word embedding with number of features == hidden_size
        self.gru = nn.GRU(hidden_size, hidden_size, n_layers,
                          dropout=(0 if n_layers == 1 else dropout), bidirectional=True)

    def forward(self, input_seq, input_lengths, hidden=None):
        # Convert word indexes to embeddings
        embedded = self.embedding(input_seq)
        # Pack padded batch of sequences for RNN module
        packed = torch.nn.utils.rnn.pack_padded_sequence(embedded, input_lengths)
        # Forward pass through GRU
        outputs, hidden = self.gru(packed, hidden)
        # Unpack padding
        outputs, _ = torch.nn.utils.rnn.pad_packed_sequence(outputs)
        # Sum bidirectional GRU outputs
        outputs = outputs[:, :, :self.hidden_size] + outputs[:, : ,self.hidden_size:]
        # Return output and final hidden state
        return outputs, hidden
# Luong attention layer
'''
class Attn(torch.nn.Module):
    def __init__(self, method, hidden_size):
        super(Attn, self).__init__()
        self.method = method
        if self.method not in ['dot', 'general', 'concat']:
            raise ValueError(self.method, "is not an appropriate attention method.")
        self.hidden_size = hidden_size
        if self.method == 'general':
            self.attn = torch.nn.Linear(self.hidden_size, hidden_size)
        elif self.method == 'concat':
            self.attn = torch.nn.Linear(self.hidden_size * 2, hidden_size)
            self.v = torch.nn.Parameter(torch.FloatTensor(hidden_size))

    def dot_score(self, hidden, encoder_output):
        return torch.sum(hidden * encoder_output, dim=2)

    def general_score(self, hidden, encoder_output):
        energy = self.attn(encoder_output)
        return torch.sum(hidden * energy, dim=2)

    def concat_score(self, hidden, encoder_output):
        energy = self.attn(torch.cat((hidden.expand(encoder_output.size(0), -1, -1), encoder_output), 2)).tanh()
        return torch.sum(self.v * energy, dim=2)

    def forward(self, hidden, encoder_outputs):
        # Calculate the attention weights (energies) based on the given method
        if self.method == 'general':
            attn_energies = self.general_score(hidden, encoder_outputs)
        elif self.method == 'concat':
            attn_energies = self.concat_score(hidden, encoder_outputs)
        elif self.method == 'dot':
            attn_energies = self.dot_score(hidden, encoder_outputs)

        # Transpose max_length and batch_size dimensions
        attn_energies = attn_energies.t()

        # Return the softmax normalized probability scores (with added dimension)
        return F.softmax(attn_energies, dim=1).unsqueeze(1)   
        '''
print('Implement attention inside ECM')

Implement attention inside ECM


# ECM: Internal memory + Attention

In [15]:
class ECMWrapper(nn.Module):
    '''
    Internal memory module
    '''
    def __init__(self,hidden_size,state_size,emo_size,num_emotion,embedding,emotion_embedding,gru,device):
        '''
        hidden_size: hidden input dimension
        state_size: state vector size (input a word so hidden size)
        emo_size: emotional embedding size (usually similar to hidden_size)
        num_emotion: number of emotion categories
        '''
        super(ECMWrapper,self).__init__()
        self.hidden_size = hidden_size
        self.state_size = state_size
        self.emo_size = emo_size
        self.num_emotion = num_emotion
        self.device = device
        # read gate dimensions (word_embedding + hidden_input + context_input)
        self.read_g = nn.Linear(self.hidden_size + self.hidden_size + self.hidden_size,self.emo_size)
        # write gate
        self.write_g = nn.Linear(self.state_size, self.emo_size)
        # GRU output input dimensions = state_last + context + emotion emb + internal memory
        self.gru = gru
        self.emotion_embedding = emotion_embedding
        self.embedding = embedding
        # attention layer
        self.attn1 = nn.Linear(self.hidden_size,self.hidden_size)
        self.attn2 = nn.Linear(self.hidden_size,self.hidden_size)
        self.concat = nn.Linear(self.hidden_size, 1)
    def forward(self,word_input,decoder_output,emotion_input,context_input,last_hidden,memory):
        '''
        Last hidden == prev_cell_state
        last word embedding = word_input
        last hidden input = h
        last_rnn_output = logits before softmax
        memory = encoder_outputs
        '''
        # get embedding of input word and emotion
        if decoder_output is None:
            decoder_output = torch.zeros(word_input.shape[1],self.hidden_size,dtype=torch.float,device = self.device)
            decoder_output = decoder_output.unsqueeze(0)
            context_input = self._compute_context(decoder_output,memory)
        last_word_embedding = self.embedding(word_input)
        read_inputs = torch.cat((last_word_embedding,decoder_output,context_input), dim = -1)
        # compute read input
        # write to emotion embedding
        emotion_input = self._read_internal_memory(read_inputs,emotion_input)
        # pass everything to GRU
        # decoder_output: logits from last rnn unit
        X = torch.cat([last_word_embedding,decoder_output, context_input, emotion_input], dim = -1)
        rnn_output, hidden = self.gru(X,last_hidden)
        # write input
        # update states
        # write to emotion embedding
        new_M_emo = self._write_internal_memory(emotion_input,rnn_output) # new emotion_input
        new_context = self._compute_context(rnn_output,memory)
        return rnn_output, hidden, new_M_emo, new_context
    def _compute_context(self,rnn_output,memory):
        '''
        Compute context
        '''
        rnn_output = rnn_output.unsqueeze(dim=-2).squeeze(0) # make shape (batch,1,hidden_size)
        memory = memory.permute(1,0,2)
        Wq = self.attn1(rnn_output)
        Wm = self.attn2(memory)
        concat = (Wq + Wm).tanh()
        e = self.concat(concat).squeeze(2)
        attn_score = torch.softmax(e,dim = 1).unsqueeze(1)
        context = torch.bmm(attn_score,memory).squeeze(1)
        return context.unsqueeze(0)
    def _read_internal_memory(self,read_inputs,emotion_input):
        """
        Read the internal memory
            emotion_input: [batch_size, emo_hidden_size]
            read_inputs: [batch_size, d] d= [last_word_embedding;decoder_output;context_input]
        Returns:
            output: [batch_size, emo__hidden_size]
        """
        read_inputs = self.read_g(read_inputs)
        M_read = torch.sigmoid(read_inputs)
        return emotion_input * M_read
    def _write_internal_memory(self,emotion_input,rnn_output):
        """
        Write the internal memory
            emotion_input: [batch_size, emo_hidden_size]
            rnn_output: [batch_size, hidden_size]
        Returns:
            output: [batch_size, emo_hidden_size]
        """
        M_write = torch.sigmoid(self.write_g(rnn_output))
        return emotion_input * M_write
    
    

# Decoder part

In [16]:
class LuongAttnDecoderRNN(nn.Module):
    def __init__(self, attn_model, embedding,emotion_embedding, hidden_size, output_size,device,ememory=None, n_layers=1, dropout=0.1,num_emotions = 7,batch_size = 64):
        super(LuongAttnDecoderRNN, self).__init__()
        # Keep for reference
        self.attn_model = attn_model
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.n_layers = n_layers
        self.batch_size = batch_size
        self.dropout = dropout
        self.num_emotions = num_emotions
        self.device = device
        # Define layers
        self.embedding = embedding
        # define emotion embedding
        self.emotion_embedding = emotion_embedding
        self.embedding_dropout = nn.Dropout(dropout)
        #self.emotion_embedding_dropout = nn.Dropout(dropout)
        # dimension
        self.gru = nn.GRU(hidden_size + hidden_size + hidden_size + hidden_size , hidden_size, n_layers, dropout=(0 if n_layers == 1 else dropout))
        # using in Luong et al. attention mechanism.
        self.internal_memory = ECMWrapper(hidden_size,hidden_size,
                                          hidden_size,self.num_emotions,
                                          self.embedding,self.emotion_embedding,self.gru,device)
        # read external from outside
        self.external_memory = ememory
        # generic output linear layer
        self.generic_word_output_layer = nn.Linear(self.hidden_size,output_size)
        # emotional output linear layer 
        self.emotion_word_output_layer = nn.Linear(self.hidden_size,output_size)
        # emotional gate/ choice layer
        self.alpha_layer = nn.Linear(hidden_size,1)
        # Luong eq 5 layer
        self.concat = nn.Linear(hidden_size * 2, hidden_size)
    def forward(self, input_step,input_step_emotion, last_hidden
                ,input_context, encoder_outputs,last_rnn_output = None):
        '''
        Decoder with external memory.
        
        '''
        if not torch.is_floating_point(input_step_emotion):
            input_step_emotion = self.emotion_embedding(input_step_emotion)
        rnn_output, hidden, new_M_emo,context = self.internal_memory(input_step,last_rnn_output,input_step_emotion,
                                                            input_context,last_hidden,encoder_outputs)
        # Concatenate weighted context vector and GRU output using Luong eq. 5
        concat_input = torch.cat((rnn_output, context), -1)
        concat_output = torch.tanh(self.concat(concat_input))
        # concat_output = rnn_output
        # this part is not using inside ECM (?)
        if self.external_memory is not None:
            # Project hidden output to distribution.
            generic_output = self.generic_word_output_layer(concat_output)
            emotion_output = self.emotion_word_output_layer(concat_output)
            generic_output = generic_output.squeeze(0)
            emotion_output = emotion_output.squeeze(0)
            # external memory gate
            g = torch.sigmoid(self.alpha_layer(rnn_output))
            output_g = torch.softmax(generic_output,dim = 1) * (1 - g)
            output_e = torch.softmax(emotion_output,dim = 1) * g
            output = output_g + output_e # output distribution
            output = output.squeeze(0)
            g = torch.cat([(1 - g),g],dim = -1) # gate distribution
            g = g.squeeze(0)
        else:
            # Predict next word using Luong eq. 6
            output = self.out(concat_output).squeeze(0)
            # generic output
            output = F.softmax(output, dim=1)
            output = output.squeeze(0)
            g = None
        # Return output and final hidden state
        return output, hidden, new_M_emo, context,concat_output,g

# NLL Loss + Internal Memory Loss

In [17]:
def maskNLLLoss_IMemory(inp, target, mask,M_emo,external_memory,alpha):
    '''
    When external memory input will be a tuple with 4 elements
    '''
    nTotal = mask.sum()
    
    # cross entropy loss
    crossEntropy = -torch.log(torch.gather(inp, 1, target.view(-1, 1)).squeeze(1))
    # internal emotional loss
    eos_mask = (target == 2) # 2 is EOS token
    eos_mask = eos_mask.type_as(M_emo)
    internal_memory_loss = torch.norm(M_emo,dim = 2) * eos_mask
    internal_memory_loss = internal_memory_loss.squeeze(0)
    # external
    # find 1,0
    if external_memory is not None:
        qt = torch.gather(external_memory.view(-1,1),0,target.view(-1,1)).type(torch.LongTensor)
        qt = qt.to(device)
        alpha_prob = torch.gather(alpha,1,qt) # if it select emotion word or generic word
        external_memory_loss = (-torch.log(alpha_prob)).reshape(-1) 
    else:
        external_memory_loss = torch.zeros(crossEntropy.shape,dtype=torch.float,device=device)
    #print(crossEntropy.masked_select(mask).mean(),internal_memory_loss.masked_select(mask).mean())
    loss = crossEntropy.masked_select(mask).mean() + external_memory_loss.mean() + internal_memory_loss.mean()
    loss = loss.to(device)
    return loss, nTotal.item(),crossEntropy.masked_select(mask).mean().item()



# Single Train

In [18]:
def compute_perplexity(loss):
    return np.exp(loss)
def train(input_variable, lengths, target_variable,target_variable_emotion,
          mask, max_target_len, encoder, decoder, embedding,emotion_embedding,
          encoder_optimizer, decoder_optimizer, batch_size, clip, max_length=MAX_LENGTH):
    # Zero gradients
    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()
    # num_samples in this batch
    num_samples = input_variable.shape[1]
    # Set device options
    input_variable = input_variable.to(device)
    lengths = lengths.to(device)
    target_variable = target_variable.to(device)
    mask = mask.to(device)
    target_variable_emotion = target_variable_emotion.to(device)
    # Initialize variables
    loss = 0
    print_losses = []
    n_totals = 0
    totalCrossEntropy = 0

    # Forward pass through encoder
    encoder_outputs, encoder_hidden = encoder(input_variable, lengths)

    # Create initial decoder input (start with SOS tokens for each sentence)
    decoder_input = torch.LongTensor([[SOS_token for _ in range(num_samples)]])
    decoder_input = decoder_input.to(device)
    
    # Set initial decoder hidden state to the encoder's final hidden state
    decoder_hidden = encoder_hidden[:decoder.n_layers]
    
    # Set initial context value,last_rnn_output, internal_memory
    context_input = torch.zeros(num_samples,hidden_size,dtype=torch.float,device=device) #torch.FloatTensor(batch_size,hidden_size)
    # Determine if we are using teacher forcing this iteration
    if random.random() < teacher_forcing_ratio:
        use_teacher_forcing = True  
    else:
        use_teacher_forcing = False
    use_teacher_forcing = False
    # initialize value for rnn_output
    rnn_output = None
    # Forward batch of sequences through decoder one time step at a time
    if use_teacher_forcing:
        for t in range(max_target_len):
            decoder_output, decoder_hidden,target_variable_emotion,context_input,rnn_output,g = decoder(
                decoder_input,target_variable_emotion, decoder_hidden,
                context_input, encoder_outputs,rnn_output
            )
            # Teacher forcing: next input is current target
            decoder_input = target_variable[t].view(1, -1)
            # Calculate and accumulate loss
            mask_loss, nTotal,crossEntropy = maskNLLLoss_IMemory(decoder_output, target_variable[t], mask[t],target_variable_emotion,decoder.external_memory,g)
            loss += mask_loss
            totalCrossEntropy += crossEntropy * nTotal
            print_losses.append(mask_loss.item() * nTotal) # print average loss
            n_totals += nTotal
    else:
        for t in range(max_target_len):
            decoder_output, decoder_hidden,target_variable_emotion,context_input,rnn_output,g = decoder(
                decoder_input,target_variable_emotion, decoder_hidden,
                context_input,encoder_outputs,rnn_output
            )
            # No teacher forcing: next input is decoder's own current output
            _, topi = decoder_output.topk(1)
            topi = topi.squeeze(0)
            decoder_input = torch.LongTensor([[topi[i][0] for i in range(num_samples)]])
            decoder_input = decoder_input.to(device)
            # Calculate and accumulate loss
            mask_loss, nTotal,crossEntropy = maskNLLLoss_IMemory(decoder_output, target_variable[t], mask[t],target_variable_emotion,decoder.external_memory,g)
            loss += mask_loss
            totalCrossEntropy += crossEntropy * nTotal
            print_losses.append(mask_loss.item() * nTotal) # print average loss
            n_totals += nTotal

    # Perform backpropatation
    loss.backward()

    # Clip gradients: gradients are modified in place
    _ = torch.nn.utils.clip_grad_norm_(encoder.parameters(), clip)
    _ = torch.nn.utils.clip_grad_norm_(decoder.parameters(), clip)

    # Adjust model weights
    encoder_optimizer.step()
    decoder_optimizer.step()
    #print('Total Loss {}; Cross Entropy: {}'.format(sum(print_losses) / n_totals, totalCrossEntropy / n_totals))
    return sum(print_losses) / n_totals,totalCrossEntropy / n_totals
def evaluate_performance(input_variable, lengths, target_variable,target_variable_emotion,
          mask, max_target_len, encoder, decoder):
    # test mode
    
    encoder.eval()
    decoder.eval()
    # num_samples in this batch
    num_samples = input_variable.shape[1]
    # Set device options
    input_variable = input_variable.to(device)
    lengths = lengths.to(device)
    target_variable = target_variable.to(device)
    mask = mask.to(device)
    target_variable_emotion = target_variable_emotion.to(device)
    # Initialize variables
    loss = 0
    print_losses = []
    n_totals = 0
    totalCrossEntropy = 0
    # Forward pass through encoder
    encoder_outputs, encoder_hidden = encoder(input_variable, lengths)

    # Create initial decoder input (start with SOS tokens for each sentence)
    decoder_input = torch.LongTensor([[SOS_token for _ in range(num_samples)]])
    decoder_input = decoder_input.to(device)
    
    # Set initial decoder hidden state to the encoder's final hidden state
    decoder_hidden = encoder_hidden[:decoder.n_layers]
    # Set initial context value,last_rnn_output, internal_memory
    context_input = torch.zeros(num_samples,hidden_size,dtype=torch.float,device=device) #torch.FloatTensor(batch_size,hidden_size)
    # initial value for rnn output
    rnn_output = None
    # forward pass to generate all sentences
    for t in range(max_target_len):
        decoder_output, decoder_hidden,target_variable_emotion,context_input,rnn_output,g = decoder(
            decoder_input,target_variable_emotion, decoder_hidden,
            context_input,encoder_outputs,rnn_output
        )
        # No teacher forcing: next input is decoder's own current output
        _, topi = decoder_output.topk(1)
        topi = topi.squeeze(0)
        decoder_input = torch.LongTensor([[topi[i][0] for i in range(num_samples)]])
        decoder_input = decoder_input.to(device)
        # Calculate and accumulate loss
        mask_loss, nTotal,crossEntropy = maskNLLLoss_IMemory(decoder_output, target_variable[t], mask[t],target_variable_emotion,decoder.external_memory,g)
        loss += mask_loss
        totalCrossEntropy += (crossEntropy * nTotal)
        print_losses.append(mask_loss.item() * nTotal) # print average loss
        n_totals += nTotal
    # back to train mode
    encoder.train()
    decoder.train()
    return sum(print_losses) / n_totals, totalCrossEntropy / n_totals

# Train Iteration

In [19]:
def trainIters(model_name, voc, pairs,pairs_emotion, 
               encoder, decoder, encoder_optimizer,
               decoder_optimizer, embedding,emotion_embedding, 
               encoder_n_layers, decoder_n_layers, save_dir, 
               n_iteration, batch_size, print_every, save_every, 
               clip,corpus_name,external_memory,test_pairs,test_pairs_emotion):
    loadFilename=None
    # Load batches for each iteration
    #training_batches = [batch2TrainData(voc, [random.choice(pairs) for _ in range(batch_size)])
                      #for _ in range(n_iteration)]
    print('Loading Training data ...')
    length_pairs = len(pairs)
    #training_batches = [batch2TrainData(voc, [random.choice(range(length_pairs)) for _ in range(batch_size)],
    #                                   pairs,pairs_emotion) for _ in range(n_iteration)]
    # Initializations
    print('Initializing ...')
    start_iteration = 1
    print_loss = 0
    totalCrossEntropy = 0
    if loadFilename:
        start_iteration = checkpoint['iteration'] + 1
    min_test_loss = 1000
    # Training loop
    print("Training...")
    for iteration in range(start_iteration, n_iteration + 1):
        training_batch = batch2TrainData(voc, [random.choice(range(length_pairs)) for _ in range(batch_size)],
                                       pairs,pairs_emotion)
        # to save the data that causes error
        #with open('wrong_data.pickle','rb') as f:
        #    training_batch = pickle.load(f)
        
        # Extract fields from batch
        input_variable,input_variable_emotion, lengths, target_variable,target_variable_emotion, mask, max_target_len = training_batch

        # Run a training iteration with batch
        loss,crossEntropy = train(input_variable, lengths, target_variable,target_variable_emotion,
                     mask, max_target_len, encoder,
                     decoder, embedding,emotion_embedding,
                     encoder_optimizer, decoder_optimizer, 
                     batch_size, clip)
        
        print_loss += loss
        totalCrossEntropy += crossEntropy
        # Print progress
        if iteration % print_every == 0 or iteration == 1:
            directory = os.path.join(save_dir, model_name, corpus_name, '{}-{}_{}'.format(encoder_n_layers, decoder_n_layers, hidden_size))
            if not os.path.exists(directory):
                os.makedirs(directory)
            if iteration == 1:
                print_loss_avg = print_loss / 1
                print_cross_entropy = totalCrossEntropy / 1
            else:
                print_loss_avg = print_loss / print_every
                print_cross_entropy = totalCrossEntropy / print_every
            if print_cross_entropy > 300:
                perplexity = compute_perplexity(300)
            else:
                perplexity = compute_perplexity(print_cross_entropy)
            output1 = "Iteration: {}; Percent complete: {:.1f}%; Average loss: {:.4f}; Perplexity: {:.2f}".format(iteration, iteration / n_iteration * 100, print_loss_avg,perplexity)
            print(output1)
            test_length_pairs = len(test_pairs) 
            test_batch = batch2TrainData(voc, [random.choice(range(test_length_pairs)) for _ in range(batch_size)],
                                       test_pairs,test_pairs_emotion)
            input_variable,input_emotion, lengths, target_variable,target_emotion, mask, max_target_len = test_batch
            test_loss,testCrossEntropy = evaluate_performance(input_variable,lengths, target_variable,target_emotion,mask,max_target_len,encoder,decoder)
            
            if testCrossEntropy > 300:
                perplexity = compute_perplexity(300)
            else:
                perplexity = compute_perplexity(testCrossEntropy)
            output2 = 'Loss on validation set {:.4f}; Perplexity:{:.2f}'.format(test_loss,perplexity)
            print(output2)
            with open(os.path.join(directory,'log.txt'),'a+') as f:
                f.write(output1 + '\n')
                f.write(output2 + '\n')
            print_loss = 0
            totalCrossEntropy = 0

        # Save checkpoint and only save the better perform one,
        if (iteration % save_every == 0) and (testCrossEntropy < min_test_loss):
            min_test_loss = testCrossEntropy
            print('Save the model at checkpoint {}, and test loss is {}'.format(iteration,min_test_loss))
            directory = os.path.join(save_dir, model_name, corpus_name, '{}-{}_{}'.format(encoder_n_layers, decoder_n_layers, hidden_size))
            if not os.path.exists(directory):
                os.makedirs(directory)
            torch.save({
                'iteration': iteration,
                'en': encoder.state_dict(),
                'de': decoder.state_dict(),
                'en_opt': encoder_optimizer.state_dict(),
                'de_opt': decoder_optimizer.state_dict(),
                'loss': loss,
                'voc_dict': voc.__dict__,
                'embedding': embedding.state_dict(),
                'external_memory':external_memory
            }, os.path.join(directory, '{}_{}.tar'.format(iteration, 'checkpoint')))
            
            

# Greedy Search

In [20]:
def print_param(model):
    for name,param in model.named_parameters():
        print(param)
        print(name,param.grad)

In [21]:
class GreedySearchDecoder(nn.Module):
    def __init__(self, encoder, decoder,num_word = None):
        super(GreedySearchDecoder, self).__init__()
        self.encoder = encoder
        self.decoder = decoder

    def forward(self, input_seq,target_emotions,input_length, max_length):
        # Forward input through encoder model
        encoder_outputs, encoder_hidden = self.encoder(input_seq, input_length)
        # Prepare encoder's final hidden layer to be first hidden input to the decoder
        decoder_hidden = encoder_hidden[:decoder.n_layers]
        # Initialize decoder input with SOS_token
        decoder_input = torch.ones(1, 1, device=device, dtype=torch.long) * SOS_token
        # Initialize tensors to append decoded words to
        all_tokens = torch.zeros([0], device=device, dtype=torch.long)
        all_scores = torch.zeros([0], device=device)
        # Set initial context value,last_rnn_output, internal_memory
        context_input = torch.zeros((1,hidden_size),dtype=torch.float,device=self.decoder.device)
        context_input = context_input.to(device)
        rnn_output = None
        # Iteratively decode one word token at a time
        for _ in range(max_length):
            # Forward pass through decoder
            decoder_output, decoder_hidden,target_emotions,context_input,rnn_output,g = decoder(
                decoder_input,target_emotions, decoder_hidden,
                context_input, encoder_outputs,rnn_output
            )
            # Obtain most likely word token and its softmax score
            decoder_scores, decoder_input = torch.max(decoder_output, dim=1)
            # Record token and score
            all_tokens = torch.cat((all_tokens, decoder_input), dim=0)
            all_scores = torch.cat((all_scores, decoder_scores), dim=0)
            # Prepare current token to be next decoder input (add a dimension)
            decoder_input = torch.unsqueeze(decoder_input, 0)
        # Return collections of word tokens and scores
        return all_tokens, all_scores

# Beam Search

In [22]:
class BeamSearchDecoder(nn.Module):
    def __init__(self, encoder, decoder,num_word):
        super(BeamSearchDecoder, self).__init__()
        self.encoder = encoder
        self.decoder = decoder
        self.num_word = num_word

    def forward(self, input_seq,target_emotions,input_length, max_length):
        # Forward input through encoder model
        encoder_outputs, encoder_hidden = self.encoder(input_seq, input_length)
        # Prepare encoder's final hidden layer to be first hidden input to the decoder
        decoder_hidden = encoder_hidden[:decoder.n_layers]
        # Initialize decoder input with SOS_token
        decoder_input = torch.ones(1, 1, device=device, dtype=torch.long) * SOS_token
        # Initialize tensors to append decoded words to
        all_tokens = torch.zeros([0], device=device, dtype=torch.long)
        all_words_order = torch.zeros((1,self.num_word),device=decoder.device,dtype=torch.long)
        all_scores = torch.zeros([0], device=device)
        all_scores_array = torch.zeros((1,self.num_word),device=decoder.device,dtype=torch.float)
        # Set initial context value,last_rnn_output, internal_memory
        context_input = torch.zeros(1,hidden_size,dtype=torch.float)
        context_input = context_input.to(decoder.device)
        rnn_output = None
        # Iteratively decode one word token at a time
        for _ in range(max_length):
            # Forward pass through decoder
            decoder_output, decoder_hidden,target_emotions,context_input,rnn_output,g = decoder(
                decoder_input,target_emotions, decoder_hidden,
                context_input, encoder_outputs,rnn_output
            )
            # Obtain most likely word token and its softmax score
            decoder_scores, decoder_input = torch.max(decoder_output, dim=1)
            decoder_input_order = torch.argsort(decoder_output,dim=1,descending=True)
            # Record token and score
            all_tokens = torch.cat((all_tokens, decoder_input), dim=0)
            all_scores = torch.cat((all_scores, decoder_scores), dim=0)
            all_scores_array = torch.cat((all_scores_array,decoder_output),dim = 0)
            all_words_order = torch.cat((all_words_order,decoder_input_order), dim=0)
            # Prepare current token to be next decoder input (add a dimension)
            decoder_input = torch.unsqueeze(decoder_input, 0)
        # Return collections of word tokens and scores
        sequences = self.beam_search(all_scores_array,3)
        return sequences
    def beam_search(self,array,k):
        array = array.tolist()
        sequences = [[list(), 1.0]]
        # walk over each step in sequence
        for row in array:
            all_candidates = list()
            # expand each current candidate
            for i in range(len(sequences)):
                seq, score = sequences[i]
                for j in range(len(row)):
                    candidate = [seq + [j], score - np.log(row[j] + 1e-8)]
                    all_candidates.append(candidate)
            # order all candidates by score
            ordered = sorted(all_candidates, key=lambda tup:tup[1])
            # select k best
            sequences = ordered[:k]
        return sequences

In [23]:
def get_ememory(file_path, voc):
    '''
    Get external memory from file. And generate category embedding based on the
    current vocabulary

    :param file_path:
    :param voc:
    :return:
    '''
    emotion_words = [0] * voc.num_words
    count = 0
    with open(file_path, 'r') as f:
        for each in f:
            each = each.rstrip()
            if each in voc.word2index:
                count += 1
                emotion_words[voc.word2index[each]] = 1
    print('Emotion word counts:', count)
    return torch.ByteTensor(emotion_words)

In [24]:
emotion_words = get_ememory('ememory2.txt',voc)

Emotion word counts: 611


In [43]:
def get_ememory2D(file_path, num_emotions,voc):
    '''
    Get external memory from file. And generate category embedding based on the
    current vocabulary

    :param file_path:
    :param voc:
    :return:
    '''
    emo_words = pd.read_csv(file_path)
    ememory_2d = torch.zeros((num_emotions,voc.num_words),dtype = torch.long,device = device)
    count = 0
    for idx,row in emo_words.iterrows():
        word = row['word']
        category = row['emotion']
        if word in voc.word2index:
            ememory_2d[category,voc.word2index[word]] = 1
            count += 1
    print(count)
    
    
    return ememory_2d

# Build Model

In [52]:
try:
    voc
except NameError:
    voc = Voc('a',max_length=MAX_LENGTH,min_count=MIN_COUNT)
# Configure models
model_name = 'emotion_model'
corpus_name = 'ECM10_words_Ememory_concat'
attn_model = 'concat'
#attn_model = 'general'
#attn_model = 'concat'
hidden_size = 500
encoder_n_layers = 2
decoder_n_layers = 2
dropout = 0.2
batch_size = 64
# number of emotion
num_emotions = 5
# load external memory based vocab.
emotion_words = get_ememory2D('ememory.csv',5,voc)
# Set checkpoint to load from; set to None if starting from scratch
loadFilename = None #'data/save/emotion_model/ECM10_words_Ememory_concat/2-2_500/6000_checkpoint.tar'
checkpoint_iter = 120
training = True
if loadFilename:
    training = False
#loadFilename = os.path.join(save_dir, model_name, corpus_name,
#                            '{}-{}_{}'.format(encoder_n_layers, decoder_n_layers, hidden_size),
#                            '{}_checkpoint.tar'.format(checkpoint_iter))


# Load model if a loadFilename is provided
if loadFilename:
    # If loading on same machine the model was trained on
    checkpoint = torch.load(loadFilename)
    # If loading a model trained on GPU to CPU
    #checkpoint = torch.load(loadFilename, map_location=torch.device('cpu'))
    encoder_sd = checkpoint['en']
    decoder_sd = checkpoint['de']
    encoder_optimizer_sd = checkpoint['en_opt']
    decoder_optimizer_sd = checkpoint['de_opt']
    embedding_sd = checkpoint['embedding']
    voc.__dict__ = checkpoint['voc_dict']
    emotion_words = checkpoint['external_memory']
    


print('Building encoder and decoder ...')
# Initialize word embeddings
if emotion_words is not None:
    emotion_words = emotion_words.to(device)

embedding = nn.Embedding(voc.num_words, hidden_size)
emotion_embedding = nn.Embedding(num_emotions, hidden_size)
emotion_embedding_static = nn.Embedding(num_emotions,hidden_size)

if loadFilename:
    embedding.load_state_dict(embedding_sd)
# Initialize encoder & decoder models
encoder = EncoderRNN(hidden_size, embedding, encoder_n_layers, dropout)
decoder = LuongAttnDecoderRNN(attn_model, embedding,emotion_embedding, hidden_size, 
                              voc.num_words,device, emotion_words,decoder_n_layers, dropout,num_emotions=num_emotions,batch_size = batch_size)
if loadFilename:
    encoder.load_state_dict(encoder_sd)
    decoder.load_state_dict(decoder_sd)
    
# Use appropriate device
encoder = encoder.to(device)
decoder = decoder.to(device)
print('Models built and ready to go!')

611
Building encoder and decoder ...
Models built and ready to go!


In [19]:
voc.num_words

3216

In [20]:
emotion_words.sum()

tensor(536, device='cuda:0')

# Run training

In [78]:
# Configure training/optimization
clip = 50
teacher_forcing_ratio = 0.1
learning_rate = 0.001
decoder_learning_ratio = 5.0
n_iteration = 2000000
print_every = 20
save_every = 100


# Ensure dropout layers are in train mode
encoder.train()
decoder.train()

# Initialize optimizers
print('Building optimizers ...')
encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate)
decoder_optimizer = optim.Adam(decoder.parameters(), lr=learning_rate * decoder_learning_ratio)
if loadFilename:
    encoder_optimizer.load_state_dict(encoder_optimizer_sd)
    decoder_optimizer.load_state_dict(decoder_optimizer_sd)

# Run training iterations


print("Starting Training!")
trainIters(model_name, voc, pairs,pairs_emotion, encoder, decoder, encoder_optimizer, decoder_optimizer,
           embedding,emotion_embedding, encoder_n_layers, decoder_n_layers, save_dir, n_iteration, batch_size,
           print_every, save_every, clip,corpus_name,emotion_words,test_pairs,test_pairs_emotion)
    
    

Building optimizers ...
Starting Training!
Loading Training data ...
Initializing ...
Training...
Iteration: 1; Percent complete: 0.0%; Average loss: 2.4133; Perplexity: 9.48
Loss on validation set 2.1670; Perplexity:7.48
Iteration: 20; Percent complete: 0.0%; Average loss: 2.0004; Perplexity: 6.29
Loss on validation set 1.8989; Perplexity:5.42
Iteration: 40; Percent complete: 0.0%; Average loss: 2.1683; Perplexity: 7.37
Loss on validation set 1.9656; Perplexity:6.20
Iteration: 60; Percent complete: 0.0%; Average loss: 2.0946; Perplexity: 6.86
Loss on validation set 2.2294; Perplexity:7.80
Iteration: 80; Percent complete: 0.0%; Average loss: 2.1247; Perplexity: 7.12
Loss on validation set 1.7368; Perplexity:4.94
Iteration: 100; Percent complete: 0.0%; Average loss: 2.1487; Perplexity: 7.26
Loss on validation set 1.9181; Perplexity:5.52
Save the model at checkpoint 100, and test loss is 1.7076359323651085
Iteration: 120; Percent complete: 0.0%; Average loss: 2.0151; Perplexity: 6.35
Los

Loss on validation set 1.6700; Perplexity:4.56
Iteration: 1260; Percent complete: 0.1%; Average loss: 1.9236; Perplexity: 5.84
Loss on validation set 2.0961; Perplexity:7.03
Iteration: 1280; Percent complete: 0.1%; Average loss: 1.9144; Perplexity: 5.75
Loss on validation set 2.2680; Perplexity:8.25
Iteration: 1300; Percent complete: 0.1%; Average loss: 1.9638; Perplexity: 6.01
Loss on validation set 1.9216; Perplexity:5.79
Iteration: 1320; Percent complete: 0.1%; Average loss: 1.9102; Perplexity: 5.76
Loss on validation set 1.8451; Perplexity:5.25
Iteration: 1340; Percent complete: 0.1%; Average loss: 1.9067; Perplexity: 5.64
Loss on validation set 2.2209; Perplexity:7.69
Iteration: 1360; Percent complete: 0.1%; Average loss: 1.8535; Perplexity: 5.40
Loss on validation set 1.7604; Perplexity:4.80
Iteration: 1380; Percent complete: 0.1%; Average loss: 1.9194; Perplexity: 5.79
Loss on validation set 1.8640; Perplexity:5.73
Iteration: 1400; Percent complete: 0.1%; Average loss: 1.9415; P

Iteration: 2520; Percent complete: 0.1%; Average loss: 1.8441; Perplexity: 5.35
Loss on validation set 2.2119; Perplexity:7.77
Iteration: 2540; Percent complete: 0.1%; Average loss: 1.8079; Perplexity: 5.17
Loss on validation set 1.7549; Perplexity:4.92
Iteration: 2560; Percent complete: 0.1%; Average loss: 1.7978; Perplexity: 5.10
Loss on validation set 1.5862; Perplexity:4.24
Iteration: 2580; Percent complete: 0.1%; Average loss: 1.8641; Perplexity: 5.47
Loss on validation set 1.7917; Perplexity:5.17
Iteration: 2600; Percent complete: 0.1%; Average loss: 1.8556; Perplexity: 5.44
Loss on validation set 2.0529; Perplexity:6.51
Iteration: 2620; Percent complete: 0.1%; Average loss: 1.8604; Perplexity: 5.50
Loss on validation set 1.6759; Perplexity:4.50
Iteration: 2640; Percent complete: 0.1%; Average loss: 1.7616; Perplexity: 4.99
Loss on validation set 2.4243; Perplexity:9.14
Iteration: 2660; Percent complete: 0.1%; Average loss: 1.8174; Perplexity: 5.24
Loss on validation set 1.7269; 

Loss on validation set 1.7824; Perplexity:5.00
Iteration: 3820; Percent complete: 0.2%; Average loss: 1.7251; Perplexity: 4.81
Loss on validation set 1.3058; Perplexity:3.11
Iteration: 3840; Percent complete: 0.2%; Average loss: 1.7716; Perplexity: 5.03
Loss on validation set 2.1374; Perplexity:7.04
Iteration: 3860; Percent complete: 0.2%; Average loss: 1.8057; Perplexity: 5.19
Loss on validation set 1.6092; Perplexity:4.32
Iteration: 3880; Percent complete: 0.2%; Average loss: 1.7747; Perplexity: 5.06
Loss on validation set 1.7246; Perplexity:4.69
Iteration: 3900; Percent complete: 0.2%; Average loss: 1.8540; Perplexity: 5.38
Loss on validation set 1.9266; Perplexity:5.81
Iteration: 3920; Percent complete: 0.2%; Average loss: 1.7424; Perplexity: 4.86
Loss on validation set 1.9367; Perplexity:5.95
Iteration: 3940; Percent complete: 0.2%; Average loss: 1.7695; Perplexity: 5.00
Loss on validation set 2.0295; Perplexity:6.55
Iteration: 3960; Percent complete: 0.2%; Average loss: 1.8517; P

Loss on validation set 1.7340; Perplexity:4.81
Iteration: 5100; Percent complete: 0.3%; Average loss: 1.9066; Perplexity: 5.76
Loss on validation set 2.1876; Perplexity:7.61
Iteration: 5120; Percent complete: 0.3%; Average loss: 1.8743; Perplexity: 5.56
Loss on validation set 2.0688; Perplexity:6.85
Iteration: 5140; Percent complete: 0.3%; Average loss: 1.8398; Perplexity: 5.36
Loss on validation set 1.8170; Perplexity:4.99
Iteration: 5160; Percent complete: 0.3%; Average loss: 2.0432; Perplexity: 6.62
Loss on validation set 2.1096; Perplexity:7.10
Iteration: 5180; Percent complete: 0.3%; Average loss: 2.0373; Perplexity: 6.44
Loss on validation set 2.0454; Perplexity:6.52
Iteration: 5200; Percent complete: 0.3%; Average loss: 1.9735; Perplexity: 6.15
Loss on validation set 1.9853; Perplexity:6.27
Iteration: 5220; Percent complete: 0.3%; Average loss: 1.9936; Perplexity: 6.23
Loss on validation set 2.1230; Perplexity:7.15
Iteration: 5240; Percent complete: 0.3%; Average loss: 1.8847; P

Loss on validation set 1.5406; Perplexity:4.10
Iteration: 6400; Percent complete: 0.3%; Average loss: 1.8366; Perplexity: 5.36
Loss on validation set 1.5477; Perplexity:4.08
Iteration: 6420; Percent complete: 0.3%; Average loss: 1.8538; Perplexity: 5.43
Loss on validation set 1.8942; Perplexity:5.46
Iteration: 6440; Percent complete: 0.3%; Average loss: 1.7251; Perplexity: 4.78
Loss on validation set 1.7977; Perplexity:4.92
Iteration: 6460; Percent complete: 0.3%; Average loss: 1.7540; Perplexity: 4.90
Loss on validation set 2.1018; Perplexity:6.88


KeyboardInterrupt: 

In [79]:
def evaluate(encoder, decoder, searcher, voc, sentence, emotions,max_length=MAX_LENGTH,beam_search = False):
    emotions = int(emotions)
    emotions = torch.LongTensor([emotions])
    ### Format input sentence as a batch
    # words -> indexes
    indexes_batch = [indexesFromSentence(voc, sentence)]
    # Create lengths tensor
    lengths = torch.tensor([len(indexes) for indexes in indexes_batch])
    # Transpose dimensions of batch to match models' expectations
    input_batch = torch.LongTensor(indexes_batch).transpose(0, 1)
    # Use appropriate device
    input_batch = input_batch.to(device)
    lengths = lengths.to(device)
    emotions = emotions.to(device)

    # indexes -> words
    if beam_search:
        sequences = searcher(input_batch, emotions, lengths, max_length)
        decoded_words = beam_decode(sequences,voc)
    else:
        # Decode sentence with searcher
        tokens, scores = searcher(input_batch, emotions, lengths, max_length)
        decoded_words = [voc.index2word[token.item()] for token in tokens]
    return decoded_words

def beam_decode(sequences,voc):
    for each in sequences:
        for idxs in each:
            return [voc.index2word[idx] for idx in idxs[:-1]]
    
def evaluateInput(encoder, decoder, searcher, voc,emotion_dict,beam_search):
    input_sentence = ''
    while(1):
        try:
            # Get input sentence
            input_sentence = input('> ')
            for emotion in range(len(emotion_dict)):
                # Check if it is quit case
                if input_sentence == 'q' or input_sentence == 'quit': break
                # Normalize sentence
                input_sentence = normalizeString(input_sentence)
                # Evaluate sentence
                output_words = evaluate(encoder, decoder, searcher, voc, input_sentence,emotion,beam_search=beam_search)
                # Format and print response sentence
                output=[]
                for word in output_words:
                    if word == 'PAD':
                        continue
                    elif word == 'EOS':
                        break
                    else:
                        output.append(word)
                print('Bot({}):'.format(emotion_dict[emotion]), ' '.join(output))

        except KeyError:
            print("Error: Encountered unknown word.")
            

# Beam Search Decode

In [80]:
def sentenceFromIdx(idx,voc):
    return ' '.join([voc.index2word[i] for i in idx])

class BeamSearchNode(object):
    def __init__(self, hiddenstate, previousNode, decoder_input, logProb, length,emotions_emb,last_rnn_output,context_input,g):
        '''
        :param hiddenstate:
        :param previousNode:
        :param wordId:
        :param logProb:
        :param length:
        '''
        
        self.hidden_state = hiddenstate
        self.prevNode = previousNode
        self.decoder_input = decoder_input
        self.logp = logProb
        self.leng = length
        self.emotions = emotions_emb
        self.rnn_output = last_rnn_output
        self.context_input = context_input
        self.alpha = g

    def eval(self, alpha=1.0):
        reward = 0
        # Add here a function for shaping a reward
        return self.logp #/ float(self.leng - 1 + 1e-6) + alpha * reward


In [83]:
for emotions in [0,1,2,3,4]:
    diversity_penality = True
    emotions = emotions
    sentence = 'where is lily ?'
    print('Post({}):{}'.format(emo_dict[emotions],sentence))
    emotions = int(emotions)
    emotions = torch.LongTensor([emotions])
    ### Format input sentence as a batch
    # words -> indexes
    indexes_batch = [indexesFromSentence(voc, sentence)]
    # Create lengths tensor
    lengths = torch.tensor([len(indexes) for indexes in indexes_batch])
    # Transpose dimensions of batch to match models' expectations
    input_batch = torch.LongTensor(indexes_batch).transpose(0, 1)
    # Use appropriate device
    input_batch = input_batch.to(device)
    lengths = lengths.to(device)
    emotions = emotions.to(device)
    # Forward input through encoder model
    encoder_outputs, encoder_hidden = encoder(input_batch, lengths)
    # Prepare encoder's final hidden layer to be first hidden input to the decoder
    decoder_hidden = encoder_hidden[:decoder.n_layers]
    # Initialize decoder input with SOS_token
    decoder_input = torch.ones((1,1), device=device, dtype=torch.long) * SOS_token
    # Set initial context value,last_rnn_output, internal_memory
    context_input = torch.zeros(1,hidden_size,dtype=torch.float)
    context_input = context_input.to(decoder.device)
    rnn_output = None

    node = BeamSearchNode(hiddenstate=decoder_hidden,decoder_input=decoder_input,
                           context_input=context_input,emotions_emb=emotions,
                           length=1,logProb=0,last_rnn_output = rnn_output,
                           previousNode=None,g = 0
                          )
    sent_leng = 0
    # beam search
    K = 10
    # Iteratively decode one word token at a time
    # Forward pass through decoder
    nodes = PriorityQueue(maxsize=K)
    nodes.put((0,node))
    # diversity rate
    gamma = 10
    # choice
    g_losses = []
    for i in range(10):
        #print('Decoder {} word'.format(i + 1))
        choices = []
        while not nodes.empty():
            score,node = nodes.get()
            #print('Last word at position {}'.format(node.leng))
            decoder_output, decoder_hidden,emotions,context_input,rnn_output,g = decoder(
                node.decoder_input,node.emotions, node.hidden_state,
                node.context_input,encoder_outputs,node.rnn_output
            )
            #print(g)
            # Obtain most likely word token and its softmax score
            # decoder_output = decoder_output.unsqueeze(0)
            decoder_scores, decoder_input = torch.topk(decoder_output,k= K, dim=1)
            decoder_scores = torch.log(decoder_scores)
            if diversity_penality and i >= 1:
                # apply based on rank
                penalties = torch.pow(torch.arange(0,K,dtype=torch.float,device=device),3) * gamma
                # apply penalties on the output
                decoder_scores = decoder_scores - penalties
            token_choices = [decoder_input[0,i].item() for i in range(K)] 
            token_scores = [decoder_scores[0,i].item() for i in range(K)] 
            #print(voc.index2word[token_choices[0]])
            # for each candidate token, compute loss
            for token,decoder_score in zip(token_choices,token_scores):
                next_decoder_input = torch.ones((1,1),dtype=torch.long,device=device) * token
                #current_score = score + decoder_score
                next_node = BeamSearchNode(decoder_hidden,node,next_decoder_input,
                                      decoder_score,node.leng + 1,emotions,rnn_output,context_input,g)
                #print('This is {} words'.format(next_node.leng))
                current_score = (score * node.leng - next_node.eval()) / next_node.leng
                choices.append((current_score,next_node))
        choices = sorted(choices,key=lambda x:x[0])
        # choices = choices[:K]
        for choice in choices:
            if not nodes.full():
                nodes.put(choice)

    #print(nodes.qsize())
    #print('Decode')        
    # decoder    
    sentences = []
    i = 0
    while not nodes.empty():
        #print('Decode {}:'.format(i))
        i += 1 
        sentence = []
        score,node = nodes.get()
        while(node.prevNode is not None):
            sentence.append(node.decoder_input.item())
            node = node.prevNode
        sentence = sentence[::-1]
        #print(sentence,score)
        sentences.append((score,sentence))
    #print(sentences)
    for sent in sentences[:40]:
        print(sentenceFromIdx(sent[1],voc),sent[0])  
    


Post(neutral):where is lily ?
in is is is nine . EOS EOS EOS EOS 0.29963770076442003
nine is is is nine . EOS EOS EOS EOS 0.6202949064000595
. a is is nine . EOS EOS EOS EOS 0.9306218200045723
contract a is is nine . EOS EOS EOS EOS 0.9953157637342387
a is is is nine . . EOS EOS EOS 1.0733809039282443
the is is is nine . EOS EOS EOS EOS 1.0789811324370517
molly is is is nine . EOS EOS EOS EOS 1.1003623724629341
it is is is nine . EOS EOS EOS EOS 1.107820813216015
is is is is nine . EOS EOS EOS EOS 1.1225524620280007
one is is is nine . EOS EOS EOS EOS 1.1382108643460511
Post(joy):where is lily ?
in in in in EOS EOS EOS EOS EOS EOS 0.4699942974045619
nine in in in EOS in beijing EOS EOS EOS 0.8327879758488218
the in in in EOS EOS EOS EOS EOS EOS 0.9938038217554673
per in in in EOS EOS EOS EOS EOS EOS 0.996769358882871
molly in in in EOS EOS EOS EOS EOS EOS 1.0738408171455376
no in in in EOS EOS EOS EOS EOS EOS 1.1195225625600538
. in in in EOS EOS EOS EOS EOS EOS 1.1799372652762528
it i

In [69]:
diversity_penality = True
emotions = 1
sentence = 'a promise is what a liar says to a guy .'
print('Post({}):{}'.format(emo_dict[emotions],sentence))
emotions = int(emotions)
emotions = torch.LongTensor([emotions])
### Format input sentence as a batch
# words -> indexes
indexes_batch = [indexesFromSentence(voc, sentence)]
# Create lengths tensor
lengths = torch.tensor([len(indexes) for indexes in indexes_batch])
# Transpose dimensions of batch to match models' expectations
input_batch = torch.LongTensor(indexes_batch).transpose(0, 1)
# Use appropriate device
input_batch = input_batch.to(device)
lengths = lengths.to(device)
emotions = emotions.to(device)
# Forward input through encoder model
encoder_outputs, encoder_hidden = encoder(input_batch, lengths)
# Prepare encoder's final hidden layer to be first hidden input to the decoder
decoder_hidden = encoder_hidden[:decoder.n_layers]
# Initialize decoder input with SOS_token
decoder_input = torch.ones((1,1), device=device, dtype=torch.long) * SOS_token
# Set initial context value,last_rnn_output, internal_memory
context_input = torch.zeros(1,hidden_size,dtype=torch.float)
context_input = context_input.to(decoder.device)
rnn_output = None

node = BeamSearchNode(hiddenstate=decoder_hidden,decoder_input=decoder_input,
                       context_input=context_input,emotions_emb=emotions,
                       length=1,logProb=0,last_rnn_output = rnn_output,
                       previousNode=None,g=g
                      )
sent_leng = 0
# beam search
K = 100
# Iteratively decode one word token at a time
# Forward pass through decoder
nodes = PriorityQueue(maxsize=K)
nodes.put((0,node))
# diversity rate
gamma = 2

choices = []

Post(joy):a promise is what a liar says to a guy .


In [70]:
diversity_penality = False
score,node = nodes.get()
#print('Last word at position {}'.format(node.leng))
decoder_output, decoder_hidden,emotions,context_input,rnn_output,g = decoder(
    node.decoder_input,node.emotions, node.hidden_state,
    node.context_input,encoder_outputs,node.rnn_output
)
# Obtain most likely word token and its softmax score
#decoder_output = decoder_output.unsqueeze(0)
decoder_scores, decoder_input = torch.topk(decoder_output,k= K, dim=1)
decoder_scores = torch.log(decoder_scores)
if diversity_penality:
    # apply based on rank
    penalties = torch.arange(0,K,dtype=torch.float,device=device) * gamma
    # apply penalties on the output
    decoder_scores = decoder_scores - penalties
token_choices = [decoder_input[0,i].item() for i in range(K)] 
token_scores = [decoder_scores[0,i].item() for i in range(K)] 
# for each candidate token, compute loss
choices=[]
for token,decoder_score in zip(token_choices,token_scores):
    next_decoder_input = torch.ones((1,1),dtype=torch.long,device=device) * token
    next_node = BeamSearchNode(decoder_hidden,node,next_decoder_input,
                          decoder_score,node.leng + 1,emotions,rnn_output,context_input,g)
    #print('This is {} words'.format(next_node.leng))
    current_score = score - next_node.eval()
    choices.append((current_score,next_node))

choices = sorted(choices,key=lambda x:x[0])


In [66]:
nodes.put(choices[0])

In [None]:
scroe,node = nodes.get()

In [71]:
for each in choices:
    print(voc.index2word[each[1].decoder_input.item()],each[0])

what 0.7984911799430847
us 1.8948837518692017
that 1.9771095514297485
in 2.7743375301361084
it 3.2182345390319824
who 3.3670308589935303
okay 3.709470510482788
s 3.7436330318450928
ok 4.311063289642334
then 4.715331554412842
fine 4.866061687469482
are 4.994941234588623
you 5.15985631942749
is 5.379861831665039
can 5.507583141326904
why 5.756469249725342
will 5.835221767425537
we 6.12294340133667
all 6.2014994621276855
i 6.588305950164795
great 6.665017604827881
a 6.694298267364502
see 6.875086307525635
but 6.918249607086182
don 6.979137897491455
on 7.0314106941223145
really 7.437674045562744
tell 7.548183441162109
well 7.5800395011901855
not 7.978166103363037
um 8.058845520019531
me 8.187016487121582
good 8.230286598205566
give 8.265947341918945
and 8.355799674987793
this 8.508251190185547
friday 8.55142879486084
sounds 8.57400131225586
yeah 8.583353996276855
right 8.584287643432617
the 8.590916633605957
do 8.708405494689941
may 8.724076271057129
how 8.870601654052734
jack 8.9766426086

# Chat with bot

In [84]:
# Set dropout layers to eval mode

encoder.eval()
decoder.eval()

# Initialize search module
searcher = GreedySearchDecoder(encoder, decoder)
searcher2 = BeamSearchDecoder(encoder,decoder,voc.num_words)
# Begin chatting (uncomment and run the following line to begin)
evaluateInput(encoder, decoder, searcher, voc,emo_dict,False)

> how are you doing ?
Bot(neutral): i m am you would have happy questions
Bot(joy): i m am you would have happy questions
Bot(anger): that m bad you you have happy questions
Bot(sadness): i m am you you have happy questions
Bot(fear): i m am you would have happy questions
> where are you from ?
Bot(neutral): i m from . house
Bot(joy): i m from . house
Bot(anger): i m from . house
Bot(sadness): i m from . house
Bot(fear): i m from . house
> what s your name ?
Bot(neutral): my name is my .
Bot(joy): my name is my .
Bot(anger): my name is . .
Bot(sadness): my name is my .
Bot(fear): my name is my .
> do know jack ?
Bot(neutral): why !
Bot(joy): why !
Bot(anger): why !
Bot(sadness): why !
Bot(fear): why !
> are you serious ?
Bot(neutral): i m am i traveling i
Bot(joy): i m am i traveling i
Bot(anger): i m m i traveling i
Bot(sadness): i m am i traveling i
Bot(fear): i m am i traveling i
> here is a problem .
Bot(neutral): thanks s your know have ?
Bot(joy): thanks s your know have ?
Bot(an

KeyboardInterrupt: 