In [1]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import torch
from torch.jit import script, trace
import torch.nn as nn
from torch import optim
import torch.nn.functional as F
import numpy as np

import pandas as pd
import csv
import random
import re
import os
import unicodedata
import codecs
from io import open
import itertools
import math
import time
import json
from preprocessing_dailydialogue import *
import pickle
from queue import PriorityQueue

In [2]:
# Define constant
# Default word tokens
#
torch.autograd.set_detect_anomaly(True)
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
USE_CUDA = torch.cuda.is_available()
device = torch.device("cuda" if USE_CUDA else "cpu")
PAD_token = 0  # Used for padding short sentences
SOS_token = 1  # Start-of-sentence token
EOS_token = 2  # End-of-sentence token
MAX_LENGTH = 10  # Maximum sentence length to consider
MIN_COUNT = 3    # Minimum word count threshold for trimming
save_dir = os.path.join("data", "save")
emo_dict = { 0: 'neutral', 1: 'joy', 2: 'anger', 
            3: 'sadness',4:'fear'}
emo2idx = {value:key for key,value in emo_dict.items()}


# Run this block if using daily dialogue

In [3]:
USE_CUDA

True

# get data from pickle (No preprocessing required)

In [4]:
with open('processed_train_large.pickle','rb') as f:
    pairs = pickle.load(f)
    pairs_emotion = pickle.load(f)
    voc = pickle.load(f)
#with open('processed_test.pickle','rb') as f:
#    pairs_t = pickle.load(f)
#    pairs_emotion_t = pickle.load(f)

In [5]:
test_batch = batch2TrainData(voc,list(range(1000)),pairs[-1000:],pairs_emotion[-1000:])
pairs = pairs[:-1000]
pairs_emotion = pairs_emotion[:-1000]
test_pairs = pairs[-1000:]
test_pairs_emotion = pairs_emotion[-1000:]


In [6]:
len(pairs),len(pairs_emotion)



(27307, 27307)

# Convert data to tensor

In [7]:
# Example for validation
small_batch_size = 5
batches = batch2TrainData(voc, [random.choice(list(range(len(pairs)))) for _ in range(small_batch_size)],pairs,pairs_emotion)
input_variable,input_emotion, lengths, target_variable,target_emotion, mask, max_target_len = batches
#loss = evaluate_performance(input_variable,lengths,target_variable,target_emotion,mask,max_target_len,encoder,decoder)
print("input_variable:", input_variable)
print('Input_emotion:',input_emotion)
print("lengths:", lengths)
print("target_variable:", target_variable)
print('target_emotion:',target_emotion)
print("mask:", mask)
print("max_target_len:", max_target_len)
#print('Loss:',loss)

input_variable: tensor([[  72,   45,   45,   64,   65],
        [  52,    7,  269,   92,  126],
        [  22,    7,  122,   69, 2081],
        [ 322,    7,   22,  223,  779],
        [  11,    3,   69, 1186,   44],
        [  63,  711, 3355,  803,    2],
        [  12,   20,   71,    7,    0],
        [  44,    7,    7,    2,    0],
        [   2,    2,    2,    0,    0]])
Input_emotion: tensor([0, 4, 4, 2, 3])
lengths: tensor([9, 9, 9, 8, 6])
target_variable: tensor([[   3,   51,    8,    3,   45],
        [ 564,  114,   64,   17,  383],
        [ 628,   28,   92,   74,    7],
        [   6, 1250,   20, 1295, 5787],
        [   7, 2996,  603,  298,    7],
        [   2,  700,    7,    7,    2],
        [   0,   44,    2,    2,    0],
        [   0,    2,    0,    0,    0]])
target_emotion: tensor([1, 4, 4, 2, 3])
mask: tensor([[1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1],
        [0, 1, 1,

# Encoder + Attention

In [8]:
class EncoderRNN(nn.Module):
    def __init__(self, hidden_size, embedding, n_layers=1, dropout=0):
        super(EncoderRNN, self).__init__()
        self.n_layers = n_layers
        self.hidden_size = hidden_size
        self.embedding = embedding

        # Initialize GRU; the input_size and hidden_size params are both set to 'hidden_size'
        #   because our input size is a word embedding with number of features == hidden_size
        self.gru = nn.GRU(hidden_size, hidden_size, n_layers,
                          dropout=(0 if n_layers == 1 else dropout), bidirectional=True)

    def forward(self, input_seq, input_lengths, hidden=None):
        # Convert word indexes to embeddings
        embedded = self.embedding(input_seq)
        # Pack padded batch of sequences for RNN module
        packed = torch.nn.utils.rnn.pack_padded_sequence(embedded, input_lengths)
        # Forward pass through GRU
        outputs, hidden = self.gru(packed, hidden)
        # Unpack padding
        outputs, _ = torch.nn.utils.rnn.pad_packed_sequence(outputs)
        # Sum bidirectional GRU outputs
        outputs = outputs[:, :, :self.hidden_size] + outputs[:, : ,self.hidden_size:]
        # Return output and final hidden state
        return outputs, hidden

print('Implement attention inside ECM')

Implement attention inside ECM


# ECM: Internal memory + Attention

In [9]:
class ECMWrapper(nn.Module):
    '''
    Internal memory module
    '''
    def __init__(self,hidden_size,state_size,emo_size,num_emotion,embedding,emotion_embedding,gru,device):
        '''
        hidden_size: hidden input dimension
        state_size: state vector size (input a word so hidden size)
        emo_size: emotional embedding size (usually similar to hidden_size)
        num_emotion: number of emotion categories
        '''
        super(ECMWrapper,self).__init__()
        self.hidden_size = hidden_size
        self.state_size = state_size
        self.emo_size = emo_size
        self.num_emotion = num_emotion
        self.device = device
        # read gate dimensions (word_embedding + hidden_input + context_input)
        self.read_g = nn.Linear(self.hidden_size + self.hidden_size + self.hidden_size,self.emo_size)
        # write gate
        self.write_g = nn.Linear(self.state_size, self.emo_size)
        # GRU output input dimensions = state_last + context + emotion emb + internal memory
        self.gru = gru
        self.emotion_embedding = emotion_embedding
        self.embedding = embedding
        # attention layer
        self.attn1 = nn.Linear(self.hidden_size,self.hidden_size)
        self.attn2 = nn.Linear(self.hidden_size,self.hidden_size)
        self.concat = nn.Linear(self.hidden_size, 1)
    def forward(self,word_input,decoder_output,static_emotion_input,emotion_input,context_input,last_hidden,memory):
        '''
        Last hidden == prev_cell_state
        last word embedding = word_input
        last hidden input = h
        last_rnn_output = logits before softmax
        memory = encoder_outputs
        emotion_input = internal memory
        static_emotion_input = emotion embedding value
        '''
        # get embedding of input word and emotion
        if decoder_output is None:
            decoder_output = torch.zeros(word_input.shape[1],self.hidden_size,dtype=torch.float,device = self.device)
            decoder_output = decoder_output.unsqueeze(0)
            context_input = self._compute_context(decoder_output,memory)
        last_word_embedding = self.embedding(word_input)
        read_inputs = torch.cat((last_word_embedding,decoder_output,context_input), dim = -1)
        # compute read input
        # write to emotion embedding
        emotion_input = self._read_internal_memory(read_inputs,emotion_input)
        # pass everything to GRU
        # decoder_output: logits from last rnn unit
        X = torch.cat([context_input, last_word_embedding, emotion_input], dim = -1)
        rnn_output, hidden = self.gru(X,last_hidden,static_emotion_input,emotion_input)
        # write input
        # update states
        # write to emotion embedding
        new_M_emo = self._write_internal_memory(emotion_input,rnn_output) # new emotion_input
        new_context = self._compute_context(rnn_output,memory)
        return rnn_output, hidden, new_M_emo, new_context
    def _compute_context(self,rnn_output,memory):
        '''
        Compute context
        '''
        rnn_output = rnn_output.unsqueeze(dim=-2).squeeze(0) # make shape (batch,1,hidden_size)
        memory = memory.permute(1,0,2)
        Wq = self.attn1(rnn_output)
        Wm = self.attn2(memory)
        concat = (Wq + Wm).tanh()
        e = self.concat(concat).squeeze(2)
        attn_score = torch.softmax(e,dim = 1).unsqueeze(1)
        context = torch.bmm(attn_score,memory).squeeze(1)
        return context.unsqueeze(0)
    def _read_internal_memory(self,read_inputs,emotion_input):
        """
        Read the internal memory
            emotion_input: [batch_size, emo_hidden_size]
            read_inputs: [batch_size, d] d= [last_word_embedding;decoder_output;context_input]
        Returns:
            output: [batch_size, emo__hidden_size]
        """
        read_inputs = self.read_g(read_inputs)
        M_read = torch.sigmoid(read_inputs)
        return emotion_input * M_read
    def _write_internal_memory(self,emotion_input,rnn_output):
        """
        Write the internal memory
            emotion_input: [batch_size, emo_hidden_size]
            rnn_output: [batch_size, hidden_size]
        Returns:
            output: [batch_size, emo_hidden_size]
        """
        M_write = torch.sigmoid(self.write_g(rnn_output))
        return emotion_input * M_write
    
    

# GRU

In [10]:
class ECMGRU(nn.Module):
    def __init__(self,hidden_size,static_emo_size,emo_size,n_layers = 1):
        '''
        Single layer GRU.
        '''
        super(ECMGRU,self).__init__()
        # first layer of special GRU
        self.hidden_size = hidden_size
        # these three linear layer compute output from emotion/internal memory 
        self.emotion_u = nn.Linear(static_emo_size + emo_size, hidden_size)
        self.emotion_r = nn.Linear(static_emo_size + emo_size, hidden_size)
        self.emotion_c = nn.Linear(static_emo_size + emo_size, hidden_size)
        # these two are generic GRU output that takes [step_input, last_hidden]
        self.generic_u = nn.Linear(hidden_size + hidden_size + hidden_size + emo_size, hidden_size)
        self.generic_r = nn.Linear(hidden_size + hidden_size + hidden_size + emo_size, hidden_size)
        
        # gate value computation
        self.generic_c = nn.Linear(hidden_size + hidden_size + hidden_size + emo_size, hidden_size)
        self.n_layers = n_layers
        # starting from second layer, using the normal GRU
        self._cell = nn.GRU(hidden_size, hidden_size,num_layers = n_layers - 1)
    def forward(self,step_input,last_hidden,emotion,internal_memory):
        '''
        step_input: X
        last_hidden: Hidden value from GRU
        emotion: static emotion embedding vector
        internal_memory: decayed emotion embedding vector
        '''
        # compute based on the first layer
        hidden_0 = last_hidden[0].unsqueeze(dim = 0)
        internal_memory = internal_memory.squeeze(dim = 0)
        emotion_input = torch.cat([emotion,internal_memory],dim=-1)
        # compute emotion gate value
        _u = self.emotion_u(emotion_input) # update gate
        _r = self.emotion_r(emotion_input) # reset gate
        _c = self.emotion_c(emotion_input) # reset gate vector
        
        # generic GRU gate value
        X = torch.cat([step_input,hidden_0],dim = -1)
        u = self.generic_u(X)
        r = self.generic_r(X)
        
        # gate for this time stamp
        rt = torch.sigmoid(r + _r)
        ut = torch.sigmoid(u + _u)
        
        # gate vector
        Xc = torch.cat([step_input, hidden_0 * rt],dim = -1)
        ct = _c + self.generic_c(Xc)
        ct = torch.tanh(ct)
        # compute new hidden
        hidden = ut * hidden_0 + (1 - ut) * ct
        
        # if it has second layer
        if self.n_layers > 1:
            gru_hidden = last_hidden[1:] # skip the first layer of input
            gru_input = hidden
            rnn_output, gru_hidden = self._cell(gru_input, gru_hidden)
        hidden = torch.cat([hidden,gru_hidden],dim = 0)
        return rnn_output, hidden
        

# Decoder part

In [11]:
class LuongAttnDecoderRNN(nn.Module):
    def __init__(self,embedding,static_emotion_embedding,emotion_embedding, hidden_size, output_size,device,ememory=None, n_layers=1, dropout=0.1,num_emotions = 7,batch_size = 64):
        super(LuongAttnDecoderRNN, self).__init__()
        # Keep for reference
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.n_layers = n_layers
        self.batch_size = batch_size
        self.dropout = dropout
        self.num_emotions = num_emotions
        self.device = device
        # Define layers
        self.embedding = embedding
        # define emotion embedding
        self.emotion_cat_embedding = static_emotion_embedding
        self.emotion_embedding = emotion_embedding # for internal memory
        self.embedding_dropout = nn.Dropout(dropout)
        #self.emotion_embedding_dropout = nn.Dropout(dropout)
        # dimension
        #self.gru = nn.GRU(hidden_size + hidden_size + hidden_size, hidden_size, n_layers, dropout=(0 if n_layers == 1 else dropout))
        self.gru = ECMGRU(emo_size=hidden_size,hidden_size=hidden_size,static_emo_size=hidden_size,n_layers = n_layers)
        # using in Luong et al. attention mechanism.
        self.internal_memory = ECMWrapper(hidden_size,hidden_size,
                                          hidden_size,self.num_emotions,
                                          self.embedding,self.emotion_embedding,self.gru,device)
        # read external from outside
        self.external_memory = ememory
        # generic output linear layer
        self.generic_word_output_layer = nn.Linear(self.hidden_size,output_size)
        # emotional output linear layer 
        self.emotion_word_output_layer = nn.Linear(self.hidden_size,output_size)
        # emotional gate/ choice layer
        self.alpha_layer = nn.Linear(hidden_size,1)
        # Luong eq 5 layer
        self.concat_g = nn.Linear(hidden_size * 2, hidden_size)
        self.concat_e = nn.Linear(hidden_size * 2, hidden_size)
    def forward(self, input_step, input_static_emotion, input_step_emotion, last_hidden
                ,input_context, encoder_outputs,last_rnn_output = None):
        '''
        Decoder with external memory.
        
        '''
        if not torch.is_floating_point(input_step_emotion):
            input_step_emotion = self.emotion_embedding(input_step_emotion) # float number for internal memory
        input_static_emotion = self.emotion_cat_embedding(input_static_emotion)
        rnn_output, hidden, new_M_emo,context = self.internal_memory(input_step,last_rnn_output,
                                                                     input_static_emotion,
                                                                     input_step_emotion,
                                                                     input_context,last_hidden,
                                                                     encoder_outputs)
        # Concatenate weighted context vector and GRU output using Luong eq. 5
        concat_input = torch.cat((rnn_output, context), -1)
        concat_output = torch.tanh(self.concat_g(concat_input))
        # concat_output = rnn_output
        # this part is not using inside ECM (?)
        if self.external_memory is not None:
            # Project hidden output to distribution.
            generic_output = self.generic_word_output_layer(concat_output)
            emotion_output = self.emotion_word_output_layer(concat_output)
            generic_output = generic_output.squeeze(0)
            emotion_output = emotion_output.squeeze(0)
            # external memory gate
            g = torch.sigmoid(self.alpha_layer(concat_output))
            output_g = torch.softmax(generic_output,dim = 1) * (1 - g)
            output_e = torch.softmax(emotion_output,dim = 1) * g
            output = output_g + output_e # output distribution
            output = output.squeeze(0)
            g = torch.cat([(1 - g),g],dim = -1) # gate distribution
            g = g.squeeze(0)
        else:
            # Predict next word using Luong eq. 6
            output = self.out(concat_output).squeeze(0)
            # generic output
            output = F.softmax(output, dim=1)
            output = output.squeeze(0)
            g = None
        # Return output and final hidden state
        return output, hidden, new_M_emo, context,concat_output,g

In [12]:
gru = ECMGRU(emo_size=500,hidden_size=500,static_emo_size=500,n_layers=2)

In [13]:
inp = torch.zeros((1,64,1500))
last_hidden = torch.ones((2,64,500))
static_emo = torch.randn((64,500))
internal_memory = torch.randn((1,64,500))

In [14]:
rnn_output, hidden = gru(inp,last_hidden,static_emo,internal_memory)

In [15]:
rnn_output.shape,hidden.shape

(torch.Size([1, 64, 500]), torch.Size([2, 64, 500]))

# NLL Loss + Internal Memory Loss

In [16]:
def maskNLLLoss_IMemory(inp, target, mask,M_emo,external_memory,alpha):
    '''
    When external memory input will be a tuple with 4 elements
    '''
    nTotal = mask.sum()
    
    # cross entropy loss
    crossEntropy = -torch.log(torch.gather(inp, 1, target.view(-1, 1)).squeeze(1) + 1e-12)
    # internal emotional loss
    eos_mask = (target == 2) # 2 is EOS token
    eos_mask = eos_mask.type_as(M_emo)
    internal_memory_loss = torch.norm(M_emo,dim = 2) * eos_mask
    internal_memory_loss = internal_memory_loss.squeeze(0)
    # external
    # find 1,0
    if external_memory is not None:
        qt = torch.gather(external_memory.view(-1,1),0,target.view(-1,1)).type(torch.LongTensor)
        qt = qt.to(device)
        alpha_prob = torch.gather(alpha,1,qt) # if it select emotion word or generic word
        external_memory_loss = (-torch.log(alpha_prob + 1e-12)).reshape(-1) 
    else:
        external_memory_loss = torch.zeros(crossEntropy.shape,dtype=torch.float,device=device)
    #print(crossEntropy.masked_select(mask).mean(),internal_memory_loss.masked_select(mask).mean())
    loss = crossEntropy.masked_select(mask).mean() + external_memory_loss.mean() + internal_memory_loss.mean()
    loss = loss.to(device)
    return loss, nTotal.item(),crossEntropy.masked_select(mask).mean().item()



# Single Train

In [17]:
def compute_perplexity(loss):
    return np.exp(loss)
def train(input_variable, lengths, target_variable,target_variable_emotion,
          mask, max_target_len, encoder, decoder, embedding,emotion_embedding,
          encoder_optimizer, decoder_optimizer, batch_size, clip, max_length=MAX_LENGTH):
    # Zero gradients
    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()
    # num_samples in this batch
    num_samples = input_variable.shape[1]
    # Set device options
    input_variable = input_variable.to(device)
    lengths = lengths.to(device)
    target_variable = target_variable.to(device)
    mask = mask.to(device)
    target_variable_emotion = target_variable_emotion.to(device)
    # Initialize variables
    loss = 0
    print_losses = []
    n_totals = 0
    totalCrossEntropy = 0

    # Forward pass through encoder
    encoder_outputs, encoder_hidden = encoder(input_variable, lengths)

    # Create initial decoder input (start with SOS tokens for each sentence)
    decoder_input = torch.LongTensor([[SOS_token for _ in range(num_samples)]])
    decoder_input = decoder_input.to(device)
    
    # Set initial decoder hidden state to the encoder's final hidden state
    decoder_hidden = encoder_hidden[:decoder.n_layers]
    
    # Set initial context value,last_rnn_output, internal_memory
    context_input = torch.zeros(num_samples,hidden_size,dtype=torch.float,device=device) #torch.FloatTensor(batch_size,hidden_size)
    # Determine if we are using teacher forcing this iteration
    if random.random() < teacher_forcing_ratio:
        use_teacher_forcing = True  
    else:
        use_teacher_forcing = False
    # initialize value for rnn_output
    rnn_output = None
    # keep a copy of emotional category for static emotion embedding
    static_emotion = target_variable_emotion
    static_emotion = static_emotion.to(device)
    # Forward batch of sequences through decoder one time step at a time
    if use_teacher_forcing:
        for t in range(max_target_len):
            decoder_output, decoder_hidden,target_variable_emotion,context_input,rnn_output,g = decoder(
                decoder_input,static_emotion,target_variable_emotion, decoder_hidden,
                context_input, encoder_outputs,rnn_output
            )
            # Teacher forcing: next input is current target
            decoder_input = target_variable[t].view(1, -1)
            # Calculate and accumulate loss
            mask_loss, nTotal,crossEntropy = maskNLLLoss_IMemory(decoder_output, target_variable[t], mask[t],target_variable_emotion,decoder.external_memory,g)
            loss += mask_loss
            totalCrossEntropy += crossEntropy * nTotal
            print_losses.append(mask_loss.item() * nTotal) # print average loss
            n_totals += nTotal
    else:
        for t in range(max_target_len):
            decoder_output, decoder_hidden,target_variable_emotion,context_input,rnn_output,g = decoder(
                decoder_input,static_emotion,target_variable_emotion, decoder_hidden,
                context_input,encoder_outputs,rnn_output
            )
            # No teacher forcing: next input is decoder's own current output
            _, topi = decoder_output.topk(1)
            topi = topi.squeeze(0)
            decoder_input = torch.LongTensor([[topi[i][0] for i in range(num_samples)]])
            decoder_input = decoder_input.to(device)
            # Calculate and accumulate loss
            mask_loss, nTotal,crossEntropy = maskNLLLoss_IMemory(decoder_output, target_variable[t], mask[t],target_variable_emotion,decoder.external_memory,g)
            loss += mask_loss
            totalCrossEntropy += crossEntropy * nTotal
            print_losses.append(mask_loss.item() * nTotal) # print average loss
            n_totals += nTotal

    # Perform backpropatation
    try:
        loss.backward()
    except Exception:
        print(input_variable)
        print(target_variable)

    # Clip gradients: gradients are modified in place
    _ = torch.nn.utils.clip_grad_norm_(encoder.parameters(), clip)
    _ = torch.nn.utils.clip_grad_norm_(decoder.parameters(), clip)

    # Adjust model weights
    encoder_optimizer.step()
    decoder_optimizer.step()
    #print('Total Loss {}; Cross Entropy: {}'.format(sum(print_losses) / n_totals, totalCrossEntropy / n_totals))
    return sum(print_losses) / n_totals,totalCrossEntropy / n_totals
def evaluate_performance(input_variable, lengths, target_variable,target_variable_emotion,
          mask, max_target_len, encoder, decoder):
    # test mode
    
    encoder.eval()
    decoder.eval()
    # num_samples in this batch
    num_samples = input_variable.shape[1]
    # Set device options
    input_variable = input_variable.to(device)
    lengths = lengths.to(device)
    target_variable = target_variable.to(device)
    mask = mask.to(device)
    target_variable_emotion = target_variable_emotion.to(device)
    # Initialize variables
    loss = 0
    print_losses = []
    n_totals = 0
    totalCrossEntropy = 0
    # Forward pass through encoder
    encoder_outputs, encoder_hidden = encoder(input_variable, lengths)

    # Create initial decoder input (start with SOS tokens for each sentence)
    decoder_input = torch.LongTensor([[SOS_token for _ in range(num_samples)]])
    decoder_input = decoder_input.to(device)
    
    # Set initial decoder hidden state to the encoder's final hidden state
    decoder_hidden = encoder_hidden[:decoder.n_layers]
    # Set initial context value,last_rnn_output, internal_memory
    context_input = torch.zeros(num_samples,hidden_size,dtype=torch.float,device=device) #torch.FloatTensor(batch_size,hidden_size)
    # initial value for rnn output
    rnn_output = None
    # keep a copy of emotional category for static emotion embedding
    static_emotion = target_variable_emotion
    static_emotion = static_emotion.to(device)
    # forward pass to generate all sentences
    for t in range(max_target_len):
        decoder_output, decoder_hidden,target_variable_emotion,context_input,rnn_output,g = decoder(
            decoder_input,static_emotion,target_variable_emotion, decoder_hidden,
            context_input,encoder_outputs,rnn_output
        )
        # No teacher forcing: next input is decoder's own current output
        _, topi = decoder_output.topk(1)
        topi = topi.squeeze(0)
        decoder_input = torch.LongTensor([[topi[i][0] for i in range(num_samples)]])
        decoder_input = decoder_input.to(device)
        # Calculate and accumulate loss
        mask_loss, nTotal,crossEntropy = maskNLLLoss_IMemory(decoder_output, target_variable[t], mask[t],target_variable_emotion,decoder.external_memory,g)
        loss += mask_loss
        totalCrossEntropy += (crossEntropy * nTotal)
        print_losses.append(mask_loss.item() * nTotal) # print average loss
        n_totals += nTotal
    # back to train mode
    encoder.train()
    decoder.train()
    return sum(print_losses) / n_totals, totalCrossEntropy / n_totals

# Train Iteration

In [18]:
def trainIters(model_name, voc, pairs,pairs_emotion, 
               encoder, decoder, encoder_optimizer,
               decoder_optimizer, embedding,emotion_embedding, 
               encoder_n_layers, decoder_n_layers, save_dir, 
               n_iteration, batch_size, print_every, save_every, 
               clip,corpus_name,external_memory,test_pairs,test_pairs_emotion):
    loadFilename=None
    # Load batches for each iteration
    #training_batches = [batch2TrainData(voc, [random.choice(pairs) for _ in range(batch_size)])
                      #for _ in range(n_iteration)]
    print('Loading Training data ...')
    length_pairs = len(pairs)
    #training_batches = [batch2TrainData(voc, [random.choice(range(length_pairs)) for _ in range(batch_size)],
    #                                   pairs,pairs_emotion) for _ in range(n_iteration)]
    # Initializations
    print('Initializing ...')
    start_iteration = 1
    print_loss = 0
    totalCrossEntropy = 0
    if loadFilename:
        start_iteration = checkpoint['iteration'] + 1
    min_test_loss = 1000
    # Training loop
    print("Training...")
    for iteration in range(start_iteration, n_iteration + 1):
        training_batch = batch2TrainData(voc, [random.choice(range(length_pairs)) for _ in range(batch_size)],
                                       pairs,pairs_emotion)
        # to save the data that causes error
        #with open('wrong_data.pickle','rb') as f:
        #    training_batch = pickle.load(f)
        
        # Extract fields from batch
        input_variable,input_variable_emotion, lengths, target_variable,target_variable_emotion, mask, max_target_len = training_batch

        # Run a training iteration with batch
        loss,crossEntropy = train(input_variable, lengths, target_variable,target_variable_emotion,
                     mask, max_target_len, encoder,
                     decoder, embedding,emotion_embedding,
                     encoder_optimizer, decoder_optimizer, 
                     batch_size, clip)
        
        print_loss += loss
        totalCrossEntropy += crossEntropy
        # Print progress
        if iteration % print_every == 0 or iteration == 1:
            directory = os.path.join(save_dir, model_name, corpus_name, '{}-{}_{}'.format(encoder_n_layers, decoder_n_layers, hidden_size))
            if not os.path.exists(directory):
                os.makedirs(directory)
            if iteration == 1:
                print_loss_avg = print_loss / 1
                print_cross_entropy = totalCrossEntropy / 1
            else:
                print_loss_avg = print_loss / print_every
                print_cross_entropy = totalCrossEntropy / print_every
            if print_cross_entropy > 300:
                perplexity = compute_perplexity(300)
            else:
                perplexity = compute_perplexity(print_cross_entropy)
            output1 = "Iteration: {}; Percent complete: {:.1f}%; Average loss: {:.4f}; Perplexity: {:.2f}".format(iteration, iteration / n_iteration * 100, print_loss_avg,perplexity)
            print(output1)
            test_length_pairs = len(test_pairs) 
            test_batch = batch2TrainData(voc, [idx for idx in range(1000)],
                                       test_pairs,test_pairs_emotion)
            input_variable,input_emotion, lengths, target_variable,target_emotion, mask, max_target_len = test_batch
            test_loss,testCrossEntropy = evaluate_performance(input_variable,lengths, target_variable,target_emotion,mask,max_target_len,encoder,decoder)
            
            if testCrossEntropy > 300:
                perplexity = compute_perplexity(300)
            else:
                perplexity = compute_perplexity(testCrossEntropy)
            output2 = 'Loss on validation set {:.4f}; Perplexity:{:.2f}'.format(test_loss,perplexity)
            print(output2)
            with open(os.path.join(directory,'log.txt'),'a+') as f:
                f.write(output1 + '\n')
                f.write(output2 + '\n')
            print_loss = 0
            totalCrossEntropy = 0

        # Save checkpoint and only save the better perform one,
        if (iteration % save_every == 0) and (testCrossEntropy < min_test_loss):
            min_test_loss = testCrossEntropy
            print('Save the model at checkpoint {}, and test loss is {}'.format(iteration,min_test_loss))
            directory = os.path.join(save_dir, model_name, corpus_name, '{}-{}_{}'.format(encoder_n_layers, decoder_n_layers, hidden_size))
            if not os.path.exists(directory):
                os.makedirs(directory)
            torch.save({
                'iteration': iteration,
                'en': encoder.state_dict(),
                'de': decoder.state_dict(),
                'en_opt': encoder_optimizer.state_dict(),
                'de_opt': decoder_optimizer.state_dict(),
                'loss': loss,
                'voc_dict': voc.__dict__,
                'embedding': embedding.state_dict(),
                'external_memory':external_memory
            }, os.path.join(directory, '{}_{}.tar'.format(iteration, 'checkpoint')))
            
            

# Greedy Search

In [19]:
def print_param(model):
    for name,param in model.named_parameters():
        print(param)
        print(name,param.grad)

In [20]:
class GreedySearchDecoder(nn.Module):
    def __init__(self, encoder, decoder,num_word = None):
        super(GreedySearchDecoder, self).__init__()
        self.encoder = encoder
        self.decoder = decoder

    def forward(self, input_seq,target_emotions,input_length, max_length):
        # Forward input through encoder model
        encoder_outputs, encoder_hidden = self.encoder(input_seq, input_length)
        # Prepare encoder's final hidden layer to be first hidden input to the decoder
        decoder_hidden = encoder_hidden[:decoder.n_layers]
        # Initialize decoder input with SOS_token
        decoder_input = torch.ones(1, 1, device=device, dtype=torch.long) * SOS_token
        # Initialize tensors to append decoded words to
        all_tokens = torch.zeros([0], device=device, dtype=torch.long)
        all_scores = torch.zeros([0], device=device)
        # Set initial context value,last_rnn_output, internal_memory
        context_input = torch.zeros((1,hidden_size),dtype=torch.float,device=self.decoder.device)
        context_input = context_input.to(device)
        rnn_output = None
        # keep a copy of emotional category for static emotion embedding
        static_emotion = target_emotions
        static_emotion = static_emotion.to(device)
        # Iteratively decode one word token at a time
        for _ in range(max_length):
            # Forward pass through decoder
            decoder_output, decoder_hidden,target_emotions,context_input,rnn_output,g = decoder(
                decoder_input,static_emotion,target_emotions, decoder_hidden,
                context_input, encoder_outputs,rnn_output
            )
            # Obtain most likely word token and its softmax score
            decoder_scores, decoder_input = torch.max(decoder_output, dim=1)
            # Record token and score
            all_tokens = torch.cat((all_tokens, decoder_input), dim=0)
            all_scores = torch.cat((all_scores, decoder_scores), dim=0)
            # Prepare current token to be next decoder input (add a dimension)
            decoder_input = torch.unsqueeze(decoder_input, 0)
        # Return collections of word tokens and scores
        return all_tokens, all_scores

# Beam Search

In [21]:
class BeamSearchDecoder(nn.Module):
    def __init__(self, encoder, decoder,num_word):
        super(BeamSearchDecoder, self).__init__()
        self.encoder = encoder
        self.decoder = decoder
        self.num_word = num_word

    def forward(self, input_seq,target_emotions,input_length, max_length):
        # Forward input through encoder model
        encoder_outputs, encoder_hidden = self.encoder(input_seq, input_length)
        # Prepare encoder's final hidden layer to be first hidden input to the decoder
        decoder_hidden = encoder_hidden[:decoder.n_layers]
        # Initialize decoder input with SOS_token
        decoder_input = torch.ones(1, 1, device=device, dtype=torch.long) * SOS_token
        # Initialize tensors to append decoded words to
        all_tokens = torch.zeros([0], device=device, dtype=torch.long)
        all_words_order = torch.zeros((1,self.num_word),device=decoder.device,dtype=torch.long)
        all_scores = torch.zeros([0], device=device)
        all_scores_array = torch.zeros((1,self.num_word),device=decoder.device,dtype=torch.float)
        # Set initial context value,last_rnn_output, internal_memory
        context_input = torch.zeros(1,hidden_size,dtype=torch.float)
        context_input = context_input.to(decoder.device)
        rnn_output = None
        # keep a copy of emotional category for static emotion embedding
        static_emotion = target_emotion
        static_emotion = static_emotion.to(device)
        # Iteratively decode one word token at a time
        for _ in range(max_length):
            # Forward pass through decoder
            decoder_output, decoder_hidden,target_emotions,context_input,rnn_output,g = decoder(
                decoder_input,static_emotion,target_emotions, decoder_hidden,
                context_input, encoder_outputs,rnn_output
            )
            # Obtain most likely word token and its softmax score
            decoder_scores, decoder_input = torch.max(decoder_output, dim=1)
            decoder_input_order = torch.argsort(decoder_output,dim=1,descending=True)
            # Record token and score
            all_tokens = torch.cat((all_tokens, decoder_input), dim=0)
            all_scores = torch.cat((all_scores, decoder_scores), dim=0)
            all_scores_array = torch.cat((all_scores_array,decoder_output),dim = 0)
            all_words_order = torch.cat((all_words_order,decoder_input_order), dim=0)
            # Prepare current token to be next decoder input (add a dimension)
            decoder_input = torch.unsqueeze(decoder_input, 0)
        # Return collections of word tokens and scores
        sequences = self.beam_search(all_scores_array,3)
        return sequences
    def beam_search(self,array,k):
        array = array.tolist()
        sequences = [[list(), 1.0]]
        # walk over each step in sequence
        for row in array:
            all_candidates = list()
            # expand each current candidate
            for i in range(len(sequences)):
                seq, score = sequences[i]
                for j in range(len(row)):
                    candidate = [seq + [j], score - np.log(row[j] + 1e-8)]
                    all_candidates.append(candidate)
            # order all candidates by score
            ordered = sorted(all_candidates, key=lambda tup:tup[1])
            # select k best
            sequences = ordered[:k]
        return sequences

# Build Model

In [23]:
try:
    voc
except NameError:
    voc = Voc('a',max_length=MAX_LENGTH,min_count=MIN_COUNT)
# Configure models
model_name = 'emotion_model'
corpus_name = 'ECM10_words_GRU_Large_MINLENGTH5_Attn'
hidden_size = 500
encoder_n_layers = 4
decoder_n_layers = 4
dropout = 0.1
batch_size = 64
# number of emotion
num_emotions = 5
# load external memory based vocab.
emotion_words = get_ememory('ememory2.txt',voc)
# Set checkpoint to load from; set to None if starting from scratch
loadFilename = 'data/save/emotion_model/ECM10_words_GRU_Large_MINLENGTH5_Attn/4-4_500/100_checkpoint.tar'
checkpoint_iter = 120
training = True
if loadFilename:
    training = False
#loadFilename = os.path.join(save_dir, model_name, corpus_name,
#                            '{}-{}_{}'.format(encoder_n_layers, decoder_n_layers, hidden_size),
#                            '{}_checkpoint.tar'.format(checkpoint_iter))


# Load model if a loadFilename is provided
if loadFilename:
    # If loading on same machine the model was trained on
    checkpoint = torch.load(loadFilename)
    # If loading a model trained on GPU to CPU
    #checkpoint = torch.load(loadFilename, map_location=torch.device('cpu'))
    encoder_sd = checkpoint['en']
    decoder_sd = checkpoint['de']
    encoder_optimizer_sd = checkpoint['en_opt']
    decoder_optimizer_sd = checkpoint['de_opt']
    embedding_sd = checkpoint['embedding']
    voc.__dict__ = checkpoint['voc_dict']
    emotion_words = checkpoint['external_memory']
    


print('Building encoder and decoder ...')
# Initialize word embeddings
if emotion_words is not None:
    emotion_words = emotion_words.to(device)

embedding = nn.Embedding(voc.num_words, hidden_size)
emotion_embedding = nn.Embedding(num_emotions, hidden_size)
emotion_embedding_static = nn.Embedding(num_emotions,hidden_size)

if loadFilename:
    embedding.load_state_dict(embedding_sd)
# Initialize encoder & decoder models
encoder = EncoderRNN(hidden_size, embedding, encoder_n_layers, dropout)
decoder = LuongAttnDecoderRNN(embedding,emotion_embedding_static,emotion_embedding, hidden_size, 
                              voc.num_words,device, emotion_words,decoder_n_layers, dropout,num_emotions=num_emotions,batch_size = batch_size)
if loadFilename:
    encoder.load_state_dict(encoder_sd)
    decoder.load_state_dict(decoder_sd)
    
# Use appropriate device
encoder = encoder.to(device)
decoder = decoder.to(device)
print('Models built and ready to go!')

Emotion word counts: 1198
Building encoder and decoder ...
Models built and ready to go!


In [24]:
voc.num_words

5983

In [25]:
emotion_words.sum()

tensor(1020, device='cuda:0')

# Run training

In [None]:
# Configure training/optimization
clip = 50
teacher_forcing_ratio = 0.1
learning_rate = 0.0001
decoder_learning_ratio = 5.0
n_iteration = 40000
print_every = 20
save_every = 100


# Ensure dropout layers are in train mode
encoder.train()
decoder.train()

# Initialize optimizers
print('Building optimizers ...')
encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate)
decoder_optimizer = optim.Adam(decoder.parameters(), lr=learning_rate * decoder_learning_ratio)
if loadFilename:
    encoder_optimizer.load_state_dict(encoder_optimizer_sd)
    decoder_optimizer.load_state_dict(decoder_optimizer_sd)

# Run training iterations


print("Starting Training!")
trainIters(model_name, voc, pairs,pairs_emotion, encoder, decoder, encoder_optimizer, decoder_optimizer,
           embedding,emotion_embedding, encoder_n_layers, decoder_n_layers, save_dir, n_iteration, batch_size,
           print_every, save_every, clip,corpus_name,emotion_words,test_pairs,test_pairs_emotion)
    
    

Building optimizers ...
Starting Training!
Loading Training data ...
Initializing ...
Training...
Iteration: 1; Percent complete: 0.0%; Average loss: 0.5512; Perplexity: 1.56
Loss on validation set 0.4326; Perplexity:1.41
Iteration: 20; Percent complete: 0.1%; Average loss: 0.4635; Perplexity: 1.46
Loss on validation set 0.4664; Perplexity:1.46
Iteration: 40; Percent complete: 0.1%; Average loss: 0.4922; Perplexity: 1.51
Loss on validation set 0.4814; Perplexity:1.48
Iteration: 60; Percent complete: 0.1%; Average loss: 0.5164; Perplexity: 1.53
Loss on validation set 0.4974; Perplexity:1.50
Iteration: 80; Percent complete: 0.2%; Average loss: 0.5106; Perplexity: 1.52
Loss on validation set 0.5023; Perplexity:1.51
Iteration: 100; Percent complete: 0.2%; Average loss: 0.5205; Perplexity: 1.55
Loss on validation set 0.4909; Perplexity:1.50
Save the model at checkpoint 100, and test loss is 0.4025090985639377
Iteration: 120; Percent complete: 0.3%; Average loss: 0.5191; Perplexity: 1.54
Los

In [26]:
def evaluate(encoder, decoder, searcher, voc, sentence, emotions,max_length=MAX_LENGTH,beam_search = False):
    emotions = int(emotions)
    emotions = torch.LongTensor([emotions])
    ### Format input sentence as a batch
    # words -> indexes
    indexes_batch = [indexesFromSentence(voc, sentence)]
    # Create lengths tensor
    lengths = torch.tensor([len(indexes) for indexes in indexes_batch])
    # Transpose dimensions of batch to match models' expectations
    input_batch = torch.LongTensor(indexes_batch).transpose(0, 1)
    # Use appropriate device
    input_batch = input_batch.to(device)
    lengths = lengths.to(device)
    emotions = emotions.to(device)

    # indexes -> words
    if beam_search:
        sequences = searcher(input_batch, emotions, lengths, max_length)
        decoded_words = beam_decode(sequences,voc)
    else:
        # Decode sentence with searcher
        tokens, scores = searcher(input_batch, emotions, lengths, max_length)
        decoded_words = [voc.index2word[token.item()] for token in tokens]
    return decoded_words

def beam_decode(sequences,voc):
    for each in sequences:
        for idxs in each:
            return [voc.index2word[idx] for idx in idxs[:-1]]
    
def evaluateInput(encoder, decoder, searcher, voc,emotion_dict,beam_search):
    input_sentence = ''
    while(1):
        try:
            # Get input sentence
            input_sentence = input('> ')
            for emotion in range(len(emotion_dict)):
                # Check if it is quit case
                if input_sentence == 'q' or input_sentence == 'quit': break
                # Normalize sentence
                input_sentence = normalizeString(input_sentence)
                # Evaluate sentence
                output_words = evaluate(encoder, decoder, searcher, voc, input_sentence,emotion,beam_search=beam_search)
                # Format and print response sentence
                output=[]
                for word in output_words:
                    if word == 'PAD':
                        continue
                    elif word == 'EOS':
                        break
                    else:
                        output.append(word)
                print('Bot({}):'.format(emotion_dict[emotion]), ' '.join(output))

        except KeyError:
            print("Error: Encountered unknown word.")
            

# Beam Search Decode

In [36]:
voc.word2index['EOS']

2

In [37]:
def sentenceFromIdx(idx,voc):
    output = []
    for num,i in enumerate(idx):
        if num > 0 and idx[num] == idx[num - 1] and i == 2:
            continue
        if voc.index2word[i] == 'EOS':
            continue
        output.append(voc.index2word[i])
        
    return ' '.join(output)

class BeamSearchNode(object):
    def __init__(self, hiddenstate, previousNode, decoder_input, 
                 logProb, length,static_emotion,emotions_emb,
                 last_rnn_output,context_input,g):
        '''
        :param hiddenstate:
        :param previousNode:
        :param wordId:
        :param logProb:
        :param length:
        '''
        
        self.hidden_state = hiddenstate
        self.prevNode = previousNode
        self.decoder_input = decoder_input
        self.logp = logProb
        self.leng = length
        self.emotions = emotions_emb
        self.rnn_output = last_rnn_output
        self.context_input = context_input
        self.alpha = g
        self.static_emotion = static_emotion
    def eval(self, alpha=1.0):
        reward = 0
        # Add here a function for shaping a reward
        return self.logp #/ float(self.leng - 1 + 1e-6) + alpha * reward


In [38]:
def beam_decode(encoder,decoder,voc,beam_size,sentence,diversity_penality,gamma,sentence_length,remove_repeated):
    result = []
    for emotions in [0,1,2,3,4]:
        #diversity_penality = False
        emotions = emotions
        #sentence = 'how are you doing ?'
        print('Post({}):{}'.format(emo_dict[emotions],sentence))
        emotions = int(emotions)
        emotions = torch.LongTensor([emotions])
        ### Format input sentence as a batch
        # words -> indexes
        indexes_batch = [indexesFromSentence(voc, sentence)]
        # Create lengths tensor
        lengths = torch.tensor([len(indexes) for indexes in indexes_batch])
        # Transpose dimensions of batch to match models' expectations
        input_batch = torch.LongTensor(indexes_batch).transpose(0, 1)
        # Use appropriate device
        input_batch = input_batch.to(device)
        lengths = lengths.to(device)
        emotions = emotions.to(device)
        # Forward input through encoder model
        encoder_outputs, encoder_hidden = encoder(input_batch, lengths)
        # Prepare encoder's final hidden layer to be first hidden input to the decoder
        decoder_hidden = encoder_hidden[:decoder.n_layers]
        # Initialize decoder input with SOS_token
        decoder_input = torch.ones((1,1), device=device, dtype=torch.long) * SOS_token
        # Set initial context value,last_rnn_output, internal_memory
        context_input = torch.zeros(1,hidden_size,dtype=torch.float)
        context_input = context_input.to(decoder.device)
        rnn_output = None
        #
        static_emotion = emotions
        static_emotion = static_emotion.to(device)
        node = BeamSearchNode(hiddenstate=decoder_hidden,decoder_input=decoder_input,
                               context_input=context_input,static_emotion = static_emotion,
                              emotions_emb=emotions,
                               length=1,logProb=0,last_rnn_output = rnn_output,
                               previousNode=None,g = 0
                              )
        sent_leng = 0
        # beam search
        K = beam_size
        # Iteratively decode one word token at a time
        # Forward pass through decoder
        nodes = PriorityQueue(maxsize=K)
        nodes.put((0,node))
        # diversity rate
        gamma = gamma
        # choice
        g_losses = []
        for i in range(sentence_length):
            #print('Decoder {} word'.format(i + 1))
            choices = []
            while not nodes.empty():
                score,node = nodes.get()
                #print('Last word at position {}'.format(node.leng))
                if node.decoder_input.item() == 2: # decode stop when EOS is met
                     choices.append((score,node))
                     continue
                decoder_output, decoder_hidden,emotions,context_input,rnn_output,g = decoder(
                    node.decoder_input,node.static_emotion,node.emotions, node.hidden_state,
                    node.context_input,encoder_outputs,node.rnn_output
                )
                #print(g)
                # Obtain most likely word token and its softmax score
                # decoder_output = decoder_output.unsqueeze(0)
                decoder_scores, decoder_input = torch.topk(decoder_output,k= K, dim=1)
                decoder_scores = torch.log(decoder_scores)
                if diversity_penality and i >= 1:
                    # apply based on rank
                    penalties = torch.arange(0,K,dtype=torch.float,device=device) * gamma
                    # apply penalties on the output
                    decoder_scores = decoder_scores - penalties
                token_choices = [decoder_input[0,i].item() for i in range(K)] 
                token_scores = [decoder_scores[0,i].item() for i in range(K)] 
                #print(voc.index2word[token_choices[0]])
                # for each candidate token, compute loss
                for token,decoder_score in zip(token_choices,token_scores):
                    
                    next_decoder_input = torch.ones((1,1),dtype=torch.long,device=device) * token
                    #current_score = score + decoder_score
                    if token == node.decoder_input.item() and remove_repeated:
                        decoder_score = -100
                    next_node = BeamSearchNode(decoder_hidden,node,next_decoder_input,
                                          decoder_score,node.leng + 1,static_emotion,emotions,rnn_output,context_input,g)
                    #print('This is {} words'.format(next_node.leng))
                    current_score = (score * node.leng  - next_node.eval()) / next_node.leng
                    choices.append((current_score,next_node))
            choices = sorted(choices,key=lambda x:x[0])
            # choices = choices[:K]
            for choice in choices:
                if not nodes.full():
                    nodes.put(choice)

        #print(nodes.qsize())
        #print('Decode')        
        # decoder    
        sentences = []
        i = 0
        while not nodes.empty():
            #print('Decode {}:'.format(i))
            i += 1 
            sentence_ = []
            score,node = nodes.get()
            while(node.prevNode is not None):
                sentence_.append(node.decoder_input.item())
                node = node.prevNode
            sentence_ = sentence_[::-1]
            #print(sentence,score)
            sentences.append((score,sentence_))
        #print(sentences)
        for sent in sentences[:1]:
            print(sentenceFromIdx(sent[1],voc),sent[0]) 
            result.append((sentence,sentenceFromIdx(sent[1],voc),sent[0],static_emotion.item()))
    
    return result
    


In [39]:
df = pd.read_csv('../Seq2Seq_evaluation.csv')

In [40]:
df.post[0],df.response[0]

('how s it coming ?', 'it looks like it very well .')

In [41]:
df.head()

Unnamed: 0,post,response,emotion
0,how s it coming ?,it looks like it very well .,4
1,ah i can do it perfectly .,how do you do that ? ?,0
2,any other bags dr . johnson ?,not really i m just asking about it .,4
3,you think it looks nice ?,i honestly think it looks great .,1
4,oh it s the way you talk .,yes . . . .,2


In [None]:
results = []
for idx,row in df.iterrows():
    
    sentence = row['post']
    print(sentence)
    response = beam_decode(encoder,decoder,
                sentence=sentence,
                beam_size=10,
                diversity_penality=True,
                gamma = 1,sentence_length=10,
                voc = voc,remove_repeated = True)
    results += response
    

how s it coming ?
Post(neutral):how s it coming ?
not . fine . thank you . 0.5140974356068505
Post(joy):how s it coming ?
well i think so be you . 0.8547784321837955
Post(anger):how s it coming ?
well i think so tired . 0.8171372786164284
Post(sadness):how s it coming ?
well i m thank you to . 0.6943707855211364
Post(fear):how s it coming ?
do i think i m to . 1.1293233533700306
ah i can do it perfectly .
Post(neutral):ah i can do it perfectly .
what are your dead ? 0.691305839589664
Post(joy):ah i can do it perfectly .
how kind of you ? 0.8891910176192012
Post(anger):ah i can do it perfectly .
do you tell me ? 0.5877953975328377
Post(sadness):ah i can do it perfectly .
so ? tell me . 0.5739091432520321
Post(fear):ah i can do it perfectly .
because you are dead . 0.796965171716043
any other bags dr . johnson ?
Post(neutral):any other bags dr . johnson ?
yes honey of a . 1.2162847252828735
Post(joy):any other bags dr . johnson ?
yes i certainly out . 1.0880073649542672
Post(anger):any o

In [35]:
pd.DataFrame(results)

Unnamed: 0,0,1,2,3
0,how s it coming ?,well i think so too . EOS,0.728701,0
1,how s it coming ?,well . ? thank you so much . EOS,1.048622,1
2,how s it coming ?,well i m you m you . EOS,0.875476,2
3,how s it coming ?,well . thank you and you . EOS,0.804971,3
4,how s it coming ?,do i think i m to . EOS,0.904044,4
5,ah i can do it perfectly .,how are your watch ? EOS,0.782685,0
6,ah i can do it perfectly .,no wish you are . EOS,0.51611,1
7,ah i can do it perfectly .,no you are dead . EOS,0.652646,2
8,ah i can do it perfectly .,no you are so . EOS,0.412775,3
9,ah i can do it perfectly .,let s tell me . EOS,0.886995,4


In [None]:
diversity_penality = True
emotions = 2
sentence = 'how are you doing ?'
print('Post({}):{}'.format(emo_dict[emotions],sentence))
emotions = int(emotions)
emotions = torch.LongTensor([emotions])
### Format input sentence as a batch
# words -> indexes
indexes_batch = [indexesFromSentence(voc, sentence)]
# Create lengths tensor
lengths = torch.tensor([len(indexes) for indexes in indexes_batch])
# Transpose dimensions of batch to match models' expectations
input_batch = torch.LongTensor(indexes_batch).transpose(0, 1)
# Use appropriate device
input_batch = input_batch.to(device)
lengths = lengths.to(device)
emotions = emotions.to(device)
# Forward input through encoder model
encoder_outputs, encoder_hidden = encoder(input_batch, lengths)
# Prepare encoder's final hidden layer to be first hidden input to the decoder
decoder_hidden = encoder_hidden[:decoder.n_layers]
# Initialize decoder input with SOS_token
decoder_input = torch.ones((1,1), device=device, dtype=torch.long) * SOS_token
# Set initial context value,last_rnn_output, internal_memory
context_input = torch.zeros(1,hidden_size,dtype=torch.float)
context_input = context_input.to(decoder.device)
rnn_output = None

static_emotion = emotions
static_emotion = static_emotion.to(device)
node = BeamSearchNode(hiddenstate=decoder_hidden,decoder_input=decoder_input,
                       context_input=context_input,static_emotion = static_emotion,
                      emotions_emb=emotions,
                       length=1,logProb=0,last_rnn_output = rnn_output,
                       previousNode=None,g = 0
                      )
sent_leng = 0
# beam search
K = 100
# Iteratively decode one word token at a time
# Forward pass through decoder
nodes = PriorityQueue(maxsize=K)
nodes.put((0,node))
# diversity rate
gamma = 2

choices = []

In [None]:
diversity_penality = False
score,node = nodes.get()
#print('Last word at position {}'.format(node.leng))
decoder_output, decoder_hidden,emotions,context_input,rnn_output,g = decoder(
                node.decoder_input,node.static_emotion,node.emotions, node.hidden_state,
                node.context_input,encoder_outputs,node.rnn_output
            )
# Obtain most likely word token and its softmax score
#decoder_output = decoder_output.unsqueeze(0)
decoder_scores, decoder_input = torch.topk(decoder_output,k= K, dim=1)
decoder_scores = torch.log(decoder_scores)
if diversity_penality:
    # apply based on rank
    penalties = torch.arange(0,K,dtype=torch.float,device=device) * gamma
    # apply penalties on the output
    decoder_scores = decoder_scores - penalties
token_choices = [decoder_input[0,i].item() for i in range(K)] 
token_scores = [decoder_scores[0,i].item() for i in range(K)] 
# for each candidate token, compute loss
choices=[]
for token,decoder_score in zip(token_choices,token_scores):
    next_decoder_input = torch.ones((1,1),dtype=torch.long,device=device) * token
    next_node = BeamSearchNode(decoder_hidden,node,next_decoder_input,
                              decoder_score,node.leng + 1,static_emotion,emotions,rnn_output,context_input,g)
    #print('This is {} words'.format(next_node.leng))
    current_score = score - next_node.eval()
    choices.append((current_score,next_node))

choices = sorted(choices,key=lambda x:x[0])


In [None]:
nodes.put(choices[0])

In [None]:
scroe,node = nodes.get()

In [None]:
for idx,each in enumerate(choices):
    print(idx,voc.index2word[each[1].decoder_input.item()],each[0],each[1].logp)

# Chat with bot

In [None]:
# Set dropout layers to eval mode

encoder.eval()
decoder.eval()

# Initialize search module
searcher = GreedySearchDecoder(encoder, decoder)
searcher2 = BeamSearchDecoder(encoder,decoder,voc.num_words)
# Begin chatting (uncomment and run the following line to begin)
evaluateInput(encoder, decoder, searcher, voc,emo_dict,False)