In [0]:
!pip install contractions
!pip install pytorch-nlp
!pip install bpemb

Collecting contractions
  Downloading https://files.pythonhosted.org/packages/52/52/a15f0fb338a462045c7c87a35dbaeda11738c45aa9d2f5c76ac191d6adff/contractions-0.0.17-py2.py3-none-any.whl
Installing collected packages: contractions
Successfully installed contractions-0.0.17
Collecting pytorch-nlp
[?25l  Downloading https://files.pythonhosted.org/packages/5b/3e/cb2663ea0837b04936a27c695af1947288e2189872f6e469181a94771a75/pytorch_nlp-0.4.0.post2-py3-none-any.whl (83kB)
[K    100% |████████████████████████████████| 92kB 3.5MB/s 
Installing collected packages: pytorch-nlp
Successfully installed pytorch-nlp-0.4.0.post2
Collecting bpemb
  Downloading https://files.pythonhosted.org/packages/fe/d5/229f4d1a8de7a08a34d3b205bf82f6487f3b624c665d3de58e797fba2a9f/bpemb-0.2.11-py3-none-any.whl
Collecting sentencepiece (from bpemb)
[?25l  Downloading https://files.pythonhosted.org/packages/7e/8a/0e4a10bc00a0263db8d45d0062c83892598eb58e8091f439c63926e9b107/sentencepiece-0.1.81-cp36-cp36m-manylinux1_x8

In [0]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import torch
import numpy as np
from torch.jit import script, trace
import torch.nn as nn
from torch import optim
import torch.nn.functional as F
import contractions
import csv
import random
import json
import re
import os
import unicodedata
import operator

import codecs
from io import open
import itertools
import math
from queue import PriorityQueue
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load & Preprocess Data

## Global Variables Initialization

In [0]:
_corpus_name = "DailyDialogue"

small_batch_size = 5
# Default word tokens
MAX_LENGTH = 30  # Maximum sentence length to consider
int2emotion = ['Anger','Anger','Anger','Happiness','Sadness','Surpise','Other']
# 0: no emotion, 1: anger, 2: disgust, 3: fear, 4: happiness, 5: sadness, 6: surprise
num_emotion = 7

# 0 - 1,2,3,5
# 1 - 0,4,6
# Use self-defined embedding

## Class Voc

In [0]:
PAD_token = 0  # Used for padding short sentences
SOS_token = 1  # Start-of-sentence token
EOS_token = 2  # End-of-sentence token
# voc = Voc(_corpus_name,"word2vec") # Need to run the cell down below first

class Voc: # Word - Index Mapping
    def __init__(self, name, version):
        self.name = name
        self.version = version
        self.trimmed = False
        self.word2index = {}
        self.word2count = {}
        self.index2word = {PAD_token: "PAD", SOS_token: "SOS", EOS_token: "EOS"} 
        self.num_words = 3  # Count SOS, EOS, PAD
        if version == "word2vec" or version=='word2vec_small':
            from torchnlp.word_to_vector import GloVe
            if version == "word2vec":
                self.dim = 300 
                self.glove = GloVe()
                self.weights_matrix = np.zeros((10000,self.dim))
            else:
                self.dim = 100 
                self.glove = GloVe(name='6B', dim=self.dim)
            self.weights_matrix = np.zeros((10000,self.dim))
            self.weights_matrix[0] = self.glove[str(PAD_token)]
            self.weights_matrix[1] = self.glove[str(SOS_token)]
            self.weights_matrix[2] = self.glove[str(EOS_token)]
        elif version == "bpemb":
            from bpemb import BPEmb
            self.dim = 100
            self.bpemb = BPEmb(lang="en", dim=self.dim)
            self.index2word = {PAD_token: self.bpemb.decode_ids([PAD_token]), 
                               SOS_token: self.bpemb.decode_ids([SOS_token]), 
                               EOS_token: self.bpemb.decode_ids([EOS_token])} 
            
            self.weights_matrix = self.bpemb.vectors.copy()
    
    def unicodeToAscii(self,s):
        return ''.join(
            c for c in unicodedata.normalize('NFD', s)
            if unicodedata.category(c) != 'Mn'
        )
    
    def tokenizer(self,s):
        s = self.unicodeToAscii(s.lower().strip())
        s = contractions.fix(s)
        s = re.sub(
            r"[\*\"“”\n\\…\+\-\/\=\(\)‘•:\[\]\|’\!;]", " ", 
            s)
        s = re.sub(r"[ ]+", " ", s)
        s = re.sub(r"\!+", "!", s)
        s = re.sub(r"\,+", ",", s)
        s = re.sub(r"\?+", "?", s)

        if self.version  == "bpemb":
            return self.bpemb.encode(s)
        else: 
            import spacy
            NLP = spacy.load('en')
            return [x.text for x in NLP.tokenizer(s) if x.text != " "]

    def addSentence(self, sentence):
        words = self.tokenizer(sentence) if  type(sentence) == str else sentence
        for word in words:
            self.addWord(word)

    def addWord(self, word):
        if word not in self.word2index:
            self.word2index[word] = self.num_words
            self.word2count[word] = 1
            self.index2word[self.num_words] = word
            self.num_words += 1
            if self.version in ["word2vec","word2vec_small"]:
                self.weights_matrix[self.num_words] = self.glove[word]
        else:
            self.word2count[word] += 1

    # Remove words below a certain count threshold
    def trim(self, min_count):
        if self.trimmed:
            return
        self.trimmed = True

        keep_words = []

        for k, v in self.word2count.items():
            if v >= min_count:
                keep_words.append(k)

        print('keep_words {} / {} = {:.4f}'.format(
            len(keep_words), len(self.word2index), len(keep_words) / len(self.word2index)
        ))

        # Reinitialize dictionaries
        self.word2index = {}
        self.word2count = {}
        self.index2word = {PAD_token: "PAD", SOS_token: "SOS", EOS_token: "EOS"}
        if version == "bpemb":
            self.index2word = {PAD_token: bpemb.decode_ids([PAD_token]), 
                               SOS_token: bpemb.decode_ids([SOS_token]), 
                               EOS_token: bpemb.decode_ids([EOS_token])} 
        self.num_words = 3 # Count default tokens
        if version in ["word2vec","word2vec_small"]:
            self.weights_matrix = np.zeros((len(keep_words),self.dim))
            self.weights_matrix[0] = self.glove[PAD_token]
            self.weights_matrix[1] = self.glove[SOS_token]
            self.weights_matrix[2] = self.glove[EOS_token]                     
        for word in keep_words:
            self.addWord(word)

## Loading Functions

In [0]:
## Convert the json file to dataset of the format [post,[response,emotion],pos_emotion,res_emotion] for number of bucket
def read_data(path,voc,max_size=None):
    data_set = []
    data = json.load(open(path,'r'))
    counter = 0
    size_max = 0
    for pair in data:
        post,emo1,emo2 = pair[0]
        response,res_emo1,res_emo2 = pair[1][0]
        post_word_list = voc.tokenizer(post)
        res_word_list = voc.tokenizer(response)
        if len(post_word_list) < MAX_LENGTH and len(res_word_list) < MAX_LENGTH:
            voc.addSentence(post)
            voc.addSentence(response)
            counter += 1
            if counter % 10000 == 0:
                print("    reading data pair %d" % counter)
                print(post_word_list)
                print(res_word_list)
            data_set.append([post, response, int(emo1), int(res_emo1)])
    return data_set
## Lowercase, trim, and remove non-letter characters


def getword2index(word):
    if word not in voc.word2index:
        voc.addWord(word)
    return voc.word2index[word]
    
def indexesFromSentence(voc, sentence):
#     return [voc.word2index[word] for word in sentence.split(' ')] + [EOS_token]
    if voc.version == "bpemb":
        return voc.bpemb.encode_ids(sentence) + [EOS_token]
    return [getword2index(word) for word in voc.tokenizer(sentence)] + [EOS_token]


def zeroPadding(l, fillvalue=PAD_token):
    return list(itertools.zip_longest(*l, fillvalue=fillvalue))

def binaryMatrix(l, value=PAD_token):
    m = []
    for i, seq in enumerate(l):
        m.append([])
        for token in seq:
            if (type(token) == int and token == PAD_token) or (type(token)!=int and torch.equal(token,value)):
                m[i].append(0)
            else:
                m[i].append(1)
    return m

# Returns padded input sequence tensor and lengths
def inputVar(l, voc):
    indexes_batch = [indexesFromSentence(voc, sentence) for sentence in l]
    lengths = torch.tensor([len(indexes) for indexes in indexes_batch])

    padList = zeroPadding(indexes_batch)
    padVar = torch.LongTensor(padList)
    
    return padVar, lengths

# Returns padded target sequence tensor, padding mask, and max target length
def outputVar(l, voc):
    indexes_batch = [indexesFromSentence(voc, sentence) for sentence in l]
    max_target_len = max([len(indexes) for indexes in indexes_batch])
    
    
    padList = zeroPadding(indexes_batch)
    mask = binaryMatrix(padList)
    mask = torch.ByteTensor(mask)
    padVar = torch.LongTensor(padList)
    
    return padVar, mask, max_target_len
  

# Returns all items for a given batch of pairs
def batch2TrainData(voc, pair_batch):
  
    pair_batch.sort(key=lambda x: len(indexesFromSentence(voc,x[0])), reverse=True)
#     pair_batch.sort(key=lambda x: len(x[0].split(" ")), reverse=True)
    input_batch, output_batch,emo_in,emo_out = [],[],[],[]
    for pair in pair_batch:
        input_batch.append(pair[0])
        output_batch.append(pair[1])
        pair[2] = 0 if pair[2] in [0,1,2] else pair[2]
        pair[3] = 0 if pair[3] in [0,1,2] else pair[3]
#         pair[2] = 0 if pair[2] in [1,2,3,5] else 1
#         pair[3] = 0 if pair[2] in [1,2,3,5] else 1
        emo_in.append(pair[2])
        emo_out.append(pair[3])
    inp, lengths = inputVar(input_batch, voc)
    output, mask, max_target_len = outputVar(output_batch, voc)
    return inp, lengths, output, mask, max_target_len,torch.LongTensor(emo_in),torch.LongTensor(emo_out)


## Testing (Load and Prep Data)

In [0]:
pair_batch = [random.choice(dev_set) for _ in range(small_batch_size)]
print(pair_batch[0])
pair_batch.sort(key=lambda x: len(x[0].split(" ")), reverse=True)
print(pair_batch[0])
input_batch, output_batch,emo_in,emo_out = [],[],[],[]
pair = pair_batch[0]
input_batch.append(pair[0])
output_batch.append(pair[1])
emo_in.append(pair[2])
emo_out.append(pair[3])
pair = pair_batch[1]
input_batch.append(pair[0])
output_batch.append(pair[1])
emo_in.append(pair[2])
emo_out.append(pair[3])
pair = pair_batch[2]
input_batch.append(pair[0])
output_batch.append(pair[1])
emo_in.append(pair[2])
emo_out.append(pair[3])
pair = pair_batch[3]
input_batch.append(pair[0])
output_batch.append(pair[1])
emo_in.append(pair[2])
emo_out.append(pair[3])
pair = pair_batch[4]
input_batch.append(pair[0])
output_batch.append(pair[1])
emo_in.append(pair[2])
emo_out.append(pair[3])

#  inp, lengths = inputVar(input_batch, voc)

#  output, mask, max_target_len = outputVar(output_batch, voc)

# indexes_batch = [indexesFromSentence(voc, sentence) for sentence in input_batch]
# lengths = torch.tensor([len(indexes) for indexes in indexes_batch])
# padList = zeroPadding(indexes_batch)
# # padVar = torch.LongTensor(padList)
# s = "we offer full tuition reimbursement for job related classes ."
# print(len(voc.tokenizer(s)))
# print(len(indexesFromSentence(voc,s)))





['i don t know . can i purchase treasury certificates here at this counter ?', 'yes of course . we have treasury notes with two three and five year terms available .', 0, 0]
['certainly . there are several kinds of music . what do you like to listen to ?', 'i like listening to the classical music .', 4, 4]


In [0]:
pair_batch = [random.choice(dev_set) for _ in range(small_batch_size)]
pair_batch

[['no thank you . that will be all for now .',
  'your document will be ready in about ten minutes .',
  0,
  0],
 ['where will we call at ?', 'dalian .', 0, 0],
 ['japanese yen .', 'ok sir . would that be fixed or current deposit ?', 2, 6],
 ['here s a twenty .', 'here s your change . thank you .', 0, 0],
 ['i don t understand why i m being arrested .', 'you have a warrant .', 0, 0]]

In [0]:
inp, lengths = inputVar(input_batch, voc)
output, mask, max_target_len = outputVar(output_batch, voc)
lengths

tensor([24, 17, 14, 10, 10])

In [0]:
# from google.colab import files
# uploaded = files.upload()
# from google.colab import files
# uploaded = files.upload()

from google.colab import drive
drive.mount('/content/drive')

train_path = "drive/My Drive/ECM/train"
dev_path = "drive/My Drive/ECM/dev"

# voc_version = "word2vec"
# voc_version = "word2vec_small"
voc_version = "bpemb"

voc = Voc(_corpus_name,voc_version) # Need to run the cell down below first
train_set = read_data(train_path,voc)
dev_set = read_data(dev_path,voc)

# Example for validation

# train_batches = batch2TrainData(voc, [random.choice(train_set) for _ in range(small_batch_size)])
# input_variable, lengths, target_variable, mask, max_target_len,emo_in,emo_out = train_batches

# print("input_variable:", input_variable)
# print("lengths:", lengths)
# print("target_variable:", target_variable)
# print("mask:", mask)
# print("max_target_len:", max_target_len)
# print("emo_in:",emo_in)
# print("emo_out:",emo_out)


# Example for validation
# dev_batches = batch2TrainData(voc, [random.choice(dev_set) for _ in range(small_batch_size)])
# input_variable_2, lengths_2, target_variable_2, mask_2, max_target_len_2,emo_in_2,emo_out_2 = dev_batches

# print("input_variable:", input_variable)
# print("lengths:", lengths)
# print("target_variable:", target_variable)
# print("mask:", mask)
# print("max_target_len:", max_target_len)
# print("emo_in:",emo_in)
# print("emo_out:",emo_out)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
    reading data pair 10000
 
 
['▁y', 'es', '▁i', '▁d', '▁like', '▁another', '▁cup', '▁of', '▁coffee', '▁afterwards', '▁.', '▁make', '▁it', '▁hot', '▁ple', 'ase', '▁.']
['▁you', '▁ve', '▁got', '▁it', '▁sir', '▁.']
    reading data pair 20000
 
 
['▁there', '▁are', '▁hundreds', '▁and', '▁hundreds', '▁.', '▁english', '▁is', '▁particularly', '▁rich', '▁in', '▁id', 'i', 'om', 'atic', '▁express', 'ions', '▁.']
['▁can', '▁you', '▁give', '▁us', '▁an', '▁example', '▁', '?']
    reading data pair 30000
 
 
['▁why', '▁did', '▁you', '▁lie', '▁to', '▁me', '▁in', '▁the', '▁em', 'ail', '▁', '?']
['▁i', '▁didn', '▁t', '▁lie', '▁.', '▁you', '▁just', '▁didn', '▁t', '▁ask', '▁me', '▁my', '▁real', '▁name', '▁.']
    reading data pair 10000
 
 
['▁how', '▁n', 'ice', '▁i', '▁can', '▁see', '▁you', '▁are', '▁bus', 'ying', '▁pack', 'ing', '▁.', '▁i', '▁m', '▁sor', 'ry', '▁to', '▁in

# Load Cornell Corpus

In [0]:
import pickle
_corpus_name = "CornellMovie"
_voc_name = "word2vec"


small_batch_size = 5
MAX_LENGTH = 30  # Maximum sentence length to consider
int2emotion = ['negative','positive']
num_emotion = 2


from google.colab import drive
drive.mount('/content/drive')


voc = Voc(_corpus_name,_voc_name) # Need to run the cell down below first
with open ('drive/My Drive/ECM/MovieDialogue/cornell_train', 'rb') as fp:
    movie_train = pickle.load(fp)
with open ('drive/My Drive/ECM/MovieDialogue/cornell_dev', 'rb') as fp:
    movie_dev = pickle.load(fp)
for data in movie_train:
    voc.addSentence(data[0])
    voc.addSentence(data[1])
for data in movie_dev:
    voc.addSentence(data[0])
    voc.addSentence(data[1])

In [0]:
# Configure models

model_name = 'ecm_model_imemory_bpemb_fix'

use_embedding =  True
use_imemory = True
use_ememory = False

attn_model = 'dot'
#attn_model = 'general'
#attn_model = 'concat'

hidden_size = 100 # must match pretrained word2vec embedding size!!!!
encoder_n_layers = 2
decoder_n_layers = 2
dropout = 0.1
batch_size = 64
# Set checkpoint to load from; set to None if starting from scratch
loadFilename = None
checkpoint_iter = 2000



# filepath = "content/ecm_model_withimemory_bpemb/DailyDialogue/2-2_100/2000_checkpoint.tar"
# loadFilename = os.path.join('ecm_model_imemory_bpemb_notfix/DailyDialogue/2-2_100/2000_checkpoint.tar')

# Load model if a loadFilename is provided
if loadFilename:
    # If loading on same machine the model was trained on
    checkpoint = torch.load(loadFilename)
    
    encoder_sd = checkpoint['en']
    decoder_sd = checkpoint['de']
    encoder_optimizer_sd = checkpoint['en_opt']
    decoder_optimizer_sd = checkpoint['de_opt']
    embedding_sd = checkpoint['embedding']
    voc.__dict__ = checkpoint['voc_dict']
    
print('Building encoder and decoder ...')
# Initialize word embeddings



wm = voc.weights_matrix if voc.version == "bpemb" else voc.weights_matrix[:voc.num_words]
num_embeddings, embedding_dim = wm.shape
embedding = nn.Embedding(num_embeddings, embedding_dim)
embedding.load_state_dict({'weight': torch.Tensor(wm)})
emo_embedding = nn.Embedding(num_emotion,embedding_dim)


if use_embedding:
        embedding.weight.requires_grad = False

if loadFilename:
    embedding.load_state_dict(embedding_sd)
# Initialize encoder & decoder models
encoder = EncoderRNN(hidden_size, embedding, encoder_n_layers, dropout)
decoder = AttnDecoderRNN(attn_model, embedding, emo_embedding, hidden_size, voc.weights_matrix.shape[0], decoder_n_layers, 
                         dropout,use_emb=use_embedding, use_imemory=use_imemory, use_ememory=use_ememory)
if loadFilename:
    encoder.load_state_dict(encoder_sd)
    decoder.load_state_dict(decoder_sd)
# Use appropriate device
encoder = encoder.to(device)
decoder = decoder.to(device)
print('Models built and ready to go!')

In [0]:
# Configure training/optimization
clip = 50.0
teacher_forcing_ratio = 1.0
learning_rate = 0.0001
decoder_learning_ratio = 5.0
n_iteration = 10000
print_every = 5
save_every = 100

# Ensure dropout layers are in train mode
encoder.train()
decoder.train()

# Initialize optimizers
print('Building optimizers ...')
encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate)
decoder_optimizer = optim.Adam(decoder.parameters(), lr=learning_rate * decoder_learning_ratio)
if loadFilename:
    encoder_optimizer.load_state_dict(encoder_optimizer_sd)
    decoder_optimizer.load_state_dict(decoder_optimizer_sd)

# Run training iterations
print("Starting Training!")
trainIters(model_name, voc, movie_train, movie_dev, encoder, decoder, encoder_optimizer, decoder_optimizer,
           embedding, emo_embedding, encoder_n_layers, decoder_n_layers, n_iteration, batch_size,
           print_every, save_every, clip, _corpus_name, loadFilename)

Building optimizers ...
Starting Training!
Initializing ...
Training...
Iteration: 5; Percent complete: 0.1%; Average loss: 5.9180
Iteration: 10; Percent complete: 0.1%; Average loss: 5.8343
Iteration: 15; Percent complete: 0.1%; Average loss: 5.9597
Iteration: 20; Percent complete: 0.2%; Average loss: 5.9079
Iteration: 25; Percent complete: 0.2%; Average loss: 5.9060
Iteration: 30; Percent complete: 0.3%; Average loss: 5.8603
Iteration: 35; Percent complete: 0.4%; Average loss: 5.9027
Iteration: 40; Percent complete: 0.4%; Average loss: 5.8077
Iteration: 45; Percent complete: 0.4%; Average loss: 5.9042
Iteration: 50; Percent complete: 0.5%; Average loss: 5.8957
Iteration: 55; Percent complete: 0.5%; Average loss: 5.9369
Iteration: 60; Percent complete: 0.6%; Average loss: 5.8236
Iteration: 65; Percent complete: 0.7%; Average loss: 5.8812
Iteration: 70; Percent complete: 0.7%; Average loss: 5.9722
Iteration: 75; Percent complete: 0.8%; Average loss: 5.8754
Iteration: 80; Percent comple

# Seq2seq **Model**

## LSTM Model - sentiment classifier

In [0]:
# _*_ coding: utf-8 _*_

import torch
import torch.nn as nn
from torch.autograd import Variable
from torch.nn import functional as F

class LSTMClassifier(nn.Module):
	def __init__(self, batch_size, output_size, hidden_size, vocab_size, embedding_dim, embedding):
		super(LSTMClassifier, self).__init__()
		
		"""
		Arguments
		---------
		batch_size : Size of the batch which is same as the batch_size of the data returned by the TorchText BucketIterator
		output_size : 2 = (pos, neg)
		hidden_sie : Size of the hidden_state of the LSTM
		vocab_size : Size of the vocabulary containing unique words
		embedding_length : Embeddding dimension of GloVe word embeddings
		weights : Pre-trained GloVe word_embeddings which we will use to create our word_embedding look-up table 
		
		"""
		
		self.batch_size = batch_size
		self.output_size = output_size
		self.hidden_size = hidden_size
		self.vocab_size = vocab_size
		self.embedding_dim = embedding_dim
		self.word_embeddings = embedding
# 		self.word_embeddings = nn.Embedding(vocab_size, embedding_length)# Initializing the look-up table.
# 		self.word_embeddings.weight = nn.Parameter(weights, requires_grad=False) # Assigning the look-up table to the pre-trained GloVe word embedding.
		self.lstm = nn.LSTM(embedding_dim, hidden_size)
		self.label = nn.Linear(hidden_size, output_size)
		
	def forward(self, input_sentence, batch_size=None):
	
		""" 
		Parameters
		----------
		input_sentence: input_sentence of shape = (batch_size, num_sequences)
		batch_size : default = None. Used only for prediction on a single sentence after training (batch_size = 1)
		
		Returns
		-------
		Output of the linear layer containing logits for positive & negative class which receives its input as the final_hidden_state of the LSTM
		final_output.shape = (batch_size, output_size)
		
		"""
		
		''' Here we will map all the indexes present in the input sequence to the corresponding word vector using our pre-trained word_embedddins.'''
		input = self.word_embeddings(input_sentence) # embedded input of shape = (batch_size, num_sequences,  embedding_length)
# 		input = input.permute(1, 0, 2) # input.size() = (num_sequences, batch_size, embedding_length)
		if batch_size is None:
			h_0 = Variable(torch.zeros(1, self.batch_size, self.hidden_size).cuda()) # Initial hidden state of the LSTM
			c_0 = Variable(torch.zeros(1, self.batch_size, self.hidden_size).cuda()) # Initial cell state of the LSTM
		else:
			h_0 = Variable(torch.zeros(1, batch_size, self.hidden_size).cuda())
			c_0 = Variable(torch.zeros(1, batch_size, self.hidden_size).cuda())
		output, (final_hidden_state, final_cell_state) = self.lstm(input, (h_0, c_0))
		final_output = self.label(final_hidden_state[-1]) # final_hidden_state.size() = (1, batch_size, hidden_size) & final_output.size() = (batch_size, output_size)
		
		return final_output

## Train and Loss Function - LSTM Model

In [0]:
def clip_gradient(model, clip_value):
    params = list(filter(lambda p: p.grad is not None, model.parameters()))
    for p in params:
        p.grad.data.clamp_(-clip_value, clip_value)
def train2(model,model_optimizer,input_variable,target,batch_size,clip):
    model_optimizer.zero_grad()
    loss = 0
    print_losses = []
    n_totals = 0
    input_variable = input_variable.to(device)
    prediction = model(input_variable)
    target = target.to(device)
    loss = loss_fn(prediction, target)
    num_corrects = (torch.max(prediction,1)[1].view(target.size()).data == target.data).float().sum()
    acc = 100.0 * num_corrects/batch_size
    loss.backward()

    # Clip gradients: gradients are modified in place
    clip_gradient(model, 1e-1)
    
    # Adjust model weights
    model_optimizer.step()
    
    return loss,acc
def train_iters2(model_name,voc,pairs,dev_pairs,embedding,model,model_optimizer,n_iteration,batch_size,print_every,save_every,clip,corpus_name,loadFilename=None):
    
    # Load batches for each iteration
    training_batches = [batch2TrainData(voc, [random.choice(pairs) for _ in range(batch_size)])
                      for _ in range(n_iteration)]
    dev_batches = [batch2TrainData(voc, [random.choice(dev_pairs) for _ in range(batch_size)])
                      for _ in range(n_iteration)]
    # Initializations
    print('Initializing ...')
    start_iteration = 1
    print_loss = 0
    print_acc = 0
    if loadFilename:
        start_iteration = checkpoint['iteration'] + 1

    # Training loop
    print("Training...")
    for iteration in range(start_iteration, n_iteration + 1):
        training_batch = training_batches[iteration - 1]
        # Extract fields from batch
        input_variable, lengths, target_variable, mask, max_target_len,emo_in,emo_out = training_batch
        # Run a training iteration with batch

        train_loss, train_acc = train2(model,model_optimizer,input_variable,emo_in,batch_size, clip)
        print_loss += train_loss
        print_acc += train_acc
        train_loss, train_acc = train2(model,model_optimizer,target_variable,emo_out,batch_size, clip)
        print_loss += train_loss
        print_acc += train_acc
      
        # Print progress
        if iteration % print_every == 0:
            print_loss_avg = print_loss / print_every
            print_acc_avg = print_acc / print_every
            print("Iteration: {}; Percent complete: {:.1f}%; Average loss: {:.4f}; Average acc: {:.4f} ".format(iteration, iteration / n_iteration * 100, print_loss,print_acc_avg))
    
            print_loss = 0
            print_acc = 0

        # Save checkpoint
        if (iteration % save_every == 0):
            dev_batch = dev_batches[iteration-1]
            input_variable, lengths, target_variable, mask, max_target_len,emo_in,emo_out = dev_batch
            
            dev_loss, dev_acc = train2(model,model_optimizer,input_variable,emo_in,batch_size,clip)
            dev_loss2, dev_acc2 = train2(model,model_optimizer,target_variable,emo_out,batch_size,clip)
            
            
#             print(f'Train Loss: {print_loss_avg:.3f}, Train Acc: {print_acc_avg:.2f}%, Val. Loss: {val_loss:3f}, Val. Acc: {val_acc:.2f}%')
            
            print("Iteration: {}; Dev loss: {:.4f}; Dev acc: {:.4f}".format(iteration, dev_loss+dev_loss2,dev_acc+dev_acc2))
            
            
            directory = os.path.join(model_name, _corpus_name)
            if not os.path.exists(directory):
                os.makedirs(directory)
            torch.save({
                'iteration': iteration,
                'model': model.state_dict(),
                'embedding': embedding.state_dict(),
                'model_opt': model_optimizer.state_dict(),
                'loss': loss
            }, os.path.join(directory, '{}_{}.tar'.format(iteration, 'checkpoint')))
  

## Encoder, Decoder and Attension

In [0]:
class EncoderRNN(nn.Module):
    def __init__(self, hidden_size, embedding, n_layers=1, dropout=0):
        super(EncoderRNN, self).__init__()
        self.n_layers = n_layers
        self.hidden_size = hidden_size
        self.embedding = embedding

        # Initialize GRU; the input_size and hidden_size params are both set to 'hidden_size'
        #   because our input size is a word embedding with number of features == hidden_size
        self.gru = nn.GRU(hidden_size, hidden_size, n_layers,
                          dropout=(0 if n_layers == 1 else dropout), bidirectional=True)

    def forward(self, input_seq, input_lengths, hidden=None):
        # Convert word indexes to embeddings
        embedded = self.embedding(input_seq)
        # Pack padded batch of sequences for RNN module
        packed = torch.nn.utils.rnn.pack_padded_sequence(embedded, input_lengths)
        # Forward pass through GRU
        outputs, hidden = self.gru(packed, hidden)
        # Unpack padding
        outputs, _ = torch.nn.utils.rnn.pad_packed_sequence(outputs)
        # Sum bidirectional GRU outputs
        outputs = outputs[:, :, :self.hidden_size] + outputs[:, : ,self.hidden_size:]
        # Return output and final hidden state
        return outputs, hidden
# Luong attention layer
class Attn(torch.nn.Module):
    def __init__(self, method, hidden_size):
        super(Attn, self).__init__()
        self.method = method
        if self.method not in ['dot', 'general', 'concat']:
            raise ValueError(self.method, "is not an appropriate attention method.")
        self.hidden_size = hidden_size
        if self.method == 'general':
            self.attn = torch.nn.Linear(self.hidden_size, hidden_size)
        elif self.method == 'concat':
            self.attn = torch.nn.Linear(self.hidden_size * 2, hidden_size)
            self.v = torch.nn.Parameter(torch.FloatTensor(hidden_size))

    def dot_score(self, hidden, encoder_output):
        return torch.sum(hidden * encoder_output, dim=2)

    def general_score(self, hidden, encoder_output):
        energy = self.attn(encoder_output)
        return torch.sum(hidden * energy, dim=2)

    def concat_score(self, hidden, encoder_output):
        energy = self.attn(torch.cat((hidden.expand(encoder_output.size(0), -1, -1), encoder_output), 2)).tanh()
        return torch.sum(self.v * energy, dim=2)

    def forward(self, hidden, encoder_outputs):
        # Calculate the attention weights (energies) based on the given method
        if self.method == 'general':
            attn_energies = self.general_score(hidden, encoder_outputs)
        elif self.method == 'concat':
            attn_energies = self.concat_score(hidden, encoder_outputs)
        elif self.method == 'dot':
            attn_energies = self.dot_score(hidden, encoder_outputs)

        # Transpose max_length and batch_size dimensions
        attn_energies = attn_energies.t()

        # Return the softmax normalized probability scores (with added dimension)
        return F.softmax(attn_energies, dim=1).unsqueeze(1)
      
class AttnDecoderRNN(nn.Module):
    def __init__(self,attn_model,embedding,emotion_embedding,hidden_size,output_size,n_layers=1,dropout=0.1,use_emb=False,use_imemory=False,use_ememory=False):
        super(AttnDecoderRNN, self).__init__()

        # Keep for reference
        self.attn_model = attn_model
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.n_layers = n_layers
        self.dropout = dropout
        self.use_emb = use_emb
        self.use_imemory = use_imemory
        self.use_ememory = use_ememory

        # Define layers
        self.embedding = embedding
        self.emotion_embedding = emotion_embedding
        self.embedding_dropout = nn.Dropout(dropout)
        self.gru = nn.GRU(hidden_size, hidden_size, n_layers, dropout=(0 if n_layers == 1 else dropout))
        self.concat_1 = nn.Linear(hidden_size * 2, hidden_size)
        self.concat_3 = nn.Linear(hidden_size * 2, hidden_size)

        self.out = nn.Linear(hidden_size, output_size)
        
        
        # DIY layers
        self.read_linear = nn.Linear(hidden_size*(self.n_layers+1),hidden_size)
        self.write_linear = nn.Linear(hidden_size,hidden_size)
        self.gru_concat = nn.Linear(hidden_size*2,hidden_size)
        

        self.attn = Attn(attn_model, hidden_size)
    def forward(self, input_step, emotion, last_hidden, encoder_outputs):
        # Note: we run this one step (word) at a time
        # Get embedding of current input word
        embedded = self.embedding(input_step) # 1,64,300
        embedded = self.embedding_dropout(embedded)
        
        if self.use_emb and self.use_imemory:
          
            if emotion.size()!=embedded.size():
                emotion = self.emotion_embedding(emotion).unsqueeze(dim=0) # mem_write
            
            _,tmp_size,_ = last_hidden.size()

            read_gate = torch.sigmoid(self.read_linear(torch.cat([embedded,torch.reshape(last_hidden,(1,tmp_size,-1))],dim=2)))
            mem_read = torch.mul(emotion,read_gate)
            
            gru_input = self.gru_concat(torch.cat([embedded,mem_read],dim=2))
            rnn_output, hidden = self.gru(gru_input, last_hidden)
            
            write_gate = torch.sigmoid(self.write_linear(rnn_output))
            emotion = torch.mul(write_gate,mem_read)
          
          
          

            attn_weights = self.attn(rnn_output, encoder_outputs)
            # Multiply attention weights to encoder outputs to get new "weighted sum" context vector
            context = attn_weights.bmm(encoder_outputs.transpose(0, 1))
            # Concatenate weighted context vector and GRU output using Luong eq. 5
            rnn_output = rnn_output.squeeze(0)
            context = context.squeeze(1)
            concat_input = torch.cat((rnn_output, context), 1)
            concat_output = torch.tanh(self.concat_3(concat_input))
            # Predict next word using Luong eq. 6
            output = self.out(concat_output)
            output = F.softmax(output, dim=1)
            return output,hidden,emotion
        elif self.use_emb:
            emotion_embedded = self.emotion_embedding(emotion).unsqueeze(dim=0)
            embedded = self.concat_1(torch.cat([embedded,emotion_embedded],dim = 2))

        # Forward through unidirectional GRU
        rnn_output, hidden = self.gru(embedded, last_hidden)
        # Calculate attention weights from the current GRU output
        attn_weights = self.attn(rnn_output, encoder_outputs)
        # Multiply attention weights to encoder outputs to get new "weighted sum" context vector
        context = attn_weights.bmm(encoder_outputs.transpose(0, 1))
        # Concatenate weighted context vector and GRU output using Luong eq. 5
        rnn_output = rnn_output.squeeze(0)
        context = context.squeeze(1)
        concat_input = torch.cat((rnn_output, context), 1)
        concat_output = torch.tanh(self.concat_3(concat_input))
        # Predict next word using Luong eq. 6
        output = self.out(concat_output)
        output = F.softmax(output, dim=1)
        # Return output and final hidden state
        return output,hidden,input_emotion      

## Train and Loss function

In [0]:
def maskNLLLoss(inp,emotion,target,mask,decoder):
    nTotal = mask.sum()
    crossEntropy = -torch.log(torch.gather(inp, 1, target.view(-1, 1)).squeeze(1))
    loss = crossEntropy.masked_select(mask).mean()
    if decoder.use_imemory:
        emo_loss = torch.norm(emotion)
        if math.isnan(emo_loss): loss+= emo_loss
    loss = loss.to(device)
    return loss, nTotal.item()
def train(input_variable,input_emotion,lengths, target_variable, mask, max_target_len, encoder, decoder,
          encoder_optimizer, decoder_optimizer, batch_size, clip, max_length=MAX_LENGTH):

    # Zero gradients
    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()

#     Set device options
    input_variable = input_variable.to(device)
    lengths = lengths.to(device)
    target_variable = target_variable.to(device)
    mask = mask.to(device)

    # Initialize variables
    loss = 0
    print_losses = []
    n_totals = 0

    # Forward pass through encoder
    encoder_outputs, encoder_hidden = encoder(input_variable, lengths)

    # Create initial decoder input (start with SOS tokens for each sentence)
    decoder_input = torch.LongTensor([[SOS_token for _ in range(batch_size)]])
    decoder_input = decoder_input.to(device)
    input_emotion = input_emotion.to(device)
    

    # Set initial decoder hidden state to the encoder's final hidden state
    decoder_hidden = encoder_hidden[:decoder.n_layers]

    # Determine if we are using teacher forcing this iteration
    use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False

    # Forward batch of sequences through decoder one time step at a time
    if use_teacher_forcing:
        for t in range(max_target_len):
            decoder_output, decoder_hidden,input_emotion = decoder(
                decoder_input,input_emotion,decoder_hidden, encoder_outputs
            )
            # Teacher forcing: next input is current target
            decoder_input = target_variable[t].view(1, -1)
            # Calculate and accumulate loss
            mask_loss, nTotal = maskNLLLoss(decoder_output,input_emotion,target_variable[t], mask[t],decoder)
            loss += mask_loss
            print_losses.append(mask_loss.item() * nTotal)
            n_totals += nTotal
    else:
        for t in range(max_target_len):
            decoder_output, decoder_hidden,input_emotion = decoder(
                decoder_input,input_emotion,decoder_hidden, encoder_outputs
            )
            # No teacher forcing: next input is decoder's own current output
            _, topi = decoder_output.topk(1)
            decoder_input = torch.LongTensor([[topi[i][0] for i in range(batch_size)]])
            decoder_input = decoder_input.to(device)
            # Calculate and accumulate loss
            mask_loss, nTotal = maskNLLLoss(decoder_output,input_emotion,target_variable[t], mask[t],decoder)
            loss += mask_loss
            print_losses.append(mask_loss.item() * nTotal)
            n_totals += nTotal

    # Perform backpropatation
    loss.backward()

    # Clip gradients: gradients are modified in place
    _ = torch.nn.utils.clip_grad_norm_(encoder.parameters(), clip)
    _ = torch.nn.utils.clip_grad_norm_(decoder.parameters(), clip)

    # Adjust model weights
    encoder_optimizer.step()
    decoder_optimizer.step()

    return sum(print_losses) / n_totals

def trainIters(model_name, voc, pairs, dev_pairs, encoder, decoder, encoder_optimizer, 
               decoder_optimizer, embedding,emo_embedding, encoder_n_layers, decoder_n_layers, n_iteration, batch_size, print_every, save_every, clip, corpus_name, loadFilename=None):

    # Load batches for each iteration
    training_batches = [batch2TrainData(voc, [random.choice(pairs) for _ in range(batch_size)])
                      for _ in range(n_iteration)]
    dev_batches = [batch2TrainData(voc, [random.choice(dev_pairs) for _ in range(batch_size)])
                      for _ in range(n_iteration)]
    # Initializations
    print('Initializing ...')
    start_iteration = 1
    print_loss = 0
    if loadFilename:
        start_iteration = checkpoint['iteration'] + 1

    # Training loop
    print("Training...")
    for iteration in range(start_iteration, n_iteration + 1):
        training_batch = training_batches[iteration - 1]
        # Extract fields from batch
        input_variable, lengths, target_variable, mask, max_target_len,emo_in,emo_out = training_batch
        # Run a training iteration with batch
        loss = train(input_variable,emo_out,lengths, target_variable, mask, max_target_len, encoder,
                     decoder, encoder_optimizer, decoder_optimizer, batch_size, clip)

        print_loss += loss

        # Print progress
        if iteration % print_every == 0:
            print_loss_avg = print_loss / print_every
            print("Iteration: {}; Percent complete: {:.1f}%; Average loss: {:.4f}".format(iteration, iteration / n_iteration * 100, print_loss_avg))
            print_loss = 0

        # Save checkpoint
        if (iteration % save_every == 0):
            dev_batch = dev_batches[iteration-1]
            input_variable2, lengths2, target_variable2, mask2, max_target_len2,emo_in2,emo_out2 = dev_batch
            
            dev_loss = train(input_variable2,emo_out2, lengths2, target_variable2, mask2, max_target_len2, encoder,
                     decoder,encoder_optimizer, decoder_optimizer, batch_size, clip)
            print("Iteration: {}; Dev loss: {:.4f}".format(iteration, dev_loss))
            directory = os.path.join(model_name, _corpus_name, '{}-{}_{}'.format(encoder_n_layers, decoder_n_layers, hidden_size))
            if not os.path.exists(directory):
                os.makedirs(directory)
            torch.save({
                'iteration': iteration,
                'en': encoder.state_dict(),
                'de': decoder.state_dict(),
                'en_opt': encoder_optimizer.state_dict(),
                'de_opt': decoder_optimizer.state_dict(),
                'loss': loss,
                'voc_dict': voc.__dict__,
                'embedding': embedding.state_dict(),
                'emo_embedding':emo_embedding.state_dict()
            }, os.path.join(directory, '{}_{}.tar'.format(iteration, 'checkpoint')))

## Searcher

In [0]:
class BeamSearchNode(object):
    def __init__(self, hiddenstate,hiddenemotion,previousNode, wordId, logProb, length, past=[]):
        '''
        :param hiddenstate:
        :param previousNode:
        :param wordId:
        :param logProb:
        :param length:
        '''
        self.h = hiddenstate
        self.e = hiddenemotion
        self.prevNode = previousNode
        self.wordid = wordId
        self.logp = logProb
        self.leng = length
        self.past = past
        
    def __lt__(self, other):      
#         return self.logp < other.logp
        return self.eval() < other.eval()
    def eval(self, alpha=1.0):
        reward = 0
        # Add here a function for shaping a reward
        return self.logp / float(self.leng - 1 + 1e-6) + alpha * reward
           
class MyTopKDecoder(nn.Module):
    def __init__(self, encoder, decoder,k):
        super(MyTopKDecoder, self).__init__()
        self.encoder = encoder
        self.decoder = decoder
        self.k = k
    def forward(self, input_seq,target_emotion,input_length,num_output, max_length):
        # Forward input through encoder model
        encoder_output, encoder_hidden = self.encoder(input_seq, input_length)
        # Prepare encoder's final hidden layer to be first hidden input to the decoder
        decoder_hidden = encoder_hidden[:decoder.n_layers]
        # Initialize decoder input with SOS_token
        
        # Initialize tensors to append decoded words to
        all_tokens = torch.zeros([0], device=device, dtype=torch.long)
        all_scores = torch.zeros([0], device=device)
        emotion = torch.LongTensor([target_emotion]).to(device)
        return beam_decode(decoder, emotion, decoder_hidden, encoder_output=encoder_output,topk=self.k,num_output = num_output,debug=True)     
      
class GreedySearchDecoder(nn.Module):
    def __init__(self, encoder, decoder):
        super(GreedySearchDecoder, self).__init__()
        self.encoder = encoder
        self.decoder = decoder

    def forward(self, input_seq,target_emotion, input_length, max_length):
        # Forward input through encoder model
        encoder_outputs, encoder_hidden = self.encoder(input_seq, input_length)
        # Prepare encoder's final hidden layer to be first hidden input to the decoder
        decoder_hidden = encoder_hidden[:decoder.n_layers]
#         # Initialize decoder input with SOS_token
#         decoder_input = torch.ones(1, 1, dtype=torch.long) * SOS_token
#         # Initialize tensors to append decoded words to
#         all_tokens = torch.zeros([0], dtype=torch.long)
#         all_scores = torch.zeros([0])
        # Initialize decoder input with SOS_token
        decoder_input = torch.ones(1, 1, device=device, dtype=torch.long) * SOS_token
        # Initialize tensors to append decoded words to
        all_tokens = torch.zeros([0], device=device, dtype=torch.long)
        all_scores = torch.zeros([0], device=device)
        # Iteratively decode one word token at a time
        decoder_emotion = torch.LongTensor([target_emotion]).to(device)
        for _ in range(max_length):
            # Forward pass through decoder
            decoder_output, decoder_hidden, decoder_emotion = self.decoder(decoder_input,decoder_emotion, decoder_hidden, encoder_outputs)
            # Obtain most likely word token and its softmax score
            decoder_scores, decoder_input = torch.max(decoder_output, dim=1)
            # Record token and score
            all_tokens = torch.cat((all_tokens, decoder_input), dim=0)
            all_scores = torch.cat((all_scores, decoder_scores), dim=0)
            # Prepare current token to be next decoder input (add a dimension)
            decoder_input = torch.unsqueeze(decoder_input, 0)
        # Return collections of word tokens and scores
        return all_tokens, all_scores
      

In [0]:
beam_width = 15
max_qsize = 10000
max_past_word = 20
similar_word_len = 2
def beam_decode(decoder,decoder_emotion,decoder_hidden,encoder_output,topk,num_output,debug=False):
    sent_breaker = ['.','.','!','?',':']
    
    
    # Start with the start of the sentence token
#     decoder_input = torch.ones(1, 1, dtype=torch.long) * SOS_token
    decoder_input = torch.ones(1, 1, device=device, dtype=torch.long) * SOS_token
    # Number of sentence to generate
    endnodes = []
    number_required = min((topk + 1), topk - len(endnodes))

    # starting node -  hidden vector, previous node, word id, logp, length
    node = BeamSearchNode(decoder_hidden, decoder_emotion, None, decoder_input, 0, 1)
    nodes = PriorityQueue()
    # start the queue
    nodes.put((-node.eval(), node))
    qsize = 1
    # start beam search
    while True:
        
        # give up when decoding takes too long
        if qsize > max_qsize or nodes.empty(): break
        
        # fetch the best node
        score, n = nodes.get()
        decoder_input = n.wordid
        decoder_hidden = n.h
        if n.wordid.item() == EOS_token and n.prevNode != None:
            endnodes.append((score, n))
            # if we reached maximum # of sentences required
            if len(endnodes) >= number_required:
                break
            else:
                continue
        # decode for one step using decoder
        decoder_output,decoder_hidden,decoder_emotion = decoder(decoder_input,decoder_emotion,decoder_hidden,encoder_output)
#         decoder_hidden = decoder_hidden[:, idx, :].unsqueeze(0)
#         decoder_hidden = torch.unsqueeze(decoder_hidden,0)
        # PUT HERE REAL BEAM SEARCH OF TOP
        log_prob, indexes = torch.topk(decoder_output, beam_width)
#         torch.unsqueeze(indexes[0][new_k], 0)
        nextnodes = []
        word_past = n.past.copy()
        for new_k in range(beam_width):
            decoded_t = indexes[0][new_k].view(1, -1)
          
#             log_p = log_prob[0][new_k].item()
#             word_past.add(decoded_t.item())
#             node = BeamSearchNode(decoder_hidden,decoder_emotion, n,decoded_t,n.logp + log_p, n.leng + 1,word_past)
#             score = -node.eval()
#             nextnodes.append((score, node))
#             if (voc.index2word[decoded_t.item()] in sent_breaker) and n.leng < 10 or decoded_t.item() in word_past:
#                   or decoded_t.item() in word_past
            eval_1 = voc.bpemb.decode_ids([decoded_t.item()]) in sent_breaker
            eval_2 = decoded_t.item() in word_past
            eval_3 = len(voc.bpemb.decode_ids([decoded_t.item()]))>similar_word_len
            if (eval_1 and n.leng < max_past_word ) or (eval_2 and eval_3):
                continue
            else:   
#                 print("current word is {}".format(voc.bpemb.decode_ids([decoded_t.item()])))
                log_p = log_prob[0][new_k].item()
                word_past.append(decoded_t.item())
                if len(word_past)>max_past_word:
                    word_past = word_past[-max_past_word:]
                node = BeamSearchNode(decoder_hidden,decoder_emotion, n,decoded_t,n.logp + log_p, n.leng + 1,word_past)
                score = -node.eval()
                nextnodes.append((score, node))
        # put them into queue
        for i in range(len(nextnodes)):
            score, nn = nextnodes[i]
            nodes.put((score, nn))
            # increase qsize
        qsize += len(nextnodes) - 1
    
    # choose nbest paths, back trace them
    if len(endnodes) == 0:
        endnodes = [nodes.get() for _ in range(min(topk,nodes.qsize()))]
    utterances = []
#     print(len(endnodes))
    for score, n in sorted(endnodes, key=operator.itemgetter(0)):

        utterance = []
        utterance.append(n.wordid)
        # back trace
        while n.prevNode != None:
            n = n.prevNode
            utterance.append(n.wordid)
        
        utterance = utterance[::-1]
        utterances.append(utterance)


    return utterances

## Evaluation

In [0]:
def evaluateInput(encoder, decoder, searcher, voc, num_output=5, max_length=10):
    input_sentence = ''
    while(1):
        try:
            # Get input sentence
            input_sentence = input('> ')
            # Check if it is quit case
            if input_sentence == 'q' or input_sentence == 'quit': break
            # Normalize sentence
#             input_sentence = normalizeString(input_sentence)
            
            # words -> indexes
            indexes_batch = [indexesFromSentence(voc, input_sentence)]
            

            # Create lengths tensor
            lengths = torch.tensor([len(indexes) for indexes in indexes_batch])
            # Transpose dimensions of batch to match models' expectations
            input_batch = torch.LongTensor(indexes_batch).transpose(0, 1)
            
            # Use appropriate device
            input_batch = input_batch.to(device)
            lengths = lengths.to(device)
            
            
            
            # Decode sentence with searcher
            output = []
            for e in range(num_emotion):
                if type(searcher) == MyTopKDecoder:
                    
                    tmp = searcher(input_batch,e,lengths, num_output, max_length)
#                     res = tmp[e][:num_output]
#                     print(tmp)
                    for i in range(min(len(tmp),num_output)):
                        if voc.version == "bpemb":
                            padding = [0,1,2]
                            words = [token.item() for token in tmp[i]]
                            filtered = filter(lambda x: True if x not in padding else False, words)
                            print('{}: '.format(int2emotion[e]), voc.bpemb.decode_ids(list(filtered)))
                        else:
                            decoded_words = [voc.index2word[token.item()] for token in tmp[i]]
                            decoded_words[:] = [x for x in decoded_words if not (x == 'EOS' or x == 'PAD' or x == 'SOS')]
                            print('{}: '.format(int2emotion[e]), ' '.join(decoded_words))
                        
                elif type(searcher) == GreedySearchDecoder:
                    tokens, scores = searcher(input_batch,e,lengths, max_length)
                    if voc.version == "bpemb":
                        padding = [0,1,2]
                        
                        words = [token.item() for token in tokens]
                        filtered = filter(lambda x: True if x not in padding else False, words)
                        print('{}: '.format(int2emotion[e]), voc.bpemb.decode_ids(list(filtered)))
                    else:
                        decoded_words = [voc.index2word[token.item()] for token in tokens]
                        decoded_words[:] = [x for x in decoded_words if not (x == 'EOS' or x == 'PAD' or x == 'SOS')]
                        print('{}: '.format(int2emotion[e]), ' '.join(decoded_words))

        except KeyError:
            print("Error: Encountered unknown word.")

# Run Model

## Load Data

In [0]:
_corpus_name = "DailyDialogue"
_voc_name = "bpemb"


small_batch_size = 5
MAX_LENGTH = 30  # Maximum sentence length to consider
int2emotion = ['Anger','Happiness','Sadness','Surpise','Other']
num_emotion = 5



from google.colab import drive
drive.mount('/content/drive')

train_path = "drive/My Drive/ECM/train"
dev_path = "drive/My Drive/ECM/dev"
voc = Voc(_corpus_name,_voc_name) # Need to run the cell down below first
train_set = read_data(train_path,voc)
dev_set = read_data(dev_path,voc)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
downloading https://nlp.h-its.org/bpemb/en/en.wiki.bpe.vs10000.model


100%|██████████| 400869/400869 [00:01<00:00, 381920.24B/s]


downloading https://nlp.h-its.org/bpemb/en/en.wiki.bpe.vs10000.d100.w2v.bin.tar.gz


100%|██████████| 3784656/3784656 [00:02<00:00, 1435556.13B/s]


    reading data pair 10000
['▁y', 'es', '▁i', '▁d', '▁like', '▁another', '▁cup', '▁of', '▁coffee', '▁afterwards', '▁.', '▁make', '▁it', '▁hot', '▁ple', 'ase', '▁.']
['▁you', '▁ve', '▁got', '▁it', '▁sir', '▁.']
    reading data pair 20000
['▁there', '▁are', '▁hundreds', '▁and', '▁hundreds', '▁.', '▁english', '▁is', '▁particularly', '▁rich', '▁in', '▁id', 'i', 'om', 'atic', '▁express', 'ions', '▁.']
['▁can', '▁you', '▁give', '▁us', '▁an', '▁example', '▁', '?']
    reading data pair 30000
['▁why', '▁did', '▁you', '▁lie', '▁to', '▁me', '▁in', '▁the', '▁em', 'ail', '▁', '?']
['▁i', '▁didn', '▁t', '▁lie', '▁.', '▁you', '▁just', '▁didn', '▁t', '▁ask', '▁me', '▁my', '▁real', '▁name', '▁.']
    reading data pair 10000
['▁how', '▁n', 'ice', '▁i', '▁can', '▁see', '▁you', '▁are', '▁bus', 'ying', '▁pack', 'ing', '▁.', '▁i', '▁m', '▁sor', 'ry', '▁to', '▁inter', 'rupt', '▁you', '▁.']
['▁that', '▁s', '▁all', '▁right', '▁.', '▁i', '▁m', '▁almost', '▁finished', '▁.', 'do', '▁you', '▁need', '▁my', '▁hel

In [0]:
# Configure models

model_name = 'ecm_model_imemory_bpemb_notfix'

use_embedding =  True
use_imemory = True
use_ememory = False

attn_model = 'dot'
#attn_model = 'general'
#attn_model = 'concat'

hidden_size = 100 # must match pretrained word2vec embedding size!!!!
encoder_n_layers = 2
decoder_n_layers = 2
dropout = 0.1
batch_size = 64
# Set checkpoint to load from; set to None if starting from scratch
loadFilename = None
checkpoint_iter = 2000



# filepath = "content/ecm_model_withimemory_bpemb/DailyDialogue/2-2_100/2000_checkpoint.tar"
# loadFilename = os.path.join('ecm_model_imemory_bpemb_notfix/DailyDialogue/2-2_100/2000_checkpoint.tar')

# Load model if a loadFilename is provided
if loadFilename:
    # If loading on same machine the model was trained on
    checkpoint = torch.load(loadFilename)
    
    encoder_sd = checkpoint['en']
    decoder_sd = checkpoint['de']
    encoder_optimizer_sd = checkpoint['en_opt']
    decoder_optimizer_sd = checkpoint['de_opt']
    embedding_sd = checkpoint['embedding']
    voc.__dict__ = checkpoint['voc_dict']
    
print('Building encoder and decoder ...')
# Initialize word embeddings



wm = voc.weights_matrix if voc.version == "bpemb" else voc.weights_matrix[:voc.num_words]
num_embeddings, embedding_dim = wm.shape
embedding = nn.Embedding(num_embeddings, embedding_dim)
embedding.load_state_dict({'weight': torch.Tensor(wm)})
emo_embedding = nn.Embedding(num_emotion,embedding_dim)


# if use_embedding:
#         embedding.weight.requires_grad = False

if loadFilename:
    embedding.load_state_dict(embedding_sd)
# Initialize encoder & decoder models
encoder = EncoderRNN(hidden_size, embedding, encoder_n_layers, dropout)
decoder = AttnDecoderRNN(attn_model, embedding, emo_embedding, hidden_size, voc.weights_matrix.shape[0], decoder_n_layers, 
                         dropout,use_emb=use_embedding, use_imemory=use_imemory, use_ememory=use_ememory)
if loadFilename:
    encoder.load_state_dict(encoder_sd)
    decoder.load_state_dict(decoder_sd)
# Use appropriate device
encoder = encoder.to(device)
decoder = decoder.to(device)
print('Models built and ready to go!')

Building encoder and decoder ...
Models built and ready to go!


In [0]:
loadFilename = None

In [0]:
voc.weights_matrix.shape[0]

10000

## Run Sentiment Classifier Training

In [0]:
# Configure models

# TEXT, vocab_size, word_embeddings, train_iter, valid_iter, test_iter = load_dataset()



learning_rate = 2e-5
batch_size = 64
output_size = 2
hidden_size = 100

wm = voc.weights_matrix if voc.version == "bpemb" else voc.weights_matrix[:voc.num_words]
num_embeddings, embedding_dim = wm.shape
embedding = nn.Embedding(num_embeddings, embedding_dim)
embedding.load_state_dict({'weight': torch.Tensor(wm)})

print('Building LSTM Model ...')
model = LSTMClassifier(batch_size, output_size, hidden_size, num_embeddings, embedding_dim, embedding)
loss_fn = F.cross_entropy
model = model.to(device)
print('Models built and ready to go!')

# model_name = 'ecm_model_imemory_bpemb_notfix'


# hidden_size = 100 # must match pretrained word2vec embedding size!!!!
# batch_size = 64
# # Set checkpoint to load from; set to None if starting from scratch
# loadFilename = None
# checkpoint_iter = 2000

# # filepath = "content/ecm_model_withimemory_bpemb/DailyDialogue/2-2_100/2000_checkpoint.tar"
# # loadFilename = os.path.join('ecm_model_imemory_bpemb_notfix/DailyDialogue/2-2_100/2000_checkpoint.tar')

# # Load model if a loadFilename is provided
# if loadFilename:
#     # If loading on same machine the model was trained on
#     checkpoint = torch.load(loadFilename)
#     model_sd = checkpoint['model']
#     model_optimizer_sd = checkpoint['model_optimizer']
#     embedding_sd = checkpoint['embedding']
#     voc.__dict__ = checkpoint['voc_dict']
    
# print('Building LSTM Model ...')
# # Initialize word embeddings

# wm = voc.weights_matrix if voc.version == "bpemb" else voc.weights_matrix[:voc.num_words]
# num_embeddings, embedding_dim = wm.shape
# embedding = nn.Embedding(num_embeddings, embedding_dim)
# embedding.load_state_dict({'weight': torch.Tensor(wm)})
# emo_embedding = nn.Embedding(num_emotion,embedding_dim)


# if use_embedding:
#     embedding.weight.requires_grad = False

# if loadFilename:
#     embedding.load_state_dict(embedding_sd)
# # Initialize encoder & decoder models




# if loadFilename:
#     encoder.load_state_dict(encoder_sd)
#     decoder.load_state_dict(decoder_sd)
# # Use appropriate device
# encoder = encoder.to(device)
# decoder = decoder.to(device)
# print('Models built and ready to go!')



Building LSTM Model ...
Models built and ready to go!


In [0]:
# Configure training/optimization
clip = 50.0
teacher_forcing_ratio = 1.0
learning_rate = 0.0001
decoder_learning_ratio = 5.0
n_iteration = 100
print_every = 1
save_every = 50

# Ensure dropout layers are in train mode
model.train()

# Initialize optimizers
print('Building optimizers ...')
model_optimizer = optim.Adam(model.parameters(), lr=learning_rate)
# if loadFilename:
#     encoder_optimizer.load_state_dict(encoder_optimizer_sd)
#     decoder_optimizer.load_state_dict(decoder_optimizer_sd)

# Run training iterations
print("Starting Training!")
train_iters2(model_name, voc, train_set, dev_set, embedding, model, model_optimizer, n_iteration, batch_size,
           print_every, save_every, clip, _corpus_name)



In [0]:
train_iters2(model_name, voc, train_set, dev_set, model, model_optimizer, embedding, n_iteration, batch_size,
           print_every, save_every, clip, _corpus_name, loadFilename

## Run training

In [0]:
# Configure training/optimization
clip = 50.0
teacher_forcing_ratio = 1.0
learning_rate = 0.0001
decoder_learning_ratio = 5.0
n_iteration = 100
print_every = 5
save_every = 100

# Ensure dropout layers are in train mode
encoder.train()
decoder.train()

# Initialize optimizers
print('Building optimizers ...')
encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate)
decoder_optimizer = optim.Adam(decoder.parameters(), lr=learning_rate * decoder_learning_ratio)
if loadFilename:
    encoder_optimizer.load_state_dict(encoder_optimizer_sd)
    decoder_optimizer.load_state_dict(decoder_optimizer_sd)

# Run training iterations
print("Starting Training!")
trainIters(model_name, voc, train_set, dev_set, encoder, decoder, encoder_optimizer, decoder_optimizer,
           embedding, emo_embedding, encoder_n_layers, decoder_n_layers, n_iteration, batch_size,
           print_every, save_every, clip, _corpus_name, loadFilename)

Building optimizers ...
Starting Training!
Initializing ...
Training...


RuntimeError: ignored

## Save Model to Google Drive

In [0]:
# from google.colab import drive
# drive.mount('/content/gdrive')
# !zip -r cb_model.zip cb_model
# !cp cb_model.zip 'gdrive/My Drive/'

# content/ecm_model_pretrained2/DailyDialogue/2-2_300/1400_checkpoint.tar
# content/ecm_model_pretrained2/DailyDialogue/2-2_300/2000_checkpoint.tar
# content/ecm_model_withimemory_embfix/DailyDialogue/2-2_300/2000_checkpoint.tar
# content/ecm_model_withimemory/DailyDialogue/2-2_100/2000_checkpoint.tar
# ! cp ecm_model_withimemory/DailyDialogue/2-2_300/2000_checkpoint.tar 
# content/ecm_model_withimemory_bpemb/DailyDialogue/2-2_100/5000_checkpoint.tar
# content/ecm_model_imemory_bpemb_notfix/DailyDialogue/2-2_100/5000_checkpoint.tar
# content/ecm_model_imemory_bpemb_fix/CornellMovie/2-2_100/10000_checkpoint.tar
! mv ecm_model_imemory_bpemb_fix/CornellMovie/2-2_100/10000_checkpoint.tar ecm_imemory_bpemb_nofix_10000_cornell_checkpoint.tar
! cp ecm_imemory_bpemb_nofix_10000_cornell_checkpoint.tar 'drive/My Drive/ECM'


In [0]:
!sudo rm -rf ~/.nv

# Testing

In [0]:
MAX_LENGTH = 300
# Set dropout layers to eval mode
encoder.eval()
decoder.eval()

# Initialize search module
searcher = MyTopKDecoder(encoder, decoder,k=10)

# searcher = GreedySearchDecoder(encoder,decoder)
# Begin chatting (uncomment and run the following line to begin)
evaluateInput(encoder, decoder, searcher,voc,num_output=1,max_length=MAX_LENGTH)


> whom do you love ?
negative:  i want to tell me
positive:  i want to tell me me
> i love you .
negative:  i want to tell me what i dont know
positive:  i want to tell me what i dont know
> how is life ?
negative:  i know what youre gonna tell me me
positive:  i know what youre gonna tell me me
> i want a lunch with you .
negative:  i know youre gonna tell me
positive:  i know youre gonna tell me
> beautiful ?
negative:  i know what youre gonna tell me i dont want to see you
positive:  i know what youre gonna tell me i dont want to see you
> tired ?
negative:  i know what youre gonna tell me me
positive:  i know what youre gonna tell me me


KeyboardInterrupt: ignored

In [0]:
MAX_LENGTH = 30
# Set dropout layers to eval mode
encoder.eval()
decoder.eval()

# Initialize search module
# searcher = MyTopKDecoder(encoder, decoder,k=1)
searcher = GreedySearchDecoder(encoder,decoder)
# Begin chatting (uncomment and run the following line to begin)
evaluateInput(encoder, decoder, searcher,voc,num_output=1,max_length=MAX_LENGTH)


> hello world .
negative:  i dont know
positive:  i dont know
> hello
negative:  i dont know
positive:  i dont know
> who is your mother ?
negative:  i dont know
positive:  i dont know
> ?
negative:  i dont know
positive:  i dont know
> laugh
negative:  i dont know
positive:  i dont know
> smile 
negative:  i dont know
positive:  i dont know


KeyboardInterrupt: ignored

# TTesting

In [0]:
my_batch = batch2TrainData(voc, [random.choice(train_set) for _ in range(batch_size)])
input_variable, lengths, target_variable, mask, max_target_len,emo_in,emo_out = my_batch
print(input_variable.size())
print(emo_in)
print(target_variable.size())
print(emo_out)
train_loss, train_acc = train2(model,model_optimizer,input_variable.to(device),emo_in.to(device),batch_size, clip)


torch.Size([23, 64])
tensor([0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1,
        1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0,
        0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1])
torch.Size([25, 64])
tensor([1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0,
        0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1,
        1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0])


In [0]:
train_acc

tensor(45.3125, device='cuda:0')

In [0]:
wm = voc.weights_matrix if voc.version == "bpemb" else voc.weights_matrix[:voc.num_words]
num_embeddings, embedding_dim = wm.shape
tmp_emb = nn.Embedding(num_embeddings, embedding_dim)
tmp_emb.load_state_dict({'weight': torch.Tensor(wm)})
input = tmp_emb(input_variable)
h_0 = Variable(torch.zeros(1, batch_size, hidden_size))
c_0 = Variable(torch.zeros(1, batch_size, hidden_size))

# lstm = nn.LSTM(embedding_dim, hidden_size)
# output, (final_hidden_state, final_cell_state) = lstm(input, (h_0, c_0))

In [0]:
lstm = nn.LSTM(embedding_dim, hidden_size)
input = tmp_emb(input_variable)
input.size()
lstm(input, (h_0, c_0))[0].size()

torch.Size([22, 64, 100])

In [0]:
h_0.size()

torch.Size([1, 64, 100])

In [0]:
read_linear = nn.Linear(hidden_size,hidden_size)
write_linear = nn.Linear(hidden_size,hidden_size)
concat_emo = nn.Linear(hidden_size+emo_dim,hidden_size) 
concat_2 = nn.Linear(hidden_size*2,hidden_size)
my_gru = nn.GRU(hidden_size, hidden_size, 2, dropout=0.1)


hidden_size=300
emo_dim = 300
num_emo = 7
emb = nn.Embedding(num_embeddings, hidden_size)
emb2 = nn.Embedding(num_emo,emo_dim)



my_batch = batch2TrainData(voc, [random.choice(train_set) for _ in range(batch_size)])
input_variable, lengths, target_variable, mask, max_target_len,emo_in,emo_out = my_batch

# loss = train(input_variable,emo_out,lengths, target_variable, mask, max_target_len, encoder,
#                      decoder, encoder_optimizer, decoder_optimizer, batch_size, clip)
encoder_outputs, encoder_hidden = encoder(input_variable, lengths)
decoder_input = torch.LongTensor([[SOS_token for _ in range(batch_size)]])
decoder_hidden = encoder_hidden[:decoder.n_layers]

NameError: ignored

In [0]:
a,b,c = decoder_hidden.size()
# torch.reshape(decoder_hidden,(1,-1,-1))
b

64

In [0]:


# mem_read = torch.mul(emotion_embedded,read_gate)
# embedded = concat_2(torch.cat([embedded,mem_read],dim = 2))

# rnn_output, hidden = my_gru(embedded, decoder_hidden)
# write_gate = torch.sigmoid(write_linear(rnn_output))
# emotion_update = torch.mul(write_gate,emotion_embedded)


torch.Size([1, 64, 300]) torch.Size([2, 64, 300])


In [0]:
embedded = embedding(decoder_input)
emo_embedding = nn.Embedding(num_emotion,emo_emb_dim)

emotion_embedded = emo_embedding(emo_out).unsqueeze(dim=0)
# torch.cat([embedded,emotion_embedded],dim = 3).size()
emotion_embedded.size()

torch.Size([1, 64, 300])

In [0]:
# emo_embedding(emo_out).size()
# torch.cat([emotion_embedded,embedded],dim=2).size()
read_linear = nn.Linear(hidden_size,hidden_size)

read_gate = torch.sigmoid(read_linear(embedded))

torch.mul(emotion_embedded,read_gate).size()

torch.Size([1, 64, 300])

In [0]:
my_batch = batch2TrainData(voc, [random.choice(train_set) for _ in range(batch_size)])
input_variable, lengths, target_variable, mask, max_target_len,emo_in,emo_out = my_batch

# loss = train(input_variable,emo_out,lengths, target_variable, mask, max_target_len, encoder,
#                      decoder, encoder_optimizer, decoder_optimizer, batch_size, clip)
encoder_outputs, encoder_hidden = encoder(input_variable, lengths)
decoder_input = torch.LongTensor([[SOS_token for _ in range(batch_size)]])
decoder_hidden = encoder_hidden[:decoder.n_layers]
use_teacher_forcing = False

decoder_output, decoder_hidden,decoder_emo = decoder(decoder_input,emo_out,decoder_hidden,encoder_outputs)
_, topi = decoder_output.topk(1)
decoder_input = torch.LongTensor([[topi[i][0] for i in range(batch_size)]])
#             decoder_input = decoder_input.to(device)
            # Calculate and accumulate loss
# mask_loss, nTotal = maskNLLLoss(decoder_output,decoder_emo,target_variable[0], mask[0],decoder)
# def maskNLLLoss(inp,emotion,target,mask,decoder):
nTotal = mask[0].sum()

In [0]:
# torch.gather(decoder_output, 1, target_variable[0].view(-1, 1))
target_variable[0].view(-1, 1)
# crossEntropy = -torch.log(torch.gather(decoder_output, 1, target_variable[0].view(-1, 1)).squeeze(1))
# loss = crossEntropy.masked_select(mask[0]).mean()
# if decoder.use_imemory:
#     emo_loss = torch.norm(decoder_emo)
#     if math.isnan(emo_loss): loss+= emo_loss
#     loss = loss.to(device)
# return loss, nTotal.item()


tensor([[ 467],
        [ 326],
        [8407],
        [5451],
        [ 280],
        [ 351],
        [ 107],
        [ 107],
        [ 107],
        [ 386],
        [ 216],
        [ 386],
        [4743],
        [ 538],
        [1515],
        [   7],
        [1294],
        [2950],
        [ 379],
        [1616],
        [ 155],
        [ 386],
        [1220],
        [ 386],
        [ 386],
        [ 467],
        [4743],
        [ 386],
        [3410],
        [ 386],
        [ 120],
        [ 538],
        [ 756],
        [ 386],
        [   7],
        [ 386],
        [7227],
        [3410],
        [ 107],
        [ 415],
        [ 107],
        [ 192],
        [ 643],
        [8407],
        [ 415],
        [ 155],
        [8326],
        [3410],
        [ 386],
        [  34],
        [ 879],
        [9368],
        [ 155],
        [ 643],
        [5451],
        [3410],
        [ 386],
        [ 386],
        [ 155],
        [ 386],
        [ 107],
        [ 155],
        

In [0]:
indexes_batch = [indexesFromSentence(voc, "hello world")]
input_batch = torch.LongTensor(indexes_batch).transpose(0, 1)
lengths = torch.tensor([len(indexes) for indexes in indexes_batch])
searcher = GreedySearchDecoder(encoder,decoder)
encoder_outputs, encoder_hidden = encoder(input_batch,lengths)
decoder_hidden = encoder_hidden[:decoder.n_layers]
decoder_input = torch.ones(1, 1, dtype=torch.long) * SOS_token
all_tokens = torch.zeros([0], dtype=torch.long)
all_scores = torch.zeros([0])
decoder_emotion = torch.LongTensor([4])
decoder_output, decoder_hidden, decoder_emotion = decoder(decoder_input,decoder_emotion, decoder_hidden, encoder_outputs)

decoder_scores, decoder_input = torch.max(decoder_output, dim=1)
all_tokens = torch.cat((all_tokens, decoder_input), dim=0)
all_scores = torch.cat((all_scores, decoder_scores), dim=0)
decoder_input = torch.unsqueeze(decoder_input, 0)

decoder_output, decoder_hidden, decoder_emotion = decoder(decoder_input,decoder_emotion, decoder_hidden, encoder_outputs)

decoder_scores, decoder_input = torch.max(decoder_output, dim=1)
all_tokens = torch.cat((all_tokens, decoder_input), dim=0)
all_scores = torch.cat((all_scores, decoder_scores), dim=0)
decoder_input = torch.unsqueeze(decoder_input, 0)

decoder_output, decoder_hidden, decoder_emotion = decoder(decoder_input,decoder_emotion, decoder_hidden, encoder_outputs)

# tmp = searcher(input_batch,4,lengths, 30)
# def forward(self, input_seq,target_emotion, input_length, max_length):
    # Forward input through encoder model
# encoder_outputs, encoder_hidden = encoder(input_seq, input_length)
#     # Prepare encoder's final hidden layer to be first hidden input to the decoder
#     decoder_hidden = encoder_hidden[:decoder.n_layers]
#     # Initialize decoder input with SOS_token
#     decoder_input = torch.ones(1, 1, dtype=torch.long) * SOS_token
#     # Initialize tensors to append decoded words to
#     all_tokens = torch.zeros([0], dtype=torch.long)
#     all_scores = torch.zeros([0])
#     # Iteratively decode one word token at a time
#     decoder_emotion = torch.LongTensor([target_emotion])
#     for _ in range(max_length):
        # Forward pass through decoder


In [0]:
#emotion_embedded.bmm(read_gate)
# read_gate.size()
torch.mul(emotion_embedded,read_gate).size()

torch.Size([1, 64, 300])

In [0]:
embedded = embedding(decoder_input)
emotion_embedded = emo_embedding(emo_out).unsqueeze(dim=0)
emb = torch.cat([embedded,emotion_embedded],dim = 2)
my_gru = nn.GRU(hidden_size*2, hidden_size, 2, dropout=0.1)
my_cat = nn.Linear(hidden_size * 2, hidden_size)
rnn_output, hidden = my_gru(emb, decoder_hidden)
print(embedded.size(),emotion_embedded.size(),emb.size())
print(rnn_output.size(),hidden.size())


torch.Size([1, 64, 100]) torch.Size([1, 64, 100]) torch.Size([1, 64, 200])
torch.Size([1, 64, 100]) torch.Size([2, 64, 100])


In [0]:
indexes_batch = [indexesFromSentence(voc, input_sentence)]
            
            
            
            # Create lengths tensor
            # Transpose dimensions of batch to match models' expectations
            input_batch = torch.LongTensor(indexes_batch).transpose(0, 1)
            # Decode sentence with searcher
            output = []
            for e in range(num_emotion):
                if e == 1 or e == 2: 
                    continue
                if type(searcher) == MyTopKDecoder:
                    tmp = searcher(input_batch,e,lengths, max_length)
#                     res = tmp[e][:num_output]
                    for i in range(min(len(tmp[0]),num_output)):
                        decoded_words = [voc.index2word[token.item()] for token in tmp[0][i]]
                        decoded_words[:] = [x for x in decoded_words if not (x == 'EOS' or x == 'PAD' or x == 'SOS')]
                        print('{}: '.format(int2emotion[e]), ' '.join(decoded_words))
                else:
                    tokens, scores = searcher(input_batch,e,lengths, max_length)
                    decoded_words = [voc.index2word[token.item()] for token in tokens]
                    decoded_words[:] = [x for x in decoded_words if not (x == 'EOS' or x == 'PAD' or x == 'SOS')]
                    print('{}: '.format(int2emotion[e]), ' '.join(decoded_words))
