In [1]:
from src.model import *
from src.preprocessing import * 
from src.utils import *
import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F
import numpy as np
import time
import pickle

In [2]:
def load_model(loadFilename,params):
    # hyperparameters
    USE_CUDA = False#torch.cuda.is_available()
    device = torch.device("cuda" if USE_CUDA else "cpu")
    PAD_token = 0  # Used for padding short sentences
    SOS_token = 1  # Start-of-sentence token
    EOS_token = 2  # End-of-sentence token
    MAX_LENGTH = params['MAX_LENGTH']  # Maximum sentence length to consider
    MIN_COUNT = params['MIN_COUNT']    # Minimum word count threshold for trimming
    save_dir = params['save_dir']
    emo_dict = params['emo_dict']
    model_name = params['model_name']
    corpus_name = params['corpus_name']
    attn_model = params['attn_model']
    voc = Voc(corpus_name,max_length=MAX_LENGTH,min_count=MIN_COUNT)
    hidden_size = params['hidden_size']
    encoder_n_layers = params['encoder_n_layers']
    decoder_n_layers = params['decoder_n_layers']
    dropout = params['dropout']
    batch_size = params['batch_size']
    # number of emotion
    num_emotions = params['num_emotions']
    # If loading on same machine the model was trained on
    checkpoint = torch.load(loadFilename, map_location='cpu')
    # If loading a model trained on GPU to CPU
    # checkpoint = torch.load(loadFilename, map_location=torch.device('cpu'))
    encoder_sd = checkpoint['en']
    decoder_sd = checkpoint['de']
    encoder_optimizer_sd = checkpoint['en_opt']
    decoder_optimizer_sd = checkpoint['de_opt']
    embedding_sd = checkpoint['embedding']
    voc.__dict__ = checkpoint['voc_dict']
    emotion_words = checkpoint['external_memory']

    print('Building encoder and decoder ...')
    # Initialize word embeddings
    embedding = nn.Embedding(voc.num_words, hidden_size)
    emotion_embedding = nn.Embedding(num_emotions, hidden_size)
    if loadFilename:
        embedding.load_state_dict(embedding_sd)
    # Initialize encoder & decoder models
    encoder = EncoderRNN(hidden_size, embedding, encoder_n_layers, dropout)
    decoder = LuongAttnDecoderRNN(attn_model, embedding, emotion_embedding, hidden_size,
                                  voc.num_words, emotion_words, decoder_n_layers, dropout)
    if loadFilename:
        encoder.load_state_dict(encoder_sd)
        decoder.load_state_dict(decoder_sd)

    # Use appropriate device
    encoder = encoder.to(device)
    decoder = decoder.to(device)
    return encoder,decoder,voc

In [3]:
emo_dict ={ 0: 'no emotion', 1: 'anger', 2: 'disgust', 
            3: 'fear', 4: 'happiness', 
            5: 'sadness', 6: 'surprise'}
params = {
    'MAX_LENGTH':20,
    'MIN_COUNT':1,
    'save_dir':os.path.join('data','save'),
    'emo_dict':emo_dict,
    'model_name':'emotion_model1',
    'corpus_name':'daily_dialogue',
    'attn_model':'dot',
    'hidden_size':500,
    'encoder_n_layers':2,
    'decoder_n_layers':2,
    'num_emotions':7,
    'dropout':0.1,
    'batch_size':64,
}

In [4]:
encoder1,decoder1,voc1 = load_model('data/pre_trained/fully_work_ECM_4000checkpoint_500_hidden.tar',params)


Building encoder and decoder ...


In [5]:
encoder2,decoder2,voc2 = load_model('data/pre_trained/5000_checkpoint.tar',params)


Building encoder and decoder ...


# beam search for testing

In [6]:
class BeamSearchDecoder(nn.Module):
    '''
    Beam search decode
    '''
    def __init__(self, encoder, decoder,num_word,device):
        '''

        :param encoder: torch nn
        :param decoder: torch nn
        :param num_word: int vocabulary size
        '''
        super(BeamSearchDecoder, self).__init__()
        self.encoder = encoder
        self.decoder = decoder
        self.num_word = num_word
        self.device = device

    def forward(self, input_seq,target_emotions,input_length, max_length):
        SOS_token = 1
        # Forward input through encoder model
        encoder_outputs, encoder_hidden = self.encoder(input_seq, input_length)
        # Prepare encoder's final hidden layer to be first hidden input to the decoder
        decoder_hidden = encoder_hidden[:self.decoder.n_layers]
        # Initialize decoder input with SOS_token
        decoder_input = torch.ones(1, 1, device=self.device, dtype=torch.long) * SOS_token
        # Initialize tensors to append decoded words to
        all_tokens = torch.zeros([0], device=self.device, dtype=torch.long)
        all_words_order = torch.zeros((1,self.num_word),device=self.device,dtype=torch.long)
        all_scores = torch.zeros([0], device=self.device)
        all_scores_array = torch.zeros((1,self.num_word),device=self.device,dtype=torch.float)
        # Set initial context value,last_rnn_output, internal_memory
        context_input = torch.ones(1,self.decoder.hidden_size,dtype=torch.float)
        context_input = context_input.to(self.device)
        # last_rnn_output = torch.FloatTensor(hidden_size)
        # Iteratively decode one word token at a time
        for _ in range(max_length):
            # Forward pass through decoder
            decoder_output, decoder_hidden,target_emotions,context_input = self.decoder(
                decoder_input,target_emotions, decoder_hidden,
                context_input, encoder_outputs
            )
            # Obtain most likely word token and its softmax score
            decoder_scores, decoder_input = torch.max(decoder_output, dim=1)
            decoder_input_order = torch.argsort(decoder_output,dim=1,descending=True)
            # Record token and score
            all_tokens = torch.cat((all_tokens, decoder_input), dim=0)
            all_scores = torch.cat((all_scores, decoder_scores), dim=0)
            all_scores_array = torch.cat((all_scores_array,decoder_output),dim = 0)
            all_words_order = torch.cat((all_words_order,decoder_input_order), dim=0)
            # Prepare current token to be next decoder input (add a dimension)
            decoder_input = torch.unsqueeze(decoder_input, 0)
        # Return collections of word tokens and scores
        sequences = self.beam_search(all_scores_array,3)
        return sequences
    def beam_search(self,array,k):
        array = array.tolist()
        sequences = [[list(), 1.0]]
        # walk over each step in sequence
        for row in array:
            all_candidates = list()
            # expand each current candidate
            for i in range(len(sequences)):
                seq, score = sequences[i]
                for j in range(len(row)):
                    candidate = [seq + [j], score * -np.log(row[j] + 1e-8)]
                    all_candidates.append(candidate)
            # order all candidates by score
            ordered = sorted(all_candidates, key=lambda tup:tup[1])
            # select k best
            sequences = ordered[:k]
        return sequences


In [19]:
def evaluate(searcher, voc, sentence, emotions,device,max_length):
    emotions = int(emotions)
    emotions = torch.LongTensor([emotions])
    ### Format input sentence as a batch
    # words -> indexes
    indexes_batch = [indexesFromSentence(voc, sentence)]
    # Create lengths tensor
    lengths = torch.tensor([len(indexes) for indexes in indexes_batch])
    # Transpose dimensions of batch to match models' expectations
    input_batch = torch.LongTensor(indexes_batch).transpose(0, 1)
    # Use appropriate device
    input_batch = input_batch.to(device)
    lengths = lengths.to(device)
    emotions = emotions.to(device)
    # indexes -> words 
    sequences = searcher(input_batch, emotions, lengths, max_length)
    decoded_words = beam_decode(sequences, voc)
    return decoded_words
def beam_decode(sequences, voc):
    for each in sequences:
        score = each[-1]
        for idxs in each:
            return [[voc.index2word[idx] for idx in idxs[:-1]],score]

In [8]:
# Set dropout layers to eval mode
USE_CUDA = False#torch.cuda.is_available()
device = torch.device("cuda" if USE_CUDA else "cpu")
encoder1.eval()
decoder1.eval()
encoder2.eval()
decoder2.eval()
# Initialize search module
searcher1 = BeamSearchDecoder(encoder1,decoder1,voc1.num_words,device)
searcher2 = BeamSearchDecoder(encoder2,decoder2,voc2.num_words,device)

In [9]:
def get_response(num_emotions,sentence,searcher,voc):
    sentence = sentence.lower()
    responses = []
    for i in range(num_emotions):
        responses.append(evaluate(searcher,voc,sentence,i,device,voc.max_length))
    responses.sort(key = lambda x:x[1],reverse = True)
    output_words =[]
    for each in responses[0][0]:
        if each == 'EOS':
            return ' '.join(output_words)
        elif each == 'PAD':
            continue
        else:
            output_words.append(each)
    return ' '.join(output_words)

In [2]:
with open('jupyter/processed_train_large.pickle','rb') as f:
    pairs = pickle.load(f)
    pairs_emotion = pickle.load(f)
    #voc = pickle.load(f)

In [3]:
test_pairs = pairs[-1000:]
test_pairs_emotion = pairs_emotion[-1000:]

In [7]:
original = []
for sentence,emotions in zip(test_pairs,test_pairs_emotion):
    original.append((sentence[0],emotions[0],sentence[1],emotions[1]))

In [8]:
pd.DataFrame(original,columns=['post','post_emotion','response','response_emotion'])

Unnamed: 0,post,post_emotion,response,response_emotion
0,how s it coming ?,4,you don t think i m insane ?,4
1,ah i can do it perfectly .,0,how much does it cost ?,0
2,any other bags dr . johnson ?,4,just that . careful it s heavy .,4
3,you think it looks nice ?,1,i honestly think it looks great .,1
4,oh it s the way you talk .,0,is it . . . is it really ?,2
5,i think that is really cool of you .,0,i m glad to hear that .,1
6,how do you like your job here linda ?,1,it s interesting i like being an editor .,0
7,i hope that you had good journey .,0,yes . thank you .,0
8,i m supposed to call in .,0,there s a phone in the car .,0
9,i want to go home .,2,for twenty thousand dollars .,2


In [4]:
test_pairs

[['how s it coming ?', 'you don t think i m insane ?'],
 ['ah i can do it perfectly .', 'how much does it cost ?'],
 ['any other bags dr . johnson ?', 'just that . careful it s heavy .'],
 ['you think it looks nice ?', 'i honestly think it looks great .'],
 ['oh it s the way you talk .', 'is it . . . is it really ?'],
 ['i think that is really cool of you .', 'i m glad to hear that .'],
 ['how do you like your job here linda ?',
  'it s interesting i like being an editor .'],
 ['i hope that you had good journey .', 'yes . thank you .'],
 ['i m supposed to call in .', 'there s a phone in the car .'],
 ['i want to go home .', 'for twenty thousand dollars .'],
 ['how old is he today ?', 'it s his th birthday .'],
 ['i apologize for their tardiness .',
  'i was late for work because of them .'],
 ['son of a bitch !', 'okay maybe ask this guy .'],
 ['what the fuck happened ?', 'i don t know . . .'],
 ['and you know what ?', 'i think she was .'],
 ['i hope i m not intruding ?', 'be my guest 

In [30]:
result = []
for sentence,emotion in zip(test_pairs,test_pairs_emotion):
    post = sentence[0]
    target_emotion = emotion[1]
    try:
        response = evaluate(searcher1,voc1,post,0,device,voc1.max_length)
        response = ' '.join([i for i in response[0] if i not in ['EOS','PAD']])
        result.append((post, response,target_emotion))
    except KeyError as e:
        print(e)
        continue

'bitch'
'fuck'
'intruding'
'drank'
'hung'
'stark'
'mason'
'limb'
'cooking'
'sieu'
'fucker'
'marie'
'widow'
'panes'
'fuck'
'git'
'diz'
'raymond'
'hell'
'dorothy'
'attacked'
'emmett'
'lenny'
'annie'
'worthless'
'kiss'
'gut'
'qualifies'
'steal'
'yo'
'traditional'
'conversation'
'terrance'
'complicated'
'johnny'
'comic'
'storage'
'fucking'
'horses'
'permission'
'cent'
'sandy'
'rough'
'wisdom'
'conner'
'.who'
'closer'
'rub'
'spirit'
'conor'
'jail'
'locks'
'chasing'
'shock'
'peel'
'hearing'
'fall'
'sherman'
'fucking'
'tick'
'deputy'
'goddamn'
'steed'
'sandy'
'ross'
'parry'
'union'
'threaten'
'target'
'snap'
'content'
'em'
'solid'
'clothing'
'bout'
'clicks'
'palm'
'shuttle'
'rid'
'hell'
'nowhere'
'disappear'
'dunno'
'angel'
'vada'
'ichiro'
'impact'
'fergus'
'hell'
'rid'
'hugo'
'naked'
'pepper'
'.deborah'
'explosion'
'magnum'
'yah'
'matthew'
'sweetie'
'dean'
'bontecou'
'leland'
'jam'
'gag'
'newspapers'
'lied'
'toe'
'armed'
'era'
'bout'
'flew'
'jefferson'
'.a'
'damn'
'al'
'closest'
'palm'
'shit

In [35]:
test_pairs

[['how s it coming ?', 'you don t think i m insane ?'],
 ['ah i can do it perfectly .', 'how much does it cost ?'],
 ['any other bags dr . johnson ?', 'just that . careful it s heavy .'],
 ['you think it looks nice ?', 'i honestly think it looks great .'],
 ['oh it s the way you talk .', 'is it . . . is it really ?'],
 ['i think that is really cool of you .', 'i m glad to hear that .'],
 ['how do you like your job here linda ?',
  'it s interesting i like being an editor .'],
 ['i hope that you had good journey .', 'yes . thank you .'],
 ['i m supposed to call in .', 'there s a phone in the car .'],
 ['i want to go home .', 'for twenty thousand dollars .'],
 ['how old is he today ?', 'it s his th birthday .'],
 ['i apologize for their tardiness .',
  'i was late for work because of them .'],
 ['son of a bitch !', 'okay maybe ask this guy .'],
 ['what the fuck happened ?', 'i don t know . . .'],
 ['and you know what ?', 'i think she was .'],
 ['i hope i m not intruding ?', 'be my guest 

In [36]:
df = pd.DataFrame(result,columns=['post','response','emotion'])

In [38]:
df.to_csv('Seq2Seq_evaluation.csv',index = False)

In [28]:
response

[['PAD', 'it', 'looks', 'like', 'it', 'very', 'well', '.', 'EOS', 'EOS'],
 4.7091967863147312e-19]

In [26]:
sentence = 'how s it coming ?'
evaluate(searcher1,voc1,sentence,0,device,voc1.max_length)

[['PAD', 'it', 'looks', 'like', 'it', 'very', 'well', '.', 'EOS', 'EOS'],
 4.7295348845359053e-19]

In [86]:
def play(num_emotions,sentence,searcher1,searcher2,voc1,voc2):
    print('Human:',sentence)
    while True:
        sentence = get_response(num_emotions,sentence,searcher1,voc1)
        print('Bot1:',sentence)
        sentence = get_response(num_emotions,sentence,searcher2,voc2)
        print('Bot2:',sentence)

In [88]:
start_sentence = 'where is your wife'
play(7,start_sentence,searcher1,searcher2,voc1,voc2)

Human: where is your wife
Bot1: she is in the bedroom .
Bot2: what is her ?
Bot1: she is expecting you don t .
Bot2: well i wish you .
Bot1: you too .
Bot2: how s going to dance .
Bot1: i ll say .
Bot2: i d like any more questions .
Bot1: give them in the line price .
Bot2: the night is .
Bot1: i have to get in the bedroom .
Bot2: have you ever any pay ?
Bot1: no . i have only one mid .
Bot2: how about the pay or one ?
Bot1: cash or cash please .
Bot2: would you like to pay ?
Bot1: i d be very pleased .
Bot2: thank you very much .
Bot1: you re welcome .


KeyboardInterrupt: 