## Data parsing part

In [1]:
import pickle
import codecs
import re
import os
import time
import numpy as np
import config

In [2]:
def preProBuildWordVocab(word_count_threshold=5, all_words_path=config.all_words_path):
    # borrowed this function from NeuralTalk

    if not os.path.exists(all_words_path):
        parse_all_words(all_words_path)

    corpus = open(all_words_path, 'r').read().split('\n')[:-1]
    captions = np.asarray(corpus, dtype=np.object)

    captions = map(lambda x: x.replace('.', ''), captions)
    captions = map(lambda x: x.replace(',', ''), captions)
    captions = map(lambda x: x.replace('"', ''), captions)
    captions = map(lambda x: x.replace('\n', ''), captions)
    captions = map(lambda x: x.replace('?', ''), captions)
    captions = map(lambda x: x.replace('!', ''), captions)
    captions = map(lambda x: x.replace('\\', ''), captions)
    captions = map(lambda x: x.replace('/', ''), captions)

    print('preprocessing word counts and creating vocab based on word count threshold %d' % (word_count_threshold))
    word_counts = {}
    nsents = 0
    for sent in captions:
        nsents += 1
        for w in sent.lower().split(' '):
           word_counts[w] = word_counts.get(w, 0) + 1
    vocab = [w for w in word_counts if word_counts[w] >= word_count_threshold]
    print('filtered words from %d to %d' % (len(word_counts), len(vocab)))

    ixtoword = {}
    ixtoword[0] = '<pad>'
    ixtoword[1] = '<bos>'
    ixtoword[2] = '<eos>'
    ixtoword[3] = '<unk>'

    wordtoix = {}
    wordtoix['<pad>'] = 0
    wordtoix['<bos>'] = 1
    wordtoix['<eos>'] = 2
    wordtoix['<unk>'] = 3

    for idx, w in enumerate(vocab):
        wordtoix[w] = idx+4
        ixtoword[idx+4] = w

    word_counts['<pad>'] = nsents
    word_counts['<bos>'] = nsents
    word_counts['<eos>'] = nsents
    word_counts['<unk>'] = nsents

    bias_init_vector = np.array([1.0 * word_counts[ixtoword[i]] for i in ixtoword])
    bias_init_vector /= np.sum(bias_init_vector) # normalize to frequencies
    bias_init_vector = np.log(bias_init_vector)
    bias_init_vector -= np.max(bias_init_vector) # shift to nice numeric range

    return wordtoix, ixtoword, bias_init_vector

In [3]:
def parse_all_words(all_words_path):
    raw_movie_lines = open('data/movie_lines.txt', 'r', encoding='utf-8', errors='ignore').read().split('\n')[:-1]

    with codecs.open(all_words_path, "w", encoding='utf-8', errors='ignore') as f:
        for line in raw_movie_lines:
            line = line.split(' +++$+++ ')
            utterance = line[-1]
            f.write(utterance + '\n')

In [4]:
""" Extract only the vocabulary part of the data """
def refine(data):
    words = re.findall("[a-zA-Z'-]+", data)
    words = ["".join(word.split("'")) for word in words]
    # words = ["".join(word.split("-")) for word in words]
    data = ' '.join(words)
    return data

Loading the sentences, and convert them into the form of a dict of "id : content".

In [5]:
parse_all_words(config.all_words_path)

In [6]:
raw_movie_lines = open('data/movie_lines.txt', 'r', encoding='utf-8', errors='ignore').read().split('\n')[:-1]

In [7]:
raw_movie_lines

['L1045 +++$+++ u0 +++$+++ m0 +++$+++ BIANCA +++$+++ They do not!',
 'L1044 +++$+++ u2 +++$+++ m0 +++$+++ CAMERON +++$+++ They do to!',
 'L985 +++$+++ u0 +++$+++ m0 +++$+++ BIANCA +++$+++ I hope so.',
 'L984 +++$+++ u2 +++$+++ m0 +++$+++ CAMERON +++$+++ She okay?',
 "L925 +++$+++ u0 +++$+++ m0 +++$+++ BIANCA +++$+++ Let's go.",
 'L924 +++$+++ u2 +++$+++ m0 +++$+++ CAMERON +++$+++ Wow',
 "L872 +++$+++ u0 +++$+++ m0 +++$+++ BIANCA +++$+++ Okay -- you're gonna need to learn how to lie.",
 'L871 +++$+++ u2 +++$+++ m0 +++$+++ CAMERON +++$+++ No',
 'L870 +++$+++ u0 +++$+++ m0 +++$+++ BIANCA +++$+++ I\'m kidding.  You know how sometimes you just become this "persona"?  And you don\'t know how to quit?',
 'L869 +++$+++ u0 +++$+++ m0 +++$+++ BIANCA +++$+++ Like my fear of wearing pastels?',
 'L868 +++$+++ u2 +++$+++ m0 +++$+++ CAMERON +++$+++ The "real you".',
 'L867 +++$+++ u0 +++$+++ m0 +++$+++ BIANCA +++$+++ What good stuff?',
 "L866 +++$+++ u2 +++$+++ m0 +++$+++ CAMERON +++$+++ I figured yo

In [8]:
utterance_dict = {}
with codecs.open('data/tokenized_all_words.txt', "w", encoding='utf-8', errors='ignore') as f:
    for line in raw_movie_lines:
        line = line.split(' +++$+++ ')
        line_ID = line[0]
        utterance = line[-1]
        utterance_dict[line_ID] = utterance
        utterance = " ".join([refine(w) for w in utterance.lower().split()])
        f.write(utterance + '\n')

In [9]:
utterance_dict

{'L1045': 'They do not!',
 'L1044': 'They do to!',
 'L985': 'I hope so.',
 'L984': 'She okay?',
 'L925': "Let's go.",
 'L924': 'Wow',
 'L872': "Okay -- you're gonna need to learn how to lie.",
 'L871': 'No',
 'L870': 'I\'m kidding.  You know how sometimes you just become this "persona"?  And you don\'t know how to quit?',
 'L869': 'Like my fear of wearing pastels?',
 'L868': 'The "real you".',
 'L867': 'What good stuff?',
 'L866': "I figured you'd get to the good stuff eventually.",
 'L865': 'Thank God!  If I had to hear one more story about your coiffure...',
 'L864': "Me.  This endless ...blonde babble. I'm like, boring myself.",
 'L863': 'What crap?',
 'L862': 'do you listen to this crap?',
 'L861': 'No...',
 'L860': 'Then Guillermo says, "If you go any lighter, you\'re gonna look like an extra on 90210."',
 'L699': 'You always been this selfish?',
 'L698': 'But',
 'L697': "Then that's all you had to say.",
 'L696': 'Well, no...',
 'L695': "You never wanted to go out with 'me, did y

In [10]:
pickle.dump(utterance_dict, open('data/utterance_dict', 'wb'), True)

## Feature extraction

In [11]:
from gensim.models import word2vec, KeyedVectors

### Train a word2vec model using this corpus

In [12]:
WORD_VECTOR_SIZE = 300

raw_movie_conversations = open('data/movie_conversations.txt', 'r').read().split('\n')[:-1]

utterance_dict = pickle.load(open('data/utterance_dict', 'rb'))

ts = time.time()
corpus = word2vec.Text8Corpus("data/tokenized_all_words.txt")
#word_vector = word2vec.Word2Vec(corpus, size=WORD_VECTOR_SIZE)
#word_vector.wv.save_word2vec_format(u"model/word_vector.bin", binary=True)
word_vector = KeyedVectors.load_word2vec_format('model/word_vector.bin', binary=True)
print("Time Elapsed: {} secs\n".format(time.time() - ts))

Time Elapsed: 0.4996676445007324 secs



In [13]:
""" Extract only the vocabulary part of the data """
def refine(data):
    words = re.findall("[a-zA-Z'-]+", data)
    words = ["".join(word.split("'")) for word in words]
    # words = ["".join(word.split("-")) for word in words]
    data = ' '.join(words)
    return data

In [15]:
ts = time.time()
conversations = []
print('len conversation', len(raw_movie_conversations))
con_count = 0
traindata_count = 0
for conversation in raw_movie_conversations:
    conversation = conversation.split(' +++$+++ ')[-1]
    conversation = conversation.replace('[', '')
    conversation = conversation.replace(']', '')
    conversation = conversation.replace('\'', '')
    conversation = conversation.split(', ')
    assert len(conversation) > 1
    for i in range(len(conversation)-1):
        con_a = utterance_dict[conversation[i+1]].strip()
        con_b = utterance_dict[conversation[i]].strip()
        if len(con_a.split()) <= 22 and len(con_b.split()) <= 22:
            con_a = [refine(w) for w in con_a.lower().split()]
            # con_a = [word_vector[w] if w in word_vector else np.zeros(WORD_VECTOR_SIZE) for w in con_a]
            conversations.append((con_a, con_b))
            traindata_count += 1
    con_count += 1
    if con_count % 1000 == 0:
        print('con_count {}, traindata_count {}'.format(con_count, traindata_count))
pickle.dump(conversations, open('data/reversed_conversations_lenmax22', 'wb'), True)
print("Time Elapsed: {} secs\n".format(time.time() - ts))

len conversation 83097
con_count 1000, traindata_count 2049
con_count 2000, traindata_count 3996
con_count 3000, traindata_count 6425
con_count 4000, traindata_count 8353
con_count 5000, traindata_count 10654
con_count 6000, traindata_count 12707
con_count 7000, traindata_count 14666
con_count 8000, traindata_count 16673
con_count 9000, traindata_count 18578
con_count 10000, traindata_count 20317
con_count 11000, traindata_count 22826
con_count 12000, traindata_count 25611
con_count 13000, traindata_count 27879
con_count 14000, traindata_count 30057
con_count 15000, traindata_count 32631
con_count 16000, traindata_count 34686
con_count 17000, traindata_count 36849
con_count 18000, traindata_count 38890
con_count 19000, traindata_count 41103
con_count 20000, traindata_count 43175
con_count 21000, traindata_count 45123
con_count 22000, traindata_count 47305
con_count 23000, traindata_count 48998
con_count 24000, traindata_count 51571
con_count 25000, traindata_count 53672
con_count 26000

### Some statistics

In [16]:
# some statistics of training data
max_a = -1
max_b = -1
max_a_ind = -1
max_b_ind = -1
sum_a = 0.
sum_b = 0.
len_a_list = []
len_b_list = []
for i in range(len(conversations)):
    len_a = len(conversations[i][0])
    len_b = len(conversations[i][1].split())
    if len_a > max_a:
        max_a = len_a
        max_a_ind = i
    if len_b > max_b:
        max_b = len_b
        max_b_ind = i
    sum_a += len_a
    sum_b += len_b
    len_a_list.append(len_a)
    len_b_list.append(len_b)
np.save("data/reversed_lenmax22_a_list", np.array(len_a_list))
np.save("data/reversed_lenmax22_b_list", np.array(len_b_list))
print("max_a_ind {}, max_b_ind {}".format(max_a_ind, max_b_ind))
print("max_a {}, max_b {}, avg_a {}, avg_b {}".format(max_a, max_b, sum_a/len(conversations), sum_b/len(conversations)))

max_a_ind 10, max_b_ind 0
max_a 22, max_b 22, avg_a 7.424128170416531, avg_b 7.383465165974871


The length of a sentence is limited to 22, as is shown above.

In [None]:
ts = time.time()
conversations = []
# former_sents = []
print('len conversation', len(raw_movie_conversations))
con_count = 0
traindata_count = 0
for conversation in raw_movie_conversations:
    conversation = conversation.split(' +++$+++ ')[-1]
    conversation = conversation.replace('[', '')
    conversation = conversation.replace(']', '')
    conversation = conversation.replace('\'', '')
    conversation = conversation.split(', ')
    assert len(conversation) > 1
    con_a_1 = ''
    for i in range(len(conversation)-1):
        con_a_2 = utterance_dict[conversation[i]]
        con_b = utterance_dict[conversation[i+1]]
        if len(con_a_1.split()) <= 22 and len(con_a_2.split()) <= 22 and len(con_b.split()) <= 22:
            con_a = "{} {}".format(con_a_1, con_a_2)
            con_a = [refine(w) for w in con_a.lower().split()]
            # con_a = [word_vector[w] if w in word_vector else np.zeros(WORD_VECTOR_SIZE) for w in con_a]
            conversations.append((con_a, con_b, con_a_2))
            # former_sents.append(con_a_2)
            traindata_count += 1
        con_a_1 = con_a_2
    con_count += 1
    if con_count % 1000 == 0:
        print('con_count {}, traindata_count {}'.format(con_count, traindata_count))
pickle.dump(conversations, open('data/conversations_lenmax22_formersents2_with_former', 'wb'), True)
# pickle.dump(former_sents, open('data/conversations_lenmax22_former_sents', 'wb'), True)
print("Time Elapsed: {} secs\n".format(time.time() - ts))

In [None]:
ts = time.time()
conversations = []
# former_sents = []
print('len conversation', len(raw_movie_conversations))
con_count = 0
traindata_count = 0
for conversation in raw_movie_conversations:
    conversation = conversation.split(' +++$+++ ')[-1]
    conversation = conversation.replace('[', '')
    conversation = conversation.replace(']', '')
    conversation = conversation.replace('\'', '')
    conversation = conversation.split(', ')
    assert len(conversation) > 1
    con_a_1 = ''
    for i in range(len(conversation)-1):
        con_a_2 = utterance_dict[conversation[i]]
        con_b = utterance_dict[conversation[i+1]]
        if len(con_a_1.split()) <= 22 and len(con_a_2.split()) <= 22 and len(con_b.split()) <= 22:
            con_a = "{} {}".format(con_a_1, con_a_2)
            con_a = [refine(w) for w in con_a.lower().split()]
            # con_a = [word_vector[w] if w in word_vector else np.zeros(WORD_VECTOR_SIZE) for w in con_a]
            conversations.append((con_a, con_b))
            # former_sents.append(con_a_2)
            traindata_count += 1
        con_a_1 = con_a_2
    con_count += 1
    if con_count % 1000 == 0:
        print('con_count {}, traindata_count {}'.format(con_count, traindata_count))
pickle.dump(conversations, open('data/conversations_lenmax22_former_sents2', 'wb'), True)
print("Time Elapsed: {} secs\n".format(time.time() - ts))

In [None]:
ts = time.time()
conversations = []
print('len conversation', len(raw_movie_conversations))
con_count = 0
traindata_count = 0
for conversation in raw_movie_conversations:
    conversation = conversation.split(' +++$+++ ')[-1]
    conversation = conversation.replace('[', '')
    conversation = conversation.replace(']', '')
    conversation = conversation.replace('\'', '')
    conversation = conversation.split(', ')
    assert len(conversation) > 1
    for i in range(len(conversation)-1):
        con_a = utterance_dict[conversation[i]]
        con_b = utterance_dict[conversation[i+1]]
        if len(con_a.split()) <= 22 and len(con_b.split()) <= 22:
            con_a = [refine(w) for w in con_a.lower().split()]
            # con_a = [word_vector[w] if w in word_vector else np.zeros(WORD_VECTOR_SIZE) for w in con_a]
            conversations.append((con_a, con_b))
            traindata_count += 1
    con_count += 1
    if con_count % 1000 == 0:
        print('con_count {}, traindata_count {}'.format(con_count, traindata_count))
pickle.dump(conversations, open('data/conversations_lenmax22', 'wb'), True)
print("Time Elapsed: {} secs\n".format(time.time() - ts))

Now we've created files that in forms of "1 input 1 output" and "2 input 1 output".

Before going to the next steps, we define some data reading methods.

In [17]:
import random

In [18]:
class Data_Reader:
    def __init__(self, cur_train_index=0, load_list=False):
        self.training_data = pickle.load(open(config.training_data_path, 'rb'))
        self.data_size = len(self.training_data)
        if load_list:
            self.shuffle_list = pickle.load(open(config.index_list_file, 'rb'))
        else:    
            self.shuffle_list = self.shuffle_index()
        self.train_index = cur_train_index

    def get_batch_num(self, batch_size):
        return self.data_size // batch_size

    def shuffle_index(self):
        shuffle_index_list = random.sample(range(self.data_size), self.data_size)
        pickle.dump(shuffle_index_list, open(config.index_list_file, 'wb'), True)
        return shuffle_index_list

    def generate_batch_index(self, batch_size):
        if self.train_index + batch_size > self.data_size:
            batch_index = self.shuffle_list[self.train_index:self.data_size]
            self.shuffle_list = self.shuffle_index()
            remain_size = batch_size - (self.data_size - self.train_index)
            batch_index += self.shuffle_list[:remain_size]
            self.train_index = remain_size
        else:
            batch_index = self.shuffle_list[self.train_index:self.train_index+batch_size]
            self.train_index += batch_size

        return batch_index

    def generate_training_batch(self, batch_size):
        batch_index = self.generate_batch_index(batch_size)
        batch_X = [self.training_data[i][0] for i in batch_index]   # batch_size of conv_a
        batch_Y = [self.training_data[i][1] for i in batch_index]   # batch_size of conv_b

        return batch_X, batch_Y

    def generate_training_batch_with_former(self, batch_size):
        batch_index = self.generate_batch_index(batch_size)
        batch_X = [self.training_data[i][0] for i in batch_index]   # batch_size of conv_a
        batch_Y = [self.training_data[i][1] for i in batch_index]   # batch_size of conv_b
        former = [self.training_data[i][2] for i in batch_index]    # batch_size of former utterance

        return batch_X, batch_Y, former

    def generate_testing_batch(self, batch_size):
        batch_index = self.generate_batch_index(batch_size)
        batch_X = [self.training_data[i][0] for i in batch_index]   # batch_size of conv_a

        return batch_X

## Train a seq2seq model
This step is needed before training under policy gradient

First we define the network of the chatbot.

In [108]:
# coding=utf-8

import tensorflow as tf
import numpy as np

class Seq2Seq_chatbot():
    def __init__(self, dim_wordvec, n_words, dim_hidden, batch_size, n_encode_lstm_step, n_decode_lstm_step, bias_init_vector=None, lr=0.0001):
        print("init model....")
        self.dim_wordvec = dim_wordvec
        self.dim_hidden = dim_hidden
        self.batch_size = batch_size
        self.n_words = n_words
        self.n_encode_lstm_step = n_encode_lstm_step
        self.n_decode_lstm_step = n_decode_lstm_step
        self.lr = lr

        with tf.device("/cpu:0"):
            self.Wemb = tf.Variable(tf.random_uniform([n_words, dim_hidden], -0.1, 0.1), name='Wemb')

        #self.lstm1 = tf.contrib.rnn.BasicLSTMCell(dim_hidden, forget_bias=0.0, state_is_tuple=False)
        #self.lstm2 = tf.contrib.rnn.BasicLSTMCell(dim_hidden, forget_bias=0.0, state_is_tuple=False)

        self.lstm1 = tf.contrib.rnn.BasicLSTMCell(dim_hidden, state_is_tuple=False)
        self.lstm2 = tf.contrib.rnn.BasicLSTMCell(dim_hidden, state_is_tuple=False)

        self.encode_vector_W = tf.Variable(tf.random_uniform([dim_wordvec, dim_hidden], -0.1, 0.1), name='encode_vector_W')
        self.encode_vector_b = tf.Variable(tf.zeros([dim_hidden]), name='encode_vector_b')

        self.embed_word_W = tf.Variable(tf.random_uniform([dim_hidden, n_words], -0.1, 0.1), name='embed_word_W')
        if bias_init_vector is not None:
            self.embed_word_b = tf.Variable(bias_init_vector.astype(np.float32), name='embed_word_b')
        else:
            self.embed_word_b = tf.Variable(tf.zeros([n_words]), name='embed_word_b')

    def build_model(self):
        print("building model....")
        word_vectors = tf.placeholder(tf.float32, [self.batch_size, self.n_encode_lstm_step, self.dim_wordvec])

        caption = tf.placeholder(tf.int32, [self.batch_size, self.n_decode_lstm_step+1])
        caption_mask = tf.placeholder(tf.float32, [self.batch_size, self.n_decode_lstm_step+1])

        word_vectors_flat = tf.reshape(word_vectors, [-1, self.dim_wordvec])
        wordvec_emb = tf.nn.xw_plus_b(word_vectors_flat, self.encode_vector_W, self.encode_vector_b ) # (batch_size*n_encode_lstm_step, dim_hidden)
        wordvec_emb = tf.reshape(wordvec_emb, [self.batch_size, self.n_encode_lstm_step, self.dim_hidden])

        state1 = tf.zeros([self.batch_size, self.lstm1.state_size])
        state2 = tf.zeros([self.batch_size, self.lstm2.state_size])
        padding = tf.zeros([self.batch_size, self.dim_hidden])

        probs = []
        entropies = []
        loss = 0.0

        ##############################  Encoding Stage ##################################
        print("encoding stage....")
        for i in range(0, self.n_encode_lstm_step):
            if i > 0:
                tf.get_variable_scope().reuse_variables()

            with tf.variable_scope("LSTM1"):
                output1, state1 = self.lstm1(wordvec_emb[:, i, :], state1)

            with tf.variable_scope("LSTM2"):
                output2, state2 = self.lstm2(tf.concat([padding, output1], 1), state2)

        ############################# Decoding Stage ######################################
        print("decoding stage....")
        for i in range(0, self.n_decode_lstm_step):
            with tf.device("/cpu:0"):
                current_embed = tf.nn.embedding_lookup(self.Wemb, caption[:, i])

            tf.get_variable_scope().reuse_variables()

            with tf.variable_scope("LSTM1"):
                output1, state1 = self.lstm1(padding, state1)

            with tf.variable_scope("LSTM2"):
                output2, state2 = self.lstm2(tf.concat([current_embed, output1], 1), state2)

            labels = tf.expand_dims(caption[:, i+1], 1)
            indices = tf.expand_dims(tf.range(0, self.batch_size, 1), 1)
            concated = tf.concat([indices, labels], 1)
            onehot_labels = tf.sparse_to_dense(concated, tf.stack([self.batch_size, self.n_words]), 1.0, 0.0)

            logit_words = tf.nn.xw_plus_b(output2, self.embed_word_W, self.embed_word_b)
            cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logit_words, labels=onehot_labels)
            cross_entropy = cross_entropy * caption_mask[:, i]
            entropies.append(cross_entropy)
            probs.append(logit_words)

            current_loss = tf.reduce_sum(cross_entropy)/self.batch_size
            loss = loss + current_loss
        
        with tf.variable_scope(tf.get_variable_scope(), reuse=False):#tf.AUTO_REUSE):
            train_op = tf.train.AdamOptimizer(self.lr).minimize(loss)

        inter_value = {
            'probs': probs,
            'entropies': entropies
        }

        return train_op, loss, word_vectors, caption, caption_mask, inter_value

    def build_generator(self):
        print("building generator....")
        word_vectors = tf.placeholder(tf.float32, [1, self.n_encode_lstm_step, self.dim_wordvec])

        word_vectors_flat = tf.reshape(word_vectors, [-1, self.dim_wordvec])
        wordvec_emb = tf.nn.xw_plus_b(word_vectors_flat, self.encode_vector_W, self.encode_vector_b)
        wordvec_emb = tf.reshape(wordvec_emb, [1, self.n_encode_lstm_step, self.dim_hidden])

        state1 = tf.zeros([1, self.lstm1.state_size])
        state2 = tf.zeros([1, self.lstm2.state_size])
        padding = tf.zeros([1, self.dim_hidden])

        generated_words = []

        probs = []
        embeds = []

        for i in range(0, self.n_encode_lstm_step):
            if i > 0:
                tf.get_variable_scope().reuse_variables()

            with tf.variable_scope("LSTM1"):
                output1, state1 = self.lstm1(wordvec_emb[:, i, :], state1)

            with tf.variable_scope("LSTM2"):
                output2, state2 = self.lstm2(tf.concat([padding, output1], 1), state2)

        for i in range(0, self.n_decode_lstm_step):
            tf.get_variable_scope().reuse_variables()

            if i == 0:
                with tf.device('/cpu:0'):
                    current_embed = tf.nn.embedding_lookup(self.Wemb, tf.ones([1], dtype=tf.int64))

            with tf.variable_scope("LSTM1"):
                output1, state1 = self.lstm1(padding, state1)

            with tf.variable_scope("LSTM2"):
                output2, state2 = self.lstm2(tf.concat([current_embed, output1], 1), state2)

            logit_words = tf.nn.xw_plus_b(output2, self.embed_word_W, self.embed_word_b)
            max_prob_index = tf.argmax(logit_words, 1)[0]
            generated_words.append(max_prob_index)
            probs.append(logit_words)

            with tf.device("/cpu:0"):
                current_embed = tf.nn.embedding_lookup(self.Wemb, max_prob_index)
                current_embed = tf.expand_dims(current_embed, 0)

            embeds.append(current_embed)

        return word_vectors, generated_words, probs, embeds

Now start training. We need several epochs to get a baseline model. Here the number is 30.

In [20]:
### Global Parameters ###
checkpoint = config.CHECKPOINT
model_path = config.train_model_path
model_name = config.train_model_name
start_epoch = config.start_epoch

word_count_threshold = config.WC_threshold

### Train Parameters ###
dim_wordvec = 300
dim_hidden = 1000

n_encode_lstm_step = 22 + 22
n_decode_lstm_step = 22

epochs = 30 # 500
batch_size = 100
learning_rate = 0.0001

In [21]:
def pad_sequences(sequences, maxlen=None, dtype='int32', padding='pre', truncating='pre', value=0.):
    if not hasattr(sequences, '__len__'):
        raise ValueError('`sequences` must be iterable.')
    lengths = []
    for x in sequences:
        if not hasattr(x, '__len__'):
            raise ValueError('`sequences` must be a list of iterables. '
                             'Found non-iterable: ' + str(x))
        lengths.append(len(x))

    num_samples = len(sequences)
    if maxlen is None:
        maxlen = np.max(lengths)

    # take the sample shape from the first non empty sequence
    # checking for consistency in the main loop below.
    sample_shape = tuple()
    for s in sequences:
        if len(s) > 0:
            sample_shape = np.asarray(s).shape[1:]
            break

    x = (np.ones((num_samples, maxlen) + sample_shape) * value).astype(dtype)
    for idx, s in enumerate(sequences):
        if not len(s):
            continue  # empty list/array was found
        if truncating == 'pre':
            trunc = s[-maxlen:]
        elif truncating == 'post':
            trunc = s[:maxlen]
        else:
            raise ValueError('Truncating type "%s" not understood' % truncating)

        # check `trunc` has expected shape
        trunc = np.asarray(trunc, dtype=dtype)
        if trunc.shape[1:] != sample_shape:
            raise ValueError('Shape of sample %s of sequence at position %s is different from expected shape %s' %
                             (trunc.shape[1:], idx, sample_shape))

        if padding == 'post':
            x[idx, :len(trunc)] = trunc
        elif padding == 'pre':
            x[idx, -len(trunc):] = trunc
        else:
            raise ValueError('Padding type "%s" not understood' % padding)
    return x

In [22]:
def train():
    wordtoix, ixtoword, bias_init_vector = preProBuildWordVocab(word_count_threshold=word_count_threshold)
    word_vector = KeyedVectors.load_word2vec_format('model/word_vector.bin', binary=True)

    model = Seq2Seq_chatbot(
            dim_wordvec=dim_wordvec,
            n_words=len(wordtoix),
            dim_hidden=dim_hidden,
            batch_size=batch_size,
            n_encode_lstm_step=n_encode_lstm_step,
            n_decode_lstm_step=n_decode_lstm_step,
            bias_init_vector=bias_init_vector,
            lr=learning_rate)

    train_op, tf_loss, word_vectors, tf_caption, tf_caption_mask, inter_value = model.build_model()

    saver = tf.train.Saver(max_to_keep=100)

    sess = tf.InteractiveSession()
    
    checkpoint = False
    
    if checkpoint:
        print("Use Model {}.".format(model_name))
        saver.restore(sess, os.path.join(model_path, model_name))
        print("Model {} restored.".format(model_name))
    else:
        print("Restart training...")
        tf.global_variables_initializer().run()

    dr = Data_Reader()

    for epoch in range(start_epoch, epochs):
        print("epoch {}/{}".format(epoch, epochs))
        n_batch = dr.get_batch_num(batch_size)
        for batch in range(n_batch):
            print("batch {}/{}".format(batch, n_batch))
            start_time = time.time()

            batch_X, batch_Y = dr.generate_training_batch(batch_size)

            for i in range(len(batch_X)):
                batch_X[i] = [word_vector[w] if w in word_vector else np.zeros(dim_wordvec) for w in batch_X[i]]
                # batch_X[i].insert(0, np.random.normal(size=(dim_wordvec,))) # insert random normal at the first step
                if len(batch_X[i]) > n_encode_lstm_step:
                    batch_X[i] = batch_X[i][:n_encode_lstm_step]
                else:
                    for _ in range(len(batch_X[i]), n_encode_lstm_step):
                        batch_X[i].append(np.zeros(dim_wordvec))

            current_feats = np.array(batch_X)

            current_captions = batch_Y
            current_captions = map(lambda x: '<bos> ' + x, current_captions)
            current_captions = map(lambda x: x.replace('.', ''), current_captions)
            current_captions = map(lambda x: x.replace(',', ''), current_captions)
            current_captions = map(lambda x: x.replace('"', ''), current_captions)
            current_captions = map(lambda x: x.replace('\n', ''), current_captions)
            current_captions = map(lambda x: x.replace('?', ''), current_captions)
            current_captions = map(lambda x: x.replace('!', ''), current_captions)
            current_captions = map(lambda x: x.replace('\\', ''), current_captions)
            current_captions = map(lambda x: x.replace('/', ''), current_captions)

            for idx, each_cap in enumerate(current_captions):
                word = each_cap.lower().split(' ')
                if len(word) < n_decode_lstm_step:
                    current_captions[idx] = current_captions[idx] + ' <eos>'
                else:
                    new_word = ''
                    for i in range(n_decode_lstm_step-1):
                        new_word = new_word + word[i] + ' '
                    print(new_word)
                    print(idx)
                    print(current_captions[idx])
                    current_captions[idx] = new_word + '<eos>'

            current_caption_ind = []
            for cap in current_captions:
                current_word_ind = []
                for word in cap.lower().split(' '):
                    if word in wordtoix:
                        current_word_ind.append(wordtoix[word])
                    else:
                        current_word_ind.append(wordtoix['<unk>'])
                current_caption_ind.append(current_word_ind)

            current_caption_matrix = pad_sequences(current_caption_ind, padding='post', maxlen=n_decode_lstm_step)
            current_caption_matrix = np.hstack([current_caption_matrix, np.zeros([len(current_caption_matrix), 1])]).astype(int)
            current_caption_masks = np.zeros((current_caption_matrix.shape[0], current_caption_matrix.shape[1]))
            nonzeros = np.array(map(lambda x: (x != 0).sum() + 1, current_caption_matrix))

            for ind, row in enumerate(current_caption_masks):
                row[:nonzeros[ind]] = 1

            if batch % 100 == 0:
                _, loss_val = sess.run(
                        [train_op, tf_loss],
                        feed_dict={
                            word_vectors: current_feats,
                            tf_caption: current_caption_matrix,
                            tf_caption_mask: current_caption_masks
                        })
                print("Epoch: {}, batch: {}, loss: {}, Elapsed time: {}".format(epoch, batch, loss_val, time.time() - start_time))
            else:
                _ = sess.run(train_op,
                             feed_dict={
                                word_vectors: current_feats,
                                tf_caption: current_caption_matrix,
                                tf_caption_mask: current_caption_masks
                            })


        print("Epoch ", epoch, " is done. Saving the model ...")
        saver.save(sess, os.path.join(model_path, 'model'), global_step=epoch)

In [None]:
train()

Use these code below to test the seq2seq model.

In [124]:
import sys

#=====================================================
# Global Parameters
#=====================================================
default_model_path = './model/Seq2Seq/model-77-2'
testing_data_path = 'sample_input.txt' #if len(sys.argv) <= 2 else sys.argv[2]
output_path = 'sample_output_S2S.txt' #if len(sys.argv) <= 3 else sys.argv[3]

word_count_threshold = config.WC_threshold

In [110]:
#=====================================================
# Train Parameters
#=====================================================
dim_wordvec = 300
dim_hidden = 1000

n_encode_lstm_step = 22 + 1 # one random normal as the first timestep
n_decode_lstm_step = 22

batch_size = 1

In [111]:
""" Extract only the vocabulary part of the data """
def refine(data):
    words = re.findall("[a-zA-Z'-]+", data)
    words = ["".join(word.split("'")) for word in words]
    # words = ["".join(word.split("-")) for word in words]
    data = ' '.join(words)
    return data

In [125]:
testing_data = open(testing_data_path, 'r').read().split('\n')

word_vector = KeyedVectors.load_word2vec_format('model/word_vector.bin', binary=True)


In [113]:
_, ixtoword, bias_init_vector = preProBuildWordVocab(word_count_threshold=word_count_threshold)

preprocessing word counts and creating vocab based on word count threshold 20
filtered words from 76029 to 6847


In [140]:
model = Seq2Seq_chatbot(
        dim_wordvec=dim_wordvec,
        n_words=len(ixtoword),
        dim_hidden=dim_hidden,
        batch_size=batch_size,
        n_encode_lstm_step=n_encode_lstm_step,
        n_decode_lstm_step=n_decode_lstm_step,
        bias_init_vector=bias_init_vector)

word_vectors, caption_tf, probs, _ = model.build_generator()

sess = tf.get_default_session() or tf.InteractiveSession()

sess.run(tf.global_variables_initializer())
saver = tf.train.Saver()

init model....
building generator....


In [142]:
saver.restore(sess, default_model_path)


Use default model

INFO:tensorflow:Restoring parameters from ./model/Seq2Seq/model-77-2


In [143]:
with open(output_path, 'w') as out:
    generated_sentences = []
    bleu_score_avg = [0., 0.]
    for idx, question in enumerate(testing_data):
        print('question =>', question)

        question = [refine(w) for w in question.lower().split()]
        question = [word_vector[w] if w in word_vector else np.zeros(dim_wordvec) for w in question]
        question.insert(0, np.random.normal(size=(dim_wordvec,))) # insert random normal at the first step

        if len(question) > n_encode_lstm_step:
            question = question[:n_encode_lstm_step]
        else:
            for _ in range(len(question), n_encode_lstm_step):
                question.append(np.zeros(dim_wordvec))

        question = np.array([question]) # 1x22x300

        generated_word_index, prob_logit = sess.run([caption_tf, probs], feed_dict={word_vectors: question})

        # remove <unk> to second high prob. word
        for i in range(len(generated_word_index)):
            if generated_word_index[i] == 3:
                sort_prob_logit = sorted(prob_logit[i][0])
                maxindex = np.where(prob_logit[i][0] == sort_prob_logit[-1])[0][0]
                secmaxindex = np.where(prob_logit[i][0] == sort_prob_logit[-2])[0][0]
                generated_word_index[i] = secmaxindex

        generated_words = []
        for ind in generated_word_index:
            generated_words.append(ixtoword[ind])

        # generate sentence
        punctuation = np.argmax(np.array(generated_words) == '<eos>') + 1
        generated_words = generated_words[:punctuation]
        generated_sentence = ' '.join(generated_words)

        # modify the output sentence 
        generated_sentence = generated_sentence.replace('<bos> ', '')
        generated_sentence = generated_sentence.replace(' <eos>', '')
        generated_sentence = generated_sentence.replace('--', '')
        generated_sentence = generated_sentence.split('  ')
        for i in range(len(generated_sentence)):
            generated_sentence[i] = generated_sentence[i].strip()
            if len(generated_sentence[i]) > 1:
                generated_sentence[i] = generated_sentence[i][0].upper() + generated_sentence[i][1:] + '.'
            else:
                generated_sentence[i] = generated_sentence[i].upper()
        generated_sentence = ' '.join(generated_sentence)
        generated_sentence = generated_sentence.replace(' i ', ' I ')
        generated_sentence = generated_sentence.replace("i'm", "I'm")
        generated_sentence = generated_sentence.replace("i'd", "I'd")
        generated_sentence = generated_sentence.replace("i'll", "I'll")
        generated_sentence = generated_sentence.replace("i'v", "I'v")
        generated_sentence = generated_sentence.replace(" - ", "")

        print('generated_sentence =>', generated_sentence)
        out.write(generated_sentence + '\n')


question => Have you heard about 'machine learning and having it deep and structured'?
generated_sentence => Misunderstood respect infection.
question => How are you?
generated_sentence => Phoenix.
question => What's your name?
generated_sentence => Sneak.
question => Hello
generated_sentence => Shirts.
question => Thank God!  If I had to hear one more story about your coiffure
generated_sentence => Ambition buddy forward they infection jurisdiction bored invited 22.
question => You never wanted to go out with me, did you?
generated_sentence => Cell his neo infection mere.
question => I guess I thought I was protecting you.
generated_sentence => Assuming cell order itself accused they cell order itself andrew gamble accused sits.
question => Forget his reputation.  Do you think we've got a plan or not?
generated_sentence => Benny 'round sank treasure they cell benny songs they toast did 22 baron camp.
question => You didn't have a choice?
generated_sentence => Pills cell guards curiosi

### Now we create a RL model

First we need to define the graph of RL model.

In [144]:
class PolicyGradient_chatbot():
    def __init__(self, dim_wordvec, n_words, dim_hidden, batch_size, n_encode_lstm_step, n_decode_lstm_step, bias_init_vector=None, lr=0.0001):
        self.dim_wordvec = dim_wordvec
        self.dim_hidden = dim_hidden
        self.batch_size = batch_size
        self.n_words = n_words
        self.n_encode_lstm_step = n_encode_lstm_step
        self.n_decode_lstm_step = n_decode_lstm_step
        self.lr = lr

        with tf.device("/cpu:0"):
            self.Wemb = tf.Variable(tf.random_uniform([n_words, dim_hidden], -0.1, 0.1), name='Wemb')

        self.lstm1 = tf.contrib.rnn.BasicLSTMCell(dim_hidden, state_is_tuple=False)
        self.lstm2 = tf.contrib.rnn.BasicLSTMCell(dim_hidden, state_is_tuple=False)

        self.encode_vector_W = tf.Variable(tf.random_uniform([dim_wordvec, dim_hidden], -0.1, 0.1), name='encode_vector_W')
        self.encode_vector_b = tf.Variable(tf.zeros([dim_hidden]), name='encode_vector_b')

        self.embed_word_W = tf.Variable(tf.random_uniform([dim_hidden, n_words], -0.1, 0.1), name='embed_word_W')
        if bias_init_vector is not None:
            self.embed_word_b = tf.Variable(bias_init_vector.astype(np.float32), name='embed_word_b')
        else:
            self.embed_word_b = tf.Variable(tf.zeros([n_words]), name='embed_word_b')

    def build_model(self):
        word_vectors = tf.placeholder(tf.float32, [self.batch_size, self.n_encode_lstm_step, self.dim_wordvec])

        caption = tf.placeholder(tf.int32, [self.batch_size, self.n_decode_lstm_step+1])
        caption_mask = tf.placeholder(tf.float32, [self.batch_size, self.n_decode_lstm_step+1])

        word_vectors_flat = tf.reshape(word_vectors, [-1, self.dim_wordvec])
        wordvec_emb = tf.nn.xw_plus_b(word_vectors_flat, self.encode_vector_W, self.encode_vector_b ) # (batch_size*n_encode_lstm_step, dim_hidden)
        wordvec_emb = tf.reshape(wordvec_emb, [self.batch_size, self.n_encode_lstm_step, self.dim_hidden])

        reward = tf.placeholder(tf.float32, [self.batch_size, self.n_decode_lstm_step])

        state1 = tf.zeros([self.batch_size, self.lstm1.state_size])
        state2 = tf.zeros([self.batch_size, self.lstm2.state_size])
        padding = tf.zeros([self.batch_size, self.dim_hidden])

        entropies = []
        loss = 0.
        pg_loss = 0.  # policy gradient loss

        ##############################  Encoding Stage ##################################
        for i in range(0, self.n_encode_lstm_step):
            if i > 0:
                tf.get_variable_scope().reuse_variables()

            with tf.variable_scope("LSTM1"):
                output1, state1 = self.lstm1(wordvec_emb[:, i, :], state1)
                # states.append(state1)

            with tf.variable_scope("LSTM2"):
                output2, state2 = self.lstm2(tf.concat([padding, output1], 1), state2)

        ############################# Decoding Stage ######################################
        for i in range(0, self.n_decode_lstm_step):
            with tf.device("/cpu:0"):
                current_embed = tf.nn.embedding_lookup(self.Wemb, caption[:, i])

            tf.get_variable_scope().reuse_variables()

            with tf.variable_scope("LSTM1"):
                output1, state1 = self.lstm1(padding, state1)

            with tf.variable_scope("LSTM2"):
                output2, state2 = self.lstm2(tf.concat([current_embed, output1], 1), state2)

            labels = tf.expand_dims(caption[:, i+1], 1)
            indices = tf.expand_dims(tf.range(0, self.batch_size, 1), 1)
            concated = tf.concat([indices, labels], 1)
            onehot_labels = tf.sparse_to_dense(concated, tf.stack([self.batch_size, self.n_words]), 1.0, 0.0)

            logit_words = tf.nn.xw_plus_b(output2, self.embed_word_W, self.embed_word_b)

            cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logit_words, labels=onehot_labels)
            cross_entropy = cross_entropy * caption_mask[:, i]
            entropies.append(cross_entropy)
            pg_cross_entropy = cross_entropy * reward[:, i]

            pg_current_loss = tf.reduce_sum(pg_cross_entropy) / self.batch_size
            pg_loss = pg_loss + pg_current_loss

        with tf.variable_scope(tf.get_variable_scope(), reuse=False):
            train_op = tf.train.AdamOptimizer(self.lr).minimize(pg_loss)

        input_tensors = {
            'word_vectors': word_vectors,
            'caption': caption,
            'caption_mask': caption_mask,
            'reward': reward
        }

        feats = {
            'entropies': entropies
        }

        return train_op, pg_loss, input_tensors, feats

    def build_generator(self):
        word_vectors = tf.placeholder(tf.float32, [self.batch_size, self.n_encode_lstm_step, self.dim_wordvec])

        word_vectors_flat = tf.reshape(word_vectors, [-1, self.dim_wordvec])
        wordvec_emb = tf.nn.xw_plus_b(word_vectors_flat, self.encode_vector_W, self.encode_vector_b)
        wordvec_emb = tf.reshape(wordvec_emb, [self.batch_size, self.n_encode_lstm_step, self.dim_hidden])

        state1 = tf.zeros([self.batch_size, self.lstm1.state_size])
        state2 = tf.zeros([self.batch_size, self.lstm2.state_size])
        padding = tf.zeros([self.batch_size, self.dim_hidden])

        generated_words = []

        probs = []
        embeds = []
        states = []

        for i in range(0, self.n_encode_lstm_step):
            if i > 0:
                tf.get_variable_scope().reuse_variables()

            with tf.variable_scope("LSTM1"):
                output1, state1 = self.lstm1(wordvec_emb[:, i, :], state1)
                states.append(state1)

            with tf.variable_scope("LSTM2"):
                output2, state2 = self.lstm2(tf.concat([padding, output1], 1), state2)

        for i in range(0, self.n_decode_lstm_step):
            tf.get_variable_scope().reuse_variables()

            if i == 0:
                # <bos>
                with tf.device('/cpu:0'):
                    current_embed = tf.nn.embedding_lookup(self.Wemb, tf.ones([self.batch_size], dtype=tf.int64))

            with tf.variable_scope("LSTM1"):
                output1, state1 = self.lstm1(padding, state1)

            with tf.variable_scope("LSTM2"):
                output2, state2 = self.lstm2(tf.concat([current_embed, output1], 1), state2)

            logit_words = tf.nn.xw_plus_b(output2, self.embed_word_W, self.embed_word_b)
            max_prob_index = tf.argmax(logit_words, 1)
            generated_words.append(max_prob_index)
            probs.append(logit_words)

            with tf.device("/cpu:0"):
                current_embed = tf.nn.embedding_lookup(self.Wemb, max_prob_index)

            embeds.append(current_embed)

        feats = {
            'probs': probs,
            'embeds': embeds,
            'states': states
        }

        return word_vectors, generated_words, feats

This network is quite similar to the one used in seq2seq model. The difference is that this one is trained to increase reward function.

Now we train this model

In [146]:
import copy

sys.path.append("python")
import re

from scipy import spatial
import math

In [147]:
### Global Parameters ###
checkpoint = config.CHECKPOINT
model_path = config.train_model_path
model_name = config.train_model_name
start_epoch = config.start_epoch
start_batch = config.start_batch

# reversed model
reversed_model_path = config.reversed_model_path
reversed_model_name = config.reversed_model_name

word_count_threshold = config.WC_threshold
r_word_count_threshold = config.reversed_WC_threshold

# dialog simulation turns
max_turns = config.MAX_TURNS

dull_set = ["I don't know what you're talking about.", "I don't know.", "You don't know.", "You know what I mean.", "I know what you mean.", "You know what I'm saying.", "You don't know anything."]

### Train Parameters ###
training_type = config.training_type    # 'normal' for seq2seq training, 'pg' for policy gradient

dim_wordvec = 300
dim_hidden = 1000

n_encode_lstm_step = 22 + 22
n_decode_lstm_step = 22

r_n_encode_lstm_step = 22
r_n_decode_lstm_step = 22

learning_rate = 0.0001
epochs = 500
batch_size = config.batch_size
reversed_batch_size = config.batch_size

Some of these functions are copied (some with small modifications) from above. They are serving similar purpose. For convenience they are copied here.

In [148]:
def pad_sequences(sequences, maxlen=None, dtype='int32', padding='pre', truncating='pre', value=0.):
    if not hasattr(sequences, '__len__'):
        raise ValueError('`sequences` must be iterable.')
    lengths = []
    for x in sequences:
        if not hasattr(x, '__len__'):
            raise ValueError('`sequences` must be a list of iterables. '
                             'Found non-iterable: ' + str(x))
        lengths.append(len(x))

    num_samples = len(sequences)
    if maxlen is None:
        maxlen = np.max(lengths)

    # take the sample shape from the first non empty sequence
    # checking for consistency in the main loop below.
    sample_shape = tuple()
    for s in sequences:
        if len(s) > 0:
            sample_shape = np.asarray(s).shape[1:]
            break

    x = (np.ones((num_samples, maxlen) + sample_shape) * value).astype(dtype)
    for idx, s in enumerate(sequences):
        if not len(s):
            continue  # empty list/array was found
        if truncating == 'pre':
            trunc = s[-maxlen:]
        elif truncating == 'post':
            trunc = s[:maxlen]
        else:
            raise ValueError('Truncating type "%s" not understood' % truncating)

        # check `trunc` has expected shape
        trunc = np.asarray(trunc, dtype=dtype)
        if trunc.shape[1:] != sample_shape:
            raise ValueError('Shape of sample %s of sequence at position %s is different from expected shape %s' %
                             (trunc.shape[1:], idx, sample_shape))

        if padding == 'post':
            x[idx, :len(trunc)] = trunc
        elif padding == 'pre':
            x[idx, -len(trunc):] = trunc
        else:
            raise ValueError('Padding type "%s" not understood' % padding)
    return x

""" Extract only the vocabulary part of the data """
def refine(data):
    words = re.findall("[a-zA-Z'-]+", data)
    words = ["".join(word.split("'")) for word in words]
    # words = ["".join(word.split("-")) for word in words]
    data = ' '.join(words)
    return data

def make_batch_X(batch_X, n_encode_lstm_step, dim_wordvec, word_vector, noise=False):
    for i in range(len(batch_X)):
        batch_X[i] = [word_vector[w] if w in word_vector else np.zeros(dim_wordvec) for w in batch_X[i]]
        if noise:
            batch_X[i].insert(0, np.random.normal(size=(dim_wordvec,))) # insert random normal at the first step

        if len(batch_X[i]) > n_encode_lstm_step:
            batch_X[i] = batch_X[i][:n_encode_lstm_step]
        else:
            for _ in range(len(batch_X[i]), n_encode_lstm_step):
                batch_X[i].append(np.zeros(dim_wordvec))

    current_feats = np.array(batch_X)
    return current_feats

def make_batch_Y(batch_Y, wordtoix, n_decode_lstm_step):
    current_captions = batch_Y
    current_captions = map(lambda x: '<bos> ' + x, current_captions)
    current_captions = map(lambda x: x.replace('.', ''), current_captions)
    current_captions = map(lambda x: x.replace(',', ''), current_captions)
    current_captions = map(lambda x: x.replace('"', ''), current_captions)
    current_captions = map(lambda x: x.replace('\n', ''), current_captions)
    current_captions = map(lambda x: x.replace('?', ''), current_captions)
    current_captions = map(lambda x: x.replace('!', ''), current_captions)
    current_captions = map(lambda x: x.replace('\\', ''), current_captions)
    current_captions = map(lambda x: x.replace('/', ''), current_captions)

    for idx, each_cap in enumerate(current_captions):
        word = each_cap.lower().split(' ')
        if len(word) < n_decode_lstm_step:
            current_captions[idx] = current_captions[idx] + ' <eos>'
        else:
            new_word = ''
            for i in range(n_decode_lstm_step-1):
                new_word = new_word + word[i] + ' '
            current_captions[idx] = new_word + '<eos>'

    current_caption_ind = []
    for cap in current_captions:
        current_word_ind = []
        for word in cap.lower().split(' '):
            if word in wordtoix:
                current_word_ind.append(wordtoix[word])
            else:
                current_word_ind.append(wordtoix['<unk>'])
        current_caption_ind.append(current_word_ind)

    current_caption_matrix = pad_sequences(current_caption_ind, padding='post', maxlen=n_decode_lstm_step)
    current_caption_matrix = np.hstack([current_caption_matrix, np.zeros([len(current_caption_matrix), 1])]).astype(int)
    current_caption_masks = np.zeros((current_caption_matrix.shape[0], current_caption_matrix.shape[1]))
    nonzeros = np.array(map(lambda x: (x != 0).sum() + 1, current_caption_matrix))

    for ind, row in enumerate(current_caption_masks):
        row[:nonzeros[ind]] = 1

    return current_caption_matrix, current_caption_masks

def index2sentence(generated_word_index, prob_logit, ixtoword):
    # remove <unk> to second high prob. word
    for i in range(len(generated_word_index)):
        if generated_word_index[i] == 3 or generated_word_index[i] <= 1:
            sort_prob_logit = sorted(prob_logit[i])
            curindex = np.where(prob_logit[i] == sort_prob_logit[-2])[0][0]
            count = 1
            while curindex <= 3:
                curindex = np.where(prob_logit[i] == sort_prob_logit[(-2)-count])[0][0]
                count += 1

            generated_word_index[i] = curindex

    generated_words = []
    for ind in generated_word_index:
        generated_words.append(ixtoword[ind])

    # generate sentence
    punctuation = np.argmax(np.array(generated_words) == '<eos>') + 1
    generated_words = generated_words[:punctuation]
    generated_sentence = ' '.join(generated_words)

    # modify the output sentence 
    generated_sentence = generated_sentence.replace('<bos> ', '')
    generated_sentence = generated_sentence.replace('<eos>', '')
    generated_sentence = generated_sentence.replace(' <eos>', '')
    generated_sentence = generated_sentence.replace('--', '')
    generated_sentence = generated_sentence.split('  ')
    for i in range(len(generated_sentence)):
        generated_sentence[i] = generated_sentence[i].strip()
        if len(generated_sentence[i]) > 1:
            generated_sentence[i] = generated_sentence[i][0].upper() + generated_sentence[i][1:] + '.'
        else:
            generated_sentence[i] = generated_sentence[i].upper()
    generated_sentence = ' '.join(generated_sentence)
    generated_sentence = generated_sentence.replace(' i ', ' I ')
    generated_sentence = generated_sentence.replace("i'm", "I'm")
    generated_sentence = generated_sentence.replace("i'd", "I'd")

    return generated_sentence

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def count_rewards(dull_loss, forward_entropy, backward_entropy, forward_target, backward_target, reward_type='pg'):
    ''' args:
            generated_word_indexs:  <type 'numpy.ndarray'>  
                                    word indexs generated by pre-trained model
                                    shape: (batch_size, n_decode_lstm_step)
            inference_feats:        <type 'dict'>  
                                    some features generated during inference
                                    keys:
                                        'probs': 
                                            shape: (n_decode_lstm_step, batch_size, n_words)
                                        'embeds': 
                                            shape: (n_decode_lstm_step, batch_size, dim_hidden)
                                            current word embeddings at each decode stage
                                        'states': 
                                            shape: (n_encode_lstm_step, batch_size, dim_hidden)
                                            LSTM_1's hidden state at each encode stage
    '''

    # normal training, rewards all equal to 1
    if reward_type == 'normal':
        return np.ones([batch_size, n_decode_lstm_step])

    if reward_type == 'pg':
        forward_entropy = np.array(forward_entropy).reshape(batch_size, n_decode_lstm_step)
        backward_entropy = np.array(backward_entropy).reshape(batch_size, n_decode_lstm_step)
        total_loss = np.zeros([batch_size, n_decode_lstm_step])

        for i in range(batch_size):
            # ease of answering
            total_loss[i, :] += dull_loss[i]
    
            # information flow
            # cosine_sim = 1 - spatial.distance.cosine(embeds[0][-1], embeds[1][-1])
            # IF = cosine_sim * (-1)
    
            # semantic coherence
            forward_len = len(forward_target[i].split())
            backward_len = len(backward_target[i].split())
            if forward_len > 0:
                total_loss[i, :] += (np.sum(forward_entropy[i]) / forward_len)
            if backward_len > 0:
                total_loss[i, :] += (np.sum(backward_entropy[i]) / backward_len)

        total_loss = sigmoid(total_loss) * 1.1

        return total_loss

In [None]:
def train():
    global dull_set

    wordtoix, ixtoword, bias_init_vector = preProBuildWordVocab(word_count_threshold=word_count_threshold)
    word_vector = KeyedVectors.load_word2vec_format('model/word_vector.bin', binary=True)

    if len(dull_set) > batch_size:
        dull_set = dull_set[:batch_size]
    else:
        for _ in range(len(dull_set), batch_size):
            dull_set.append('')
    dull_matrix, dull_mask = make_batch_Y(
                                batch_Y=dull_set, 
                                wordtoix=wordtoix, 
                                n_decode_lstm_step=n_decode_lstm_step)

    ones_reward = np.ones([batch_size, n_decode_lstm_step])

    g1 = tf.Graph()
    g2 = tf.Graph()

    default_graph = tf.get_default_graph() 

    with g1.as_default():
        model = PolicyGradient_chatbot(
                dim_wordvec=dim_wordvec,
                n_words=len(wordtoix),
                dim_hidden=dim_hidden,
                batch_size=batch_size,
                n_encode_lstm_step=n_encode_lstm_step,
                n_decode_lstm_step=n_decode_lstm_step,
                bias_init_vector=bias_init_vector,
                lr=learning_rate)
        train_op, loss, input_tensors, inter_value = model.build_model()
        tf_states, tf_actions, tf_feats = model.build_generator()
        sess = tf.InteractiveSession()
        saver = tf.train.Saver(max_to_keep=100)
        if checkpoint:
            print("Use Model {}.".format(model_name))
            saver.restore(sess, os.path.join(model_path, model_name))
            print("Model {} restored.".format(model_name))
        else:
            print("Restart training...")
            tf.global_variables_initializer().run()

    r_wordtoix, r_ixtoword, r_bias_init_vector = preProBuildWordVocab(word_count_threshold=r_word_count_threshold)
    with g2.as_default():
        reversed_model = Seq2Seq_chatbot(
            dim_wordvec=dim_wordvec,
            n_words=len(r_wordtoix),
            dim_hidden=dim_hidden,
            batch_size=reversed_batch_size,
            n_encode_lstm_step=r_n_encode_lstm_step,
            n_decode_lstm_step=r_n_decode_lstm_step,
            bias_init_vector=r_bias_init_vector,
            lr=learning_rate)
        _, _, word_vectors, caption, caption_mask, reverse_inter = reversed_model.build_model()
        sess2 = tf.InteractiveSession()
        saver2 = tf.train.Saver()
        saver2.restore(sess2, os.path.join(reversed_model_path, reversed_model_name))
        print("Reversed model {} restored.".format(reversed_model_name))


    dr = Data_Reader(cur_train_index=config.cur_train_index, load_list=config.load_list)

    for epoch in range(start_epoch, epochs):
        n_batch = dr.get_batch_num(batch_size)
        sb = start_batch if epoch == start_epoch else 0
        for batch in range(sb, n_batch):
            start_time = time.time()

            batch_X, batch_Y, former = dr.generate_training_batch_with_former(batch_size)

            current_feats = make_batch_X(
                            batch_X=copy.deepcopy(batch_X), 
                            n_encode_lstm_step=n_encode_lstm_step, 
                            dim_wordvec=dim_wordvec,
                            word_vector=word_vector)

            current_caption_matrix, current_caption_masks = make_batch_Y(
                                                                batch_Y=copy.deepcopy(batch_Y), 
                                                                wordtoix=wordtoix, 
                                                                n_decode_lstm_step=n_decode_lstm_step)

            if training_type == 'pg':
                # action: generate batch_size sents
                action_word_indexs, inference_feats = sess.run([tf_actions, tf_feats],
                                                                feed_dict={
                                                                   tf_states: current_feats
                                                                })
                action_word_indexs = np.array(action_word_indexs).reshape(batch_size, n_decode_lstm_step)
                action_probs = np.array(inference_feats['probs']).reshape(batch_size, n_decode_lstm_step, -1)

                actions = []
                actions_list = []
                for i in range(len(action_word_indexs)):
                    action = index2sentence(
                                generated_word_index=action_word_indexs[i], 
                                prob_logit=action_probs[i],
                                ixtoword=ixtoword)
                    actions.append(action)
                    actions_list.append(action.split())

                action_feats = make_batch_X(
                                batch_X=copy.deepcopy(actions_list), 
                                n_encode_lstm_step=n_encode_lstm_step, 
                                dim_wordvec=dim_wordvec,
                                word_vector=word_vector)

                action_caption_matrix, action_caption_masks = make_batch_Y(
                                                                batch_Y=copy.deepcopy(actions), 
                                                                wordtoix=wordtoix, 
                                                                n_decode_lstm_step=n_decode_lstm_step)

                # ease of answering
                dull_loss = []
                for vector in action_feats:
                    action_batch_X = np.array([vector for _ in range(batch_size)])
                    d_loss = sess.run(loss,
                                 feed_dict={
                                    input_tensors['word_vectors']: action_batch_X,
                                    input_tensors['caption']: dull_matrix,
                                    input_tensors['caption_mask']: dull_mask,
                                    input_tensors['reward']: ones_reward
                                })
                    d_loss = d_loss * -1. / len(dull_set)
                    dull_loss.append(d_loss)

                # Information Flow
                pass

                # semantic coherence
                forward_inter = sess.run(inter_value,
                                 feed_dict={
                                    input_tensors['word_vectors']: current_feats,
                                    input_tensors['caption']: action_caption_matrix,
                                    input_tensors['caption_mask']: action_caption_masks,
                                    input_tensors['reward']: ones_reward
                                })
                forward_entropies = forward_inter['entropies']
                former_caption_matrix, former_caption_masks = make_batch_Y(
                                                                batch_Y=copy.deepcopy(former), 
                                                                wordtoix=wordtoix, 
                                                                n_decode_lstm_step=n_decode_lstm_step)
                action_feats = make_batch_X(
                                batch_X=copy.deepcopy(actions_list), 
                                n_encode_lstm_step=r_n_encode_lstm_step, 
                                dim_wordvec=dim_wordvec,
                                word_vector=word_vector)
                backward_inter = sess2.run(reverse_inter,
                                 feed_dict={
                                    word_vectors: action_feats,
                                    caption: former_caption_matrix,
                                    caption_mask: former_caption_masks
                                })
                backward_entropies = backward_inter['entropies']

                # reward: count goodness of actions
                rewards = count_rewards(dull_loss, forward_entropies, backward_entropies, actions, former, reward_type='pg')
    
                # policy gradient: train batch with rewards
                if batch % 10 == 0:
                    _, loss_val = sess.run(
                            [train_op, loss],
                            feed_dict={
                                input_tensors['word_vectors']: current_feats,
                                input_tensors['caption']: current_caption_matrix,
                                input_tensors['caption_mask']: current_caption_masks,
                                input_tensors['reward']: rewards
                            })
                    print("Epoch: {}, batch: {}, loss: {}, Elapsed time: {}".format(epoch, batch, loss_val, time.time() - start_time))
                else:
                    _ = sess.run(train_op,
                                 feed_dict={
                                    input_tensors['word_vectors']: current_feats,
                                    input_tensors['caption']: current_caption_matrix,
                                    input_tensors['caption_mask']: current_caption_masks,
                                    input_tensors['reward']: rewards
                                })
                if batch % 1000 == 0 and batch != 0:
                    print("Epoch {} batch {} is done. Saving the model ...".format(epoch, batch))
                    saver.save(sess, os.path.join(model_path, 'model-{}-{}'.format(epoch, batch)))
            if training_type == 'normal':
                if batch % 10 == 0:
                    _, loss_val = sess.run(
                            [train_op, loss],
                            feed_dict={
                                input_tensors['word_vectors']: current_feats,
                                input_tensors['caption']: current_caption_matrix,
                                input_tensors['caption_mask']: current_caption_masks,
                                input_tensors['reward']: ones_reward
                            })
                    print("Epoch: {}, batch: {}, loss: {}, Elapsed time: {}".format(epoch, batch, loss_val, time.time() - start_time))
                else:
                    _ = sess.run(train_op,
                                 feed_dict={
                                    input_tensors['word_vectors']: current_feats,
                                    input_tensors['caption']: current_caption_matrix,
                                    input_tensors['caption_mask']: current_caption_masks,
                                    input_tensors['reward']: ones_reward
                                })

        print("Epoch ", epoch, " is done. Saving the model ...")
        saver.save(sess, os.path.join(model_path, 'model'), global_step=epoch)

Then we test it

In [153]:
#=====================================================
# Global Parameters
#=====================================================
default_model_path = './model/RL/model-56-3000-2'
testing_data_path = 'sample_input.txt' #if len(sys.argv) <= 2 else sys.argv[2]
output_path = 'sample_output_RL.txt' #if len(sys.argv) <= 3 else sys.argv[3]

word_count_threshold = config.WC_threshold

#=====================================================
# Train Parameters
#=====================================================
dim_wordvec = 300
dim_hidden = 1000

n_encode_lstm_step = 22 + 1 # one random normal as the first timestep
n_decode_lstm_step = 22

batch_size = 1

In [154]:
""" Extract only the vocabulary part of the data """
def refine(data):
    words = re.findall("[a-zA-Z'-]+", data)
    words = ["".join(word.split("'")) for word in words]
    # words = ["".join(word.split("-")) for word in words]
    data = ' '.join(words)
    return data

In [156]:
testing_data = open(testing_data_path, 'r').read().split('\n')

word_vector = KeyedVectors.load_word2vec_format('model/word_vector.bin', binary=True)

_, ixtoword, bias_init_vector = preProBuildWordVocab(word_count_threshold=word_count_threshold)

preprocessing word counts and creating vocab based on word count threshold 20
filtered words from 76029 to 6847


In [157]:
model = PolicyGradient_chatbot(
        dim_wordvec=dim_wordvec,
        n_words=len(ixtoword),
        dim_hidden=dim_hidden,
        batch_size=batch_size,
        n_encode_lstm_step=n_encode_lstm_step,
        n_decode_lstm_step=n_decode_lstm_step,
        bias_init_vector=bias_init_vector)

word_vectors, caption_tf, feats = model.build_generator()

sess = tf.InteractiveSession()

saver = tf.train.Saver()



In [158]:
saver.restore(sess, default_model_path)


Use default model

INFO:tensorflow:Restoring parameters from ./model/RL/model-56-3000-2


In [159]:
with open(output_path, 'w') as out:
    generated_sentences = []
    bleu_score_avg = [0., 0.]
    for idx, question in enumerate(testing_data):
        print('question =>', question)

        question = [refine(w) for w in question.lower().split()]
        question = [word_vector[w] if w in word_vector else np.zeros(dim_wordvec) for w in question]
        question.insert(0, np.random.normal(size=(dim_wordvec,))) # insert random normal at the first step

        if len(question) > n_encode_lstm_step:
            question = question[:n_encode_lstm_step]
        else:
            for _ in range(len(question), n_encode_lstm_step):
                question.append(np.zeros(dim_wordvec))

        question = np.array([question]) # 1x22x300

        generated_word_index, prob_logit = sess.run([caption_tf, feats['probs']], feed_dict={word_vectors: question})
        generated_word_index = np.array(generated_word_index).reshape(batch_size, n_decode_lstm_step)[0]
        prob_logit = np.array(prob_logit).reshape(batch_size, n_decode_lstm_step, -1)[0]
        # print('generated_word_index.shape', generated_word_index.shape)
        # print('prob_logit.shape', prob_logit.shape)

        # remove <unk> to second high prob. word
        # print('generated_word_index', generated_word_index)
        for i in range(len(generated_word_index)):
            if generated_word_index[i] == 3:
                sort_prob_logit = sorted(prob_logit[i])
                # print('max val', sort_prob_logit[-1])
                # print('second max val', sort_prob_logit[-2])
                maxindex = np.where(prob_logit[i] == sort_prob_logit[-1])[0][0]
                secmaxindex = np.where(prob_logit[i] == sort_prob_logit[-2])[0][0]
                # print('max ind', maxindex, ixtoword[maxindex])
                # print('second max ind', secmaxindex, ixtoword[secmaxindex])
                generated_word_index[i] = secmaxindex
        # print('generated_word_index', generated_word_index)

        generated_words = []
        for ind in generated_word_index:
            generated_words.append(ixtoword[ind])

        # generate sentence
        punctuation = np.argmax(np.array(generated_words) == '<eos>') + 1
        generated_words = generated_words[:punctuation]
        generated_sentence = ' '.join(generated_words)

        # modify the output sentence 
        generated_sentence = generated_sentence.replace('<bos> ', '')
        generated_sentence = generated_sentence.replace(' <eos>', '')
        generated_sentence = generated_sentence.replace('--', '')
        generated_sentence = generated_sentence.split('  ')
        for i in range(len(generated_sentence)):
            generated_sentence[i] = generated_sentence[i].strip()
            if len(generated_sentence[i]) > 1:
                generated_sentence[i] = generated_sentence[i][0].upper() + generated_sentence[i][1:] + '.'
            else:
                generated_sentence[i] = generated_sentence[i].upper()
        generated_sentence = ' '.join(generated_sentence)
        generated_sentence = generated_sentence.replace(' i ', ' I ')
        generated_sentence = generated_sentence.replace("i'm", "I'm")
        generated_sentence = generated_sentence.replace("i'd", "I'd")
        generated_sentence = generated_sentence.replace("i'll", "I'll")
        generated_sentence = generated_sentence.replace("i'v", "I'v")
        generated_sentence = generated_sentence.replace(" - ", "")

        print('generated_sentence =>', generated_sentence)
        out.write(generated_sentence + '\n')

question => Have you heard about 'machine learning and having it deep and structured'?
generated_sentence => Sneak prison.
question => How are you?
generated_sentence => Medal jack's.
question => What's your name?
generated_sentence => Infection.
question => Hello
generated_sentence => Cell how's surprise purely brain johnnie covers curiosity lingerie invited poisoned joined johnnie invited poisoned poisoned illness misery kent.
question => Thank God!  If I had to hear one more story about your coiffure
generated_sentence => Misunderstood cell mere tower.
question => You never wanted to go out with me, did you?
generated_sentence => Raped mere they warned they assuming cell idiot extreme curiosity accuse infection they infection russia base cell his.
question => I guess I thought I was protecting you.
generated_sentence => Lit cell businessman chief cell peel assuming cell benny lie curiosity plenty aren't mac unbelievable.
question => Forget his reputation.  Do you think we've got a p

Try to generate some dialogues.
The following steps will try to generate dialogues, both A and B are the trained RL model.

In [204]:
#=====================================================
# Global Parameters
#=====================================================
default_model_path = './model/Seq2Seq/model-77-2'
default_simulate_type = 2  # type 1 use one former sent, type 2 use two former sents

testing_data_path = 'sample_input.txt'
output_path = 'sample_dialog_output.txt'

max_turns = config.MAX_TURNS
word_count_threshold = config.WC_threshold

In [205]:
#=====================================================
# Train Parameters
#=====================================================
dim_wordvec = 300
dim_hidden = 1000

n_encode_lstm_step = 22  # need to plus 1 later, because one random normal as the first timestep
n_decode_lstm_step = 22

batch_size = 1

""" Extract only the vocabulary part of the data """
def refine(data):
    words = re.findall("[a-zA-Z'-]+", data)
    words = ["".join(word.split("'")) for word in words]
    # words = ["".join(word.split("-")) for word in words]
    data = ' '.join(words)
    return data

In [206]:
def generate_question_vector(state, word_vector, dim_wordvec, n_encode_lstm_step):
    state = [refine(w) for w in state.lower().split()]
    state = [word_vector[w] if w in word_vector else np.zeros(dim_wordvec) for w in state]
    state.insert(0, np.random.normal(size=(dim_wordvec,))) # insert random normal at the first step

    if len(state) > n_encode_lstm_step:
        state = state[:n_encode_lstm_step]
    else:
        for _ in range(len(state), n_encode_lstm_step):
            state.append(np.zeros(dim_wordvec))

    return np.array([state]) # 1 x n_encode_lstm_step x dim_wordvec

def generate_answer_sentence(generated_word_index, prob_logit, ixtoword):
    # remove <unk> to second high prob. word
    for i in range(len(generated_word_index)):
        if generated_word_index[i] == 3:
            sort_prob_logit = sorted(prob_logit[i][0])
            # print('max val', sort_prob_logit[-1])
            # print('second max val', sort_prob_logit[-2])
            maxindex = np.where(prob_logit[i][0] == sort_prob_logit[-1])[0][0]
            secmaxindex = np.where(prob_logit[i][0] == sort_prob_logit[-2])[0][0]
            # print('max ind', maxindex, ixtoword[maxindex])
            # print('second max ind', secmaxindex, ixtoword[secmaxindex])
            generated_word_index[i] = secmaxindex

    generated_words = []
    for ind in generated_word_index:
        generated_words.append(ixtoword[ind])

    # generate sentence
    punctuation = np.argmax(np.array(generated_words) == '<eos>') + 1
    generated_words = generated_words[:punctuation]
    generated_sentence = ' '.join(generated_words)

    # modify the output sentence 
    generated_sentence = generated_sentence.replace('<bos> ', '')
    generated_sentence = generated_sentence.replace(' <eos>', '')
    generated_sentence = generated_sentence.replace('--', '')
    generated_sentence = generated_sentence.split('  ')
    for i in range(len(generated_sentence)):
        generated_sentence[i] = generated_sentence[i].strip()
        if len(generated_sentence[i]) > 1:
            generated_sentence[i] = generated_sentence[i][0].upper() + generated_sentence[i][1:] + '.'
        else:
            generated_sentence[i] = generated_sentence[i].upper()
    generated_sentence = ' '.join(generated_sentence)
    generated_sentence = generated_sentence.replace(' i ', ' I ')
    generated_sentence = generated_sentence.replace("i'm", "I'm")
    generated_sentence = generated_sentence.replace("i'd", "I'd")

    return generated_sentence

def init_history(simulate_type, start_sentence):
    history = []
    history += ['' for _ in range(simulate_type-1)]
    history.append(start_sentence)
    return history

def get_cur_state(simulate_type, dialog_history):
    return ' '.join(dialog_history[-1*simulate_type:]).strip()

In [207]:
testing_data = open(testing_data_path, 'r').read().split('\n')

word_vector = KeyedVectors.load_word2vec_format('model/word_vector.bin', binary=True)

_, ixtoword, bias_init_vector = preProBuildWordVocab(word_count_threshold=word_count_threshold)

preprocessing word counts and creating vocab based on word count threshold 20
filtered words from 76029 to 6847


In [208]:
model = Seq2Seq_chatbot(
        dim_wordvec=dim_wordvec,
        n_words=len(ixtoword),
        dim_hidden=dim_hidden,
        batch_size=batch_size,
        n_encode_lstm_step=n_encode_lstm_step,
        n_decode_lstm_step=n_decode_lstm_step,
        bias_init_vector=bias_init_vector)

word_vectors, caption_tf, probs, _ = model.build_generator()

sess = tf.InteractiveSession()

saver = tf.train.Saver()

init model....
building generator....




In [209]:
saver.restore(sess, default_model_path)


Use default model

INFO:tensorflow:Restoring parameters from ./model/Seq2Seq/model-77-2


In [211]:
model_path = default_model_path
simulate_type = default_simulate_type

with open(output_path, 'w') as out:
    for idx, start_sentence in enumerate(testing_data):
        print('dialog {}'.format(idx))
        print('A => {}'.format(start_sentence))
        out.write('dialog {}\nA: {}\n'.format(idx, start_sentence))

        dialog_history = init_history(simulate_type, start_sentence)

        for turn in range(max_turns):
            question = generate_question_vector(state=get_cur_state(simulate_type, dialog_history), 
                                                word_vector=word_vector, 
                                                dim_wordvec=dim_wordvec, 
                                                n_encode_lstm_step=n_encode_lstm_step)

            generated_word_index, prob_logit = sess.run([caption_tf, probs], feed_dict={word_vectors: question})

            generated_sentence = generate_answer_sentence(generated_word_index=generated_word_index, 
                                                          prob_logit=prob_logit, 
                                                          ixtoword=ixtoword)

            dialog_history.append(generated_sentence)
            print('B => {}'.format(generated_sentence))

            question_2 = generate_question_vector(state=get_cur_state(simulate_type, dialog_history), 
                                                word_vector=word_vector, 
                                                dim_wordvec=dim_wordvec, 
                                                n_encode_lstm_step=n_encode_lstm_step)

            generated_word_index, prob_logit = sess.run([caption_tf, probs], feed_dict={word_vectors: question_2})

            generated_sentence_2 = generate_answer_sentence(generated_word_index=generated_word_index, 
                                                              prob_logit=prob_logit, 
                                                              ixtoword=ixtoword)

            dialog_history.append(generated_sentence_2)
            print('A => {}'.format(generated_sentence_2))
            out.write('B: {}\nA: {}\n'.format(generated_sentence, generated_sentence_2))

dialog 0
A => Have you heard about 'machine learning and having it deep and structured'?
B => Target along 22 labor.
A => Verbal.
B => Backed likely human they loans envelope thirty-six frame assuming loans seventh illness.
A => Sean joined shocking dearest.
B => Sean.
A => Criminal soup victor they deck curiosity flat victor desire plot victor say: benny mister 22 samuel snow following.
B => Sean strings plenty apology.
A => Sneak benny cia punk meals hero.
B => Chief cell danger danger they assuming infection peel jeremy surgery kent error wear treasure they agreement drove infection neo.
A => Assuming problem benny cell sounded punk sophisticated.
B => Maid.
A => Cell labor criminal they wise vietnam routine.
B => Problem benny infection cia invited funeral mac 22 lifted mysterious infection neo.
A => Assuming pills 22 me chess channel victor sara chet permission.
B => Natives treasure alike.
A => Sneak buddy wives <u>know<u> they cell how's.
B => Super.
A => Problem misery treasure

A => While.
B => Sneak they cell questioning they misunderstood cell pro ninety reason target.
A => Problem teacher avoid lie curiosity questioning buddy let's tiny even.
B => Sean drove brain guards loose hero.
A => Sean snow breakdown hero johnnie infection.
dialog 7
A => Forget his reputation.  Do you think we've got a plan or not?
B => Sneak cell benny neo junior begging underwater aren't mac 22 jury form cell idiot 22 particles planted counts sank treasure.
A => Benny donnie buddy library muscles cell wishes dwayne apology.
B => Personally burke senate diary diary <u>are<u> <u>are<u> <u>are<u> carryin' johnnie stupidity pains times.
A => Snow labor.
B => Verbal verbal verbal they verbal people's cell.
A => Shirts.
B => Assuming sean wise underground they toast such sweetie buddy marijuana 500 they wise error 22 filled they burning.
A => Assuming carryin' misery skipper acted assuming cell surgery cell riding fisher teacher ninety children backed cell mere peel.
B => Benny disappoi

B => Sean poisoned sean robbed mac torture johnnie infection feature kinda along.
A => 22 problem such say: dwayne super neo ass isolated.
B => Sean drove cell neo they medal patchett.
A => Danger danger they problem.
B => Easily sexual buddy cotton they wise along 22 hand liz buddy leading mac buddy spells misery sean times 22.
A => Low 500 22 400 purely mama's bored invited fisher.
B => Parking invited.
A => Criminal.
B => Verbal safe land nut breakdown 500 feel or 22 outta camp 500 buddy.
A => During apology invited infection surgery ninety cell dwayne sounded buddy marijuana along buddy walkin'.
dialog 13
A => Are you saying that someone is paying you to be our maid and doesn't want us to know who he is?
B => Verbal assuming cell quitting report stupidity flame did curiosity stanley 22 possessed 500 22 pretend dumb coupla burned curiosity.
A => Verbal curiosity.
B => Pike.
A => Sean.
B => Swana 22 creeps pretend gear buddy.
A => Problem.
B => Cell.
A => Sean sank deck curiosity nat

B => Assuming problem misery ninety problem brain florida times treasure.
A => Sean mere brain drove curiosity loyalty witches.
B => 22 responsible unless cleveland times destiny criminal times unless toast became internet breakdown shit's.
A => 22 scotty assuming 22 they brain jeremy guards underwater mac trapped they sarge they criminal sarge they human.
B => Verbal cell neo they treasure match gentle doors they super treasure they compare treasure extreme curiosity cia.
A => Benny accuse parking invited gear invited cell benny surgery cell shocking curiosity bored they illness misery family sank seriously jury.
B => Sean johnnie brain.
A => Snow storm poisoned danger stupidity swiss guards nail quarter fisher strictly z held.
B => Problem benny stupidity bored punk meals.
A => 500 sean stupidity cia even dearest chart curiosity buddy they bored witches they bored witches.
B => Assuming cell dwayne burned 22 particles suit.
A => Assuming times assuming buddy anything assuming parking

KeyboardInterrupt: 

I have to admit the results are not making much sense. I'd guess the reason here is that the model is not properly trained. According to the paper, the model needs to be trained to a stable status on Seq2Seq before it can be trained on RL, which takes so much time that we cannot finish the task. For this example, we only trained the seq2seq for 30 epochs.

### Other utils

If indicated "tensorflow interactive session is running", use the next block to stop and refresh tensorflow graph.

In [203]:
sess.close()
sess
tf.reset_default_graph()