[View in Colaboratory](https://colab.research.google.com/github/higepon/tensorflow_seq2seq_chatbot/blob/master/seq2seq.ipynb)

### Chatbot based on Seq2Seq Beam Search + Attention + Reinforcment Learning(Experimental)
- Tensorflow 1.4.0+ is required.
- This is based on [NMT Tutorial](https://github.com/tensorflow/nmt).
- Experiment [notes](https://github.com/higepon/tensorflow_seq2seq_chatbot/wiki).



In [0]:
# Special commands should be located here.
!apt-get install -y -qq software-properties-common python-software-properties module-init-tools
!add-apt-repository -y ppa:alessandro-strada/ppa 2>&1 > /dev/null
!apt-get update -qq 2>&1 > /dev/null
!apt-get -y install -qq google-drive-ocamlfuse fuse
!apt-get -qq install -y mecab libmecab-dev mecab-ipadic mecab-ipadic-utf8

!pip -q install git+https://github.com/mrahtz/easy-tf-log#egg=easy-tf-log[tf]
!pip install pushbullet.py
!pip install tweepy pyyaml
!pip install mecab-python3

def auth_google_drive():
  # Generate creds for the Drive FUSE library.
  if not os.path.exists('drive'):
    from oauth2client.client import GoogleCredentials
    creds = GoogleCredentials.get_application_default()
    import getpass
    !google-drive-ocamlfuse -headless -id={creds.client_id} -secret={creds.client_secret} < /dev/null 2>&1 | grep URL
    vcode = getpass.getpass()
    !echo {vcode} | google-drive-ocamlfuse -headless -id={creds.client_id} -secret={creds.client_secret}  

def mount_google_drive():
  if not os.path.exists('drive'):
    os.makedirs('drive', exist_ok=True)
    !google-drive-ocamlfuse drive 
    
def kill_docker():
  !kill -9 -1  


In [0]:
import urllib.request
response = urllib.request.urlopen("https://raw.githubusercontent.com/yaroslavvb/memory_util/master/memory_util.py")
open("memory_util.py", "wb").write(response.read())
import memory_util

In [0]:
from __future__ import print_function

import copy as copy
import datetime
import hashlib
import json
import os
import os.path
import random
import re
import shutil
from enum import Enum, auto

import MeCab
import easy_tf_log
import matplotlib.pyplot as plt
import random as random
import numpy as np
import tensorflow as tf
import tweepy
import yaml
from easy_tf_log import tflog
from google.colab import auth
from google.colab import files
from pushbullet import Pushbullet
from tensorflow.python.layers import core as layers_core
from tensorflow.python.platform import gfile

# Generate auth tokens for Colab
auth.authenticate_user()


In [0]:
#kill_docker()

In [0]:
def client_id():
    clients = {'dfc1d5b22ba03430800179d23e522f6f': 'client1',
               'f8e857a2d792038820ebb2ae8d803f7c': 'client2',
               '7628f983785173edabbde501ef8f781d': 'client3'}
    with open('/content/datalab/adc.json') as json_data:
        d = json.load(json_data)
        email = d['id_token']['email'].encode('utf-8')
        return clients[hashlib.md5(email).hexdigest()]


print(client_id())
current_client_id = client_id()

In [0]:
auth_google_drive()
mount_google_drive()

drive_path = 'drive/seq2seq_data'

In [0]:
class Mode(Enum):
    Test = auto()
    TrainSeq2Seq = auto()
    TrainSeq2SeqSwapped = auto()
    TrainRL = auto()
    TweetBot = auto()
    


mode = Mode.Test
#mode = Mode.TrainSeq2Seq
#mode = Mode.TrainSeq2SeqSwapped
#mode = Mode.TrainRL
#mode = Mode.TweetBot


class Shell:
    @staticmethod
    def download_file_if_necessary(file_name):
        if os.path.exists(file_name):
            return
        print("downloading {}...".format(file_name))
        shutil.copy2(os.path.join(drive_path, file_name), file_name)
        print("downloaded")

    @staticmethod
    def download_model_data_if_necessary(model_path):
        if not os.path.exists(model_path):
            os.makedirs(model_path)
        print("Downloading model files...")
        src_dir = os.path.join(drive_path, model_path)
        Shell.copy_all_files(src_dir, model_path)
        print("done")

    @staticmethod
    def copy_all_files(src_dir, dst_dir):
        if os.path.exists(src_dir):
            for f in os.listdir(src_dir):
                shutil.copy2(os.path.join(src_dir, f), os.path.join(dst_dir, f))

    @staticmethod
    def remove_all_files(target_dir):
        for f in Shell.listdir(target_dir):
            os.remove(f)

    @staticmethod
    def remove_matched_files(target_dir, pattern):
        for f in Shell.listdir(target_dir):
            if re.match(pattern, f):
                os.remove(f)

    @staticmethod
    def download_logs(path):
        for f in Shell.listdir(path):
            if re.match('.*events', f):
                files.download(f)

    @staticmethod
    def remove_saved_model(hparams):
        os.makedirs(hparams.model_path, exist_ok=True)
        Shell.remove_all_files(hparams.model_path)
        os.makedirs(os.path.join(drive_path, hparams.model_path), exist_ok=True)
        Shell.remove_all_files(os.path.join(drive_path, hparams.model_path))

    @staticmethod
    def copy_saved_model(src_hparams, dst_hparams):
        Shell.copy_all_files(src_hparams.model_path, dst_hparams.model_path)
        # rm tf.logs from source so that it wouldn't be mixed in dest tf.logs.
        Shell.remove_matched_files(dst_hparams.model_path, ".*events.*")

    @staticmethod
    def listdir(target_dir):
        for dir_path, _, file_names in os.walk(target_dir):
            for f in file_names:
                yield os.path.abspath(os.path.join(dir_path, f))

    @staticmethod
    def list_model_file(path):
        f = open('{}/checkpoint'.format(path))
        text = f.read()
        f.close()
        print(text)
        m = re.match(r".*ChatbotModel\-(\d+)", text)
        model_name = m.group(1)
        files = ["checkpoint"]
        files.extend([x for x in os.listdir(path) if
                      re.search(model_name, x) or re.search('events.out', x)])
        return files

    @staticmethod
    def save_model_in_drive(model_path):
        path = os.path.join(drive_path, model_path)
        os.makedirs(path, exist_ok=True)
        Shell.remove_all_files(os.path.join(drive_path, model_path))
        print("Saving model in Google Drive...")
        for file in Shell.list_model_file(model_path):
            print("Saving ", file)
            shutil.copy2(os.path.join(model_path, file),
                         os.path.join(drive_path, model_path, file))
        print("done")


config_path = 'config.yml'
Shell.download_file_if_necessary(config_path)
f = open(config_path, 'rt')
push_key = yaml.load(f)['pushbullet']['api_key']

pb = Pushbullet(push_key)

# Note for myself.
# You've summarized Seq2Seq
# at http://d.hatena.ne.jp/higepon/20171210/1512887715.

# If you see following error, it means your max(len(tweets of training set)) <  decoder_length.
# This should be a bug somewhere in build_decoder, but couldn't find one yet.
# You can workaround by setting hparams.decoder_length=max len of tweet in training set.
# InvalidArgumentError: logits and labels must have the same first dimension, got logits shape [48,50] and labels shape [54]
#	 [[Node: root/SparseSoftmaxCrossEntropyWithLogits/SparseSoftmaxCrossEntropyWithLogits = SparseSoftmaxCrossEntropyWithLogits[T=DT_FLOAT, Tlabels=DT_INT32, 

print(tf.__version__)

def info(message, hparams):
    if hparams.debug_verbose:
        print(message)


def has_gpu0():
    return tf.test.gpu_device_name() == "/device:GPU:0"

class ModelDirectory(Enum):
    tweet_large = 'model/tweet_large'
    tweet_large_rl = 'model/tweet_large_rl'
    tweet_large_swapped = 'model/tweet_large_swapped'
    tweet_small = 'model/tweet_small'
    tweet_small_swapped = 'model/tweet_small_swapped'
    tweet_small_rl = 'model/tweet_small_rl'
    conversations_small = 'model/conversations_small'
    conversations_small_backward = 'model/conversations_small_backward'
    conversations_small_rl = 'model/conversations_small_rl'
    test_multiple1 = 'model/test_multiple1'
    test_multiple2 = 'model/test_multiple2'
    test_multiple3 = 'model/test_multiple3'
    test_distributed = 'model/test_distributed'

    @staticmethod
    def create_all_directories():
        for d in ModelDirectory:
            os.makedirs(d.value, exist_ok=True)


# todo
# collect all initializer
ModelDirectory.create_all_directories()

base_hparams = tf.contrib.training.HParams(
    machine=current_client_id,
    batch_size=3,
    num_units=6,
    num_layers=2,
    vocab_size=9,
    embedding_size=8,
    learning_rate=0.01,
    learning_rate_decay=0.99,
    use_attention=False,
    encoder_length=5,
    decoder_length=5,
    max_gradient_norm=5.0,
    beam_width=2,
    num_train_steps=100,
    debug_verbose=False,
    model_path='Please override model_directory',
    sos_id=0,
    eos_id=1,
    pad_id=2,
    unk_id=3,
    sos_token="[SOS]",
    eos_token="[EOS]",
    pad_token="[PAD]",
    unk_token="[UNK]",
)

test_hparams = copy.deepcopy(base_hparams).override_from_dict(
    {'beam_width': 0, 'num_train_steps': 100, 'learning_rate': 0.5})

test_attention_hparams = copy.deepcopy(test_hparams).override_from_dict(
    {'use_attention': True})


In [0]:
def print_hparams(hparams):
    result = {}
    for key in ['machine', 'batch_size', 'num_units', 'num_layers',
                'vocab_size',
                'embedding_size', 'learning_rate', 'learning_rate_decay',
                'use_attention', 'encoder_length', 'decoder_length',
                'max_gradient_norm', 'beam_width', 'num_train_steps',
                'model_path']:
        result[key] = hparams.get(key)
    print(result)


In [0]:
import tensorflow as tf

# For debug purpose.
tf.reset_default_graph()


class ChatbotModel:
    def __init__(self, sess, hparams, model_path, scope='ChatbotModel'):
        self.sess = sess
        # todo remove
        self.hparams = hparams

        # todo
        self.model_path = model_path
        self.scope = scope
        # Sampled replies in previous session,
        # this is necessary to back propagation.
        self.sampled = tf.placeholder(tf.int32, name="sampled")

        # Used to store previously inferred by beam_search.
        self.beam_predicted_ids = tf.placeholder(tf.int32,
                                                 name="beam_predicted_ids")
        self.enc_inputs, self.enc_inputs_lengths, enc_outputs, enc_state, emb_encoder = self._build_encoder(
            hparams, scope)

        self.dec_inputs, self.dec_tgt_lengths, self._logits, self.sample_logits, self.sample_replies, self.sample_log_prob, self.infer_logits, self.replies, self.beam_replies, self.log_probs_beam_op = self._build_decoder(
            hparams, self.enc_inputs_lengths, emb_encoder,
            enc_state, enc_outputs)
        self._log_probs = tf.nn.log_softmax(self.infer_logits)

        self.reward = tf.placeholder(tf.float32, name="reward")
        self.tgt_labels, self.global_step, self.loss, self.train_op = self._build_seq2seq_optimizer(
            hparams, self._logits)
        self.rl_loss, self.rl_train_op = self._build_rl_optimizer(hparams)
        self.beam_rl_loss, self.beam_rl_train_op = self._build_beam_rl_optimizer(
            hparams)

        self.train_loss_summary = tf.summary.scalar("loss", self.loss)
        self.val_loss_summary = tf.summary.scalar("validation_loss",
                                                  self.loss)
        self.merged_summary = tf.summary.merge_all()

        # Initialize saver after model created
        self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=1)

    def train(self, enc_inputs, enc_inputs_lengths, target_labels,
              dec_inputs, dec_target_lengths):

        feed_dict = {
            self.enc_inputs: enc_inputs,
            self.enc_inputs_lengths: enc_inputs_lengths,
            self.tgt_labels: target_labels,
            self.dec_inputs: dec_inputs,
            self.dec_tgt_lengths: dec_target_lengths,
        }
        _, global_step, summary = self.sess.run(
            [self.train_op, self.global_step, self.train_loss_summary],
            feed_dict=feed_dict)

        return global_step, summary

    def infer(self, enc_inputs, enc_inputs_lengths):
        infer_feed_dic = {
            self.enc_inputs: enc_inputs,
            self.enc_inputs_lengths: enc_inputs_lengths,
        }
        return self.sess.run(self.replies, feed_dict=infer_feed_dic)

    def log_probs(self, enc_inputs, enc_inputs_lengths):
        print("enc_inputs.shape", enc_inputs.shape)    
        print("enc_inputs_lengths", enc_inputs_lengths)
        infer_feed_dic = {
            self.enc_inputs: enc_inputs,
            self.enc_inputs_lengths: enc_inputs_lengths,
        }
        return self.sess.run(self._log_probs, feed_dict=infer_feed_dic)

    def infer_beam_search(self, enc_inputs, enc_inputs_lengths):
        """
        :return: (replies: [batch_size, decoder_length, beam_size],
                  logits: [batch_size, decoder_length, vocab_size]))
        """
        infer_feed_dic = {
            self.enc_inputs: enc_inputs,
            self.enc_inputs_lengths: enc_inputs_lengths,
        }
        return self.sess.run([self.beam_replies, self.infer_logits], 
                             feed_dict=infer_feed_dic)

    def sample(self, enc_inputs, enc_inputs_lengths):
        infer_feed_dic = {
            self.enc_inputs: enc_inputs,
            self.enc_inputs_lengths: enc_inputs_lengths,
        }

        replies, logits = self.sess.run(
            [self.sample_replies, self.sample_logits],
            feed_dict=infer_feed_dic)
        return replies, logits

    def batch_loss(self, enc_inputs, enc_inputs_lengths, tgt_labels,
                   dec_inputs, dec_tgt_lengths):
        feed_dict = {
            self.enc_inputs: enc_inputs,
            self.enc_inputs_lengths: enc_inputs_lengths,
            self.tgt_labels: tgt_labels,
            self.dec_inputs: dec_inputs,
            self.dec_tgt_lengths: dec_tgt_lengths,
        }
        return self.sess.run([self.loss, self.val_loss_summary],
                             feed_dict=feed_dict)

    def seq_len(self, seq):
        try:
            # length includes the first eos_id.
            return seq.index(self.hparams.eos_id) + 1
        except ValueError:
            return self.hparams.encoder_length

    def train_with_reward(self, enc_inputs, enc_inputs_lengths, sampled,
                          reward):
        feed_dict = {
            self.enc_inputs: enc_inputs,
            self.enc_inputs_lengths: enc_inputs_lengths,
            self.sampled: sampled,
            self.reward: reward
        }

        _, global_step = self.sess.run(
            [self.rl_train_op, self.global_step],
            feed_dict=feed_dict)
        return global_step

    def log_probs_beam(self, enc_inputs, enc_inputs_lengths,
                       beam_predicted_ids):
        """ Return log_probs_beam: [[batch_size, dec_length, beam_size]
            This is used in rl training, where we need P_seq2seq(a| p_i, q_i)
        """
        feed_dict = {
            self.enc_inputs: enc_inputs,
            self.enc_inputs_lengths: enc_inputs_lengths,
            self.beam_predicted_ids: beam_predicted_ids,
        }

        return self.sess.run(self.log_probs_beam_op, feed_dict=feed_dict)

    def train_beam_with_reward(self, enc_inputs, enc_inputs_lengths,
                               beam_predicted_ids,
                               reward):
        feed_dict = {
            self.enc_inputs: enc_inputs,
            self.enc_inputs_lengths: enc_inputs_lengths,
            self.beam_predicted_ids: beam_predicted_ids,
            self.reward: reward
        }

        _, global_step = self.sess.run(
            [self.beam_rl_train_op, self.global_step],
            feed_dict=feed_dict)
        return global_step

    def reward_ease_of_answering(self, max_len, enc_inputs,
                                 enc_inputs_lengths,
                                 dull_responses):
        """ Return reward for ease of answering.
            See Deep Reinforcement Learning for Dialogue Generation
            for more details.

        Args:
            enc_inputs: [encoder_length, batch_size], eg) tweets
            dull_responses: [number of pre-defined dull responses,
            decoder_length or less than decoder_length].
            eg) [["I'm", "Good"], ["fine"]]

        Returns:
            Return reward for ease of answering.
        """
        inference_feed_dict = {
            self.enc_inputs: enc_inputs,
            self.enc_inputs_lengths: enc_inputs_lengths
        }

        # Logits
        #   logits_value: [batch_size, actual_decoder_length, vocab_size]
        logits_batch_value = self.sess.run(self.infer_logits,
                                           feed_dict=inference_feed_dict)

        # Note that encoder_inputs here is time major.
        reward = np.ones((self.hparams.batch_size, max_len))
        print(
            "constructing reward{}{}".format(self.hparams.batch_size, max_len))
        # For each batch: [actual_decoder_length, vocab_size]
        for i, logits in enumerate(logits_batch_value):
            p_array = []
            for dull_response in dull_responses:
                p = 1
                # Note that dull_response and _logits don't
                # always have same length, but zip takes care of the case.
                for word_id, logit in zip(dull_response, logits):
                    # Apply softmax first, see definition of softmax.
                    norm = (self._softmax(logit))[word_id]
                    p *= norm
                # This is P(dull_response|encoder_input)
                p = np.log(p) / len(dull_response)
                p_array.append(p)
            batch_p = np.sum(p_array) / len(dull_responses)
            batch_reward = -batch_p
            for j in range(max_len):
                reward[i][j] = batch_reward
        return reward

    def save(self, model_path=None):
        if model_path is None:
            model_path = self.model_path
        model_dir = "{}/{}".format(model_path, self.scope)
        self.saver.save(self.sess, model_dir, global_step=self.global_step)

    def restore(self):
        ckpt = tf.train.get_checkpoint_state(self.model_path)
        if ckpt:
            last_model = ckpt.model_checkpoint_path
            self.saver.restore(self.sess, last_model)
            return True
        else:
            print("Created fresh model.")
            return False

    @staticmethod
    def _softmax(x):
        return np.exp(x) / np.sum(np.exp(x), axis=0)

    def _build_rl_optimizer(self, hparams):
        # todo mask the sampling results
        sample_log_prob_shape = tf.shape(self.sample_log_prob)
        reward_shape = tf.shape(self.reward)
        reward_shape_print = tf.Print(reward_shape,
                                      [reward_shape],
                                      message="reward_shape")
        reward_print = tf.Print(self.reward,
                                [self.reward],
                                message="reward")

        asserts = [tf.assert_equal(sample_log_prob_shape[0],
                                   reward_shape_print[0],
                                   [self.sample_log_prob,
                                    self.reward]),
                   tf.assert_equal(sample_log_prob_shape[1],
                                   reward_shape_print[1],
                                   [self.sample_log_prob,
                                    self.reward]), reward_print
                   ]
        with tf.control_dependencies(asserts):
            loss = -tf.reduce_sum(
                self.sample_log_prob * self.reward) / tf.to_float(
                hparams.batch_size)
        train_op = self._build_optimizer_with_loss(self.global_step, hparams,
                                                   loss)
        return loss, train_op

    def _build_beam_rl_optimizer(self, hparams):
        # todo mask the sampling results
        beam_log_probs_shape = tf.shape(self.log_probs_beam_op)
        beam_log_probs_shape_print = tf.Print(beam_log_probs_shape,
                                              [beam_log_probs_shape,
                                               tf.shape(self.infer_logits),
                                               tf.shape(
                                                   self.beam_predicted_ids),
                                               tf.shape(self.beam_replies)],
                                              "beam_log_probs_shape")
        reward_shape = tf.shape(self.reward)
        reward_shape_print = tf.Print(reward_shape, [reward_shape],
                                      message="reward_shape")
        asserts = [tf.assert_equal(beam_log_probs_shape_print,
                                   reward_shape_print,
                                   [beam_log_probs_shape_print,
                                    self.reward])]
        with tf.control_dependencies(asserts):
            loss = -tf.reduce_sum(
                self.log_probs_beam_op * self.reward) / tf.to_float(
                hparams.batch_size * hparams.beam_width)
        train_op = self._build_optimizer_with_loss(self.global_step, hparams,
                                                   loss)
        return loss, train_op

    def _build_optimizer_with_loss(self, global_step, hparams, loss):
        params = tf.trainable_variables()
        optimizer = tf.train.GradientDescentOptimizer(hparams.learning_rate)
        gradients = tf.gradients(loss, params)
        clipped_gradients, _ = tf.clip_by_global_norm(
            gradients, hparams.max_gradient_norm)
        with tf.device(self.available_device()):
            train_op = optimizer.apply_gradients(
                zip(clipped_gradients, params), global_step=global_step)
        return train_op

    def _build_seq2seq_optimizer(self, hparams, logits):
        # Target labels
        #   As described in doc for sparse_softmax_cross_entropy_with_logits,
        #   labels should be [batch_size, decoder_target_lengths]
        #   instead of [batch_size, decoder_target_lengths, vocab_size].
        #   So labels should have indices instead of vocab_size classes.
        tgt_labels = tf.placeholder(tf.int32, shape=(
            hparams.batch_size, hparams.decoder_length), name="tgt_labels")
        # Loss
        #   tgt_labels: [batch_size, decoder_length]
        #   _logits: [batch_size, decoder_length, vocab_size]
        #   crossent: [batch_size, decoder_length]
        crossent = tf.nn.sparse_softmax_cross_entropy_with_logits(
            labels=tgt_labels, logits=logits)
        tgt_weights = tf.sequence_mask(self.dec_tgt_lengths,
                                       hparams.decoder_length,
                                       dtype=logits.dtype)
        crossent = crossent * tgt_weights
        crossent_by_batch = tf.reduce_sum(crossent, axis=1)
        loss = tf.reduce_sum(crossent_by_batch) / tf.to_float(
            hparams.batch_size)
        # Train
        global_step = tf.get_variable(name="global_step", shape=[],
                                      dtype=tf.int32,
                                      initializer=tf.constant_initializer(0),
                                      trainable=False)
        train_op = self._build_optimizer_with_loss(global_step, hparams, loss)
        return tgt_labels, global_step, loss, train_op

    @staticmethod
    def available_device():
        device = '/cpu:0'
        if has_gpu0():
            device = '/gpu:0'
            print("$$$ GPU ENABLED $$$")
        return device

    @staticmethod
    def _build_encoder(hparams, scope):
        # Encoder
        #   enc_inputs: [encoder_length, batch_size]
        #   This is time major where encoder_length comes
        #   first instead of batch_size.
        #   enc_inputs_lengths: [batch_size]
        enc_inputs = tf.placeholder(tf.int32, shape=(
            hparams.encoder_length, hparams.batch_size), name="enc_inputs")
        enc_inputs_lengths = tf.placeholder(tf.int32,
                                            shape=hparams.batch_size,
                                            name="enc_inputs_lengths")

        # Embedding
        #   We originally didn't share embedding between encoder and decoder.
        #   But now we share it. It makes much easier to calculate rewards.
        #   Matrix for embedding: [vocab_size, embedding_size]
        #   Should be shared between training and inference.
        with tf.variable_scope(scope):
            emb_encoder = tf.get_variable("emb_encoder",
                                          [hparams.vocab_size,
                                           hparams.embedding_size])

        # Look up embedding:
        #   enc_inputs: [encoder_length, batch_size]
        #   enc_emb_inputs: [encoder_length, batch_size, embedding_size]
        enc_emb_inputs = tf.nn.embedding_lookup(emb_encoder, enc_inputs)

        # LSTM cell.
        with tf.variable_scope(scope):
            # Should be shared between training and inference.
            cells = []
            for _ in range(hparams.num_layers):
                cells.append(
                    tf.nn.rnn_cell.BasicLSTMCell(hparams.num_units))
            encoder_cell = tf.contrib.rnn.MultiRNNCell(cells)

            # Run Dynamic RNN
            #   enc_outputs: [encoder_length, batch_size, num_units]
            #   enc_state: [batch_size, num_units],
            #   this is final state of the cell for each batch.
            enc_outputs, enc_state = tf.nn.dynamic_rnn(encoder_cell,
                                                       enc_emb_inputs,
                                                       time_major=True,
                                                       dtype=tf.float32,
                                                       sequence_length=enc_inputs_lengths)

        return enc_inputs, enc_inputs_lengths, enc_outputs, enc_state, emb_encoder

    @staticmethod
    def _build_training_decoder(hparams, enc_inputs_lengths,
                                enc_state, enc_outputs, dec_cell,
                                dec_emb_inputs, dec_tgt_lengths,
                                projection_layer, scope):

        dynamic_batch_size = tf.shape(enc_inputs_lengths)[0]
        initial_state, wrapped_dec_cell = ChatbotModel._attention_wrapper(
            dec_cell, dynamic_batch_size, enc_inputs_lengths, enc_outputs,
            enc_state, hparams, scope, reuse=False)

        # Decoder with helper:
        #   dec_emb_inputs: [decoder_length, batch_size, embedding_size]
        #   dec_tgt_lengths: [batch_size] vector,
        #   which represents each target sequence length.
        with tf.variable_scope(scope):
            training_helper = tf.contrib.seq2seq.TrainingHelper(
                dec_emb_inputs,
                dec_tgt_lengths,
                time_major=True)

        # Decoder and decode
        with tf.variable_scope(scope):
            with tf.variable_scope("training"):
                training_decoder = tf.contrib.seq2seq.BasicDecoder(
                    wrapped_dec_cell, training_helper, initial_state,
                    output_layer=projection_layer)

        # Dynamic decoding
        #   final_outputs.rnn_output: [batch_size, decoder_length,
        #                             vocab_size], list of RNN state.
        #   final_outputs.sample_id: [batch_size, decoder_length],
        #                            list of argmax of rnn_output.
        #   final_state: [batch_size, num_units],
        #                list of final state of RNN on decode process.
        #   final_sequence_lengths: [batch_size], list of each decoded sequence.
        with tf.variable_scope(scope):
            final_outputs, _final_state, _final_sequence_lengths = tf.contrib.seq2seq.dynamic_decode(
                training_decoder)

        if hparams.debug_verbose:
            print("rnn_output.shape=", final_outputs.rnn_output.shape)
            print("sample_id.shape=", final_outputs.sample_id.shape)
            print("final_state=", _final_state)
            print("final_sequence_lengths.shape=",
                  _final_sequence_lengths.shape)

        logits = final_outputs.rnn_output
        return logits, wrapped_dec_cell, initial_state

    def _build_decoder(self, hparams, enc_inputs_lengths, embedding_encoder,
                       enc_state, enc_outputs):
        # Decoder input
        #   dec_inputs: [decoder_length, batch_size]
        #   dec_tgt_lengths: [batch_size]
        #   This is grand truth target inputs for training.
        dec_inputs = tf.placeholder(tf.int32, shape=(
            hparams.decoder_length, hparams.batch_size), name="dec_inputs")
        dec_tgt_lengths = tf.placeholder(tf.int32,
                                         shape=hparams.batch_size,
                                         name="dec_tgt_lengths")

        # Look up embedding:
        #   dec_inputs: [decoder_length, batch_size]
        #   decoder_emb_inp: [decoder_length, batch_size, embedding_size]
        dec_emb_inputs = tf.nn.embedding_lookup(embedding_encoder,
                                                dec_inputs)

        # https://stackoverflow.com/questions/39573188/output-projection-in-seq2seq-model-tensorflow
        # Internally, a neural network operates on dense vectors of some size,
        # often 256, 512 or 1024 floats (let's say 512 for here).
        # But at the end it needs to predict a word
        # from the vocabulary which is often much larger,
        # e.g., 40000 words. Output projection is the final linear layer
        # that converts (projects) from the internal representation
        #  to the larger one.
        # So, for example, it can consist of a 512 x 40000 parameter matrix
        # and a 40000 parameter for the bias vector.
        projection_layer = layers_core.Dense(hparams.vocab_size, use_bias=False)

        # We share this between training and inference.
        cells = []
        for _ in range(hparams.num_layers):
            cells.append(tf.nn.rnn_cell.BasicLSTMCell(hparams.num_units))
        dec_cell = tf.contrib.rnn.MultiRNNCell(cells)

        # Training graph
        logits, wrapped_dec_cell, initial_state = self._build_training_decoder(
            hparams, enc_inputs_lengths, enc_state, enc_outputs,
            dec_cell, dec_emb_inputs, dec_tgt_lengths,
            projection_layer, self.scope)

        infer_logits, replies = self._build_greedy_inference(hparams,
                                                             embedding_encoder,
                                                             enc_state,
                                                             enc_inputs_lengths,
                                                             enc_outputs,
                                                             dec_cell,
                                                             projection_layer,
                                                             self.scope)

        # Beam Search Inference graph
        beam_replies = self._build_beam_search_inference(hparams,
                                                         enc_inputs_lengths,
                                                         embedding_encoder,
                                                         enc_state,
                                                         enc_outputs,
                                                         dec_cell,
                                                         projection_layer,
                                                         self.scope)

        beam_log_probs = self._log_probs_beam(infer_logits,
                                              self.beam_predicted_ids)

        # Sample Inference graph
        sample_logits, sample_replies = self._build_sample_inference(hparams,
                                                                     embedding_encoder,
                                                                     enc_state,
                                                                     enc_inputs_lengths,
                                                                     enc_outputs,
                                                                     dec_cell,
                                                                     projection_layer,
                                                                     self.scope)
        indices = self._convert_indices(self.sampled)
        #        print_indices0 = tf.Print(indices, [tf.shape(indices)],
        #                                  message="OPT:indices.shape")
        #        print_indices1 = tf.Print(print_indices0, [tf.shape(sample_logits)],
        #                                  message="OPT:sample_logits.shape")
        #        print_indices2 = tf.Print(print_indices1, [tf.shape(sample_replies)],
        #                                  message="OPT:sample_replies.shape")

        sample_log_prob = tf.gather_nd(sample_logits, indices)
        sample_log_prob0 = tf.Print(sample_log_prob,
                                    [tf.shape(sample_log_prob)],
                                    message="OPT:sample_log_prob")
        return dec_inputs, dec_tgt_lengths, logits, sample_logits, sample_replies, sample_log_prob0, infer_logits, replies, beam_replies, beam_log_probs

    @staticmethod
    def _build_greedy_inference(hparams, embedding_encoder, enc_state,
                                encoder_inputs_lengths, encoder_outputs,
                                dec_cell, projection_layer, scope):
        # Greedy decoder
        dynamic_batch_size = tf.shape(encoder_inputs_lengths)[0]
        inference_helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(
            embedding_encoder,
            tf.fill([dynamic_batch_size], hparams.sos_id), hparams.eos_id)

        infer_logits, replies = ChatbotModel._dynamic_decode(dec_cell,
                                                             dynamic_batch_size,
                                                             encoder_inputs_lengths,
                                                             encoder_outputs,
                                                             enc_state,
                                                             hparams,
                                                             inference_helper,
                                                             projection_layer,
                                                             scope)
        return infer_logits, replies

    @staticmethod
    def _build_beam_search_inference(hparams, encoder_inputs_lengths,
                                     embedding_encoder, enc_state,
                                     encoder_outputs, dec_cell,
                                     projection_layer, scope):

        assert (hparams.beam_width != 0)

        dynamic_batch_size = tf.shape(encoder_inputs_lengths)[0]
        # https://github.com/tensorflow/tensorflow/issues/11904
        if hparams.use_attention:
            with tf.variable_scope(scope, reuse=True):
                # Attention
                # encoder_outputs is time major, so transopse it to batch major.
                # attention_encoder_outputs: [batch_size, encoder_length, num_units]
                attention_encoder_outputs = tf.transpose(encoder_outputs,
                                                         [1, 0, 2])

                tiled_encoder_outputs = tf.contrib.seq2seq.tile_batch(
                    attention_encoder_outputs, multiplier=hparams.beam_width)
                tiled_encoder_final_state = tf.contrib.seq2seq.tile_batch(
                    enc_state, multiplier=hparams.beam_width)
                tiled_encoder_inputs_lengths = tf.contrib.seq2seq.tile_batch(
                    encoder_inputs_lengths, multiplier=hparams.beam_width)

                # Create an attention mechanism
                attention_mechanism = tf.contrib.seq2seq.LuongAttention(
                    hparams.num_units, tiled_encoder_outputs,
                    memory_sequence_length=tiled_encoder_inputs_lengths)

                wrapped_de_cell = tf.contrib.seq2seq.AttentionWrapper(
                    dec_cell, attention_mechanism,
                    attention_layer_size=hparams.num_units)

                dec_initial_state = wrapped_de_cell.zero_state(
                    dtype=tf.float32,
                    batch_size=dynamic_batch_size * hparams.beam_width)
                dec_initial_state = dec_initial_state.clone(
                    cell_state=tiled_encoder_final_state)
        else:
            with tf.variable_scope(scope, reuse=True):
                wrapped_de_cell = dec_cell
                dec_initial_state = tf.contrib.seq2seq.tile_batch(
                    enc_state,
                    multiplier=hparams.beam_width)

        # len(inferred_reply) is lte encoder_length,
        # because we are targeting tweet (140 for each tweet)
        # Also by doing this,
        # we can pass the reply to other seq2seq w/o shorten it.
        maximum_iterations = hparams.encoder_length

        inference_decoder = tf.contrib.seq2seq.BeamSearchDecoder(
            cell=wrapped_de_cell,
            embedding=embedding_encoder,
            start_tokens=tf.fill([dynamic_batch_size], hparams.sos_id),
            end_token=hparams.eos_id,
            initial_state=dec_initial_state,
            beam_width=hparams.beam_width,
            output_layer=projection_layer,
            length_penalty_weight=0.0)

        # Dynamic decoding
        with tf.variable_scope(scope, reuse=True):
            beam_outputs, final_state, _ = tf.contrib.seq2seq.dynamic_decode(
                inference_decoder, maximum_iterations=maximum_iterations)
        beam_replies = beam_outputs.predicted_ids
        return beam_replies

    @staticmethod
    def _build_sample_inference(hparams, embedding_encoder, enc_state,
                                enc_inputs_lengths, enc_outputs,
                                dec_cell, projection_layer, scope):
        # Sample decoder
        dynamic_batch_size = tf.shape(enc_inputs_lengths)[0]
        inference_helper = tf.contrib.seq2seq.SampleEmbeddingHelper(
            embedding_encoder,
            tf.fill([dynamic_batch_size], hparams.sos_id), hparams.eos_id,
            softmax_temperature=1.5)

        infer_logits, replies = ChatbotModel._dynamic_decode(dec_cell,
                                                             dynamic_batch_size,
                                                             enc_inputs_lengths,
                                                             enc_outputs,
                                                             enc_state,
                                                             hparams,
                                                             inference_helper,
                                                             projection_layer,
                                                             scope)
        return infer_logits, replies

    @staticmethod
    def _dynamic_decode(dec_cell, dynamic_batch_size,
                        enc_inputs_lengths, enc_outputs, enc_state,
                        hparams, dec_helper, projection_layer, scope):
        initial_state, wrapped_dec_cell = ChatbotModel._attention_wrapper(
            dec_cell, dynamic_batch_size, enc_inputs_lengths, enc_outputs,
            enc_state, hparams, scope)
        with tf.variable_scope(scope):
            with tf.variable_scope("infer"):
                inference_decoder = tf.contrib.seq2seq.BasicDecoder(
                    wrapped_dec_cell, dec_helper, initial_state,
                    output_layer=projection_layer)
        # len(inferred_reply) is lte encoder_length,
        # because we are targeting tweet (140 for each tweet)
        # Also by doing this,
        # we can pass the reply to other seq2seq w/o shorten it.
        maximum_iterations = hparams.encoder_length
        # Dynamic decoding
        # Here we reuse Attention Wrapper
        with tf.variable_scope(scope, reuse=True):
            outputs, _, _ = tf.contrib.seq2seq.dynamic_decode(
                inference_decoder, maximum_iterations=maximum_iterations)
        replies = outputs.sample_id
        # We use infer_logits instead of _logits when calculating log_prob,
        # because infer_logits doesn't require decoder_target_lengths input.
        infer_logits = outputs.rnn_output
        return infer_logits, replies

    @staticmethod
    def _attention_wrapper(dec_cell, dynamic_batch_size, enc_inputs_lengths,
                           enc_outputs, enc_state, hparams, scope, reuse=True):
        # See https://github.com/tensorflow/tensorflow/issues/11904
        if hparams.use_attention:
            with tf.variable_scope(scope, reuse=reuse):
                # Attention
                # encoder_outputs is time major, so transopse it to batch major.
                # attention_encoder_outputs: [batch_size, encoder_length, num_units]
                attention_encoder_outputs = tf.transpose(enc_outputs,
                                                         [1, 0, 2])

                # Create an attention mechanism
                attention_mechanism = tf.contrib.seq2seq.LuongAttention(
                    hparams.num_units,
                    attention_encoder_outputs,
                    memory_sequence_length=enc_inputs_lengths)

                wrapped_dec_cell = tf.contrib.seq2seq.AttentionWrapper(
                    dec_cell, attention_mechanism,
                    attention_layer_size=hparams.num_units)

                initial_state = wrapped_dec_cell.zero_state(
                    dynamic_batch_size,
                    tf.float32).clone(
                    cell_state=enc_state)
        else:
            with tf.variable_scope(scope, reuse=reuse):
                wrapped_dec_cell = dec_cell
                initial_state = enc_state
        return initial_state, wrapped_dec_cell

    # convert sampled_indices to indices for tf.gather_nd.
    @staticmethod
    def _convert_indices(sampled_indices):
        print_sampled_indices = tf.Print(sampled_indices,
                                         [tf.shape(sampled_indices)],
                                         message="sampled_indices")
        batch_size = tf.shape(print_sampled_indices)[0]
        dec_length = tf.shape(print_sampled_indices)[1]
        print_batch_size = tf.Print(batch_size, [batch_size, dec_length],
                                    message="(batch_size, dec_length)")
        first_indices = tf.tile(
            tf.expand_dims(tf.range(print_batch_size), dim=1),
            [1, dec_length])
        second_indices = tf.reshape(
            tf.tile(tf.range(dec_length), [print_batch_size]),
            [print_batch_size, dec_length])
        print_first_indices = tf.Print(first_indices, [tf.shape(first_indices),
                                                       tf.shape(
                                                           second_indices)],
                                       message="(first_indices, second_indices)")
        return tf.stack([print_first_indices, second_indices, sampled_indices],
                        axis=2)

    @staticmethod
    def _log_probs_beam(logits, predicted_ids):
        """ Return log_probs for predicted_ids by beam search.

        Args:
            logits: [batch_size, dec_length, vocab_size]
            predicted_ids: [batch_size, beam_width, dec_length]

        Returns:
            Return log_prob:[batch_size, beam_width, dec_length]
      """

        # Sum over vocab_size axis
        log_probs = tf.nn.log_softmax(logits)
        def one_log_probs(beam_index):
            return tf.gather_nd(log_probs, ChatbotModel._convert_indices(
                predicted_ids[:, beam_index]))

        beam_width = tf.shape(predicted_ids)[1]
        i = tf.constant(0)
        outputs = tf.TensorArray(tf.float32, size=1, dynamic_size=True)
        cond = lambda i, o: tf.less(i, beam_width)
        body = lambda i, o: [tf.add(i, 1), o.write(i, one_log_probs(i))]

        _, outputs = tf.while_loop(cond, body, [i, outputs])
        result = tf.transpose(outputs.stack(), [1, 0, 2])
        return result



In [0]:
class TrainDataSource:
    def __init__(self, source_path, hparams, vocab_path=None):
        Shell.download_file_if_necessary(source_path)
        generator = TrainDataGenerator(source_path=source_path,
                                       hparams=hparams)
        # generator.remove_generated()
        train_dataset, vocab, rev_vocab = generator.generate(vocab_path)
        # We don't use shuffle here, because we want to align two data source here.
        self.train_dataset = train_dataset.repeat()
        self.vocab_path = generator.vocab_path
        # todo(higepon): Use actual validation dataset.
        self.valid_dataset = train_dataset.repeat()
        self.vocab = vocab
        self.rev_vocab = rev_vocab


class Trainer:
    def __init__(self):
        self.loss_step = []
        self.val_losses = []
        self.reward_step = []
        self.reward_average = []
        self.last_saved_time = datetime.datetime.now()
        self.last_stats_time = datetime.datetime.now()
        self.num_stats_per = 20
        self.reward_history = []
        self._valid_tweets = ["„Åä„ÅØ„Çà„ÅÜ„Åî„Åñ„ÅÑ„Åæ„Åô„ÄÇÂØí„ÅÑ„Åß„Åô„Å≠„ÄÇ", "„Åï„Å¶Â∏∞„Çç„ÅÜ„ÄÇÊòéÊó•„ÅØÊó©„ÅÑ„ÄÇ", "‰ªäÂõû„ÇÇ„Çà„Çç„Åó„Åè„Åß„Åô„ÄÇ"]

    def train_seq2seq_rl(self, seq2seq_hparams, hparams, source_path, resume):
        self._resume_if_necessary(resume, source_path, hparams, seq2seq_hparams)
        with tf.device(self._available_device()):
            seq2seq_model = self.create_model(seq2seq_hparams)
            restored = seq2seq_model.restore()
            assert (restored)
        self.train_rl(hparams, source_path, self._valid_tweets, seq2seq_model)

    def train_seq2seq_beam_rl(self, seq2seq_hparams, hparams, source_path,
                              resume):
        self._resume_if_necessary(resume, source_path, hparams, seq2seq_hparams)
        with tf.device(self._available_device()):
            seq2seq_model = self.create_model(seq2seq_hparams)
            restored = seq2seq_model.restore()
            assert (restored)
        self.train_beam_rl(hparams, source_path, self._valid_tweets, seq2seq_model)

    def _resume_if_necessary(self, resume, source_path, hparams, seq2seq_hparams):
        if resume:
            return
        self.train_seq2seq(seq2seq_hparams, source_path, self._valid_tweets,
                           should_clean_saved_model=False)
        Shell.copy_all_files(seq2seq_hparams.model_path, hparams.model_path)

    def train_rl(self, hparams, source_path, tweets=[], seq2seq_model=None):
        print("===== Train RL {} ====".format(source_path))
        now = datetime.datetime.today().strftime("%Y%m%d%H%M%S")
        print("{}_rl_test".format(now))
        print("hparams")
        print_hparams(hparams)

        data_source = TrainDataSource(source_path, hparams)
        easy_tf_log.set_dir(hparams.model_path)
        Shell.download_model_data_if_necessary(hparams.model_path)
        device = self._available_device()
        with tf.device(device):
            model = self.create_model(hparams)

        vocab = data_source.vocab
        rev_vocab = data_source.rev_vocab
        infer_helper = InferenceHelper(model, vocab, rev_vocab)

        graph = model.sess.graph
        writer = tf.summary.FileWriter(hparams.model_path, graph)
        last_saved_time = datetime.datetime.now()
        with graph.as_default():
            train_data_next = data_source.train_dataset.make_one_shot_iterator().get_next()

            data = model.sess.run(train_data_next)
            model.train(data[0], data[1], data[2], data[3], data[4])

            avg_good_value = 0
            for step in range(hparams.num_train_steps):
                train_data = model.sess.run(train_data_next)
                sampled_replies, _ = model.sample(train_data[0], train_data[1])
                for i in range(hparams.batch_size):
                    print("{}->{}\n".format(
                        infer_helper.ids_to_words(train_data[0][:, i]),
                        infer_helper.ids_to_words(sampled_replies[i])))

                if True:
                    dull_responses_ids = self._dull_response_ids(infer_helper)
                    enc_inputs, enc_inputs_lengths = self.format_enc_inputs(hparams, model, sampled_replies)
                    max_len = len(sampled_replies[0])
                    # We adjust sampled_replies => enc_inputs, because we need fixed length for seq2seq.
                    # But for reward and logits we want to need actual max_len.
                    reward = seq2seq_model.reward_ease_of_answering(
                        max_len,
                        enc_inputs,
                        enc_inputs_lengths, dull_responses_ids)
                    print("reward", reward)
                    good_value_key = "reward"
                    good_value = np.mean(reward)
                else:
                    good_value_key = "good_count"
                    good_value, reward = self._reward_for_test(model,
                                                               sampled_replies)

                avg_good_value += good_value
                if step != 0 and step % 20 == 0:
                    print("{}:{}".format(good_value_key, good_value))
                if step != 0 and step % 60 == 0:
                    self._print_log(good_value_key, avg_good_value / 60)
                    avg_good_value = 0

                global_step = model.train_with_reward(train_data[0],
                                                      train_data[1],
                                                      sampled_replies, reward)
                if step != 0 and step % 100 == 0:
                    print("save and restore")
                    model.save()
                    is_restored = model.restore()
                    assert (is_restored)
                    is_restored = model.restore()
                    assert (is_restored)
                    self._print_inferences(step, tweets, infer_helper)
                    now = datetime.datetime.now()
                    print("delta:", (now - last_saved_time).total_seconds())
                    last_saved_time = now
                    assert is_restored
                    print("step={}, global_step={}".format(step, global_step))

    def train_beam_rl(self, rl_hparams, seq2seq_hparams, backward_hparams,
                      seq2seq_source_path, rl_source_path, tweets=[]):
        print("===== Train Beam RL {} ====".format(seq2seq_source_path))
        now = datetime.datetime.today().strftime("%Y%m%d%H%M%S")
        print("{}_beam_rl_test".format(now))
        print("rl_hparams")
        print_hparams(rl_hparams)

        seq2seq_data_source = TrainDataSource(seq2seq_source_path, rl_hparams)
        rl_data_source = TrainDataSource(rl_source_path, rl_hparams)
        easy_tf_log.set_dir(rl_hparams.model_path)
        Shell.download_model_data_if_necessary(rl_hparams.model_path)
        device = self._available_device()
        with tf.device(device):
            model = self.create_model(rl_hparams)
            seq2seq_model = self.create_model(seq2seq_hparams)
            backward_model = self.create_model(backward_hparams)

        vocab = seq2seq_data_source.vocab
        rev_vocab = seq2seq_data_source.rev_vocab
        infer_helper = InferenceHelper(model, vocab, rev_vocab)

        graph = model.sess.graph
        writer = tf.summary.FileWriter(rl_hparams.model_path, graph)
        last_saved_time = datetime.datetime.now()
        with graph.as_default():
            seq2seq_train_data_next = seq2seq_data_source.train_dataset.make_one_shot_iterator().get_next()
            rl_train_data_next = rl_data_source.train_dataset.make_one_shot_iterator().get_next()

            data = model.sess.run(seq2seq_train_data_next)
            model.train(data[0], data[1], data[2], data[3], data[4])

            avg_good_value = 0
            for step in range(rl_hparams.num_train_steps):
                seq2seq_train_data = model.sess.run(seq2seq_train_data_next)
                rl_train_data = model.sess.run(rl_train_data_next)
                beam_predicted_ids, _ = model.infer_beam_search(seq2seq_train_data[0],

                                                                seq2seq_train_data[1])

                max_len = len(beam_predicted_ids[0, :, 0])

                # log_probs_beam: [batch_size, dec_length, beam_width]
                log_probs_beam = seq2seq_model.log_probs_beam(seq2seq_train_data[0],
                                                              seq2seq_train_data[1],
                                                              beam_predicted_ids)
                print("xxxxxx, log_prob_beams,", log_probs_beam.shape)

                # Calc 1/N_a * logP_seq2seq(a|p_i, q_i) for each beam_predicted
                reward_s = np.zeros(
                    (rl_hparams.batch_size, max_len, rl_hparams.beam_width))
                for batch in range(rl_hparams.batch_size):
                    for beam in range(rl_hparams.beam_width):
                        print("log_probs_beam[batch, :, beam]", log_probs_beam[batch, :, beam].shape)
                        # ayashii
                        tweet = beam_predicted_ids[batch][:, beam]
                        print("tweet", tweet)
                        tweet_len = 0
                        p = 0
                        for i in range(len(tweet)):
                            p += log_probs_beam[batch][i][beam]
                            tweet_len = tweet_len + 1
                            if tweet[i] == rl_hparams.eos_id:
                                break

                        assert (tweet_len != 0)
                        p /= tweet_len
                        print("p", p)
                        for i in range(max_len):
                            reward_s[batch][i][beam] = p
                print("reward_s", reward_s)

                # TODO
                #   Change log_prob_beam to return actual log_prob using log softmax.
                # .    Which axis?
                #   Change and rename model.logits to return log_prob using log softmax.


                # Calc 1/N_qi * logP_backward(qi|a)
                # todo vectorized implmentation here
                ##### „ÅÇ„ÇåÔºü log_probs „Å£„Å¶ softmax ÂæåÔºüÔºü „Åù„Çå„Å®„ÇÇ logits?
                #       probs = tf.nn.softmax(logits)
                # log_probs = tf.nn.log_softmax(logits)
                reward_qi = np.zeros(
                    (rl_hparams.batch_size, max_len, rl_hparams.beam_width))

                # target label with eos.
                # [batch_size, dec_length]
                qi = rl_train_data[2]
                for beam in range(rl_hparams.beam_width):
                    replies = beam_predicted_ids[:, :, beam]
                    a_enc_inputs, a_enc_inputs_lengths = self.format_enc_inputs(rl_hparams, model, replies)

                    print("a_enc_inputs.shape", a_enc_inputs.shape)

                    print("a_enc_inputs_lengths", a_enc_inputs_lengths)

                    # [batch_size, dec_len, vocab_size]
                    log_probs = backward_model.log_probs(a_enc_inputs,
                                                         a_enc_inputs_lengths)
                    print("log_probs", log_probs.shape)
                    for batch in range(rl_hparams.batch_size):
                        tweet = qi[batch]
                        print("tweet", tweet)
                        tweet_len = 0
                        p = 0
                        # ayashi
                        # do we multply probabiity here, ???? we should probably sum up? because we're using log prob.
                        for i, id in enumerate(tweet):
                            # log_probs shape is supposed to be [batch_size, dec_length, vocab_size],
                            # but it sometimes becomes [batch_size, smaller_value, vocab_size].
                            # This is because we're using GreedyDecoder, dynamic_decode finishes the decoder process when it sees eos_id.
                            # If all enc_inputs ends up shorter dec_output, we can have smaller_value here.
                            if i < len(log_probs[batch]):
                                p += log_probs[batch][i][id]
                            tweet_len = tweet_len + 1
                            if id == rl_hparams.eos_id:
                                break

                        assert (tweet_len != 0)
                        p /= tweet_len
                        print("p2", p)
                        for i in range(max_len):
                            reward_qi[batch][i][beam] = p


                for i in range(rl_hparams.batch_size):
                    print("[{}]:{}".format(i, infer_helper.ids_to_words(seq2seq_train_data[0][:, i])))
                    for j in range(rl_hparams.beam_width):
                        print("    beam_predicted[{}]:{} prob={:06.2f} backward_prob={:06.2f}".format(j, infer_helper.ids_to_words(
                            beam_predicted_ids[i, :, j]), reward_s[i][0][j].item(), reward_qi[i][0][j].item()))


                print("reward_qi", reward_qi)
                # todo later
                #                for i in range(rl_hparams.batch_size):
                #                  print("{}->{}\n".format(infer_helper.ids_to_words(seq2seq_train_data[0][:, i]), infer_helper.ids_to_words(sampled_replies[i])))

                good_value = 1
                good_value_key = "beam"
                rl_hparams = model.hparams

                print("beam max_len", max_len)
                reward = reward_s + reward_qi  # np.ones(
                #                    (rl_hparams.batch_size, max_len, rl_hparams.beam_width))

                avg_good_value += good_value
                if step != 0 and step % 20 == 0:
                    print("{}:{}".format(good_value_key, good_value))
                if step != 0 and step % 60 == 0:
                    self._print_log(good_value_key, avg_good_value / 60)
                    avg_good_value = 0

                global_step = model.train_beam_with_reward(seq2seq_train_data[0],
                                                           seq2seq_train_data[1],
                                                           beam_predicted_ids,
                                                           reward)
                if step != 0 and step % 100 == 0:
                    print("save and restore")
                    model.save()
                    is_restored = model.restore()
                    assert (is_restored)
                    is_restored = model.restore()
                    assert (is_restored)
                    self._print_inferences(step, tweets, infer_helper)
                    now = datetime.datetime.now()
                    print("delta:", (now - last_saved_time).total_seconds())
                    last_saved_time = now
                    assert is_restored
                    print("step={}, global_step={}".format(step, global_step))

    def format_enc_inputs(self, hparams, model, replies):
        enc_inputs = []
        enc_inputs_lengths = []

        # replies: [batch_size, dec_length]
        for reply in replies:
            reply_len = model.seq_len(reply.tolist())
            # Safe guard: sampled reply has sometimes 0 len.
            #            adjusted_len = hparams.encoder_length if reply_len == 0 else reply_len
            enc_inputs_lengths.append(reply_len)
            if reply_len <= hparams.encoder_length:
                padded_reply = np.append(reply, ([hparams.pad_id] * (hparams.encoder_length - len(reply))))
                enc_inputs.append(padded_reply)
            else:
                raise Exception(
                    "Inferred"
                    " reply shouldn't be longer than encoder_input")

        # Expected enc_inputs param is time major.
        enc_inputs = np.transpose(np.array(enc_inputs))
        return enc_inputs, enc_inputs_lengths

    @staticmethod
    def _reward_for_test(model, sampled_replies):
        max_len = len(sampled_replies[0])
        # default negative reward
        reward = np.ones((model.hparams.batch_size, max_len)) * -1.0
        good_value = 0
        for i, reply in enumerate(sampled_replies):
            reply_len = model.input_length(reply.tolist())
            if reply_len == 8 or reply_len == 0 or reply_len == 1:
                for r in range(max_len):
                    reward[i][r] = -1.0
            else:
                good_value += 1
                for r in range(max_len):
                    reward[i][r] = 1.0
        return good_value, reward

    def _dull_response_ids(self, infer_helper):
        dull_responses = ['„Åì„Çå', "„Åù„Çå", "„ÅÇ„Çä„Åå„Å®„ÅÜ„Åî„Åñ„ÅÑ„Åæ„Åô", "„Åä„Å§„Åã„Çå", "„Åù„Çå„Å™www",
                          "„Åä„ÅØ„Çà(Àô-Àô)",
                          "„Åä„ÅØ„ÇàÔºÅ", "„Åä„ÅØ„Çà„ÅÜ„Åî„Åñ„ÅÑ„Åæ„ÅôÔºÅ", "„Åä„Å§„Åã„Çå„Åï„Åæ„Åß„Åô"]
        dull_responses_ids = [self.tokenize(infer_helper, text) for text in
                              dull_responses]
        return dull_responses_ids

    def train_seq2seq_swapped(self, hparams, tweets_path, validation_tweets,
                              should_clean_saved_model=True, vocab_path=None):
        Shell.download_file_if_necessary(tweets_path)
        swapped_path = TrainDataGenerator.generate_source_target_swapped(
            tweets_path)
        return self.train_seq2seq(hparams, swapped_path, validation_tweets,
                                  should_clean_saved_model, vocab_path)

    def train_seq2seq(self, hparams, tweets_path, val_tweets,
                      should_clean_saved_model=True, vocab_path=None):
        print("===== Train Seq2Seq {} ====".format(tweets_path))
        print_hparams(hparams)

        if should_clean_saved_model:
            clean_model_path(hparams.model_path)
        data_source = TrainDataSource(tweets_path, hparams, vocab_path)
        return self._train_loop(data_source, hparams, val_tweets)

    def _print_inferences(self, global_step, tweets, helper, ):
        print("==== {} ====".format(global_step))
        len_array = []
        for tweet in tweets:
            len_array.append(len(helper.inferences(tweet)[0]))
            helper.print_inferences(tweet)
        self._print_log('average reply len', np.mean(len_array))

    @staticmethod
    def create_model(hparams):

        # See https://www.tensorflow.org/tutorials/using_gpu#allowing_gpu_memory_growth
        config = tf.ConfigProto(log_device_placement=False)
        config.gpu_options.allow_growth = True

        train_graph = tf.Graph()
        train_sess = tf.Session(graph=train_graph, config=config)
        with train_graph.as_default():
            with tf.variable_scope('root'):
                model = ChatbotModel(train_sess, hparams,
                                     model_path=hparams.model_path)
                if not model.restore():
                    train_sess.run(tf.global_variables_initializer())

        return model

    def _train_loop(self, data_source,
                    hparams, tweets):
        Shell.download_model_data_if_necessary(hparams.model_path)

        device = self._available_device()
        with tf.device(device):
            model = self.create_model(hparams)

        def my_train(**kwargs):
            data = kwargs['train_data']
            return model.train(data[0], data[1], data[2], data[3], data[4])

        return self._generic_train_loop(data_source, hparams,
                                        model,
                                        tweets, my_train)

    @staticmethod
    def _available_device():
        device = '/cpu:0'
        if has_gpu0():
            device = '/gpu:0'
            print("$$$ GPU ENABLED $$$")
        return device

    @staticmethod
    def tokenize(infer_helper, text):
        tagger = MeCab.Tagger("-Owakati")
        words = tagger.parse(text).split()
        return infer_helper.words_to_ids(words)

    def _generic_train_loop(self, data_source, hparams,
                            model,
                            tweets, train_func):
        try:
            return self._raw_train_loop(data_source, hparams, model, train_func,
                                        tweets)
        except KeyboardInterrupt as ke:
            raise (ke)
        except Exception as e:
            pb.push_note("Train error", str(e))
            raise (e)

    def _raw_train_loop(self, data_source, hparams,
                        model, train_func,
                        tweets):
        vocab = data_source.vocab
        rev_vocab = data_source.rev_vocab
        infer_helper = InferenceHelper(model, vocab, rev_vocab)
        graph = model.sess.graph
        with graph.as_default():
            train_data_next = data_source.train_dataset.make_one_shot_iterator().get_next()
            val_data_next = data_source.valid_dataset.make_one_shot_iterator().get_next()
            easy_tf_log.set_dir(hparams.model_path)
            writer = tf.summary.FileWriter(hparams.model_path, graph)
            self.last_saved_time = datetime.datetime.now()
            for i in range(hparams.num_train_steps):
                train_data = model.sess.run(train_data_next)

                step, summary = train_func(
                    train_data=train_data,
                )
                writer.add_summary(summary, step)

                if i != 0 and i % self.num_stats_per == 0:
                    model.save(hparams.model_path)
                    is_restored = model.restore()
                    assert is_restored
                    self._print_inferences(step, tweets, infer_helper)
                    self._compute_val_loss(step, model, val_data_next, writer)
                    #                    self._print_stats(hparams, learning_rate)
                    self._plot_if_necessary()
                    self._save_model_in_drive(hparams)
                else:
                    print('.', end='')
        return model, infer_helper

    def _plot_if_necessary(self):
        if len(self.reward_average) > 0 and len(self.reward_average) % 30 == 0:
            self._plot(self.reward_step, self.reward_average,
                       y_label='reward average')
            self._plot(self.loss_step, self.val_losses,
                       y_label='validation_loss')

    def _print_stats(self, hparams, learning_rate):
        print("learning rate", learning_rate)
        delta = (
                    datetime.datetime.now() - self.last_stats_time).total_seconds() * 1000
        self._print_log("msec/data",
                        delta / hparams.batch_size / self.num_stats_per)
        self.last_stats_time = datetime.datetime.now()

    def _save_model_in_drive(self, hparams):
        now = datetime.datetime.now()
        delta_in_min = (now - self.last_saved_time).total_seconds() / 60

        if delta_in_min >= 60:
            self.last_saved_time = datetime.datetime.now()
            Shell.save_model_in_drive(hparams.model_path)

    @staticmethod
    def _print_log(key, value):
        tflog("{}[{}]".format(key, current_client_id), value)
        print("{}={}".format(key, round(value, 1)))

    @staticmethod
    def _plot(x, y, x_label="step", y_label='y'):
        title = "{}_{}".format(current_client_id, y_label)
        plt.plot(x, y, label=title)
        plt.plot()
        plt.ylabel(title)
        plt.xlabel(x_label)
        plt.legend()
        plt.show()

    def _compute_val_loss(self, global_step, model, val_data_next,
                          writer):
        val_data = model.sess.run(val_data_next)
        val_loss, val_loss_log = model.batch_loss(val_data[0],
                                                  val_data[1],
                                                  val_data[2],
                                                  val_data[3],
                                                  val_data[4])
        # np.float64 to native float
        val_loss = val_loss.item()
        writer.add_summary(val_loss_log, global_step)
        self._print_log("validation loss", val_loss)
        self.loss_step.append(global_step)
        self.val_losses.append(val_loss)
        return val_loss







In [0]:
class InferenceHelper:
    def __init__(self, model, vocab, rev_vocab):
        self.model = model
        self.vocab = vocab
        self.rev_vocab = rev_vocab

    def inferences(self, tweet):
        encoder_inputs, encoder_inputs_lengths = self.create_inference_input(
            tweet)
        replies = self.model.infer(encoder_inputs, encoder_inputs_lengths)
        ids = replies[0].tolist()
        all_infer = [self.sanitize_text(self.ids_to_words(ids))]
        beam_replies, logits = self.model.infer_beam_search(encoder_inputs,
                                                    encoder_inputs_lengths)
        beam_infer = [self.sanitize_text(self.ids_to_words(beam_replies[0][:, i])) for i in range(self.model.hparams.beam_width)]
        all_infer.extend(beam_infer)
        return all_infer
        
    def sanitize_text(self, line):
      line = re.sub(r"\[EOS\]", " ", line)
      line = re.sub(r"\[UNK\]", "üí©", line)
      return line

    def print_inferences(self, tweet):
        print(tweet)
        for i, reply in enumerate(self.inferences(tweet)):
            print("    [{}]{}".format(i, reply))

    def words_to_ids(self, words):
        ids = []
        for word in words:
            if word in self.vocab:
                ids.append(self.vocab[word])
            else:
                ids.append(self.model.hparams.unk_id)
        return ids

    def ids_to_words(self, ids):
        words = ""
        for id in ids:
            words += self.rev_vocab[id]
        return words

    def create_inference_input(self, text):
        inference_encoder_inputs = np.empty((self.model.hparams.encoder_length, self.model.hparams.batch_size),
                                            dtype=np.int)
        inference_encoder_inputs_lengths = np.empty(self.model.hparams.batch_size, dtype=np.int)
        text = TrainDataGenerator.sanitize_line(text)
        tagger = MeCab.Tagger("-Owakati")
        words = tagger.parse(text).split()
        ids = self.words_to_ids(words)
        ids = ids[:self.model.hparams.encoder_length]
        len_ids = len(ids)
        ids.extend([self.model.hparams.pad_id] * (self.model.hparams.encoder_length - len(ids)))
        for i in range(self.model.hparams.batch_size):
            inference_encoder_inputs[:, i] = np.array(ids, dtype=np.int)
            inference_encoder_inputs_lengths[i] = len_ids
        return inference_encoder_inputs, inference_encoder_inputs_lengths


In [0]:
class ConversationTrainDataGenerator:
    def __init__(self):
        return

    # Generate the following file from conversations_txt file.
    # Let p_i:   line 3i     in the txt file, which is original tweet.
    #     q_i:   line 3i + 1 in the txt file, which is reply to the tweet.
    #     p_i+1: line 3i + 2 in the txt file, which is reply to the reply above.
    # (A) conversation_seq2seq.txt for train p_seq2seq and p_seq2seq_backward
    #     line 2i: p_i + q_i
    #     line 2i+1: p_i+1
    #
    # (B) conversation_rl.txt for train p_rl.
    #     line 2i: p_i + q_i
    #     line 2i+1: q_i
    #
    # (A) and (B) should share the vocabulary.
    def generate(self, conversations_txt):
        basename, extension = os.path.splitext(conversations_txt)
        seq2seq_path = "{}_seq2seq{}".format(basename, extension)
        rl_path = "{}_rl{}".format(basename, extension)
        with open(seq2seq_path, "w") as s_out, open(rl_path, "w") as r_out, gfile.GFile(conversations_txt,
                                                                                        mode="rb") as fin:
            tweet = None
            reply = None
            reply2 = None
            for i, line in enumerate(fin):
                line = line.decode('utf-8')
                line = line.rstrip()
                if i % 3 == 0:
                    tweet = line
                elif i % 3 == 1:
                    reply = line
                else:
                    reply2 = line
                    self._write(s_out, tweet, reply, reply2)
                    self._write(r_out, tweet, reply, reply)


    def _write(self, s_out, tweet, reply, reply2):
        s_out.write(tweet)
        s_out.write(' ')
        s_out.write(reply)
        s_out.write('\n')
        s_out.write(reply2)
        s_out.write('\n')
        
class TrainDataGenerator:
    def __init__(self, source_path, hparams):
        self.source_path = source_path
        self.hparams = hparams
        basename, extension = os.path.splitext(self.source_path)
        self.enc_path = "{}_enc{}".format(basename, extension)
        self.dec_path = "{}_dec{}".format(basename, extension)
        self.enc_idx_path = "{}_enc_idx{}".format(basename, extension)
        self.dec_idx_path = "{}_dec_idx{}".format(basename, extension)
        self.dec_idx_eos_path = "{}_dec_idx_eos{}".format(basename, extension)
        self.dec_idx_sos_path = "{}_dec_idx_sos{}".format(basename, extension)
        self.dec_idx_len_path = "{}_dec_idx_len{}".format(basename, extension)

        self.enc_idx_padded_path = "{}_enc_idx_padded{}".format(basename,
                                                                extension)
        self.enc_idx_len_path = "{}_enc_idx_len{}".format(basename, extension)

        self.vocab_path = "{}_vocab{}".format(basename, extension)
        
        self.generated_files = [self.enc_path, self.dec_path, self.enc_idx_path, self.dec_idx_path, self.dec_idx_eos_path, self.dec_idx_sos_path, self.dec_idx_len_path, self.enc_idx_padded_path, self.vocab_path, self.enc_idx_len_path]
        self.max_vocab_size = hparams.vocab_size
        self.start_vocabs = [hparams.sos_token, hparams.eos_token, hparams.pad_token, hparams.unk_token]
        self.tagger = MeCab.Tagger("-Owakati")
        
    def remove_generated(self):
      for f in self.generated_files:
        if os.path.exists(f):
          os.remove(f)

    def generate(self, vocab_path=None):
        print("generating enc and dec files...")
        self._generate_enc_dec()
        print("generating vocab file...")
        if vocab_path is None:
          self._generate_vocab()
        else:
          shutil.copyfile(vocab_path, self.vocab_path)
        print("loading vocab...")
        vocab, _ = self._load_vocab()
        print("generating id files...")
        self._generate_id_file(self.enc_path, self.enc_idx_path, vocab)
        self._generate_id_file(self.dec_path, self.dec_idx_path, vocab)
        print("generating padded input file...")
        self._generate_enc_idx_padded(self.enc_idx_path,
                                      self.enc_idx_padded_path,
                                      self.enc_idx_len_path,
                                      self.hparams.encoder_length)
        print("generating dec eos/sos files...")
        self._generate_dec_idx_eos(self.dec_idx_path, self.dec_idx_eos_path,
                                   self.hparams.decoder_length)
        self._generate_dec_idx_sos(self.dec_idx_path, self.dec_idx_sos_path,
                                   self.dec_idx_len_path,
                                   self.hparams.decoder_length)
        print("done")
        return self._create_dataset()

    def _generate_id_file(self, source_path, dest_path, vocab):
        if gfile.Exists(dest_path):
            return
        with gfile.GFile(source_path, mode="rb") as f, gfile.GFile(dest_path,
                                                                   mode="wb") as of:
            for line in f:
                line = line.decode('utf-8')
                words = self.tagger.parse(line).split()
                ids = [vocab.get(w, self.hparams.unk_id) for w in words]
                of.write(" ".join([str(id) for id in ids]) + "\n")

    def _load_vocab(self):
        rev_vocab = []
        with gfile.GFile(self.vocab_path, mode="r") as f:
            rev_vocab.extend(f.readlines())
            rev_vocab = [line.strip() for line in rev_vocab]
            # Dictionary of (word, idx)
            vocab = dict([(x, y) for (y, x) in enumerate(rev_vocab)])
            return vocab, rev_vocab

    def _generate_vocab(self):
        if gfile.Exists(self.vocab_path):
            return
        vocab_dic = self._build_vocab_dic(self.enc_path)
        vocab_dic = self._build_vocab_dic(self.dec_path, vocab_dic)
        vocab_list = self.start_vocabs + sorted(vocab_dic, key=vocab_dic.get,
                                                reverse=True)
        if len(vocab_list) > self.max_vocab_size:
            vocab_list = vocab_list[:self.max_vocab_size]
        with gfile.GFile(self.vocab_path, mode="w") as vocab_file:
            for w in vocab_list:
                vocab_file.write(w + "\n")

    def _generate_enc_dec(self):
        if gfile.Exists(self.enc_path) and gfile.Exists(self.dec_path):
            return
        with gfile.GFile(self.source_path, mode="rb") as f, gfile.GFile(
                self.enc_path, mode="w+") as ef, gfile.GFile(self.dec_path,
                                                             mode="w+") as df:
            tweet = None
            reply = None
            for i, line in enumerate(f):
                line = line.decode('utf-8')
                line = self.sanitize_line(line)
                if i % 2 == 0:
                  tweet = line
                else:
                  reply = line
                  if tweet and reply:
                    ef.write(tweet)
                    df.write(reply)
                  tweet = None
                  reply = None

    def _generate_enc_idx_padded(self, source_path, dest_path, dest_len_path,
                                 max_line_len):
        if gfile.Exists(dest_path):
            return
        with open(source_path) as fin, open(dest_path,
                                            "w") as fout, open(dest_len_path,
                                                               "w") as flen:
            line = fin.readline()
            while line:
                ids = [int(x) for x in line.split()]
                if len(ids) > max_line_len:
#                    ids = ids[:max_line_len]
                    ids = ids[-max_line_len:]
                flen.write(str(len(ids)))
                flen.write("\n")
                if len(ids) < max_line_len:
                    ids.extend([self.hparams.pad_id] * (max_line_len - len(ids)))
                ids = [str(x) for x in ids]
                fout.write(" ".join(ids))
                fout.write("\n")
                line = fin.readline()

    # read decoder_idx file and append eos at the end of idx list.
    def _generate_dec_idx_eos(self, source_path, dest_path, max_line_len):
        if gfile.Exists(dest_path):
            return
        with open(source_path) as fin, open(dest_path, "w") as fout:
            line = fin.readline()
            while line:
                ids = [int(x) for x in line.split()]
                if len(ids) > max_line_len - 1:
#                    ids = ids[:max_line_len - 1]
                  ids = ids[-(max_line_len - 1):]
                ids.append(self.hparams.eos_id)
                if len(ids) < max_line_len:
                    ids.extend([self.hparams.pad_id] * (max_line_len - len(ids)))
                ids = [str(x) for x in ids]
                fout.write(" ".join(ids))
                fout.write("\n")
                line = fin.readline()

    # read decoder_idx file and put sos at the beginning of the idx list.
    # also write out length of index list.
    def _generate_dec_idx_sos(self, source_path, dest_path, dest_len_path,
                              max_line_len):
        if gfile.Exists(dest_path):
            return
        with open(source_path) as fin, open(dest_path, "w") as fout, open(
                dest_len_path, "w") as flen:
            line = fin.readline()
            while line:
                ids = [self.hparams.sos_id]
                ids.extend([int(x) for x in line.split()])
                if len(ids) > max_line_len:
                    ids = ids[:max_line_len]
                flen.write(str(len(ids)))
                flen.write("\n")
                if len(ids) < max_line_len:
                    ids.extend([self.hparams.pad_id] * (max_line_len - len(ids)))
                ids = [str(x) for x in ids]
                fout.write(" ".join(ids))
                fout.write("\n")
                line = fin.readline()

    @staticmethod
    def sanitize_line(line):
        # replace @username
        # replacing @username had bad impace where USERNAME token shows up everywhere.
#        line = re.sub(r"@([A-Za-z0-9_]+)", "USERNAME", line)
        line = re.sub(r"@([A-Za-z0-9_]+)", "", line)
        # Remove URL
        line = re.sub(r'https?:\/\/.*', "", line)
        line = line.lstrip()
        return line

    @staticmethod
    def generate_source_target_swapped(source_path):
        basename, extension = os.path.splitext(source_path)
        dest_path = "{}_swapped{}".format(basename, extension)
        with gfile.GFile(source_path, mode="rb") as fin, gfile.GFile(dest_path,
                                                                     mode="w+") as fout:
            temp = None
            for i, line in enumerate(fin):
                if i % 2 == 0:
                    temp = line
                else:
                    fout.write(line)
                    fout.write(temp)
                    temp = None
        return dest_path

    def _build_vocab_dic(self, source_path, vocab_dic={}):
        with gfile.GFile(source_path, mode="r") as f:
            for line in f:
                words = self.tagger.parse(line).split()
                for word in words:
                    if word in vocab_dic:
                        vocab_dic[word] += 1
                    else:
                        vocab_dic[word] = 1
            return vocab_dic

    @staticmethod
    def _read_file(source_path):
        f = open(source_path)
        data = f.read()
        f.close()
        return data

    def _read_vocab(self, source_path):
        rev_vocab = []
        rev_vocab.extend(self._read_file(source_path).splitlines())
        rev_vocab = [line.strip() for line in rev_vocab]
        vocab = dict([(x, y) for (y, x) in enumerate(rev_vocab)])
        return vocab, rev_vocab

    def text_line_split_dataset(self, filename):
        return tf.data.TextLineDataset(filename).map(self.split_to_int_values)

    @staticmethod
    def split_to_int_values(x):
        return tf.string_to_number(tf.string_split([x]).values, tf.int32)

    def _create_dataset(self):

        tweets_dataset = self.text_line_split_dataset(self.enc_idx_padded_path)
        tweets_lengths_dataset = tf.data.TextLineDataset(
            self.enc_idx_len_path)

        replies_sos_dataset = self.text_line_split_dataset(
            self.dec_idx_sos_path)
        replies_eos_dataset = self.text_line_split_dataset(
            self.dec_idx_eos_path)
        replies_sos_lengths_dataset = tf.data.TextLineDataset(
            self.dec_idx_len_path)

        tweets_transposed = tweets_dataset.apply(
            tf.contrib.data.batch_and_drop_remainder(
                self.hparams.batch_size)).map(
            lambda x: tf.transpose(x))
        tweets_lengths = tweets_lengths_dataset.apply(
            tf.contrib.data.batch_and_drop_remainder(self.hparams.batch_size))

        replies_with_eos_suffix = replies_eos_dataset.apply(
            tf.contrib.data.batch_and_drop_remainder(self.hparams.batch_size))
        replies_with_sos_prefix = replies_sos_dataset.apply(
            tf.contrib.data.batch_and_drop_remainder(
                self.hparams.batch_size)).map(
            lambda x: tf.transpose(x))
        replies_with_sos_suffix_lengths = replies_sos_lengths_dataset.apply(
            tf.contrib.data.batch_and_drop_remainder(
                self.hparams.batch_size))
        vocab, rev_vocab = self._read_vocab(self.vocab_path)
        return tf.data.Dataset.zip((tweets_transposed, tweets_lengths,
                                    replies_with_eos_suffix,
                                    replies_with_sos_prefix,
                                    replies_with_sos_suffix_lengths)), vocab, rev_vocab

In [0]:
# Helper functions to test
def make_test_training_data(hparams):
    train_encoder_inputs = np.empty(
        (hparams.encoder_length, hparams.batch_size), dtype=np.int)
    train_encoder_inputs_lengths = np.empty(hparams.batch_size, dtype=np.int)
    training_target_labels = np.empty(
        (hparams.batch_size, hparams.decoder_length), dtype=np.int)
    training_decoder_inputs = np.empty(
        (hparams.decoder_length, hparams.batch_size), dtype=np.int)

    # We keep first tweet to validate inference.
    first_tweet = None

    for i in range(hparams.batch_size):
        # Tweet
        tweet = np.random.randint(low=0, high=hparams.vocab_size,
                                  size=hparams.encoder_length)
        train_encoder_inputs[:, i] = tweet
        train_encoder_inputs_lengths[i] = len(tweet)
        # Reply
        #   Note that low = 2, as 0 and 1 are reserved.
        reply = np.random.randint(low=2, high=hparams.vocab_size,
                                  size=hparams.decoder_length - 1)

        training_target_label = np.concatenate(
            (reply, np.array([hparams.eos_id])))
        training_target_labels[i] = training_target_label

        training_decoder_input = np.concatenate(([hparams.sos_id], reply))
        training_decoder_inputs[:, i] = training_decoder_input

        if i == 0:
            first_tweet = tweet
            info("0th tweet={}".format(tweet), hparams)
            info("0th reply_with_eos_suffix={}".format(training_target_label),
                 hparams)
            info("0th reply_with_sos_prefix={}".format(training_decoder_input),
                 hparams)

        info("Tweets", hparams)
        info(train_encoder_inputs, hparams)
        info("Replies", hparams)
        info(training_target_labels, hparams)
        info(training_decoder_inputs, hparams)
    return first_tweet, train_encoder_inputs, train_encoder_inputs_lengths, training_target_labels, training_decoder_inputs


def test_training(test_hparams, model):
    if test_hparams.use_attention:
        print("==== training model[attention] ====")
    else:
        print("==== training model ====")
    first_tweet, train_encoder_inputs, train_encoder_inputs_lengths, training_target_labels, training_decoder_inputs = make_test_training_data(
        test_hparams)
    for i in range(test_hparams.num_train_steps):
        _ = model.train(train_encoder_inputs,
                        train_encoder_inputs_lengths,
                        training_target_labels,
                        training_decoder_inputs,
                        np.ones(test_hparams.batch_size,
                                dtype=int) * test_hparams.decoder_length)
        if i % 5 == 0 and test_hparams.debug_verbose:
            print('.', end='')

        if i % 15 == 0:
            model.save()

    inference_encoder_inputs = np.empty((test_hparams.encoder_length, 1),
                                        dtype=np.int)
    inference_encoder_inputs_lengths = np.empty(1, dtype=np.int)
    for i in range(1):
        inference_encoder_inputs[:, i] = first_tweet
        inference_encoder_inputs_lengths[i] = len(first_tweet)

    # testing 
    log_prob54 = model.log_prob(inference_encoder_inputs,
                                      inference_encoder_inputs_lengths,
                                      np.array([5, 4]))
    log_prob65 = model.log_prob(inference_encoder_inputs,
                                      inference_encoder_inputs_lengths,
                                      np.array([6, 5]))
    print("log_prob for 54", log_prob54)
    print("log_prob for 65", log_prob65)

    reward = model.reward_ease_of_answering(test_hparams.encoder_length,
                                                  inference_encoder_inputs,
                                                  inference_encoder_inputs_lengths,
                                                  np.array([[5], [6]]))
    print("reward=", reward)

    if test_hparams.debug_verbose:
        print(inference_encoder_inputs)
    replies = model.infer(inference_encoder_inputs,
                                inference_encoder_inputs_lengths)
    print("Infered replies", replies[0])
    print("Expected replies", training_target_labels[0])


def test_distributed_pattern(hparams):
    for d in [hparams.model_path]:
        shutil.rmtree(d, ignore_errors=True)
        os.makedirs(d, exist_ok=True)

    print('==== test_distributed_pattern[{} {}] ===='.format(
        'attention' if hparams.use_attention else '',
        'beam' if hparams.beam_width > 0 else ''))

    first_tweet, train_encoder_inputs, train_encoder_inputs_lengths, training_target_labels, training_decoder_inputs = make_test_training_data(
        hparams)

    model = Trainer().create_model(hparams)

    for i in range(hparams.num_train_steps):
        _ = model.train(train_encoder_inputs,
                        train_encoder_inputs_lengths,
                        training_target_labels,
                        training_decoder_inputs,
                        np.ones(hparams.batch_size,
                                dtype=int) * hparams.decoder_length)

    model.save()

    inference_encoder_inputs = np.empty((hparams.encoder_length, hparams.batch_size),
                                        dtype=np.int)
    inference_encoder_inputs_lengths = np.empty(hparams.batch_size, dtype=np.int)

    for i in range(hparams.batch_size):
      inference_encoder_inputs[:, i] = first_tweet
      inference_encoder_inputs_lengths[i] = len(first_tweet)

    model.restore()
    replies = model.infer(inference_encoder_inputs,
                                    inference_encoder_inputs_lengths)
    print("Inferred replies", replies[0])
        
    beam_replies, logits = model.infer_beam_search(inference_encoder_inputs,
                                                 inference_encoder_inputs_lengths)

    print("logits", logits[0])    
    print("Inferred replies candidate0", beam_replies[0][:, 0])
    print("Inferred replies candidate1", beam_replies[0][:, 1])
        
    inference_encoder_inputs = np.empty((hparams.encoder_length, hparams.batch_size),
                                        dtype=np.int)
    inference_encoder_inputs_lengths = np.empty(hparams.batch_size, dtype=np.int)

    for i in range(hparams.batch_size):
      inference_encoder_inputs[:, i] = first_tweet
      inference_encoder_inputs_lengths[i] = len(first_tweet)
      
    replies = model.sample(inference_encoder_inputs,
                                                   inference_encoder_inputs_lengths)
    print("sample replies", replies[0])        
    print("Expected replies", training_target_labels[0])


In [0]:
def test_distributed_one(enable_attention):
    hparams = copy.deepcopy(test_hparams).override_from_dict({
        'model_path': ModelDirectory.test_distributed.value,
        'use_attention': enable_attention,
        'beam_width': 2,
    })
    test_distributed_pattern(hparams)


if mode == Mode.Test:
    test_distributed_one(enable_attention=False)
    test_distributed_one(enable_attention=True)



In [0]:
def clean_model_path(model_path):
    shutil.rmtree(model_path)
    os.makedirs(model_path)


def print_header(text):
    print("============== {} ==============".format(text))


def test_tweets_small_swapped(hparams):
    replies = ["@higepon „Åä„ÅØ„Çà„ÅÜ„Åî„Åñ„ÅÑ„Åæ„ÅôÔºÅ", "„Åä„Å§„Åã„Çå„Åï„Åæ„Éº„ÄÇÊ∞ó„Çí„Å§„Åë„Å¶„ÄÇ", "„Åì„Å°„Çâ„Åì„Åù„Çà„Çç„Åó„Åè„ÅäÈ°ò„ÅÑ„Åó„Åæ„Åô„ÄÇ"]
    trainer = Trainer()
    trainer.train_seq2seq_swapped(hparams, "tweets_small.txt", replies)


# vocab size Â§â„Åà„Åü„ÇâÂãï„Åã„Å™„Åè„Å™„Å£„Åü
tweet_small_hparams = copy.deepcopy(base_hparams).override_from_dict(
    {
        'batch_size': 6,  # of tweets should be dividable by batch_size
        'encoder_length': 8,
        'decoder_length': 8,
        'num_units': 256,
        'num_layers': 2,
        'vocab_size': 34,
        'embedding_size': 40,
        'beam_width': 2,  # for faster iteration, this should be 10
        'num_train_steps': 200,
        'model_path': ModelDirectory.tweet_small.value,
        'learning_rate': 0.05,
        'use_attention': True,
    })

tweet_small_swapped_hparams = copy.deepcopy(
    tweet_small_hparams).override_from_dict(
    {'model_path': ModelDirectory.tweet_small_swapped.value})

if mode == Mode.Test:
    tweets_path = "tweets_small.txt"
    TrainDataGenerator(tweets_path, tweet_small_hparams).remove_generated()
    trainer = Trainer()
    trainer.train_seq2seq(tweet_small_hparams, tweets_path,
                          ["„Åä„ÅØ„Çà„ÅÜ„Åî„Åñ„ÅÑ„Åæ„Åô„ÄÇÂØí„ÅÑ„Åß„Åô„Å≠„ÄÇ", "„Åï„Å¶Â∏∞„Çç„ÅÜ„ÄÇÊòéÊó•„ÅØÊó©„ÅÑ„ÄÇ", "‰ªäÂõû„ÇÇ„Çà„Çç„Åó„Åè„Åß„Åô„ÄÇ"])
    test_tweets_small_swapped(tweet_small_swapped_hparams)


In [0]:
def test_tweets_large(hparams):
    tweets = ["„Åï„Å¶Á¶èÂ≤°Ë°å„Å£„Å¶„Åç„Åæ„ÅôÔºÅ", "Ë™∞„ÅãÈ£≤„Åø„Å´Ë°å„Åì„ÅÜ", "ÁÜ±„Åß„Å¶„Çã„Åë„Å©„ÄÅ„Åß„ÇÇ„Å™„Çì„ÅãÈ£ü„Åπ„Å™„Åç„ÇÉ„Éº„Å®ÊÄù„Å£„Å¶„Ç¢„Ç§„ÇπË≤∑„Åä„ÅÜ„Å®„Åó„Åü„ÅÆ",
              "‰ªäÊó•„ÅÆ„Éâ„É©„ÉûÈù¢ÁôΩ„Åù„ÅÜÔºÅ", "„ÅäËÖπ„Åô„ÅÑ„Åü„Éº", "„Åä„ÇÑ„Åô„ÅøÔΩû", "„Åä„ÅØ„Çà„ÅÜ„Åî„Åñ„ÅÑ„Åæ„Åô„ÄÇÂØí„ÅÑ„Åß„Åô„Å≠„ÄÇ",
              "„Åï„Å¶Â∏∞„Çç„ÅÜ„ÄÇÊòéÊó•„ÅØÊó©„ÅÑ„ÄÇ", "‰ªäÂõû„ÇÇ„Çà„Çç„Åó„Åè„Åß„Åô„ÄÇ", "„Å∞„ÅÑ„Å®„Åä„ÇèÔºÅ"]
    trainer = Trainer()
    trainer.train_seq2seq(hparams, "tweets_conversation.txt", tweets,
                          should_clean_saved_model=False)
    return trainer.model


def test_tweets_large_swapped(hparams):
    tweets = ["‰ªäÊó•„ÅÆ„Éâ„É©„ÉûÈù¢ÁôΩ„Åù„ÅÜÔºÅ", "„ÅäËÖπ„Åô„ÅÑ„Åü„Éº", "„Åä„ÇÑ„Åô„ÅøÔΩû", "„Åä„ÅØ„Çà„ÅÜ„Åî„Åñ„ÅÑ„Åæ„Åô„ÄÇÂØí„ÅÑ„Åß„Åô„Å≠„ÄÇ",
              "„Åï„Å¶Â∏∞„Çç„ÅÜ„ÄÇÊòéÊó•„ÅØÊó©„ÅÑ„ÄÇ", "‰ªäÂõû„ÇÇ„Çà„Çç„Åó„Åè„Åß„Åô„ÄÇ", "„Å∞„ÅÑ„Å®„Åä„ÇèÔºÅ"]
    trainer = Trainer()
    trainer.train_seq2seq_swapped(hparams, "tweets_large.txt", tweets,
                                  should_clean_saved_model=False)
    return trainer.model


tweet_large_hparams = copy.deepcopy(base_hparams).override_from_dict(
    {
        # In typical seq2seq chatbot
        # num_layers=3, learning_rate=0.5, batch_size=64, vocab=20000-100000, learning_rate decay is 0.99, which is taken care as default parameter in AdamOptimizer.
        'batch_size': 64,  # of tweets should be dividable by batch_size
        'encoder_length': 28,
        'decoder_length': 28,
        'num_units': 1024,
        'num_layers': 3,
        'vocab_size': 60000,
    # conversations.txt actually has about 70K uniq words.
        'embedding_size': 1024,
        'beam_width': 2,  # for faster iteration, this should be 10
        'num_train_steps': 1000000,
        'model_path': ModelDirectory.tweet_large.value,
        'learning_rate': 0.5,
    # For vocab_size 50000, num_layers 3, num_units 1024, tweet_large, starting learning_rate 0.05 works well, change it t0 0.01 at perplexity 800, changed it to 0.005 at 200.
        'learning_rate_decay': 0.99,
        'use_attention': True,
        # testing new restore learning rate and no USERNAME TOKEN
    })

tweet_large_swapped_hparams = copy.deepcopy(
    tweet_large_hparams).override_from_dict(
    {
        'model_path': ModelDirectory.tweet_large_swapped.value
    })

#Shell.save_model_in_drive(tweet_large_hparams.model_path)

if mode == Mode.TrainSeq2Seq:
    print("train seq2seq")
    test_tweets_large(tweet_large_hparams)
elif mode == Mode.TrainSeq2SeqSwapped:
    print("train seq2seq swapped")
    test_tweets_large_swapped(tweet_large_swapped_hparams)


In [0]:
small_hparams = copy.deepcopy(tweet_small_hparams).override_from_dict({
    'learning_rate': 0.1,
    'batch_size': 16,
    'num_train_steps': 200,
})

rl_small_hparams = copy.deepcopy(tweet_small_hparams).override_from_dict({
    'learning_rate': 0.1,
    'batch_size': 16,
    'num_train_steps': 2000,
    'model_path': ModelDirectory.tweet_small_rl.value
})

if mode == Mode.TrainRL:
  with memory_util.capture_stderr() as stderr:
      try:
          trainer = Trainer()
          trainer.train_seq2seq_rl(small_hparams, rl_small_hparams, "tweets_small.txt", resume=False)
      except Exception as e:
        print(stderr.getvalue())
        raise (e)

In [0]:
conversations_small_hparams = copy.deepcopy(base_hparams).override_from_dict(
    {
        'batch_size': 6,  # of tweets should be dividable by batch_size
        'encoder_length': 8,
        'decoder_length': 8,
        'num_units': 256,
        'num_layers': 2,
        'vocab_size': 34,
        'embedding_size': 40,
        'beam_width': 2,  # for faster iteration, this should be 10
        'num_train_steps': 200,
        'model_path': ModelDirectory.conversations_small.value,
        'learning_rate': 0.05,
        'use_attention': True,
    })

conversations_small_backward_hparams = copy.deepcopy(
    conversations_small_hparams).override_from_dict(
    {
        'model_path': ModelDirectory.conversations_small_backward.value,
    })

conversations_small_rl_hparams = copy.deepcopy(
    conversations_small_hparams).override_from_dict(
    {
        'model_path': ModelDirectory.conversations_small_rl.value,
        'num_train_steps': 200,
    })

conversations_txt = "conversations_tiny.txt"
Shell.download_file_if_necessary(conversations_txt)
ConversationTrainDataGenerator().generate(conversations_txt)
#data_source = TrainDataSource("conversations_tiny_seq2seq.txt",
#                              conversations_small_hparams)
#vocab_path = data_source.vocab_path
#data_source2 = TrainDataSource("conversations_tiny_rl.txt",
#                              conversations_small_hparams, vocab_path)

with memory_util.capture_stderr() as stderr:
    try:
        trainer = Trainer()
        valid_tweets = ["„Åä„ÅØ„Çà„ÅÜ„Åî„Åñ„ÅÑ„Åæ„Åô„ÄÇÊòéÊó•„ÅØ„Çà„Çç„Åó„Åè„Åä„Å≠„Åå„ÅÑ„Åó„Åæ„Åô„ÄÇ„Åì„Å°„Çâ„Åì„Åù„Çà„Çç„Åó„Åè„Åä„Å≠„Åå„ÅÑ„Åó„Åæ„Åô„ÄÇ"]
        trainer.train_seq2seq(conversations_small_hparams,
                              "conversations_tiny_seq2seq.txt",
                              valid_tweets)
        trainer.train_seq2seq_swapped(conversations_small_backward_hparams,
                                      "conversations_tiny_seq2seq.txt",
                                      ["„Åæ„ÅÇ„Å™„Çì„Å®„Åã„Åå„Çì„Å∞„Çä„Åæ„Åó„Çá„ÅÜ„ÄÇ"], vocab_path="conversations_tiny_seq2seq_vocab.txt")

        Trainer().train_beam_rl(conversations_small_rl_hparams,
                                conversations_small_hparams,
                                conversations_small_backward_hparams,
                                "conversations_tiny_seq2seq.txt",

                                "conversations_tiny_rl.txt",
                                valid_tweets)
    except Exception as e:
        print(stderr.getvalue())
        raise (e)

!ls - lSh



In [0]:
!ls -lSh *vocab*
#!cat conversations_tiny_rl_vocab.txt

In [0]:
!rm conversations_tiny*
!head drive/seq2seq_data/conversations_tiny.txt
#!ls conversations_tiny*
#!head conversations_tiny_seq2seq.txt

In [0]:
small_hparams = copy.deepcopy(tweet_small_hparams).override_from_dict({
    'learning_rate': 0.1,
    'batch_size': 16,
    'num_train_steps': 200,
})

rl_small_hparams = copy.deepcopy(tweet_small_hparams).override_from_dict({
    'learning_rate': 0.1,
    'batch_size': 16,
    'num_train_steps': 2000,
    'model_path': ModelDirectory.tweet_small_rl.value
})

with memory_util.capture_stderr() as stderr:
    try:
        trainer = Trainer()
        trainer.train_seq2seq_beam_rl(small_hparams, rl_small_hparams, "tweets_small.txt", resume=False)
    except Exception as e:
      print(stderr.getvalue())
      raise (e)

In [0]:
if mode == Mode.TrainRL:
  Shell.download_logs(rl_small_hparams.model_path)
  !rm $small_hparams.model_path/*
  !ls -lSh $small_hparams.model_path
  !rm $rl_small_hparams.model_path/*
  !ls -lSh $rl_small_hparams.model_path

In [0]:
large_hparams = copy.deepcopy(tweet_large_hparams).override_from_dict({
    'num_train_steps': 40000,
})

rl_large_hparams = copy.deepcopy(tweet_large_hparams).override_from_dict({
    'num_train_steps': 20000,
    'model_path': ModelDirectory.tweet_large_rl.value
})

if mode == Mode.TrainRL:
  with memory_util.capture_stderr() as stderr:
    try:
        trainer = Trainer()
        trainer.train_seq2seq_rl(large_hparams, rl_large_hparams, "tweets_large.txt", resume=False)
    except KeyboardInterrupt:       
      print(stderr.getvalue())      
    except Exception as e:
      print(stderr.getvalue())
      raise (e)

In [0]:
#!cp model/tweet_small/* drive/
#!ls model/tweet_small
#!cp $rl_small_hparams.model_path/* drive/
Shell.download_logs(rl_large_hparams.model_path)

In [0]:
!rm tweets_large.txt

In [0]:
if mode == Mode.Test:
    Shell.download_logs(ModelDirectory.tweet_small_rl.value)


In [0]:
# N.B: This would fail if we try to download logs in the previous cell.
# My guess is tflog is somehow locking the log file when running the cell.
#download_logs()


In [0]:
class StreamListener(tweepy.StreamListener):
    def __init__(self, api, helper):
        self.api = api
        self.helper = helper

    def on_status(self, status):
        # done handle @reply only
        # done print reply
        # add model parameter
        # direct reply
        # unk reply
        # shuffle beam search
        print("{0}: {1}".format(status.text, status.author.screen_name))

        screen_name = status.author.screen_name
        # ignore my tweets
        if screen_name == self.api.me().screen_name:
            print("Ignored my tweet")
            return True
        elif status.text.startswith("@{0}".format(self.api.me().screen_name)):

            replies = self.helper.inferences(status.text)
            reply = random.choice(replies)
            reply = "@" + status.author.screen_name + " " + reply
            print(reply)
            self.api.update_status(status=reply,
                                   in_reply_to_status_id=status.id)

            return True

    @staticmethod
    def on_error(status_code):
        print(status_code)
        return True


def listener(hparams):
    Shell.download_model_data_if_necessary(hparams.model_path)

    rl_train_graph = tf.Graph()
    rl_infer_graph = tf.Graph()
    rl_train_sess = tf.Session(graph=rl_train_graph)
    rl_infer_sess = tf.Session(graph=rl_infer_graph)

    _, infer_model = create_train_infer_models_in_graphs(rl_train_graph,
                                                         rl_train_sess,
                                                         rl_infer_graph,
                                                         rl_infer_sess,
                                                         hparams)

    source_path = "tweets_large.txt"
    Shell.download_file_if_necessary(source_path)
    generator = TrainDataGenerator(source_path=source_path, hparams=hparams)
    _, vocab, rev_vocab = generator.generate()
    infer_model.restore()
    helper = InferenceHelper(infer_model, vocab, rev_vocab)

    config_path = 'config.yml'
    Shell.download_file_if_necessary(config_path)
    f = open(config_path, 'rt')
    cfg = yaml.load(f)['twitter']

    consumer_key = cfg['consumer_key']
    consumer_secret = cfg['consumer_secret']
    access_token = cfg['access_token']
    access_token_secret = cfg['access_token_secret']

    auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
    auth.set_access_token(access_token, access_token_secret)
    api = tweepy.API(auth)

    while True:
        #    try:
        stream = tweepy.Stream(auth=api.auth,
                               listener=StreamListener(api, helper))
        print("listener starting...")
        stream.userstream()
#    except Exception as e:
#     print(e.__doc__)


tweet_hparams = copy.deepcopy(rl_dst_hparams).override_from_dict(
    {'beam_width': 50})
if mode == Mode.TweetBot:
    listener(tweet_hparams)
