In [1]:
import numpy as np
import sentencepiece as spm
import tensorflow as tf

In [2]:
import model
from gpu_utils import assign_to_gpu

In [3]:
tf.app.flags.DEFINE_string('f', '', 'kernel')

In [4]:
flags = tf.app.flags
FLAGS = flags.FLAGS

flags.DEFINE_string("model_dir", default='./EXP-natsume/',
      help="Estimator model_dir.")

flags.DEFINE_string("eval_ckpt_path", None, '')

flags.DEFINE_string("spm_file", '../data/natsume/natsume.model', '')
flags.DEFINE_integer("num_generate", 30, '')

# Model config
flags.DEFINE_integer("tgt_len", default=1,
      help="Number of steps to predict")
flags.DEFINE_integer("mem_len", default=640,
      help="Number of steps to cache")
flags.DEFINE_bool("same_length", default=True,
      help="Same length attention")
flags.DEFINE_integer("clamp_len", default=400,
      help="Clamp length")

flags.DEFINE_integer("n_layer", default=16,
      help="Number of layers.")
flags.DEFINE_integer("d_model", default=410,
      help="Dimension of the model.")
flags.DEFINE_integer("d_embed", default=410,
      help="Dimension of the embeddings.")
flags.DEFINE_integer("n_head", default=10,
      help="Number of attention heads.")
flags.DEFINE_integer("d_head", default=41,
      help="Dimension of each attention head.")
flags.DEFINE_integer("d_inner", default=2100,
      help="Dimension of inner hidden size in positionwise feed-forward.")
flags.DEFINE_float("dropout", default=0.0,
      help="Dropout rate.")
flags.DEFINE_float("dropatt", default=0.0,
      help="Attention dropout rate.")
flags.DEFINE_bool("untie_r", default=True,
      help="untie r_w_bias and r_r_bias")

# Adaptive Softmax / Embedding
flags.DEFINE_bool("tie_weight", default=True,
      help="Tie embedding and softmax weight.")
flags.DEFINE_integer("div_val", default=1,
      help="Divide the embedding size by this val for each bin")
flags.DEFINE_bool("proj_share_all_but_first", default=True,
      help="True to share all but first projs, False not to share.")
flags.DEFINE_bool("proj_same_dim", default=True,
      help="Project the bin with the same dimension.")

# Parameter initialization
flags.DEFINE_enum("init", default="normal",
      enum_values=["normal", "uniform"],
      help="Initialization method.")
flags.DEFINE_float("init_std", default=0.02,
      help="Initialization std when init is normal.")
flags.DEFINE_float("proj_init_std", default=0.01,
      help="Initialization std for embedding projection.")
flags.DEFINE_float("init_range", default=0.1,
      help="Initialization std when init is uniform.")

In [5]:
def get_model_fn(n_token, cutoffs):
  def model_fn(inp, tgt, mems, is_training):
    inp = tf.transpose(inp, [1, 0])

    if FLAGS.init == "uniform":
      initializer = tf.initializers.random_uniform(
          minval=-FLAGS.init_range,
          maxval=FLAGS.init_range,
          seed=None)
    elif FLAGS.init == "normal":
      initializer = tf.initializers.random_normal(
          stddev=FLAGS.init_std,
          seed=None)
      proj_initializer = tf.initializers.random_normal(
          stddev=FLAGS.proj_init_std,
          seed=None)

    tie_projs = [False for _ in range(len(cutoffs) + 1)]
    if FLAGS.proj_share_all_but_first:
      for i in range(1, len(tie_projs)):
        tie_projs[i] = True

    probs = model.decode(
        dec_inp=inp,
        mems=mems,
        n_token=n_token,
        n_layer=FLAGS.n_layer,
        d_model=FLAGS.d_model,
        d_embed=FLAGS.d_embed,
        n_head=FLAGS.n_head,
        d_head=FLAGS.d_head,
        d_inner=FLAGS.d_inner,
        dropout=FLAGS.dropout,
        dropatt=FLAGS.dropatt,
        initializer=initializer,
        proj_initializer=proj_initializer,
        is_training=is_training,
        mem_len=FLAGS.mem_len,
        cutoffs=cutoffs,
        div_val=FLAGS.div_val,
        tie_projs=tie_projs,
        input_perms=None,
        target_perms=None,
        head_target=None,
        same_length=FLAGS.same_length,
        clamp_len=FLAGS.clamp_len,
        use_tpu=False,
        untie_r=FLAGS.untie_r,
        proj_same_dim=FLAGS.proj_same_dim)

    # number of parameters
    num_params = sum([np.prod(v.shape) for v in tf.trainable_variables()])
    tf.logging.info('#params: {}'.format(num_params))

    # format_str = '{{:<{0}s}}\t{{}}'.format(
    #     max([len(v.name) for v in tf.trainable_variables()]))
    # for v in tf.trainable_variables():
    #   tf.logging.info(format_str.format(v.name, v.get_shape()))
    return probs

  return model_fn

In [6]:
def single_core_graph(n_token, cutoffs, is_training, inp, tgt, mems):
  model_fn = get_model_fn(
      n_token=n_token,
      cutoffs=cutoffs)

  model_ret = model_fn(
      inp=inp,
      tgt=tgt,
      mems=mems,
      is_training=is_training)

  return model_ret

In [8]:
sp = spm.SentencePieceProcessor()
sp.Load(FLAGS.spm_file)

True

In [9]:
tower_mems = []

with tf.device(assign_to_gpu(0, "/gpu:0")), \
     tf.variable_scope(tf.get_variable_scope(), reuse=tf.AUTO_REUSE):
  inp_ph = tf.placeholder(tf.int32, [1, None])

  mems_i = [tf.placeholder(tf.float32, [FLAGS.mem_len, 1, FLAGS.d_model])
            for _ in range(FLAGS.n_layer)]

  prob = single_core_graph(
    n_token=sp.get_piece_size(),
    cutoffs=[],
    is_training=False,
    inp=inp_ph,
    tgt=None,
    mems=mems_i)

  tower_mems.append(mems_i)

tower_mems_np = [
  [np.zeros([FLAGS.mem_len, 1, FLAGS.d_model], dtype=np.float32)
    for layer in range(FLAGS.n_layer)]
  for core in range(1)
]

saver = tf.train.Saver()

INFO:tensorflow:#params: 44367520


In [8]:
start_string = 'パプリカ'

start_ids = sp.encode_as_ids(start_string)
print('{:s}({:s})'.format(start_string, ' '.join(str(i) for i in start_ids)))

パプリカ(6 2639 4087 878 1048)


In [11]:
context = ['野球', '試合', '東京ドーム']

In [10]:
sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
sess.run(tf.global_variables_initializer())

if FLAGS.eval_ckpt_path is None:
  eval_ckpt_path = tf.train.latest_checkpoint(FLAGS.model_dir)
else:
  eval_ckpt_path = FLAGS.eval_ckpt_path
saver.restore(sess, eval_ckpt_path)

feed_dict = {}
for m, m_np in zip(tower_mems[0], tower_mems_np[0]):
  feed_dict[m] = m_np

INFO:tensorflow:Restoring parameters from ./EXP-natsume/model.ckpt


In [20]:
start_string = '猫'
start_ids = sp.encode_as_ids(start_string)
ids = []
ids.extend(start_ids)

fetches = [prob]

for i in range(FLAGS.num_generate):
    feed_dict[inp_ph] = np.expand_dims(ids, 0)
    fetched = sess.run(fetches, feed_dict=feed_dict)
    predictions = fetched[0]
    predictions = np.squeeze(predictions[-1], 0)
    predicted_id = np.argmax(predictions)
    ids.append(predicted_id)
    #print(' '.join([str(i) for i in ids]))

print(' '.join(sp.id_to_piece(i) for i in ids))

▁ 猫 といえども 相当の 猫 を 鼓 吹 する に相違ない 。 ▁ 御母さん は 、 今 ここ が 平生の 通り 落ち 付いて 来て 、 御前の 顔は さ ば か に 心得ている 。


In [23]:
def gen(prev_ids, text=None):
    ids = []
    ids.extend(prev_ids)
    if text:
      extra_ids = sp.encode_as_ids(text)
      if extra_ids[0] == 6:
        extra_ids = extra_ids[1:]
      ids.extend(extra_ids)
    feed_dict[inp_ph] = np.expand_dims(ids, 0)
    fetched = sess.run(fetches, feed_dict=feed_dict)
    preds = fetched[0]
    preds = np.squeeze(preds[-1], 0)
    pred_id = np.argmax(preds)
    ids.append(pred_id)
    print(' '.join(sp.id_to_piece(i) for i in ids))
    return ids

In [24]:
start_string = '猫'
start_ids = sp.encode_as_ids(start_string)
new_ids = gen(start_ids)
new_ids = gen(new_ids, '相当')
new_ids = gen(new_ids)

▁ 猫 といえども
▁ 猫 といえども 相 当 する
▁ 猫 といえども 相 当 する 。
