In [1]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import json
import os
import yaml

import tensorflow as tf
import numpy as np

from tensor2tensor.utils import trainer_utils as utils
from tensor2tensor.utils import decoding

In [2]:
%%javascript
require.config({
  paths: {
      d3: '//cdnjs.cloudflare.com/ajax/libs/d3/3.4.8/d3.min'
  }
});

<IPython.core.display.Javascript object>

In [3]:
%%bash
s=vie
t=eng
source ../../../config.sh $s $t
echo "exp_dir: $exp_dir" > config_local.yml
echo "s: $s" >> config_local.yml
echo "t: $t" >> config_local.yml
echo "trans_dir: $trans_dir" >> config_local.yml

In [4]:
with open("config_local.yml") as f:
    config = yaml.load(f)
exp_dir = config["exp_dir"]
s = config["s"]
t = config["t"]
trans_dir = config["trans_dir"]

use_lex = ""
emb_untrainable = ""
emb_random = ""
lex_cluster = ""
use_align = "_usealign"
with_padding = ""
# current version:
# allregular, all1d, all2d
# previous^2 version: 
# before1d, after1d, before2d, after2d, beforesimple, aftersimple, beforesimpletanh, aftersimpletanh
# previous version: 
# beforeaggregate, afteraggregate, before1daggregate, after1daggregate, before2daggregate, after2daggregate, all1daggregate, al2daggregate
attn = "allregular"
merge_ops = "inf" # 8000-8000, 8000, inf
dim = 300 # 512, 300
lr = 0.2
dropout = 0.1
layer = 2

PROBLEM = "translate_srctgt_lrlp"
if attn == "":
    MODEL = "transformer"
elif attn == "allregular" or attn == "all1d" or attn == "all2d":
    MODEL = "transformer_lex2"
else:
    MODEL = "transformer_lex"
HPARAMS = 'transformer_all'

DATA_DIR = os.path.join(trans_dir, "t2t_"+merge_ops+use_lex)
TRAIN_DIR= os.path.join(exp_dir, "_".join([
    "t2t"+attn, 
    "dim"+str(dim), 
    "layer"+str(layer), 
    "lr"+str(lr), 
    "dropout"+str(dropout), 
    "bpe"+merge_ops
    +use_lex
    +emb_untrainable
    +emb_random
    +lex_cluster
    +use_align
    +with_padding]))

FLAGS = tf.flags.FLAGS
FLAGS.problems = PROBLEM
FLAGS.hparams_set = HPARAMS
FLAGS.data_dir = DATA_DIR
FLAGS.model = MODEL

FLAGS.schedule = "train_and_evaluate"
print(TRAIN_DIR)

/home/ec2-user/kklab/Projects/lrlp/experiment_2017.08.04.vie-eng.y1r1.v2/t2tallregular_dim300_layer2_lr0.2_dropout0.1_bpeinf_usealign


In [5]:
import sys
sys.path.append(TRAIN_DIR)
from reg_config.reg_hparams import *
from reg_config.reg_problems import *
from reg_config.reg_modalities import *
from reg_config.reg_models import *

hparams = utils.create_hparams(FLAGS.hparams_set, FLAGS.data_dir)

# SET EXTRA HYPER PARAMS HERE!
#hparams.null_slot = True

utils.add_problem_hparams(hparams, PROBLEM)

num_datashards = utils.devices.data_parallelism().n

mode = tf.estimator.ModeKeys.EVAL

input_fn = utils.input_fn_builder.build_input_fn(
    mode=mode,
    hparams=hparams,
    data_dir=DATA_DIR,
    num_datashards=num_datashards,
    worker_replicas=FLAGS.worker_replicas,
    worker_id=FLAGS.worker_id,
    batch_size=1)

inputs, target = input_fn()
features = inputs
features['targets'] = target

print(hparams.problems[0])

# vocab_id: inputs, or targets
def encode(string, vocab_id):
    return [hparams.problems[0].vocabulary[vocab_id].encode(string) + [1] + [0]]

def decode(ids, vocab_id):
    return hparams.problems[0].vocabulary[vocab_id].decode(np.squeeze(ids))

def to_tokens(ids, vocab_id):
    ids = np.squeeze(ids)
    tokenizer = hparams.problems[0].vocabulary[vocab_id]
    tokens = []
    for _id in ids:
        if _id == 0:
            tokens.append('<PAD>')
        elif _id == 1:
            tokens.append('<EOS>')
        else:
            if merge_ops != "inf":
                tokens.append(tokenizer._subtoken_id_to_subtoken_string(_id))
            else:
                tokens.append(tokenizer._safe_id_to_token(_id))
    return tokens

model_fn = utils.model_builder.build_model_fn(
    MODEL,
    problem_names=[PROBLEM],
    train_steps=FLAGS.train_steps,
    worker_id=FLAGS.worker_id,
    worker_replicas=FLAGS.worker_replicas,
    eval_run_autoregressive=FLAGS.eval_run_autoregressive,
    decode_hparams=decoding.decode_hparams(FLAGS.decode_hparams))
est_spec = model_fn(features, target, mode, hparams)

with tf.variable_scope(tf.get_variable_scope(), reuse=True):
    beam_out = model_fn(features, target, tf.contrib.learn.ModeKeys.INFER, hparams)

sv = tf.train.Supervisor(
    logdir=TRAIN_DIR,
    global_step=tf.Variable(0, dtype=tf.int64, trainable=False, name='global_step'))
sess = sv.PrepareSession(config=tf.ConfigProto(allow_soft_placement=True))
sv.StartQueueRunners(
    sess,
    tf.get_default_graph().get_collection(tf.GraphKeys.QUEUE_RUNNERS))

INFO:tensorflow:datashard_devices: ['gpu:0']
INFO:tensorflow:caching_devices: None
INFO:tensorflow:batching_scheme = {'boundaries': [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 22, 24, 26, 28, 30, 33, 36, 39, 42, 46, 50, 55, 60, 66, 72, 79, 86, 94, 103, 113, 124, 136, 149, 163, 179, 196, 215, 236], 'batch_sizes': [240, 180, 180, 180, 144, 144, 144, 120, 120, 120, 90, 90, 90, 90, 80, 72, 72, 60, 60, 48, 48, 48, 40, 40, 36, 30, 30, 24, 24, 20, 20, 18, 18, 16, 15, 12, 12, 10, 10, 9, 8, 8], 'min_length': 0, 'max_length': 1000000000, 'shuffle_queue_size': 270, 'window_size': 720}
INFO:tensorflow:Updated batching_scheme = {'boundaries': [], 'batch_sizes': [1], 'min_length': 0, 'max_length': 1000000000, 'shuffle_queue_size': 270, 'window_size': 720}
INFO:tensorflow:Reading data files from /home/ec2-user/kklab/Projects/lrlp/experiment_2017.08.04.vie-eng.y1r1.v2/translation/t2t_inf/translate_srctgt_lrlp-dev*
[('batch_size_multiplier', 1), ('input_modality', {'inputs': ('symbol', 8003)}), 

[]

In [6]:
# Get the attention tensors from the graph.
# This need to be done using the training graph since the inference uses a tf.while_loop
# and you cant fetch tensors from inside a while_loop.

enc_atts = []
dec_atts = []
encdec_atts = []

for i in range(hparams.num_hidden_layers):
    enc_att = tf.get_default_graph().get_operation_by_name(
        "body/model/parallel_0/body/encoder/encoder_lex0/layer_%i/self_attention/multihead_attention/dot_product_attention/attention_weights" % i).values()[0]
    dec_att = tf.get_default_graph().get_operation_by_name(
        "body/model/parallel_0/body/decoder/layer_%i/self_attention/multihead_attention/dot_product_attention/attention_weights" % i).values()[0]
    encdec_att = tf.get_default_graph().get_operation_by_name(
        "body/model/parallel_0/body/decoder/layer_%i/encdec_attention/multihead_attention/dot_product_attention/attention_weights" % i).values()[0]
    enc_atts.append(enc_att)
    dec_atts.append(dec_att)
    encdec_atts.append(encdec_att)

print(enc_atts[0].shape.as_list())
print(dec_atts[0].shape.as_list())
print(encdec_atts[0].shape.as_list())
    
# if attn == 'after1daggregate':
#     enc_atts = []
#     dec_atts = []
#     encdec_atts = []

#     attn_1d = tf.get_default_graph().get_operation_by_name(
#         "body/model/parallel_0/body/encoder/self_attention/multihead_attention/dot_product_attention/attention_weights").values()[0]

#     for i in range(hparams.num_hidden_layers):
#         enc_att = tf.get_default_graph().get_operation_by_name(
#             "body/model/parallel_0/body/encoder/encoder_lex0/layer_%i/self_attention/multihead_attention/dot_product_attention/attention_weights" % i).values()[0]
#         dec_att = tf.get_default_graph().get_operation_by_name(
#             "body/model/parallel_0/body/decoder/layer_%i/self_attention/multihead_attention/dot_product_attention/attention_weights" % i).values()[0]
#         encdec_att = tf.get_default_graph().get_operation_by_name(
#             "body/model/parallel_0/body/decoder/layer_%i/encdec_attention/multihead_attention/dot_product_attention/attention_weights" % i).values()[0]

#         enc_atts.append(enc_att)
#         dec_atts.append(dec_att)
#         encdec_atts.append(encdec_att)
        
# elif attn == 'before1daggregate':
#     enc_atts = []
#     dec_atts = []
#     encdec_atts = []

#     attn_1d = tf.get_default_graph().get_operation_by_name(
#         "body/model/parallel_0/body/encoder/self_attention/multihead_attention/dot_product_attention/attention_weights").values()[0]

#     for i in range(hparams.num_hidden_layers):
#         enc_att = tf.get_default_graph().get_operation_by_name(
#             "body/model/parallel_0/body/encoder/layer_%i/self_attention/multihead_attention/dot_product_attention/attention_weights" % i).values()[0]
#         dec_att = tf.get_default_graph().get_operation_by_name(
#             "body/model/parallel_0/body/decoder/layer_%i/self_attention/multihead_attention/dot_product_attention/attention_weights" % i).values()[0]
#         encdec_att = tf.get_default_graph().get_operation_by_name(
#             "body/model/parallel_0/body/decoder/layer_%i/encdec_attention/multihead_attention/dot_product_attention/attention_weights" % i).values()[0]

#         enc_atts.append(enc_att)
#         dec_atts.append(dec_att)
#         encdec_atts.append(encdec_att)

INFO:tensorflow:global_step/sec: 0
[None, 4, None, None]
[None, 4, None, None]
[None, 4, None, None]


In [7]:
inp, out, logits = sess.run(
    [
        inputs['inputs'], 
        target, 
        est_spec.predictions['predictions']
    ])

print("Input:    ", decode(inp[0], "inputs"))
print("Gold:     ", decode(out[0], "targets"))
logits = np.squeeze(logits[0])
tokens = np.argmax(logits, axis=1)
print("Gold out: ", decode(tokens, "targets"))

Input:     = <EOS>
Gold:      = <EOS>
Gold out:  = <EOS>
INFO:tensorflow:Recording summary at step 31815.


In [8]:
src_sent = "đang chờ satio"
inp_ids = encode(src_sent, "inputs")
beam_decode = sess.run(
    beam_out.predictions['outputs'], 
    {inputs['inputs']: np.expand_dims(np.expand_dims(inp_ids, axis=2), axis=3),}
)
trans = decode(beam_decode[0], "targets")
print(trans)

output_ids = beam_decode

# Get attentions
# np_attn_1d, np_enc_atts, np_dec_atts, np_encdec_atts = sess.run(
#     [attn_1d, enc_atts, dec_atts, encdec_atts], 
#     {
#         inputs['inputs']: np.expand_dims(np.expand_dims(inp_ids, axis=2), axis=3),
#         target: np.expand_dims(np.expand_dims(output_ids, axis=2), axis=3),
#     }
# )

np_enc_atts, np_dec_atts, np_encdec_atts = sess.run(
    [enc_atts, dec_atts, encdec_atts], 
    {
        inputs['inputs']: np.expand_dims(np.expand_dims(inp_ids, axis=2), axis=3),
        target: np.expand_dims(np.expand_dims(output_ids, axis=2), axis=3),
    }
)

print(np.array(np_enc_atts).shape)
print(np.array(np_dec_atts).shape)
print(np.array(np_encdec_atts).shape)

waiting for the UNK waiting . <EOS> <pad> <pad> <pad>
(2, 1, 4, 5, 5)
(2, 1, 4, 10, 10)
(2, 1, 4, 10, 20)


In [9]:
%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines) {
    return false;
}

<IPython.core.display.Javascript object>

In [10]:
%load_ext autoreload
%autoreload 2
import attn

inp_text = to_tokens(inp_ids, "inputs")
out_text = to_tokens(output_ids, "targets")

#print(inp_text)
#print(out_text)

# [num_layers, batch_size, num_heads, enc/dec_length, enc/dec_length]
# print(np.array(np_attn_1d).shape)
# print(np.array(np_enc_atts).shape)
# print(np.array(np_encdec_atts).shape)
# print(np.array(np_dec_atts).shape)

# number of layers is set in attention.js (line 345)
attn.show(
    inp_text, 
    out_text,  
    np_enc_atts, 
    np_dec_atts, 
    np_encdec_atts,
    TRAIN_DIR)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

INFO:tensorflow:Recording summary at step 31815.
INFO:tensorflow:global_step/sec: 0
INFO:tensorflow:Recording summary at step 31815.
INFO:tensorflow:global_step/sec: 0
INFO:tensorflow:Recording summary at step 31815.
INFO:tensorflow:global_step/sec: 0
INFO:tensorflow:Recording summary at step 31815.
INFO:tensorflow:global_step/sec: 0
INFO:tensorflow:Saving checkpoint to path /home/ec2-user/kklab/Projects/lrlp/experiment_2017.08.04.vie-eng.y1r1.v2/t2tallregular_dim300_layer2_lr0.2_dropout0.1_bpeinf_usealign/model.ckpt
INFO:tensorflow:Recording summary at step 31815.
INFO:tensorflow:global_step/sec: 0
INFO:tensorflow:Recording summary at step 31815.
INFO:tensorflow:global_step/sec: 0
INFO:tensorflow:Recording summary at step 31815.
INFO:tensorflow:global_step/sec: 0
INFO:tensorflow:Recording summary at step 31815.
INFO:tensorflow:global_step/sec: 0
INFO:tensorflow:Recording summary at step 31815.
INFO:tensorflow:global_step/sec: 0
INFO:tensorflow:Saving checkpoint to path /home/ec2-user/