In [1]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import json

import tensorflow as tf
import numpy as np

from tensor2tensor.utils import trainer_utils as utils
from tensor2tensor.visualization import attention
from tensor2tensor.utils import decoding

In [2]:
%%javascript
require.config({
  paths: {
      d3: '//cdnjs.cloudflare.com/ajax/libs/d3/3.4.8/d3.min'
  }
});

<IPython.core.display.Javascript object>

In [3]:
%%bash
s=som
t=eng
source ../../../config.sh $s $t
echo "exp_dir: $exp_dir" > config_local.yml
echo "s: $s" >> config_local.yml
echo "t: $t" >> config_local.yml
echo "trans_dir: $trans_dir" >> config_local.yml



In [4]:
import yaml
with open("config_local.yml") as f:
    config = yaml.load(f)
exp_dir = config["exp_dir"]
s = config["s"]
t = config["t"]
trans_dir = config["trans_dir"]

use_lex = ""
emb_untrainable = ""
emb_random = ""
lex_cluster = ""
# previos version: 
# before1d, after1d, before2d, after2d, beforesimple, aftersimple, beforesimpletanh, aftersimpletanh
# current version: 
# beforeaggregate, afteraggregate, before1daggregate, after1daggregate, before2daggregate, after2daggregate, all1daggregate, al2daggregate
attn = "before1daggregate"
merge_ops = "inf" # 8000-8000, 8000, inf
dim = 300 # 512, 300
lr = 0.2
dropout = 0.1
layer = 2

In [5]:
import os  

PROBLEM = "translate_srctgt_lrlp"
#MODEL = "transformer" if attn == "" else "transformer_lex"+attn
MODEL = "transformer" if attn == "" else "transformer_lex"
HPARAMS = 'transformer_all'

DATA_DIR = os.path.join(trans_dir, "t2t_"+merge_ops+use_lex)
TRAIN_DIR= os.path.join(exp_dir, "_".join([
    "t2t"+attn, 
    "dim"+str(dim), 
    "layer"+str(layer), 
    "lr"+str(lr), 
    "dropout"+str(dropout), 
    "bpe"+merge_ops
    +use_lex
    +emb_untrainable
    +emb_random
    +lex_cluster]))

FLAGS = tf.flags.FLAGS
FLAGS.problems = PROBLEM
FLAGS.hparams_set = HPARAMS
FLAGS.data_dir = DATA_DIR
FLAGS.model = MODEL

FLAGS.schedule = "train_and_evaluate"

In [6]:
import sys
sys.path.append(TRAIN_DIR)
from reg_config.reg_hparams import *
from reg_config.reg_problems import *
from reg_config.reg_modalities import *
from reg_config.reg_models import *

hparams = utils.create_hparams(FLAGS.hparams_set, FLAGS.data_dir)

# SET EXTRA HYPER PARAMS HERE!
#hparams.null_slot = True

utils.add_problem_hparams(hparams, PROBLEM)

num_datashards = utils.devices.data_parallelism().n

mode = tf.estimator.ModeKeys.EVAL

input_fn = utils.input_fn_builder.build_input_fn(
    mode=mode,
    hparams=hparams,
    data_dir=DATA_DIR,
    num_datashards=num_datashards,
    worker_replicas=FLAGS.worker_replicas,
    worker_id=FLAGS.worker_id,
    batch_size=1)

inputs, target = input_fn()
features = inputs
features['targets'] = target

INFO:tensorflow:datashard_devices: ['gpu:0']
INFO:tensorflow:caching_devices: None
INFO:tensorflow:batching_scheme = {'boundaries': [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 22, 24, 26, 28, 30, 33, 36, 39, 42, 46, 50, 55, 60, 66, 72, 79, 86, 94, 103, 113, 124, 136, 149, 163, 179, 196, 215, 236], 'batch_sizes': [240, 180, 180, 180, 144, 144, 144, 120, 120, 120, 90, 90, 90, 90, 80, 72, 72, 60, 60, 48, 48, 48, 40, 40, 36, 30, 30, 24, 24, 20, 20, 18, 18, 16, 15, 12, 12, 10, 10, 9, 8, 8], 'min_length': 0, 'max_length': 1000000000, 'shuffle_queue_size': 270, 'window_size': 720}
INFO:tensorflow:Updated batching_scheme = {'boundaries': [], 'batch_sizes': [1], 'min_length': 0, 'max_length': 1000000000, 'shuffle_queue_size': 270, 'window_size': 720}
INFO:tensorflow:Reading data files from /home/ec2-user/kklab/Projects/lrlp/experiment_2017.08.04.som-eng.y2r1.v1/translation/t2t_inf/translate_srctgt_lrlp-dev*


In [7]:
print(hparams.problems[0])

# vocab_id: inputs, or targets
def encode(string, vocab_id):
    return [hparams.problems[0].vocabulary[vocab_id].encode(string) + [1] + [0]]

def decode(ids, vocab_id):
    return hparams.problems[0].vocabulary[vocab_id].decode(np.squeeze(ids))

def to_tokens(ids, vocab_id):
    ids = np.squeeze(ids)
    tokenizer = hparams.problems[0].vocabulary[vocab_id]
    tokens = []
    for _id in ids:
        if _id == 0:
            tokens.append('<PAD>')
        elif _id == 1:
            tokens.append('<EOS>')
        else:
            if merge_ops != "inf":
                tokens.append(tokenizer._subtoken_id_to_subtoken_string(_id))
            else:
                tokens.append(tokenizer._safe_id_to_token(_id))
    return tokens

[('batch_size_multiplier', 1), ('input_modality', {'inputs': ('symbol', 8003)}), ('input_space_id', 37), ('loss_multiplier', 1.0), ('max_expected_batch_size_per_shard', 64), ('target_modality', ('symbol', 8003)), ('target_space_id', 4), ('vocabulary', {'inputs': <tensor2tensor.data_generators.text_encoder.TokenTextEncoder object at 0x7faa1e305e80>, 'targets': <tensor2tensor.data_generators.text_encoder.TokenTextEncoder object at 0x7faa1e282e48>}), ('was_copy', False), ('was_reversed', False)]


In [8]:
model_fn = utils.model_builder.build_model_fn(
    MODEL,
    problem_names=[PROBLEM],
    train_steps=FLAGS.train_steps,
    worker_id=FLAGS.worker_id,
    worker_replicas=FLAGS.worker_replicas,
    eval_run_autoregressive=FLAGS.eval_run_autoregressive,
    decode_hparams=decoding.decode_hparams(FLAGS.decode_hparams))
est_spec = model_fn(features, target, mode, hparams)

INFO:tensorflow:datashard_devices: ['gpu:0']
INFO:tensorflow:caching_devices: None
reading source vocab from: /home/ec2-user/kklab/Projects/lrlp/experiment_2017.08.04.som-eng.y2r1.v1/translation/t2t_inf/vocab.someng.8000.som
source_vocab_size: 8003
reading target vocab from: /home/ec2-user/kklab/Projects/lrlp/experiment_2017.08.04.som-eng.y2r1.v1/translation/t2t_inf/vocab.someng.8000.eng
target_vocab_size: 8003
reading source vocab from: /home/ec2-user/kklab/Projects/lrlp/experiment_2017.08.04.som-eng.y2r1.v1/translation/t2t_inf/vocab.someng.8000.som
source_vocab_size: 8003
reading target vocab from: /home/ec2-user/kklab/Projects/lrlp/experiment_2017.08.04.som-eng.y2r1.v1/translation/t2t_inf/vocab.someng.8000.eng
target_vocab_size: 8003
INFO:tensorflow:Doing model_fn_body took 1.635 sec.
INFO:tensorflow:This model_fn took 1.880 sec.


In [9]:
with tf.variable_scope(tf.get_variable_scope(), reuse=True):
    beam_out = model_fn(features, target, tf.contrib.learn.ModeKeys.INFER, hparams)

INFO:tensorflow:datashard_devices: ['gpu:0']
INFO:tensorflow:caching_devices: None
reading source vocab from: /home/ec2-user/kklab/Projects/lrlp/experiment_2017.08.04.som-eng.y2r1.v1/translation/t2t_inf/vocab.someng.8000.som
source_vocab_size: 8003
reading target vocab from: /home/ec2-user/kklab/Projects/lrlp/experiment_2017.08.04.som-eng.y2r1.v1/translation/t2t_inf/vocab.someng.8000.eng
target_vocab_size: 8003
reading source vocab from: /home/ec2-user/kklab/Projects/lrlp/experiment_2017.08.04.som-eng.y2r1.v1/translation/t2t_inf/vocab.someng.8000.som
source_vocab_size: 8003
reading target vocab from: /home/ec2-user/kklab/Projects/lrlp/experiment_2017.08.04.som-eng.y2r1.v1/translation/t2t_inf/vocab.someng.8000.eng
target_vocab_size: 8003
INFO:tensorflow:Beam Decoding with beam size 4
INFO:tensorflow:Doing model_fn_body took 1.240 sec.
INFO:tensorflow:This model_fn took 1.411 sec.


In [10]:
sv = tf.train.Supervisor(
    logdir=TRAIN_DIR,
    global_step=tf.Variable(0, dtype=tf.int64, trainable=False, name='global_step'))
sess = sv.PrepareSession(config=tf.ConfigProto(allow_soft_placement=True))
sv.StartQueueRunners(
    sess,
    tf.get_default_graph().get_collection(tf.GraphKeys.QUEUE_RUNNERS))

INFO:tensorflow:Restoring parameters from /home/ec2-user/kklab/Projects/lrlp/experiment_2017.08.04.som-eng.y2r1.v1/t2tbefore1daggregate_dim300_layer2_lr0.2_dropout0.1_bpeinf/model.ckpt-152841
INFO:tensorflow:Starting standard services.
INFO:tensorflow:Saving checkpoint to path /home/ec2-user/kklab/Projects/lrlp/experiment_2017.08.04.som-eng.y2r1.v1/t2tbefore1daggregate_dim300_layer2_lr0.2_dropout0.1_bpeinf/model.ckpt
INFO:tensorflow:Starting queue runners.


[]

INFO:tensorflow:global_step/sec: 0
INFO:tensorflow:Recording summary at step 152841.


In [13]:
# Get the attention tensors from the graph.
# This need to be done using the training graph since the inference uses a tf.while_loop
# and you cant fetch tensors from inside a while_loop.

enc_atts = []
dec_atts = []
encdec_atts = []

attn_1d = tf.get_default_graph().get_operation_by_name(
    "body/model/parallel_0/body/encoder/self_attention/multihead_attention/dot_product_attention/attention_weights").values()[0]

for i in range(hparams.num_hidden_layers):
    enc_att = tf.get_default_graph().get_operation_by_name(
        "body/model/parallel_0/body/encoder/layer_%i/self_attention/multihead_attention/dot_product_attention/attention_weights" % i).values()[0]
    dec_att = tf.get_default_graph().get_operation_by_name(
        "body/model/parallel_0/body/decoder/layer_%i/self_attention/multihead_attention/dot_product_attention/attention_weights" % i).values()[0]
    encdec_att = tf.get_default_graph().get_operation_by_name(
        "body/model/parallel_0/body/decoder/layer_%i/encdec_attention/multihead_attention/dot_product_attention/attention_weights" % i).values()[0]

    enc_atts.append(enc_att)
    dec_atts.append(dec_att)
    encdec_atts.append(encdec_att)

INFO:tensorflow:Recording summary at step 152841.
INFO:tensorflow:global_step/sec: 0


In [14]:
inp, out, logits = sess.run([inputs['inputs'], target, est_spec.predictions['predictions']])

print("Input:    ", decode(inp[0], "inputs"))
print("Gold:     ", decode(out[0], "targets"))
logits = np.squeeze(logits[0])
tokens = np.argmax(logits, axis=1)
print("Gold out: ", decode(tokens, "targets"))

Input:     wuxuu intaasi ku daray in UNK UNK ay ku dileen UNK ay diyaaradaha dagaalka Kenya ka fuliyeen meel aan wax badan ka fogeyn Ceel-cadde oo ah meesha lagu laayey ciidamada Kenya . <EOS>
Gold:      he further stated that he was killed through an airstrike in a place not far from UNK where the attack took place . <EOS>
Gold out:  he added added that the killed killed by his air carried the place not far from the El the Kenyan killed place of <EOS>


In [15]:
src_sent = "dagaalka ayaa ka dhacay degmada Bayla ee gobolka Bari"

In [16]:
inp_ids = encode(src_sent, "inputs")
beam_decode = sess.run(
    beam_out.predictions['outputs'], 
    {inputs['inputs']: np.expand_dims(np.expand_dims(inp_ids, axis=2), axis=3),}
)
trans = decode(beam_decode[0], "targets")
print(trans)

the clashes occurred in the UNK district in the eastern region <EOS> <pad> <pad>
INFO:tensorflow:Recording summary at step 152841.
INFO:tensorflow:global_step/sec: 0


In [17]:
output_ids = beam_decode

# Get attentions
np_attn_1d, np_enc_atts, np_dec_atts, np_encdec_atts = sess.run(
    [attn_1d, enc_atts, dec_atts, encdec_atts], 
    {
        inputs['inputs']: np.expand_dims(np.expand_dims(inp_ids, axis=2), axis=3),
        target: np.expand_dims(np.expand_dims(output_ids, axis=2), axis=3),
    }
)

In [18]:
%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines) {
    return false;
}

<IPython.core.display.Javascript object>

In [23]:
inp_text = to_tokens(inp_ids, "inputs")
out_text = to_tokens(output_ids, "targets")

# [num_layers, batch_size, num_heads, enc/dec_length, enc/dec_length]
print(np.array(np_enc_atts).shape)
print(np.array(np_attn_1d).shape)

# number of layers is set in attention.js (line 345)
#attention.show(inp_text, out_text, np_enc_atts, np_dec_atts, np_encdec_atts)

(2, 1, 4, 11, 11)
(1, 4, 44, 44)
INFO:tensorflow:Recording summary at step 152841.
