# Create Your Own Visualizations!
Instructions:
1. Install tensor2tensor and train up a Transformer model following the instruction in the repository https://github.com/tensorflow/tensor2tensor.
2. Update cell 3 to point to your checkpoint, it is currently set up to read from the default checkpoint location that would be created from following the instructions above.
3. If you used custom hyper parameters then update cell 4.
4. Run the notebook!

In [None]:
import NeuralSum as ns

import numpy as np
import os
os.environ['CUDA_VISIBLE_DEVICES']='1'

import tensorflow as tf

from tensor2tensor import problems
from tensor2tensor.bin import t2t_decoder  # To register the hparams set
from tensor2tensor.utils import registry
from tensor2tensor.utils import trainer_lib
from tensor2tensor.visualization import attention
from tensor2tensor.visualization import visualization

In [None]:
%%javascript
require.config({
  paths: {
      d3: '//cdnjs.cloudflare.com/ajax/libs/d3/3.4.8/d3.min'
  }
});

## HParams

In [None]:
# PUT THE MODEL YOU WANT TO LOAD HERE!
CHECKPOINT = os.path.expanduser('./data/tensor2tensor/train')

In [None]:
# HParams
problem_name = 'summary_problem'
data_dir = os.path.expanduser('./data/tensor2tensor/data')
model_name = "my_custom_transformer"
hparams_set = "exp_6"

## Visualization

In [None]:
# visualizer = visualization.AttentionVisualizer(hparams_set, model_name, data_dir, problem_name, beam_size=1)

EOS_ID = 1
class AttentionVisualizer(object):
    """Helper object for creating Attention visualizations."""

    def __init__(
          self, hparams_set, model_name, data_dir, problem_name, beam_size=1):
        inputs, targets, samples, att_mats = build_model(
            hparams_set, model_name, data_dir, problem_name, beam_size=beam_size)

        # Fetch the problem
        ende_problem = problems.problem(problem_name)
        encoders = ende_problem.feature_encoders(data_dir)

        self.inputs = inputs
        self.targets = targets
        self.att_mats = att_mats
        self.samples = samples
        self.encoders = encoders

    def encode(self, input_str):
        """Input str to features dict, ready for inference."""
        inputs = self.encoders['inputs'].encode(input_str) + [EOS_ID]
        batch_inputs = np.reshape(inputs, [1, -1, 1, 1])  # Make it 3D.
        return batch_inputs

    def decode(self, integers):
        """List of ints to str."""
        integers = list(np.squeeze(integers))
        return self.encoders['inputs'].decode(integers)

    def decode_list(self, integers):
        """List of ints to list of str."""
        integers = list(np.squeeze(integers))
        return self.encoders['inputs'].decode_list(integers)

    def get_vis_data_from_string(self, sess, input_string):
        encoded_inputs = self.encode(input_string)

        # Run inference graph to get the translation.
        out = sess.run(self.samples, {
            self.inputs: encoded_inputs,
        })

        # Run the decoded translation through the training graph to get the
        # attention tensors.
        att_mats = sess.run(self.att_mats, {
            self.inputs: encoded_inputs,
            self.targets: np.reshape(out, [1, -1, 1, 1]),
        })

        output_string = self.decode(out)
        input_list = self.decode_list(encoded_inputs)
        output_list = self.decode_list(out)

        return output_string, input_list, output_list, att_mats

def build_model(hparams_set, model_name, data_dir, problem_name, beam_size=1):
    hparams = trainer_lib.create_hparams(
      hparams_set, data_dir=data_dir, problem_name=problem_name)
    translate_model = registry.model(model_name)(
      hparams, tf.estimator.ModeKeys.EVAL)

    inputs = tf.placeholder(tf.int32, shape=(1, None, 1, 1), name='inputs')
    targets = tf.placeholder(tf.int32, shape=(1, None, 1, 1), name='targets')
    translate_model({
      'inputs': inputs,
      'targets': targets,
    })
    att_mats = get_att_mats(translate_model)

    with tf.variable_scope(tf.get_variable_scope(), reuse=True):
        samples = translate_model.infer({
            'inputs': inputs,
        }, beam_size=beam_size)['outputs']

    return inputs, targets, samples, att_mats

def get_att_mats(translate_model):
    enc_atts = []
    dec_atts = []
    encdec_atts = []

    prefix = 'my_custom_transformer/body/' # had to change this from the original
    postfix = '/multihead_attention/dot_product_attention'

    for i in range(translate_model.hparams.num_hidden_layers):
        enc_att = translate_model.attention_weights[
            '%sencoder/layer_%i/self_attention%s' % (prefix, i, postfix)]
        dec_att = translate_model.attention_weights[
            '%sdecoder/layer_%i/self_attention%s' % (prefix, i, postfix)]
        encdec_att = translate_model.attention_weights[
            '%sdecoder/layer_%i/encdec_attention%s' % (prefix, i, postfix)]
        enc_atts.append(enc_att)
        dec_atts.append(dec_att)
        encdec_atts.append(encdec_att)

    return enc_atts, dec_atts, encdec_atts

visualizer = AttentionVisualizer(hparams_set, model_name, data_dir, problem_name, beam_size=1)

In [None]:
tf.Variable(0, dtype=tf.int64, trainable=False, name='global_step')

sess = tf.train.MonitoredTrainingSession(
    checkpoint_dir=CHECKPOINT,
    save_summaries_secs=0,
)

In [None]:
input_sentence = "what started as a local controversy in salt lake city has evolved into a full-blown international scandal ."
output_string, inp_text, out_text, att_mats = visualizer.get_vis_data_from_string(sess, input_sentence)
print(output_string)

## Interpreting the Visualizations
- The layers drop down allow you to view the different Transformer layers, 0-indexed of course.
  - Tip: The first layer, last layer and 2nd to last layer are usually the most interpretable.
- The attention dropdown allows you to select different pairs of encoder-decoder attentions:
  - All: Shows all types of attentions together. NOTE: There is no relation between heads of the same color - between the decoder self attention and decoder-encoder attention since they do not share parameters.
  - Input - Input: Shows only the encoder self-attention.
  - Input - Output: Shows the decoder’s attention on the encoder. NOTE: Every decoder layer attends to the final layer of encoder so the visualization will show the attention on the final encoder layer regardless of what layer is selected in the drop down.
  - Output - Output: Shows only the decoder self-attention. NOTE: The visualization might be slightly misleading in the first layer since the text shown is the target of the decoder, the input to the decoder at layer 0 is this text with a GO symbol prepreded.
- The colored squares represent the different attention heads.
  - You can hide or show a given head by clicking on it’s color.
  - Double clicking a color will hide all other colors, double clicking on a color when it’s the only head showing will show all the heads again.
- You can hover over a word to see the individual attention weights for just that position.
  - Hovering over the words on the left will show what that position attended to.
  - Hovering over the words on the right will show what positions attended to it.

In [None]:
attention.show(inp_text, out_text, *att_mats)