Things to add or try:
- Remove tar_vocab_to_int from decoding layer and just pass in the int for vocab_to_int['-PAD-']
- In preprocessing, valid_tags should be done before data_2_ids where I create duplicate rows for tag sets

# Import Data

In [11]:
# Source data contains ending delimiters/symbols for the Acronym and the Tag. </ACR> & </TAG>
# Target data contains a start delimiter <MEAN> and an end delimiter </MEAN>
# The <PAD> padding symbol is added in the network's graph and only padded to the size of the largest row in each current batch.

# Example Source. Padding added later.
# ['a', 'c', 'r', 'o', 'n', 'y', 'm', '</ACR>', '<TAG>', 'these', 'are', 'the', 'tags', '</TAG>']

# Example Target. Padding added later.
# ['<MEAN>', 'this', 'is', 'the', 'meaning', '</MEAN>']

# *Actual data in indexed integer format for use with embeddings.

In [1]:
import pandas as pd
import numpy as np
import pickle

In [2]:
# df = pd.read_pickle('DataSets/custom_acr_with_src_trgt.pkl')
# validation_set = pd.read_pickle('DataSets/custom_acr_with_src_trgt_validation.pkl')
vocab_to_int = pickle.load(open("DataSets/custom_vocab_to_int.p", "rb"))
int_to_vocab = pickle.load(open("DataSets/custom_int_to_vocab.p", "rb"))

embeddings_index = pickle.load(open("DataSets/embeddings_index.p", "rb"))

In [3]:
def read_int(text):
    temp = []
    for i in text:
        temp.append(int_to_vocab[i])
    print(' '.join(temp))

In [4]:
# print(read_int(df['Target'].iloc[2]), end='\n\n')
# read_int(df['Source'].iloc[2])

In [5]:
# source_int_text = [r for r in df['Source']]
# target_int_text = [r for r in df['Target']]
# source_int_text_val = [r for r in validation_set['Source']]
# target_int_text_val = [r for r in validation_set['Target']]

In [6]:
def get_data():
    df = pd.read_pickle('DataSets/custom_acr_with_src_trgt.pkl')
    validation_set = pd.read_pickle('DataSets/custom_acr_with_src_trgt_validation.pkl')
    
    src = [r for r in df['Source']] # Source Data in int form
    tgt = [r for r in df['Target']] # Target Data in int form
    src_val = [r for r in validation_set['Source']] # Validation Set Source Data in int form
    tgt_val = [r for r in validation_set['Target']] # Validation Set Target Data in int form
    return src, tgt, src_val, tgt_val

In [7]:
source_int_text, target_int_text, source_int_text_val, target_int_text_val = get_data()

In [8]:
# Not currently used because it is faster to train when sorting by source length

# Shuffle data
# from random import shuffle
# idx_shuffle = [i for i in range(len(source_int_text))]
# shuffle(idx_shuffle)
# source_int_text = [source_int_text[i] for i in idx_shuffle]
# target_int_text = [target_int_text[i] for i in idx_shuffle]

# Verify Version

In [9]:
from distutils.version import LooseVersion
import warnings
import tensorflow as tf
from tensorflow.python.layers.core import Dense
from tensorflow.python.ops.rnn_cell_impl import _zero_state_tensors

# Check TensorFlow Version
assert LooseVersion(tf.__version__) >= LooseVersion('1.1'), 'Please use TensorFlow version 1.1 or newer'
print('TensorFlow Version: {}'.format(tf.__version__))

# Check for a GPU
if not tf.test.gpu_device_name():
    warnings.warn('No GPU found. Please use a GPU to train your neural network.')
else:
    print('Default GPU Device: {}'.format(tf.test.gpu_device_name()))

TensorFlow Version: 1.1.0
Default GPU Device: /gpu:0


# Create Word Embeddings From ConceptNet Numberbatch

In [10]:
# ConceptNet Numberbatch uses embedding dimension of 300
embedding_dim = 300

word_embedding_matrix = np.zeros((len(vocab_to_int), embedding_dim), dtype=np.float32)
for word, i in vocab_to_int.items():
    if word in embeddings_index:
        word_embedding_matrix[i] = embeddings_index[word]
    else:
        new_embedding = np.array(np.random.uniform(-1.0, 1.0, embedding_dim))
        embeddings_index[word] = new_embedding
        word_embedding_matrix[i] = new_embedding

# Define Network

In [11]:
# target_length = len(df['Target'].iloc[0])
# source_length = len(df['Source'].iloc[0])
source_length = len(source_int_text[0])
target_length = len(target_int_text[0])


vocab_size = len(vocab_to_int)

pad_int = vocab_to_int['<PAD>'] # <PAD> int value
target_start_int = vocab_to_int['<GO>']
target_stop_int = vocab_to_int['</MEAN>']

In [12]:
def model_inputs():
    """
    Create TF Placeholders for input, targets, learning rate, and lengths of source and target sequences.
    :return: Tuple (input, targets, learning rate, keep probability, target sequence length,
    max target sequence length, source sequence length)
    """
    inputs = tf.placeholder(tf.int32, [None, None], name='input')
    targets = tf.placeholder(tf.int32, [None, None], name='target')
    learning_rate = tf.placeholder(tf.float32, name='learning_rate')
    keep_prob = tf.placeholder(tf.float32, name='keep_probability')
    target_seq_len = tf.placeholder(tf.int32, (None,), name='target_sequence_length')
    max_target_len = tf.reduce_max(target_seq_len, name='max_target_length')
    source_seq_len = tf.placeholder(tf.int32, (None,), name='source_sequence_length')
    
    return inputs, targets, learning_rate, keep_prob, target_seq_len, max_target_len, source_seq_len

### Encoding

In [13]:
def encoding_layer(rnn_inputs, rnn_size, num_layers, keep_prob, source_seq_len, source_vocab_size):
    """
    Create encoding layer
    :param rnn_inputs: Inputs for the RNN
    :param rnn_size: RNN Size (number of units in the cell)
    :param num_layers: Number of layers
    :param keep_prob: Dropout keep probability
    :param source_sequence_length: a list of the lengths of each sequence in the batch
    :param source_vocab_size: vocabulary size of source data
    :param encoding_embedding_size: embedding size of source data
    :return: tuple (RNN out, RNN state)
    """
    # Embedding
    embedded_encoder_input = tf.nn.embedding_lookup(word_embedding_matrix, rnn_inputs)
    
    for layer in range(num_layers):
        with tf.variable_scope('encoder_{}'.format(layer)):
            cell_fw = tf.contrib.rnn.LSTMCell(rnn_size, initializer=tf.random_uniform_initializer(-0.1, 0.1, seed=2))
            cell_fw = tf.contrib.rnn.DropoutWrapper(cell_fw, input_keep_prob = keep_prob)
            
            cell_bw = tf.contrib.rnn.LSTMCell(rnn_size, tf.random_uniform_initializer(-0.1, 0.1, seed=2))
            cell_bw = tf.contrib.rnn.DropoutWrapper(cell_bw, input_keep_prob = keep_prob)
            
            out, state = tf.nn.bidirectional_dynamic_rnn(cell_fw, cell_bw, embedded_encoder_input, source_seq_len, dtype=tf.float32)
    
    # Join outputs since we are using a bidirectional RNN
    out = tf.concat(out, 2)
    
    return out, state

### Decoding

In [14]:
def process_decoder_input(tar_data, tar_vocab_to_int, batch_size):
    """
    Preprocess target data for encoding
    :param tar_data: Target Placehoder
    :param tar_vocab_to_int: Dictionary to go from the target words to an id
    :param batch_size: Batch Size
    :return: Preprocessed target data
    """
    # Slice off last timestep since it will never be used
    ending = tf.strided_slice(tar_data, [0, 0], [batch_size, -1], [1, 1])
    dec_input = tf.concat([tf.fill([batch_size, 1], tar_vocab_to_int['<GO>']), ending], 1)
    return dec_input

Decoding is broken in to 2 graphs that share weights, Training and Inference:
- Training takes targets as inputs to each time step
- Inference uses previous time step output as input

In [15]:
# Train Decoder......................................................................................................

def decoding_layer_train(enc_state, dec_cell, dec_embed_input, target_seq_len, max_batch_seq_len, output_layer, keep_prob):
    """
    Create a decoding layer for training
    :param enc_state: Encoder State
    :param dec_cell: Decoder RNN Cell
    :param dec_embed_input: Decoder embedded input
    :param target_seq_len: The lengths of each sequence in the target batch
    :param max_batch_seq_len: The length of the longest sequence in the batch
    :param output_layer: Function to apply the output layer
    :param keep_prob: Dropout keep probability
    :return: BasicDecoderOutput containing training logits and sample_id
    """
    train_helper = tf.contrib.seq2seq.TrainingHelper(inputs=dec_embed_input, sequence_length=target_seq_len, time_major=False)
    dec_cell = tf.contrib.rnn.DropoutWrapper(dec_cell, output_keep_prob=keep_prob)
    training_decoder = tf.contrib.seq2seq.BasicDecoder(dec_cell, train_helper, enc_state, output_layer)
    out, _ = tf.contrib.seq2seq.dynamic_decode(training_decoder, impute_finished=True, maximum_iterations=max_batch_seq_len)
    
    return out

In [16]:
# Inference Decoder.....................................................................................................

def decoding_layer_infer(enc_state, dec_cell, dec_embeddings, start_seq_id, end_seq_id, max_tar_seq_len, vocab_size, output_layer, batch_size, keep_prob):
    """
    Create a decoding layer for inference
    :param enc_state: Encoder state
    :param dec_cell: Decoder RNN Cell
    :param dec_embeddings: Decoder embeddings
    :param start_seq_id: GO ID
    :param end_seq_id: EOS Id
    :param max_tar_seq_len: Maximum length of target sequences
    :param vocab_size: Size of decoder/target vocabulary
    :param output_layer: Function to apply the output layer
    :param batch_size: Batch size
    :param keep_prob: Dropout keep probability
    :return: BasicDecoderOutput containing inference logits and sample_id
    """
    start_tokens = tf.tile(tf.constant([start_seq_id], dtype=tf.int32), [batch_size], name='start_tokens')
    
    emb_helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(dec_embeddings, start_tokens, end_seq_id)
    inference_decoder = tf.contrib.seq2seq.BasicDecoder(dec_cell, emb_helper, enc_state, output_layer=output_layer)
    
    # Run the data through the RNN nodes. Does unrolling and returns outputs for each time step and final state of hidden layer.
    out, _ = tf.contrib.seq2seq.dynamic_decode(inference_decoder, impute_finished=True, maximum_iterations=max_tar_seq_len)
    
    return out

In [17]:
# Build the decoding layer...............................................................................................

def decoding_layer(dec_input, enc_state, enc_output, src_seq_len, tar_seq_len, max_tar_seq_len, rnn_size, num_layers, tar_vocab_to_int, tar_vocab_size, batch_size, keep_prob):
    """
    Create decoding layer
    :param dec_input: Decoder input
    :param enc_state: Encoder state
    :param tar_seq_len: The lengths of each sequence in the target batch
    :param max_target_sequence_length: Maximum length of target sequences
    :param rnn_size: RNN Size
    :param num_layers: Number of layers
    :param tar_vocab_to_int: Dictionary to go from the target words to an id
    :param tar_vocab_size: Size of target vocabulary
    :param batch_size: The size of the batch
    :param keep_prob: Dropout keep probability
    :return: Tuple of (Training BasicDecoderOutput, Inference BasicDecoderOutput)
    """
    # Embed Decoder Inputs
#     dec_embeddings = tf.Variable(tf.random_uniform([tar_vocab_size, dec_emb_size]))
#     dec_embed_input = tf.nn.embedding_lookup(dec_embeddings, dec_input)
    dec_embed_input = tf.nn.embedding_lookup(word_embedding_matrix, dec_input)
    
    for layer in range(num_layers):
        with tf.variable_scope('decoder_{}'.format(layer)):
            lstm = tf.contrib.rnn.LSTMCell(rnn_size, initializer=tf.random_uniform_initializer(-0.1, 0.1, seed=2))
            drop = tf.contrib.rnn.DropoutWrapper(lstm, input_keep_prob=keep_prob)
    
    # Projection Layer, turns top hidden states to logit vectors of dimension tar_vocab_size
    output_layer = Dense(tar_vocab_size, kernel_initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.1))
    
    attn_mech = tf.contrib.seq2seq.BahdanauAttention(rnn_size, enc_output, src_seq_len, normalize=False, name='BahdanauAttention')
    cell = tf.contrib.seq2seq.DynamicAttentionWrapper(drop, attn_mech, rnn_size)
    initial_state = tf.contrib.seq2seq.DynamicAttentionWrapperState(enc_state[0], _zero_state_tensors(rnn_size, batch_size, tf.float32))
    
    with tf.variable_scope("decode"):
        train_dec_out = decoding_layer_train(initial_state, 
                                             cell, 
                                             dec_embed_input, 
                                             tar_seq_len, 
                                             max_tar_seq_len, 
                                             output_layer, 
                                             keep_prob)
        
    with tf.variable_scope("decode", reuse=True):
        infer_dec_out = decoding_layer_infer(initial_state, 
                                             cell, 
                                             word_embedding_matrix, 
                                             target_start_int, 
                                             target_stop_int, 
                                             max_tar_seq_len, 
                                             tar_vocab_size, 
                                             output_layer, 
                                             batch_size, 
                                             keep_prob)
        
        return train_dec_out, infer_dec_out

# Build Network

In [18]:
# Build sequence to sequence model

def seq2seq_model(input_data, tar_data, keep_prob, batch_size, src_seq_len, tar_seq_len, max_tar_seq_len, vocab_size, 
                  rnn_size, num_layers, vocab_to_int):
    """
    Build the Sequence-to-Sequence part of the neural network
    :param input_data: Input placeholder
    :param tar_data: Target placeholder
    :param keep_prob: Dropout keep probability placeholder
    :param batch_size: Batch Size
    :param source_seq_len: Sequence Lengths of source sequences in the batch
    :param tar_seq_len: Sequence Lengths of target sequences in the batch
    :param vocab_size: Vocabulary size
    :param dec_emb: Decoder embedding size
    :param rnn_size: RNN Size
    :param num_layers: Number of layers
    :param tar_vocab_to_int: Dictionary to go from the target words to an id
    :return: Tuple of (Training BasicDecoderOutput, Inference BasicDecoderOutput)
    """
    enc_out, enc_state = encoding_layer(input_data, rnn_size, num_layers, keep_prob, source_seq_len, vocab_size)
    
    dec_input = process_decoder_input(tar_data, vocab_to_int, batch_size)
    
    train_dec_out, infer_dec_out = decoding_layer(dec_input, enc_state, enc_out, src_seq_len, tar_seq_len, max_tar_seq_len, rnn_size, num_layers, 
                                                  vocab_to_int, vocab_size, batch_size, keep_prob)
    
    return train_dec_out, infer_dec_out

### Hyperparameters

In [19]:
epochs = 1000
batch_size = 16
rnn_size = 256
num_layers = 4
learning_rate = 0.0005
keep_probability = 0.6

display_step = 50
update_check = 1000

# Build The Graph

In [20]:
save_path = 'checkpoints/dev'

train_graph = tf.Graph()
with train_graph.as_default():
    # Load the model inputs
    input_data, targets, lr, keep_prob, tar_seq_len, max_tar_seq_len, source_seq_len = model_inputs()
    
    # Create the training and inference logits
    train_logits, inference_logits = seq2seq_model(tf.reverse(input_data, [-1]),
                                                   targets,
                                                   keep_prob,
                                                   batch_size,
                                                   source_seq_len,
                                                   tar_seq_len,
                                                   max_tar_seq_len,
                                                   vocab_size,
                                                   rnn_size,
                                                   num_layers,
                                                   vocab_to_int) 
    training_logits = tf.identity(train_logits.rnn_output, name='logits')
    inference_logits = tf.identity(inference_logits.sample_id, name='predictions')
    
    masks = tf.sequence_mask(tar_seq_len, max_tar_seq_len, dtype=tf.float32, name='masks')
    
    with tf.name_scope("optimization"):
        # Loss function
        cost = tf.contrib.seq2seq.sequence_loss(training_logits, targets, masks)
        
        # Optimizer
        optimizer = tf.train.AdamOptimizer(lr)
        
        # Gradient Clipping
        gradients = optimizer.compute_gradients(cost)
        capped_gradients = [(tf.clip_by_value(grad, -1., 1.), var) for grad, var in gradients if grad is not None]
        train_op = optimizer.apply_gradients(capped_gradients)



### Pad Sentences

In [21]:
def pad_sentence_batch(sentence_batch, pad_int):
    """Pad sentences with <PAD> so that each sentence of a batch has the same length"""
    max_sentence = max([len(sentence) for sentence in sentence_batch])
    return [sentence + [pad_int] * (max_sentence - len(sentence)) for sentence in sentence_batch]

### Get Batches

In [22]:
def get_batches(sources, targets, batch_size, source_pad_int, target_pad_int):
    """Batch targets, sources, and the lengths of their sentences together"""
    for batch_i in range(0, len(sources)//batch_size):
        start_i = batch_i * batch_size

        # Slice the right amount for the batch
        sources_batch = sources[start_i:start_i + batch_size]
        targets_batch = targets[start_i:start_i + batch_size]

        # Pad
        pad_sources_batch = np.array(pad_sentence_batch(sources_batch, source_pad_int))
        pad_targets_batch = np.array(pad_sentence_batch(targets_batch, target_pad_int))

        # Need the lengths for the _lengths parameters
        pad_targets_lengths = []
        for target in pad_targets_batch:
            pad_targets_lengths.append(len(target))

        pad_source_lengths = []
        for source in pad_sources_batch:
            pad_source_lengths.append(len(source))

        yield pad_sources_batch, pad_targets_batch, pad_source_lengths, pad_targets_lengths

### Get Accuracy

In [None]:
def get_accuracy(target, logits):
    """
    Calculate accuracy
    """
    max_seq = max(target.shape[1], logits.shape[1])
    if max_seq - target.shape[1]:
        target = np.pad(
            target,
            [(0,0),(0,max_seq - target.shape[1])],
            'constant')
    if max_seq - logits.shape[1]:
        logits = np.pad(
            logits,
            [(0,0),(0,max_seq - logits.shape[1])],
            'constant')

    return np.mean(np.equal(target, logits))

# Train

In [None]:
# Split data to training and validation sets
# train_source = source_int_text[:train_split_size]
# train_target = target_int_text[:train_split_size]
# valid_source = source_int_text[train_split_size:]
# valid_target = target_int_text[train_split_size:]
train_source = source_int_text
train_target = target_int_text
valid_source = source_int_text_val
valid_target = target_int_text_val
(valid_sources_batch, 
 valid_targets_batch, 
 valid_sources_lengths, 
 valid_targets_lengths) = next(get_batches(valid_source, 
                                           valid_target, 
                                           batch_size, 
                                           pad_int, 
                                           pad_int))

stop_early = 0
update_loss = 0
batch_loss = 0
batch_total = 0
summary_update_loss = []

with tf.Session(graph=train_graph) as sess:
    #saver = tf.train.Saver()
    sess.run(tf.global_variables_initializer())
    
    # If we want to continue training a previous session
    #saver.restore(sess, save_path)
    
    for epoch_i in range(epochs):
        update_loss = 0
        batch_loss = 0
        for batch_i, (source_batch, target_batch, sources_lengths, targets_lengths) in enumerate(
            get_batches(train_source, train_target, batch_size, pad_int, pad_int)):
        
            _, loss = sess.run([train_op, cost], {input_data: source_batch, 
                                                  targets: target_batch, 
                                                  lr: learning_rate, 
                                                  tar_seq_len: targets_lengths, 
                                                  source_seq_len: sources_lengths, 
                                                  keep_prob: keep_probability})
        
            batch_loss += loss
            update_loss += loss
        
            # Display Step
            if batch_i % display_step == 0 and batch_i > 0:
                batch_train_logits = sess.run(inference_logits, {input_data: source_batch, 
                                                                 source_seq_len: sources_lengths, 
                                                                 tar_seq_len: targets_lengths, 
                                                                 keep_prob: 1.0})

                batch_valid_logits = sess.run(inference_logits, {input_data: valid_sources_batch, 
                                                                 source_seq_len: valid_sources_lengths, 
                                                                 tar_seq_len: valid_targets_lengths,
                                                                 keep_prob: 1.0})

                train_acc = get_accuracy(target_batch, batch_train_logits)
                valid_acc = get_accuracy(valid_targets_batch, batch_valid_logits)
                
                
                
                #print('Sources Lengths: {}, Targets Lengths: {}'.format(max(sources_lengths), max(targets_lengths)))
                

                print('Epoch {:>3} Batch {:>4}/{} - Train Accuracy: {:>6.8f}, Validation Accuracy: {:>6.8f}, Loss: {:>6.8f}'
                          .format(epoch_i, batch_i, len(source_int_text) // batch_size, train_acc, valid_acc, batch_loss/display_step))
                batch_loss = 0
                
            if batch_i % update_check == 0 and batch_i > 0:
                print("Average loss for this update:", round(update_loss/update_check,3))
                summary_update_loss.append(update_loss)
                
                if update_loss <= min(summary_update_loss):
                    stop_early = 0
                    
                    print("New Record!")
                    saver = tf.train.Saver()
                    saver.save(sess, save_path)
                else:
                    print("No Improvement for " + str(stop_early) + " Counts")
                    stop_early += 1
    
                update_loss = 0
        
    # Save Model
#     saver = tf.train.Saver()
#     saver.save(sess, save_path)
#     print('Model Trained and Saved')

Epoch   0 Batch   50/8052 - Train Accuracy: 0.38541667, Validation Accuracy: 0.34375000, Loss: 7.10332812
Epoch   0 Batch  100/8052 - Train Accuracy: 0.37500000, Validation Accuracy: 0.34375000, Loss: 5.60577044
Epoch   0 Batch  150/8052 - Train Accuracy: 0.52678571, Validation Accuracy: 0.37500000, Loss: 5.26438894
Epoch   0 Batch  200/8052 - Train Accuracy: 0.41071429, Validation Accuracy: 0.34375000, Loss: 5.02235150
Epoch   0 Batch  250/8052 - Train Accuracy: 0.41666667, Validation Accuracy: 0.39583333, Loss: 4.86644246
Epoch   0 Batch  300/8052 - Train Accuracy: 0.40000000, Validation Accuracy: 0.41666667, Loss: 4.78379058
Epoch   0 Batch  350/8052 - Train Accuracy: 0.44791667, Validation Accuracy: 0.47916667, Loss: 4.83075263
Epoch   0 Batch  400/8052 - Train Accuracy: 0.48958333, Validation Accuracy: 0.50000000, Loss: 4.77324216
Epoch   0 Batch  450/8052 - Train Accuracy: 0.43750000, Validation Accuracy: 0.50000000, Loss: 4.64471961
Epoch   0 Batch  500/8052 - Train Accuracy: 0.

Epoch   0 Batch 3850/8052 - Train Accuracy: 0.48750000, Validation Accuracy: 0.53125000, Loss: 3.60844162
Epoch   0 Batch 3900/8052 - Train Accuracy: 0.59375000, Validation Accuracy: 0.57291667, Loss: 3.62539948
Epoch   0 Batch 3950/8052 - Train Accuracy: 0.59375000, Validation Accuracy: 0.57291667, Loss: 3.74106905
Epoch   0 Batch 4000/8052 - Train Accuracy: 0.52500000, Validation Accuracy: 0.60416667, Loss: 3.56265208
Average loss for this update: 3.643
New Record!
Epoch   0 Batch 4050/8052 - Train Accuracy: 0.58750000, Validation Accuracy: 0.60416667, Loss: 3.54360434
Epoch   0 Batch 4100/8052 - Train Accuracy: 0.52500000, Validation Accuracy: 0.58333333, Loss: 3.46649690
Epoch   0 Batch 4150/8052 - Train Accuracy: 0.60416667, Validation Accuracy: 0.57291667, Loss: 3.61316901
Epoch   0 Batch 4200/8052 - Train Accuracy: 0.57500000, Validation Accuracy: 0.57291667, Loss: 3.66052074
Epoch   0 Batch 4250/8052 - Train Accuracy: 0.63392857, Validation Accuracy: 0.56250000, Loss: 3.5350289

Epoch   0 Batch 7650/8052 - Train Accuracy: 0.57291667, Validation Accuracy: 0.57291667, Loss: 3.41058762
Epoch   0 Batch 7700/8052 - Train Accuracy: 0.64583333, Validation Accuracy: 0.56250000, Loss: 3.34725412
Epoch   0 Batch 7750/8052 - Train Accuracy: 0.55208333, Validation Accuracy: 0.57291667, Loss: 3.40646069
Epoch   0 Batch 7800/8052 - Train Accuracy: 0.59375000, Validation Accuracy: 0.59375000, Loss: 3.47576907
Epoch   0 Batch 7850/8052 - Train Accuracy: 0.58750000, Validation Accuracy: 0.59375000, Loss: 3.46912821
Epoch   0 Batch 7900/8052 - Train Accuracy: 0.51041667, Validation Accuracy: 0.59375000, Loss: 3.44072938
Epoch   0 Batch 7950/8052 - Train Accuracy: 0.57500000, Validation Accuracy: 0.60416667, Loss: 3.30074394
Epoch   0 Batch 8000/8052 - Train Accuracy: 0.61458333, Validation Accuracy: 0.58333333, Loss: 3.41941498
Average loss for this update: 3.42
New Record!
Epoch   0 Batch 8050/8052 - Train Accuracy: 0.59375000, Validation Accuracy: 0.59375000, Loss: 3.39045576

# Predictions

In [39]:
from random import randint
idx = randint(0, len(source_int_text)-1)
source_predict = source_int_text[idx]
for w in source_int_text[idx]:
    print(int_to_vocab[w])

r
l
s
s
a
</ACR>
expertise
aquatic
training
indigenous
bronze
surf
injury
sport
federation
recreation
inland
prevention
research
n
swimming
participation
education
pool
guideline
ambassador
</TAG>


In [40]:
for w in target_int_text[idx]:
    print(int_to_vocab[w])

royal
life
saving
society
australia
</MEAN>


In [51]:
# Custom acronym test
# cus_acr = "mosfet"
# cus_tags = "electronic circuit switch gate electricity component transistor source"

# cus_acr = "blt"
# cus_tags = "sandwich bread lunch dinner restaurant meal mustard cheese mayo"

cus_acr = "bjt"
cus_tags = "electronic circuit switch gate electricity component"

# cus_acr = "fpga"
# cus_tags = "electronic circuit integrated hardware language programmable logic blocks memory"

# cus_acr = "asic"
# cus_tags = "electronic circuit logic chip programmable hardware microprocessor memory rom ram eeprom"

# cus_acr = "byob"
# cus_tags = "party drinking alcohol invitation host guest"



source_predict = list(cus_acr) + ['</ACR>'] + cus_tags.split() + ['</TAG>']
source_predict = [vocab_to_int[w] for w in source_predict if w in set(vocab_to_int.keys())]

In [52]:
load_path = 'checkpoints/dev'

loaded_graph = tf.Graph()
with tf.Session(graph=loaded_graph) as sess:
    # Load saved model
    loader = tf.train.import_meta_graph(load_path + '.meta')
    loader.restore(sess, load_path)
    
    input_data = loaded_graph.get_tensor_by_name('input:0')
    logits = loaded_graph.get_tensor_by_name('predictions:0')
    target_sequence_length = loaded_graph.get_tensor_by_name('target_sequence_length:0')
    source_sequence_length = loaded_graph.get_tensor_by_name('source_sequence_length:0')
    keep_prob = loaded_graph.get_tensor_by_name('keep_probability:0')
    
    pred_logits = sess.run(logits, {input_data: [source_predict]*batch_size, 
                                    target_sequence_length: [12]*batch_size, 
                                    source_sequence_length: [len(source_predict)]*batch_size, 
                                    keep_prob: 1.0})[0]
    print('Inputs')
    print('  Word Ids: {}'.format([i for i in source_predict]))
    print('  Words:    {}'.format([int_to_vocab[i] for i in source_predict]))
    print('\nPrediction')
    print('  Word Ids: {}'.format([i for i in pred_logits]))
    print('  Words:    {}'.format(' '.join([int_to_vocab[i] for i in pred_logits])))

INFO:tensorflow:Restoring parameters from checkpoints/dev
Inputs
  Word Ids: [16162, 12649, 16512, 2, 3585, 10722, 1162, 3317, 13798, 20778, 4]
  Words:    ['b', 'j', 't', '</ACR>', 'electronic', 'circuit', 'switch', 'gate', 'electricity', 'component', '</TAG>']

Prediction
  Word Ids: [16982, 12169, 5614, 5]
  Words:    blue jet technology </MEAN>
