In [1]:
"""
    Possible improvement:
    - pre-trained language model
    - beam search
    
    TODO:
    - build word dictionary
    - add EOS on output sequence
    - replace named-entity
    - output projection, sampled softmax
    - extract arg_max and embed, update embedding only for "go"
    - decoder input for testing is only [GO, PAD, ..., PAD]
"""
print 'Start!'

Start!


In [2]:
import numpy as np
import tensorflow as tf
from tensorflow.python.ops import variable_scope
from tensorflow.python.util import nest

In [3]:
""" 
    Setup Variables and Placeholders for Convolutional Sentence Encoder
"""
# Store layers weight & bias
num_filters = 100
filter_sizes = [3, 4, 5]

word_embedding_size = 150
sentence_length = 50 # fixed length
vocab_size = 42579

filter_shape_1 = [filter_sizes[0], word_embedding_size, 1, num_filters]
filter_shape_2 = [filter_sizes[1], word_embedding_size, 1, num_filters]
filter_shape_3 = [filter_sizes[2], word_embedding_size, 1, num_filters]
weights = {
    'wc1': tf.Variable(tf.truncated_normal(filter_shape_1, stddev=0.1), name='wc1'),
    'wc2': tf.Variable(tf.truncated_normal(filter_shape_2, stddev=0.1), name='wc2'),
    'wc3': tf.Variable(tf.truncated_normal(filter_shape_3, stddev=0.1), name='wc3'),
    'word_embeddings': tf.Variable(tf.random_uniform([vocab_size, word_embedding_size], 1., -1.), trainable=False, name='word_embeddings_150_6_20')
}

biases = {
    'bc1': tf.Variable(tf.random_normal([num_filters], name='bc1')),
    'bc2': tf.Variable(tf.random_normal([num_filters], name='bc2')),
    'bc3': tf.Variable(tf.random_normal([num_filters], name='bc3'))
}

In [4]:
summary_length = 45
batch_size = 5 # size of training batch
sentence_embedding_size = 300 # encoder input size
doc_embedding_size = 750 # hidden layer size
output_size = 1
learning_rate = 1e-3
momentum_beta_1 = 0.99
momentum_beta_2 = 0.999

linear = tf.nn.rnn_cell._linear

proj_w_t = tf.Variable(tf.random_uniform([vocab_size, doc_embedding_size], minval=-0.05, maxval=0.05), name='proj_w')
proj_w = tf.transpose(proj_w_t)
proj_b = tf.Variable(tf.random_uniform([vocab_size], minval=-0.05, maxval=0.05), name='proj_b')

variable_dict = {
    "encoder_cell": tf.nn.rnn_cell.BasicLSTMCell(doc_embedding_size, state_is_tuple=True),
    "decoder_cell": tf.nn.rnn_cell.BasicLSTMCell(doc_embedding_size, state_is_tuple=True)
}

In [5]:
placeholders = {
    "sentences_input_3": tf.placeholder(tf.int32, shape=[None, 3, sentence_length], name='input_bucket_3'),
    "summary_words": tf.placeholder(tf.int32, shape=[None, summary_length + 1]),
    "feedfw_sampling": tf.placeholder(tf.bool),
    "sampled_prob": tf.placeholder(tf.float32, shape=[2]),
    "keep_prob": tf.placeholder(tf.float32)
}

In [6]:
def rnn_seq2seq_step(
        enc_cell,
        dec_cell,
        encoder_inputs,
        decoder_inputs,
        output_targets=None,
        is_sampled=True,
        sampled_prob=0.0,
        keep_prob=1.0,
        dtype=tf.float32):
    encoder_outputs, enc_state = rnn_encoder(enc_cell, encoder_inputs, dtype=dtype)
    
    # Compute a concatenation of encoder outputs to put attention on.
    top_states = [tf.reshape(e, [-1, 1, enc_cell.output_size])
                  for e in encoder_outputs]
    attention_states = tf.concat(1, top_states)
    
    decoder_outputs, dec_state = embedding_rnn_attention_decoder(dec_cell, enc_state, encoder_outputs, 
                                             decoder_inputs, attention_states,
                                             output_targets=output_targets, 
                                             is_sampled=is_sampled, sampled_prob=sampled_prob,
                                             keep_prob=keep_prob)
    
    return decoder_outputs

In [7]:
def rnn_encoder(cell, encoder_inputs, dtype=tf.float32):
    outputs, state = tf.nn.rnn(cell, encoder_inputs, dtype=dtype)
    return outputs, state

In [8]:
# TODO CURRICULUM LEARNING
# WHAT HAPPEN WHEN TESTING? NO INPUTS PROVIDED TO DECODER
# IF IS_SAMPLED inp = prev
# decoder inputs would be just GO, PAD, ..., PAD of fixed length
# TODO output should be softmax-ed -> MLP
# DIFFERENT ATTENTION MECHANISM, see eq. 10 from the paper
# Different fully connected layer architecture
def embedding_rnn_attention_decoder(cell, 
        initial_state,   
        encoder_states, 
        decoder_inputs,
        attention_states,
        output_targets=None, 
        is_sampled=True,
        sampled_prob=0.0,
        num_heads=1,
        keep_prob=1.0,
        initial_state_attention=False,
        dtype=tf.float32
       ):
    """
        - decoder_inputs: list of 1D batch-sized int32 tensors
    """
    # lookup the embeddings of the decoder inputs
    word_embeddings = weights['word_embeddings']

    decoder_embed_inputs = [tf.nn.embedding_lookup(word_embeddings, i)
                            for i in decoder_inputs]
    
    batch_size = tf.shape(decoder_inputs[0])[0]  # Needed for reshaping.
    attn_length = attention_states.get_shape()[1].value
    if attn_length is None:
        attn_length = shape(attention_states)[1]
    attn_size = attention_states.get_shape()[2].value

    # To calculate W1 * h_t we use a 1-by-1 convolution, need to reshape before.
    hidden = tf.reshape(
        attention_states, [-1, attn_length, 1, attn_size])
    hidden_features = []
    v = []
    attention_vec_size = attn_size  # Size of query vectors for attention.
    for a in xrange(num_heads):
        k = variable_scope.get_variable("AttnW_%d" % a, [1, 1, attn_size, attention_vec_size])
        hidden_features.append(tf.nn.conv2d(hidden, k, [1, 1, 1, 1], "SAME"))
        v.append(
            variable_scope.get_variable("AttnV_%d" % a, [attention_vec_size]))

    state = initial_state

    def attention(query):
        """Put attention masks on hidden using hidden_features and query."""
        ds = []  # Results of attention reads will be stored here.
        if nest.is_sequence(query):  # If the query is a tuple, flatten it.
            query_list = nest.flatten(query)
            for q in query_list:  # Check that ndims == 2 if specified.
                ndims = q.get_shape().ndims
                if ndims:
                    assert ndims == 2
            query = tf.concat(1, query_list)
        for a in xrange(num_heads):
            with variable_scope.variable_scope("Attention_%d" % a):
                y = linear(query, attention_vec_size, True)
                y = tf.reshape(y, [-1, 1, 1, attention_vec_size])
                # Attention mask is a softmax of v^T * tanh(...).
                s = tf.reduce_sum(
                    v[a] * tf.tanh(hidden_features[a] + y), [2, 3])
                a = tf.nn.softmax(s)
                # Now calculate the attention-weighted vector d.
                d = tf.reduce_sum(
                    tf.reshape(a, [-1, attn_length, 1, 1]) * hidden,
                    [1, 2])
                ds.append(tf.reshape(d, [-1, attn_size]))
        return ds

    outputs = []
    prev = None
    batch_attn_size = tf.pack([batch_size, attn_size])
    attns = [tf.zeros(batch_attn_size, dtype=dtype)
             for _ in xrange(num_heads)]
    for a in attns:  # Ensure the second shape of attention vectors is set.
        a.set_shape([None, attn_size])
    if initial_state_attention:
        attns = attention(initial_state)
    for i, inp in enumerate(decoder_embed_inputs):
        update_embedding = True
        if i > 0:
            variable_scope.get_variable_scope().reuse_variables()
            update_embedding = False
        
        # Feed previous output as next cell input
        # If is_sampled is set, we use w'_i-1 instead of decoder_embed_inputs.
        if is_sampled and prev is not None:
            with variable_scope.variable_scope("loop_function", reuse=True):
                # coin flip to determine if we feed forward previous state
                r_elements = tf.constant([True, False])
#                 r_weights = tf.constant([sampled_prob, 1.0 - sampled_prob])
                rescaled_r_weights = tf.expand_dims(tf.log(sampled_prob), 0)
                indice = tf.multinomial(rescaled_r_weights, num_samples=1)
                output = tf.gather(r_elements, tf.squeeze(indice, [0,1]))
                
                inp = tf.cond(output, 
                              lambda: extract_argmax_and_embed(word_embeddings, MLP(prev, keep_prob), update_embedding),
                              lambda: inp
                             )
                
                # weighted random choice if sampled or not
#                 sampled = np.random.choice(r_elements, p=r_weights)
#                 if sampled:
#                     output_projection = MLP(prev, keep_prob)
#                     output_embed = extract_argmax_and_embed(word_embeddings, output_projection, update_embedding)
#                     inp = output_embed # the decoder inputs is now ignored
                
        # Merge input and previous attentions into one vector of the right size.
        input_size = inp.get_shape().with_rank(2)[1]
        if input_size.value is None:
            raise ValueError("Could not infer input size from input: %s" % inp.name)
        x = linear([inp] + attns, input_size, True)
        # Run the RNN.
        cell_output, state = cell(x, state)
        # Run the attention mechanism.
        if i == 0 and initial_state_attention:
            with variable_scope.variable_scope(variable_scope.get_variable_scope(),
                                               reuse=True):
                attns = attention(state)
        else:
            attns = attention(state)

        with variable_scope.variable_scope("AttnOutputProjection"):
            output = linear([cell_output] + attns, cell.output_size, True)
        
        # RNN decoder will output the projection instead of the dense vector
        # apply multilayer perceptron to output the logits distribution
                    
        if is_sampled:
            prev = output
        outputs.append(output)
        
    return outputs, state

In [9]:
def MLP(decoder_state, keep_prob=1.0):  
    x = tf.nn.dropout(decoder_state, keep_prob)
    
    logits = tf.matmul(x, proj_w)
    logits = tf.add(logits, proj_b)
    logits = tf.nn.relu(logits)
    
    return logits

In [10]:
def extract_argmax_and_embed(word_embeddings, projection_output, update_embedding=False):
    """
        - update_embedding: set True only for "GO" input
    """
    symbol = tf.arg_max(projection_output, 1)
    symbol_embed = tf.nn.embedding_lookup(word_embeddings, symbol)
    if not update_embedding:
        symbol_embed = tf.stop_gradient(symbol_embed)
    return symbol_embed

In [11]:
def convolutional_sentence_encoder(input_x, keep_prob):
    word_embeddings = weights['word_embeddings']
    sentence_tensor = tf.nn.embedding_lookup(word_embeddings, input_x)
    sentence_tensor = tf.expand_dims(sentence_tensor, -1, name='expanded_sentence_tensor')

    conv1 = tf.nn.conv2d(
        sentence_tensor,
        weights['wc1'],
        strides=[1,1,1,1],
        padding="VALID",
        name="conv1"
    )
    conv1 = tf.add(conv1, biases['bc1'])
    conv1 = tf.nn.relu(conv1)
    pooled1 = tf.nn.max_pool(
        conv1,
        ksize=[1, sentence_length - filter_sizes[0] + 1, 1, 1],
        strides=[1, 1, 1, 1],
        padding='VALID',
        name="pool1")
    
    conv2 = tf.nn.conv2d(
        sentence_tensor,
        weights['wc2'],
        strides=[1,1,1,1],
        padding="VALID",
        name="conv2"
    )
    conv2 = tf.add(conv2, biases['bc2'])
    conv2 = tf.nn.relu(conv2)
    pooled2 = tf.nn.max_pool(
        conv2,
        ksize=[1, sentence_length - filter_sizes[1] + 1, 1, 1],
        strides=[1, 1, 1, 1],
        padding='VALID',
        name="pool2")
    
    conv3 = tf.nn.conv2d(
        sentence_tensor,
        weights['wc3'],
        strides=[1,1,1,1],
        padding="VALID",
        name="conv3"
    )
    conv3 = tf.add(conv3, biases['bc3'])
    conv3 = tf.nn.relu(conv3)
    pooled3 = tf.nn.max_pool(
        conv3,
        ksize=[1, sentence_length - filter_sizes[2] + 1, 1, 1],
        strides=[1, 1, 1, 1],
        padding='VALID',
        name="pool3")

    num_total_filters = len(filter_sizes) * num_filters
    pool_h = tf.concat(3, [pooled1, pooled2, pooled3])
    pool_h = tf.reshape(pool_h, [-1, num_total_filters])
    pool_h = tf.nn.dropout(pool_h, keep_prob=keep_prob, name='final_sentence_embedding')
    
    return pool_h

In [12]:
def sampled_loss(inputs, labels, num_samples=512):
        labels = tf.reshape(labels, [-1, 1])
        return tf.nn.sampled_softmax_loss(proj_w_t, proj_b, inputs, labels, num_samples, vocab_size)
softmax_loss_function = sampled_loss

In [13]:
def sentence_extractor(sentence_num):
    # specify if the graph would execute curriculum learning sampling during feed-forward operation
    feedfw_sampling = placeholders["feedfw_sampling"]

    keep_prob = placeholders["keep_prob"]
    sampled_prob = placeholders["sampled_prob"]
     
    # setup input and labels placeholders
    summary_words = placeholders["summary_words"]
 
    sentence_inputs = placeholders["sentences_input_{0}".format(sentence_num)]
    sentence_inputs = tf.transpose(sentence_inputs, perm=[1, 0, 2])
    sentence_inputs = tf.reshape(sentence_inputs, [-1, sentence_length])

    sentence_embeddings = []
    for sentence_input in tf.split(0, sentence_num, sentence_inputs):
        sentence_embedding = convolutional_sentence_encoder(sentence_input, keep_prob)
        sentence_embeddings.append(sentence_embedding)
    
    summary_words = tf.split(1, summary_length + 1, summary_words)
 
    for i in range(len(summary_words)):
        summary_words[i] = tf.squeeze(summary_words[i], [1])

    summary_words_inputs = summary_words[:len(summary_words)-1]
    summary_words_targets = summary_words[1:]

    encoder_cell = variable_dict["encoder_cell"]
    decoder_cell = variable_dict["decoder_cell"]

    def sampled_decode(): 
        return rnn_seq2seq_step(
            encoder_cell,
            decoder_cell,
            sentence_embeddings,
            summary_words_inputs,
            sampled_prob=sampled_prob,
            keep_prob=keep_prob
        )

    def non_sampled_decode():
        return rnn_seq2seq_step(
            encoder_cell,
            decoder_cell,
            sentence_embeddings,
            summary_words_inputs,
            is_sampled=False,
            output_targets=summary_words_targets,
            keep_prob=keep_prob
        )

    decoder_outputs = tf.cond(feedfw_sampling, sampled_decode, non_sampled_decode)

    return decoder_outputs, summary_words_targets

bucket3outputs, bucket3targets = sentence_extractor(3)

loss_weights = [tf.ones([1]) for i in range(summary_length)]

bucket_losses = {
    '3': tf.nn.seq2seq.sequence_loss(bucket3outputs, bucket3targets, loss_weights, softmax_loss_function=softmax_loss_function),
}

# Minimizing loss
global_step = tf.Variable(0, name="global_step", trainable=False)
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate, 
                                   beta1=momentum_beta_1,
                                   beta2=momentum_beta_2)

train_ops = {
    'bucket_3': optimizer.apply_gradients(optimizer.compute_gradients(bucket_losses['3']), global_step=global_step)
}

init = tf.initialize_all_variables()

In [14]:
def train_step(sess, x_batch, y_batch, feedforward_sampling=False, sampled_prob=0.0, keep_prob=0.5):
    """
    parameters:
    - x_batch: 3 dimensional list of size (batch size, document number of sentence, and sentence size)
    - y_batch: 2 dimensional list of size (batch size, summary size). This list represents the summary sentence words
    - feedforward_sampling: Set True to allow the decoder to feed its previous states. This will be required during
                a curriculum learning
    """
    bucket_id = len(x_batch[0])
    
    input_dict = {placeholders['sentences_input_{0}'.format(bucket_id)]: x_batch, 
                  placeholders['summary_words']: y_batch,
                  placeholders["feedfw_sampling"]: feedforward_sampling,
                  placeholders['sampled_prob']: [sampled_prob, 1.0 - sampled_prob],
                  placeholders["keep_prob"]: keep_prob}
    
    _, step, loss, summaries = sess.run([train_ops['bucket_{0}'.format(bucket_id)], global_step, 
                                         bucket_losses['{0}'.format(bucket_id)], train_summary_ops['3']], input_dict)
    
    train_summary_writer.add_summary(summaries, step)
    train_summary_writer.flush()
    
    return step, loss

In [15]:
def eval_test_step(sess, x_batch, y_batch, feedforward_sampling=True, keep_prob=1.0):
    bucket_id = len(x_batch[0])
    
    input_dict = {placeholders['sentences_input_{0}'.format(bucket_id)]: x_batch, 
                  placeholders['summary_words']: y_batch,
                  placeholders["feedfw_sampling"]: feedforward_sampling,
                  placeholders['sampled_prob']: [1.0, 0.],
                  placeholders["keep_prob"]: keep_prob}
    
    loss = sess.run(bucket_losses['{0}'.format(bucket_id)], input_dict)
    return loss

In [16]:
import word_ex_batch_generator as bg
def generate_batch(batch_size, length, batch_type, include_filenames=False):
    global vocab_size
    global sentence_length
    batch = bg.get_batch_with_filenames(length, batch_size, batch_type)
    random_batch = map(lambda x: x[0], batch)
    random_batch_target = map(lambda x: x[1], batch)
    
    if (include_filenames):
        batch_filenames = map(lambda x: x[2], batch)
        return random_batch, random_batch_target, batch_filenames
    
    return random_batch, random_batch_target

In [17]:
sess = tf.Session()
sess.run(init)

w_embedding_path = 'tf_variables/word_embeddings_150_6_20.var'
var_saver = tf.train.Saver({"word_embeddings_150_6_20": weights['word_embeddings']})
var_saver.restore(sess, w_embedding_path)

In [18]:
"""
    Initialize the summaries writers
"""
import os
import time

# Output directory for models and summaries
timestamp = str(int(time.time()))
out_dir = os.path.abspath(os.path.join(os.path.curdir, "we_runs", timestamp))
print("Writing to {}\n".format(out_dir))
 
# Summaries for loss and accuracy
loss_summary3 = tf.scalar_summary("sampled softmax loss", bucket_losses['3'])

# Train Summaries
train_summary_ops = {
    '3': tf.merge_summary([loss_summary3])
}
train_summary_dir = os.path.join(out_dir, "summaries", "train")
train_summary_writer = tf.train.SummaryWriter(train_summary_dir, sess.graph)
 
current_val_loss = tf.placeholder(tf.float32, name='validation_loss')
current_val_acc = tf.placeholder(tf.float32, name='validation_acc')
loss_summary = tf.scalar_summary("sampled softmax loss", current_val_loss)
   
# Dev summaries
dev_summary_op = tf.merge_summary([loss_summary])
dev_summary_dir = os.path.join(out_dir, "summaries", "dev")
dev_summary_writer = tf.train.SummaryWriter(dev_summary_dir, sess.graph)

Writing to /home/putama/PutamaLab/NLP/summarization/we_runs/1481117900



In [19]:
import sys
temp_stdout = sys.stdout
sys.stdout = open('/dev/stdout', 'w')

# define the iterative training steps here
report_every = 50
evaluate_every = 5000
checkpoint_every = 10000

num_epoch = 8

checkpoint_path = 'checkpoints_word_extractor_sampled/stored_variables.ckpt'
var_saver_2 = tf.train.Saver(tf.trainable_variables())

total_loss = 0.
total_acc = 0. 
step_counter = 0.

buckets = [10]
eval_buckets = [10]

schedules = [0., 0.05, 0.2, 0.2, 0.4, 0.4, 0.8, 0.8]

for epoch, schedule in zip(range(num_epoch), schedules):
    doc_trained = 0
    for bucket in buckets:
        while(bg.has_more(bucket, 'training')):
            x_batch, y_batch = generate_batch(5, bucket, 'training')

            if len(x_batch) == 0:
                continue # signaling if the batch is empty

            current_step, current_loss = train_step(sess, x_batch, y_batch, feedforward_sampling=True, sampled_prob=schedule)
            total_loss += current_loss
            step_counter += 1
            doc_trained += len(x_batch)

            if current_step % report_every == 0:
                print("ep {}: bucket: {}, doc_trained: {}, training step {}, loss avg {:g}".format(epoch, bucket,
                                                                                                   doc_trained,
                                                                                                   current_step, 
                                                                                                   total_loss / step_counter))
                total_loss = 0
                total_acc = 0
                step_counter = 0

            if current_step % evaluate_every == 0:
                eval_loss = 0.
                eval_acc = 0. 
                eval_counter = 0.
                for eval_bucket in eval_buckets:
                    print 'Evaluation on validation data bucket {0}:'.format(eval_bucket)
                    while(bg.has_more(eval_bucket, 'validation')):
                        x_val_batch, y_val_batch = generate_batch(5, eval_bucket, 'validation')
                        
                        if len(x_val_batch) == 0:
                            continue # signaling if the batch is empty
                        
                        val_loss = eval_test_step(sess, x_val_batch, y_val_batch)
                        eval_loss += val_loss
                        eval_counter += 1
                print("validation loss avg {:g}".format(eval_loss / eval_counter))
                
                val_summaries = sess.run(dev_summary_op, feed_dict={
                        current_val_loss: eval_loss / eval_counter
                    })
                dev_summary_writer.add_summary(val_summaries, current_step)
                dev_summary_writer.flush()
                
                bg.reset_indices('validation')

            if current_step % checkpoint_every == 0:
                ckpt_path = var_saver_2.save(sess, checkpoint_path+'.'+str(current_step))
                print("Saved model checkpoint to {}\n".format(ckpt_path))
        
    bg.reset_indices('training')
        
sys.stdout = temp_stdout

In [20]:
word_ids = []
for cell_out in bucket3outputs:
    cell_out_proj = MLP(cell_out)
#     cell_out_word = sample(cell_out_proj)
#     cell_out_word = tf.arg_max(cell_out_proj, 1)
#     word_ids.append(cell_out_word)
    word_ids.append(cell_out_proj)

In [21]:
def sample(a, temperature=1.0):
  a = np.log(a+1e-9) / temperature
  a = np.exp(a) / np.sum(np.exp(a))
  r = random.random() # range: [0,1)
  total = 0.0
  for i in range(len(a)):
    total += a[i]
    if total>r:
      return i
  return len(a)-1

In [37]:
# bg.reset_indices('test')
import random
buckets = [10]
for bucket in buckets:
    while bg.has_more(bucket, 'training'):
        test_batch, y_batch, filenames = generate_batch(5, bucket, 'training', include_filenames=True)

        if len(test_batch) <= 1:
                continue # signaling if the batch is empty
        
        print len(test_batch)
        
        y_filler = np.zeros((len(test_batch), 46), dtype=np.int32)
        input_dict = {placeholders['sentences_input_3'.format(bucket)]: test_batch,
              placeholders['summary_words']: y_batch,
              placeholders["feedfw_sampling"]: False,
              placeholders["sampled_prob"]: [1.0, 0.],
              placeholders["keep_prob"]: 1.0}

        out_probs = sess.run(word_ids, feed_dict=input_dict)
        out_probs = np.array(out_probs)
  
        batch_results = np.zeros((out_probs.shape[1], out_probs.shape[0]), dtype=np.int32)
    
        for i in range(len(out_probs)):
            for j in range(len(out_probs[i])):
                batch_results[j][i] = sample(out_probs[i][j], temperature=0.5)

        for i, batch_result in enumerate(batch_results):
            print_f = sm.get_we_summary(filenames[i], words_to_sentences(batch_result))
            write_path = 'we_test_results_2/' + filenames[i].split('/')[-1] + '.pred'
#             with open(write_path, 'w') as f:
#                 f.write(print_f)
            print print_f
            print '------------------'
        break
    break

3
-- a strong earthquake struck off the south coast of Japan on sunday night local time , " jolting Tokyo and wide areas of eastern Japan , " the country 's Kyodo news agency reported
the 7.1 earthquake hit 200 miles ( 320 kilometers ) south - southwest of Tokyo at 7:55 p.m. ( 6:55 a.m. et ) , the United States Geological Survey reported
its epicenter was 188 miles ( 303 kilometers ) deep , the United States Geological Survey said
the Japan Meteorological Agency reported its magnitude as 6.9 , Kyodo said


Tremor hits south of island , " jolting Tokyo , " Kyodo news agency reported
United States Geological Survey measures quake as 7.1 magnitude


David sea allegedly shore three 60 to Taylor diverted province year , officers skydiver taught treating of Islamic_State across second speeding walls wild children mps speed to police luggage unit woman ripping after Alicante it took on not the not @entity ripping start 
------------------
-- an Ohio jury tuesday convicted a 53 - year - old ma

In [24]:
def words_to_sentences(words_id):
    sentence = ''
    words = map(lambda x: bg.reverse_dictionary[x], words_id)
    for word in words:
        if word == '<EOS>':
            sentence += '\n'
        elif word == '<UNK>':
            sentence += '@entity' + ' '
        elif word != '<PAD>' and word != '<GO>':
            sentence += word + ' '
    return sentence

In [28]:
# sess.close()
bg.reset_indices('test')