In [1]:
#standard libraries
import csv
import os
import time
from collections import namedtuple

#custom libraries
import tensorflow as tf
import pandas as pd

In [2]:
def read_file_format(filename_queue):
    reader = tf.TextLineReader(skip_header_lines=1)
    _, value = reader.read(filename_queue)

    record_defaults = [tf.constant([], dtype=tf.int32), tf.constant([], dtype=tf.int32)]
    _, col2 = tf.decode_csv(value, record_defaults=record_defaults)
    
    example = tf.stack([col2])
    return example

In [3]:
def input_pipeline(filenames, batch_size = 3, seq_length=3,
                   num_epochs = None, evaluation = False):   
    filename_queue = tf.train.string_input_producer(
        filenames, num_epochs=num_epochs, shuffle=False)

    example = read_file_format(filename_queue)
        
    min_after_dequeue = 10
    capacity = min_after_dequeue + 3 * batch_size
    example_batch = tf.train.batch(
        [example], batch_size=batch_size*seq_length, capacity=capacity
    )    

    label_batch = tf.concat(
        [example_batch[-1], example_batch[1:,0]],
        axis=0)

    example_batch = tf.reshape(example_batch, (batch_size, seq_length))
    label_batch = tf.reshape(label_batch, (batch_size, seq_length))

    return example_batch, label_batch

In [4]:
def _activation_summary(x):
    tensor_name = x.name
    tensor_name = tensor_name.replace(':', '_')
    tensor_name = tensor_name.replace('(', '_')
    tensor_name = tensor_name.replace(')', '_')
    tensor_name = tensor_name.replace(' ', '_')

    tf.summary.histogram(tensor_name + '/activations', x)
    tf.summary.scalar(tensor_name + '/sparsity', tf.nn.zero_fraction(x))

In [5]:
def _add_loss_summaries(total_loss, averager, include_averaged_loss=False):
    # Compute the moving average of all individual losses and the total loss.
    losses = tf.get_collection('losses')
    if include_averaged_loss:
        loss_averages_op = averager.apply(losses + [total_loss])

    # Attach a scalar summary to all individual losses and the total loss; do the
    # same for the averaged version of the losses.
    for l in losses + [total_loss]:
    # Name each loss as '(raw)' and name the moving average version of the loss
    # as the original loss name.
        l_name = l.name.replace(":", "_")

        tf.summary.scalar(l_name + '_raw_', l)        
        if include_averaged_loss:
            tf.summary.scalar(l_name + '_raw_', l)
            tf.summary.scalar(l_name, averager.average(l))
        
    if include_averaged_loss:
        return loss_averages_op
    else:
        return total_loss

In [6]:
class Model():
    
    def __init__(self, args, infer=False):
        if infer:
            self.batch_size = 1
            self.seq_length = 1
        else:
            self.batch_size = args.batch_size
            self.seq_length = args.seq_length

        self.x = tf.placeholder(tf.int32, shape=[args.batch_size, args.seq_length])
        self.y_ = tf.placeholder(tf.int32, shape=[args.batch_size, args.seq_length])

        self.lstm_cell = tf.contrib.rnn.BasicLSTMCell(args.rnn_size)
        self.lstm = tf.contrib.rnn.MultiRNNCell([self.lstm_cell]*args.num_layers)

        self.initial_state = self.lstm.zero_state(args.batch_size, tf.float32)

        with tf.variable_scope('lstm'):
            W = tf.get_variable('W', [args.rnn_size, args.vocab_size], tf.float32, tf.random_normal_initializer())
            b = tf.get_variable('b', [args.vocab_size], tf.float32, tf.constant_initializer(0.0))

            # Define Embedding
            embedding_mat = tf.get_variable('embedding', [args.vocab_size, args.rnn_size],
                                            tf.float32, tf.random_normal_initializer())
            embedding_output = tf.nn.embedding_lookup(embedding_mat, self.x)

            rnn_inputs = tf.split(axis=1, num_or_size_splits=self.seq_length, value=embedding_output)
            rnn_inputs = [tf.squeeze(x, [1]) for x in rnn_inputs]

            def loop(prev, _):
                prev = tf.matmul(prev, W) + b
                prev_symbol = tf.stop_gradient(tf.argmax(prev, 1))
                return tf.nn.embedding_lookup(embedding, prev_symbol)

        outputs, last_state = tf.contrib.legacy_seq2seq.rnn_decoder(rnn_inputs,
                                                                    self.initial_state,
                                                                    self.lstm,
                                                                    loop_function=loop if infer else None,
                                                                    scope='lstm')
        output = tf.reshape(tf.concat(outputs,1), [-1, args.rnn_size])

        self.logits = tf.matmul(output, W) + b
        self.softmax_p = tf.nn.softmax(self.logits)

        self.loss = tf.contrib.legacy_seq2seq.sequence_loss_by_example(
            [self.softmax_p],
            [self.y_],
            [tf.ones([args.batch_size * args.seq_length], dtype=tf.float32)]
        )
        tf.add_to_collection('losses', self.loss)
        tf.add_n(tf.get_collection('losses'), name='total_loss')
    
           # Generate moving averages of all losses and associated summaries.
        loss_avgs = tf.train.ExponentialMovingAverage(0.9, name='avg')
        losses = _add_loss_summaries(self.loss, loss_avgs, True)

        # Compute gradients.
        with tf.control_dependencies([losses]):
            opt = tf.train.AdamOptimizer(args.learning_rate)
            grads = opt.compute_gradients(self.loss)
            trunc_grads = [(tf.clip_by_value(grad, -1., 1.), var) for grad, var in grads]


        # Apply gradients.
        apply_gradient_op = opt.apply_gradients(trunc_grads, global_step=global_step)

        # Add histograms for trainable variables.
        for var in tf.trainable_variables():
            tf.summary.histogram(var.op.name, var)

        # Add histograms for gradients.
        for grad, var in trunc_grads:
            if grad is not None:
                tf.summary.histogram(var.op.name + '/gradients', grad)


        if args.compute_variable_averages:
            variable_averages = tf.train.ExponentialMovingAverage(0.9, name='avg')
            variables_averages_op = variable_averages.apply(tf.trainable_variables())
            
            with tf.control_dependencies([apply_gradient_op, variables_averages_op]):
                self.train_op = tf.no_op(name='train')
        else:
            with tf.control_dependencies([apply_gradient_op]):
                self.train_op = tf.no_op(name='train')

In [7]:
data_path = "data"

vocab_file = "vocab1.csv"

train_file = "train1.csv"

model_path = 'VanillaLSTM'

In [8]:
# Download/store Shakespeare data
full_model_dir = os.path.join(data_path, model_path)

# Make Model Directory
if not os.path.exists(full_model_dir):
    os.makedirs(full_model_dir)

In [9]:
vocab = pd.read_csv("{0}/{1}".format(data_path, vocab_file),
                    header=None)

In [10]:
class ArgStruct:
    def __init__(self, **entries):
        self.__dict__.update(entries)

In [11]:
arg_dict = {
    'data_path': data_path,
    'model_path': model_path,
    'rnn_size': 256,
    'num_layers': 1,
    'batch_size': 16,
    'seq_length': 16,
    'num_epochs': 1,
    'learning_rate': 0.0001,
    'momentum': 0.9,
    'logdir': 'TF_Logs',
    'vocab_size': len(vocab)+1,
    'save_every': 100,
    'print_every': 10,
    'compute_variable_averages': False
}

In [12]:
args = ArgStruct(**arg_dict)

In [13]:
with tf.Graph().as_default():

    global_step = tf.Variable(0, name='global_step', trainable=False)

    model = Model(args=args, infer=False)

    example_feed, label_feed = input_pipeline(
        ["{0}/{1}".format(args.data_path, train_file)],
        batch_size=args.batch_size,
        seq_length=args.seq_length,
        num_epochs=args.num_epochs)
    
    with tf.Session() as sess: 
        merged = tf.summary.merge_all()
        writer = tf.summary.FileWriter(args.logdir, sess.graph)
        
        #initialize all variables
        init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
        sess.run(init_op)
        
        # Start populating the filename queue.
        coord = tf.train.Coordinator()  
        threads = tf.train.start_queue_runners(coord=coord, sess=sess)
        
        saver = tf.train.Saver(tf.global_variables())
        
        while not coord.should_stop():
#             try:
                start_time = time.time()                

                example_batch, label_batch = sess.run([example_feed,
                                                      label_feed])

                result, summary =  sess.run(
                    [model.train_op, merged],
                    feed_dict={model.x: example_batch,
                               model.y_: label_batch})
                writer.add_summary(summary, global_step.eval())
                
                duration = time.time() - start_time
                if global_step.eval() % args.print_every == 0:
                    summary_nums = (global_step.eval(), duration, model.loss)
                    print('Iteration: {}, Last Step Duration: {}, Loss: {:.2f}'.format(*summary_nums))
        
                # Save the model and the vocab
                if global_step.eval() % args.save_every == 0:
                    # Save model
                    model_file_name = os.path.join(full_model_dir, 'model')
                    saver.save(sess, model_file_name, global_step=global_step)
                    print('Model Saved To: {}'.format(model_file_name))

#             except (tf.errors.OutOfRangeError, tf.errors.InvalidArgumentError) as e:
           
#                 print('Done training for %d epochs, %d steps.' % (args.num_epochs, global_step.eval()))
#                 # When done, ask the threads to stop.
#                 coord.request_stop()

        
        # Wait for threads to finish.
        coord.join(threads)
        sess.close()

ValueError: Moving average already computed for: sequence_loss_by_example/truediv:0