In [1]:
#standard libraries
import csv
import os
import time
import re
from functools import reduce

#custom libraries
import tensorflow as tf
import pandas as pd
import numpy as np

In [2]:
def read_file_format(filename_queue):
    reader = tf.TextLineReader(skip_header_lines=1)
    _, value = reader.read(filename_queue)

    record_defaults = [tf.constant([], dtype=tf.float32), tf.constant([], dtype=tf.int32)]
    _, col2 = tf.decode_csv(value, record_defaults=record_defaults)
    
    example = tf.stack([col2])
    return example

In [3]:
def input_pipeline(filenames, batch_size = 3, seq_length=3,
                   num_epochs = None, evaluation = False):   
    filename_queue = tf.train.string_input_producer(
        filenames, num_epochs=num_epochs, shuffle=False)

    example = read_file_format(filename_queue)
        
    min_after_dequeue = 10
    capacity = min_after_dequeue + 3 * batch_size
    example_batch = tf.train.batch(
        [example], batch_size=batch_size*seq_length, capacity=capacity
    )    

    label_batch = tf.concat(
        [example_batch[-1], example_batch[1:,0]],
        axis=0)

    example_batch = tf.reshape(example_batch, (batch_size, seq_length))
    label_batch = tf.reshape(label_batch, (batch_size, seq_length))

    return example_batch, label_batch

In [4]:
def _activation_summary(x):
    tensor_name = x.name
    tensor_name = tensor_name.replace(':', '_')
    tensor_name = tensor_name.replace('(', '_')
    tensor_name = tensor_name.replace(')', '_')
    tensor_name = tensor_name.replace(' ', '_')

    tf.summary.histogram(tensor_name + '/activations', x)
    tf.summary.scalar(tensor_name + '/sparsity', tf.nn.zero_fraction(x))

In [5]:
def _add_loss_summaries(total_loss, averager, include_averaged_loss=False):
    # Compute the moving average of all individual losses and the total loss.
    losses = tf.get_collection('losses')
    if include_averaged_loss:
        loss_averages_op = averager.apply(losses + [total_loss])

    # Attach a scalar summary to all individual losses and the total loss; do the
    # same for the averaged version of the losses.
    for l in losses + [total_loss]:
    # Name each loss as '(raw)' and name the moving average version of the loss
    # as the original loss name.
        l_name = l.name.replace(":", "_")

        tf.summary.scalar(l_name + '_raw_', l)        
        if include_averaged_loss:
            tf.summary.scalar(l_name + '_raw_', l)
            tf.summary.scalar(l_name, averager.average(l))
        
    if include_averaged_loss:
        return loss_averages_op
    else:
        return total_loss

In [6]:
def kl_divergence(P, Q, sample_size=1000): 
    '''
    Calculate KL Divergence between a reference distribution P and an approximating
    distribution Q using values drawn from the two distributions.
    
    P and Q are used here per canonical IT notation, giving KL(P||Q).

    Note that the typical approach in variational inference minimizes KL(Q||P),
    where the variational distribution Q is the reference distribution and
    P is the approximating distribution.
    
    Args:
    P: A tf.contrib.distributions.Distribution for reference distribution
    Q: A tf.contrib.distributions.Distribution for approximating distribution

    '''
    P_vals = P.sample([sample_size])
    
    P_probs = P.prob(P_vals)
    Q_probs = Q.prob(P_vals)
    
    PQ = tf.div(P_probs+1e-10, Q_probs+1e-10)
    safe_PQ = tf.where(tf.equal(PQ, 0.), tf.ones(PQ.shape), PQ) 
    log_PQ = tf.where(tf.equal(safe_PQ, 0.), tf.zeros(PQ.shape), tf.log(safe_PQ)) 
    return tf.reduce_sum(tf.multiply(P_probs, log_PQ))

In [7]:
def nll(y_, y_hat, vocab_size):
    y_ = tf.one_hot(tf.reshape(tf.cast(y_, tf.int32), [-1]), depth=vocab_size)
    return tf.losses.softmax_cross_entropy(y_, y_hat)   
    

In [8]:
def variational_free_energy(y_, y_hat, weight_dist, weight_prior, vocab_size, monte_carlo_size):
    '''
    Calculation variational free energy or evidence lower bound loss.
    
    It can be shown that the variational free energy is equivalent to the
    sum of the expected log loss of the network with w ~ Q(w) and the KL 
    Divergence of the variational distribution Q(w) from the weight prior P(w).
    '''

    nll_f = nll(y_, y_hat, vocab_size)

    sess = tf.get_default_session() 
    graph = tf.get_default_graph()
    weights = graph.get_tensor_by_name("weights/W:0")
    try:
        nll_eval = lambda x: sess.run(nll_f, feed_dict={weights: x.eval()})
        exp_nll = tf.contrib.bayesflow.monte_carlo.expectation(
            f=nll_eval,
            p=weight_dist,
            n=monte_carlo_size,
            name='expectation'
        )

    except:
        exp_nll = np.inf

    kld = kl_divergence(weight_dist, weight_prior)

    return tf.reduce_sum([exp_nll, kld])

In [9]:
class LSTM_Cell:
    
    def __init__(self, args, scope_name, current_weights=None):
        self.rnn_size = args.rnn_size
        self.num_proj = args.vocab_size
        self.input_size = args.batch_size * args.seq_length
        self.state_size = self.rnn_size * 2        
        
        if args.weight_noise_type == None:
            with tf.variable_scope(scope_name):
                self.W = tf.get_variable('W', [self.input_size + self.num_proj, 4 * self.rnn_size],
                                         tf.float32, tf.random_normal_initializer())
                self.b = tf.get_variable('b', [self.rnn_size * 4], tf.float32, tf.constant_initializer(0.0))

        if args.weight_noise_type == "adaptive":
            with tf.variable_scope(scope_name):
                
                self.W = tf.reshape(current_weights, [self.input_size + self.num_proj, 4 * self.rnn_size])

                self.b = tf.get_variable('b', [1, self.rnn_size * 4], tf.float32, tf.constant_initializer(0.0))
            
            
    def __call__(self, i, state):
        self.c_prev = tf.slice(state, [0, 0], [-1, self.rnn_size])
        self.h_prev = tf.slice(state, [0, self.rnn_size], [-1, self.num_proj])

        data = tf.concat([i, self.h_prev], 1)

        weighted = tf.matmul(data, self.W)

        self.i, self.j, self.f, self.o = tf.split(weighted, num_or_size_splits=4, axis=1)
        self.i_b, self.j_b, _, self.o_b = tf.split(self.b, num_or_size_splits=4, axis=1)
        
        self.c = (tf.sigmoid(self.f + args.forget_bias) * self.c_prev +
                  tf.sigmoid(self.i + self.i_b) * tf.tanh(self.j + self.j_b))
        self.h = tf.sigmoid(self.o + self.o_b) * tf.tanh(self.c)
        
        self.state = tf.concat([self.c, self.h], axis=1)
        return self.h, self.state
    
    def zero_state(self, batch_size, dtype):
        return tf.zeros([batch_size, self.state_size], dtype=dtype)

In [10]:
def update_weight_means(y_, y_hat, x, weight_dist, prior_dist, args):
 
    diff = tf.subtract(weight_dist.mean(), prior_dist.mean())
    centered = tf.div(diff, prior_dist.covariance())

    weight_sample = weight_dist.sample([args.gradient_sample_size])

    nll_f = nll(y_, y_hat, args.vocab_size)

    sess = tf.get_default_session()
    graph = tf.get_default_graph()

    weights = graph.get_tensor_by_name("weights/W:0")
    nll_grad = tf.gradients(nll_f, weights)
    try:
        nll_grads = []
        for i in range(args.gradient_sample_size):
            nll_grads.append(sess.run(nll_grad, feed_dict={weights: weight_sample[i,:].eval(),
                                                      x: x.eval()
                                                     }))
        update = centered + nll_grads
    except:
        print("Error encountered in mean gradient computation, setting update values to 0.")
        update = tf.zeros(shape=weight_dist.mean().shape)
    

In [11]:
def update_weight_variances(fn, variables):
##########NEED TO FINISH THIS AND MAKE SURE MEAN UPDATE WORKS

In [12]:
class Model():
    
    def __init__(self, args):

        self.batch_size = args.batch_size
        self.seq_length = args.seq_length

        self.x = tf.placeholder(tf.float32, shape=[args.batch_size, args.seq_length])
        self.y_ = tf.placeholder(tf.float32, shape=[args.batch_size, args.seq_length])
        
        with tf.variable_scope("weights"):
            if args.weight_noise_type == "adaptive":
                lstm_weight_size = (args.batch_size * args.seq_length + args.vocab_size) * (4 * args.rnn_size) * args.num_layers
                softmax_weight_size = (args.rnn_size * args.vocab_size)
                embedding_weight_size = (args.rnn_size * args.vocab_size)
                weight_size = lstm_weight_size + softmax_weight_size + embedding_weight_size

                prior_loc = [0.] * weight_size
                prior_scale_diag = [args.weight_prior_variance] * weight_size
                self.weight_prior = tf.contrib.distributions.MultivariateNormalDiag(
                        prior_loc,
                        prior_scale_diag
                )
                S_hat = tf.get_variable("S_hat", initializer=prior_scale_diag)
                S = tf.exp(S_hat) # make sure sigma matrix is positive

                mu = tf.get_variable("mu", initializer=prior_loc)

                self.weight_dist = tf.contrib.distributions.MultivariateNormalDiag(mu, S)

                weights_st = tf.contrib.bayesflow.stochastic_tensor.StochasticTensor(self.weight_dist)
                
                current_weights = tf.squeeze(weights_st, name="W")

                lstm_W, softmax_W, embedding_mat = tf.split(current_weights,
                                                            num_or_size_splits=[lstm_weight_size,
                                                                                softmax_weight_size,
                                                                                embedding_weight_size])
                lstm_W = tf.split(lstm_W, num_or_size_splits=args.num_layers)
            
            else:
                self.lstm_W = None

            self.lstm_cells = []
            for i in range(args.num_layers):
                self.lstm_cells.append(LSTM_Cell(args, "lstm_{0}".format(i), lstm_W[i]))
            self.lstm = tf.contrib.rnn.MultiRNNCell(self.lstm_cells)

            self.initial_state = self.lstm.zero_state(args.batch_size, tf.float32)

            if args.weight_noise_type is None:
                softmax_W = tf.get_variable('softmax_W', [args.rnn_size, args.vocab_size], tf.float32, tf.random_normal_initializer())
                embedding_mat = tf.get_variable('embedding', [args.vocab_size, args.rnn_size],
                                            tf.float32, tf.random_normal_initializer())

            elif args.weight_noise_type == "static":
                softmax_W = tf.get_variable('softmax_W', [args.rnn_size, args.vocab_size], tf.float32, tf.random_normal_initializer())
                softmax_noise = tf.truncated_normal([args.rnn_size, args.vocab_size], stddev=args.weight_prior_variance)
                softmax_W = softmax_W + softmax_noise

                embedding_mat = tf.get_variable('embedding', [args.vocab_size, args.rnn_size],
                                            tf.float32, tf.random_normal_initializer())
                embedding_noise = tf.truncated_normal([args.vocab_size, args.rnn_size], stddev=args.weight_prior_variance)
                embedding_mat = embedding_mat + embedding_noise


            elif args.weight_noise_type == "adaptive":
                softmax_W = tf.reshape(softmax_W, [args.rnn_size, args.vocab_size])
                embedding_mat = tf.reshape(embedding_mat, [args.vocab_size, args.rnn_size])

            else:
                raise Exception("Unrecognized value for weight_noise_type; " +
                                "recognized values are: None, 'static', and 'adaptive'.")

            b = tf.get_variable('b', [args.vocab_size], tf.float32, tf.constant_initializer(0.0))


            embedding_output = tf.nn.embedding_lookup(embedding_mat, tf.cast(self.x, tf.int32))

            rnn_inputs = tf.split(axis=1, num_or_size_splits=self.seq_length, value=embedding_output)
            rnn_inputs = [tf.squeeze(x, [1]) for x in rnn_inputs]

        outputs, last_state = tf.contrib.legacy_seq2seq.rnn_decoder(rnn_inputs,
                                                                    self.initial_state,
                                                                    self.lstm,
                                                                    scope='lstm')
        output = tf.reshape(tf.concat(outputs,1), [-1, args.rnn_size])

        self.logits = tf.matmul(output, softmax_W) + b
        self.softmax_p = tf.nn.softmax(self.logits)
        
    def train(self, args):
        
        t = time.time()
        if args.weight_noise_type is None:
            self.loss = tf.contrib.legacy_seq2seq.sequence_loss_by_example(
                [self.softmax_p],
                [self.y_],
                [tf.ones([args.batch_size * args.seq_length], dtype=tf.float32)]
            )
        else:
            self.loss = variational_free_energy(
                self.y_,
                self.softmax_p,
                self.weight_dist,
                self.weight_prior,
                args.vocab_size,
                args.loss_sample_size
            )
        print("{0}s elapsed for loss calculation".format(str(time.time()-t)))

        tf.add_to_collection('losses', self.loss)
        tf.add_n(tf.get_collection('losses'), name='total_loss')

        opt = tf.train.AdamOptimizer(args.learning_rate)

        if args.weight_noise_type in [None, "static"]:
            grads = opt.compute_gradients(self.loss)
            trunc_grads = [(tf.clip_by_value(grad, -1., 1.), var) for grad, var in grads]
            apply_gradient_op = opt.apply_gradients(trunc_grads, global_step=global_step)

        else: 
            with tf.variable_scope("weights", reuse=True):
                means = tf.get_variable("mu")
                variances = tf.get_variable("S_hat")

            t = time.time()

#             variance_update = update_weight_variance(self.y_, self.softmax_p, self.weight_dist, self.weight_prior, args)
#             var_grads = opt.compute_gradients(self.loss, var_list=variances, grad_loss=variance_update)

#             trunc_var_grads = [(tf.clip_by_value(grad, -1., 1.), var) for grad, var in var_grads]
#             apply_var_grad_op = opt.apply_gradients(trunc_grads, global_step=global_step)

            means_update = update_weight_means(self.y_, self.softmax_p, self.x, self.weight_dist, self.weight_prior, args)
            mean_grads = opt.compute_gradients(self.loss, var_list=means, grad_loss=means_update)

            trunc_mean_grads = [(tf.clip_by_value(grad, -1., 1.), var) for grad, var in mean_grads]
            apply_mean_grad_op = opt.apply_gradients(trunc_mean_grads, global_step=global_step)

            apply_gradient_op = [apply_mean_grad_op] # + [apply_var_grad_op]
            print("{0}s elapsed for gradient/hessian calculation".format(str(time.time()-t)))


        for var in tf.global_variables():
            tf.summary.histogram(var.op.name, var)
        for grad, var in trunc_mean_grads:
            if grad is not None:
                tf.summary.histogram(var.op.name + '/mean_gradients', grad)
#         for grad, var in trunc_var_grads:
#             if grad is not None:
#                 tf.summary.histogram(var.op.name + '/variance_gradients', grad)


        if args.compute_variable_averages:
            variable_averages = tf.train.ExponentialMovingAverage(0.9, name='avg')
            variables_averages_op = variable_averages.apply(tf.trainable_variables())

            with tf.control_dependencies(apply_gradient_op + [variables_averages_op]):
                self.train_op = tf.no_op(name='train')
        else:
            with tf.control_dependencies(apply_gradient_op):
                self.train_op = tf.no_op(name='train')
        
        return self.train_op

        
    def sample(self):
        flat_y_ = tf.expand_dims(tf.reshape(self.y_, [-1]), 1)
        flat_y_ = tf.cast(flat_y_, tf.int64)
        samples = tf.multinomial(self.softmax_p, 1)
        
        self.sampled_results = tf.concat([flat_y_, samples], axis=1)
        return self.sampled_results

In [13]:
data_path = "data"

vocab_file = "vocab1.csv"

train_file = "train1.csv"

model_path = 'VanillaLSTM'

In [14]:
# Download/store Shakespeare data
full_model_dir = os.path.join(data_path, model_path)

# Make Model Directory
if not os.path.exists(full_model_dir):
    os.makedirs(full_model_dir)

In [15]:
vocab = pd.read_csv("{0}/{1}".format(data_path, vocab_file),
                    header=None)

In [16]:
class ArgStruct:
    def __init__(self, **entries):
        self.__dict__.update(entries)

In [17]:
arg_dict = {
    'data_path': data_path,
    'model_path': model_path,
    'rnn_size': 256,
    'num_layers': 1,
    'batch_size': 16,
    'seq_length': 16,
    'forget_bias': 1.,
    'num_epochs': 1,
    'learning_rate': 0.0001,
    'momentum': 0.9,
    'logdir': 'TF_Logs',
    'vocab_size': len(vocab)+1,
    'save_every': 100,
    'print_every': 10,
    'compute_variable_averages': True,
    'weight_noise_type': "adaptive",
    'weight_prior_variance': 0.05,
    'loss_sample_size': 10000,
    'gradient_sample_size': 1
}

In [18]:
args = ArgStruct(**arg_dict)

In [19]:
with tf.Graph().as_default():

    global_step = tf.Variable(0, name='global_step', trainable=False)
    
    example_feed, label_feed = input_pipeline(
        ["{0}/{1}".format(args.data_path, train_file)],
        batch_size=args.batch_size,
        seq_length=args.seq_length,
        num_epochs=args.num_epochs)
    
    with tf.Session().as_default() as sess:
        model = Model(args=args)
        
        merged = tf.summary.merge_all()
        writer = tf.summary.FileWriter(args.logdir, sess.graph)
        
        #initialize all variables
        init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
        sess.run(init_op)
        
        # Start populating the filename queue.
        coord = tf.train.Coordinator()  
        threads = tf.train.start_queue_runners(coord=coord, sess=sess)
        
        saver = tf.train.Saver(tf.global_variables())
        
        train_op = model.train(args=args)
        sample_op = model.sample()
        
        while not coord.should_stop():
            try:
                start_time = time.time()                

                example_batch, label_batch = sess.run([example_feed,
                                                      label_feed])
                
                if global_step.eval() % args.print_every == 0:
                    latest_loss = sess.run([model.loss],
                                           feed_dict={model.x: example_batch,
                                                      model.y_: label_batch})
                    try:
                        summary_nums = (global_step.eval(), duration,
                                        np.mean(latest_loss))
                        print('Iteration: {0}, Last Step Duration: {1}, Loss: {2}'.format(*summary_nums))
                    except:
                        pass
                    results = sess.run([sample_op],
                                       feed_dict={model.x: example_batch,
                                                  model.y_: label_batch})
                    results = pd.DataFrame(results[0]).T
                    recode_results = results.replace(vocab.set_index(1).to_dict().get(0))
                    if not os.path.exists("translation.txt"):
                        recode_results.to_csv("translation.txt", header=False, index=False, sep="\t", mode="w")
                    else:
                        recode_results.to_csv("translation.txt", header=False, index=False, sep="\t", mode="a")


                result, summary =  sess.run(
                    [train_op, merged],
                    feed_dict={model.x: example_batch,
                               model.y_: label_batch})
                writer.add_summary(summary, global_step.eval())
                
                duration = time.time() - start_time
        
                # Save the model and the vocab
                if global_step.eval() % args.save_every == 0:
                    # Save model
                    model_file_name = os.path.join(full_model_dir, 'model')
                    saver.save(sess, model_file_name, global_step=global_step)
                    print('Model Saved To: {}'.format(model_file_name))

            except (tf.errors.OutOfRangeError, tf.errors.InvalidArgumentError) as e:
           
                print('Done training for %d epochs, %d steps.' % (args.num_epochs, global_step.eval()))
                # When done, ask the threads to stop.
                coord.request_stop()

        
        # Wait for threads to finish.
        coord.join(threads)
        sess.close()

0.24619221687316895s elapsed for loss calculation
Error encountered in mean gradient computation, setting update values to 0.
3.9057679176330566s elapsed for gradient/hessian calculation


FailedPreconditionError: Attempting to use uninitialized value weights/S_hat/avg
	 [[Node: weights/S_hat/avg/read = Identity[T=DT_FLOAT, _class=["loc:@weights/S_hat"], _device="/job:localhost/replica:0/task:0/cpu:0"](weights/S_hat/avg)]]

Caused by op 'weights/S_hat/avg/read', defined at:
  File "C:\Users\DailJa01\AppData\Local\Continuum\Anaconda3\envs\py35\lib\runpy.py", line 184, in _run_module_as_main
    "__main__", mod_spec)
  File "C:\Users\DailJa01\AppData\Local\Continuum\Anaconda3\envs\py35\lib\runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "C:\Users\DailJa01\AppData\Local\Continuum\Anaconda3\envs\py35\lib\site-packages\ipykernel\__main__.py", line 3, in <module>
    app.launch_new_instance()
  File "C:\Users\DailJa01\AppData\Local\Continuum\Anaconda3\envs\py35\lib\site-packages\traitlets\config\application.py", line 658, in launch_instance
    app.start()
  File "C:\Users\DailJa01\AppData\Local\Continuum\Anaconda3\envs\py35\lib\site-packages\ipykernel\kernelapp.py", line 477, in start
    ioloop.IOLoop.instance().start()
  File "C:\Users\DailJa01\AppData\Local\Continuum\Anaconda3\envs\py35\lib\site-packages\zmq\eventloop\ioloop.py", line 177, in start
    super(ZMQIOLoop, self).start()
  File "C:\Users\DailJa01\AppData\Local\Continuum\Anaconda3\envs\py35\lib\site-packages\tornado\ioloop.py", line 888, in start
    handler_func(fd_obj, events)
  File "C:\Users\DailJa01\AppData\Local\Continuum\Anaconda3\envs\py35\lib\site-packages\tornado\stack_context.py", line 277, in null_wrapper
    return fn(*args, **kwargs)
  File "C:\Users\DailJa01\AppData\Local\Continuum\Anaconda3\envs\py35\lib\site-packages\zmq\eventloop\zmqstream.py", line 440, in _handle_events
    self._handle_recv()
  File "C:\Users\DailJa01\AppData\Local\Continuum\Anaconda3\envs\py35\lib\site-packages\zmq\eventloop\zmqstream.py", line 472, in _handle_recv
    self._run_callback(callback, msg)
  File "C:\Users\DailJa01\AppData\Local\Continuum\Anaconda3\envs\py35\lib\site-packages\zmq\eventloop\zmqstream.py", line 414, in _run_callback
    callback(*args, **kwargs)
  File "C:\Users\DailJa01\AppData\Local\Continuum\Anaconda3\envs\py35\lib\site-packages\tornado\stack_context.py", line 277, in null_wrapper
    return fn(*args, **kwargs)
  File "C:\Users\DailJa01\AppData\Local\Continuum\Anaconda3\envs\py35\lib\site-packages\ipykernel\kernelbase.py", line 283, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "C:\Users\DailJa01\AppData\Local\Continuum\Anaconda3\envs\py35\lib\site-packages\ipykernel\kernelbase.py", line 235, in dispatch_shell
    handler(stream, idents, msg)
  File "C:\Users\DailJa01\AppData\Local\Continuum\Anaconda3\envs\py35\lib\site-packages\ipykernel\kernelbase.py", line 399, in execute_request
    user_expressions, allow_stdin)
  File "C:\Users\DailJa01\AppData\Local\Continuum\Anaconda3\envs\py35\lib\site-packages\ipykernel\ipkernel.py", line 196, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "C:\Users\DailJa01\AppData\Local\Continuum\Anaconda3\envs\py35\lib\site-packages\ipykernel\zmqshell.py", line 533, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "C:\Users\DailJa01\AppData\Local\Continuum\Anaconda3\envs\py35\lib\site-packages\IPython\core\interactiveshell.py", line 2683, in run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "C:\Users\DailJa01\AppData\Local\Continuum\Anaconda3\envs\py35\lib\site-packages\IPython\core\interactiveshell.py", line 2787, in run_ast_nodes
    if self.run_code(code, result):
  File "C:\Users\DailJa01\AppData\Local\Continuum\Anaconda3\envs\py35\lib\site-packages\IPython\core\interactiveshell.py", line 2847, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-19-19d907d6fa7b>", line 27, in <module>
    train_op = model.train(args=args)
  File "<ipython-input-12-29a5ff380e60>", line 157, in train
    variables_averages_op = variable_averages.apply(tf.trainable_variables())
  File "C:\Users\DailJa01\AppData\Local\Continuum\Anaconda3\envs\py35\lib\site-packages\tensorflow\python\training\moving_averages.py", line 367, in apply
    colocate_with_primary=True)
  File "C:\Users\DailJa01\AppData\Local\Continuum\Anaconda3\envs\py35\lib\site-packages\tensorflow\python\training\slot_creator.py", line 113, in create_slot
    return _create_slot_var(primary, val, "", validate_shape, None, None)
  File "C:\Users\DailJa01\AppData\Local\Continuum\Anaconda3\envs\py35\lib\site-packages\tensorflow\python\training\slot_creator.py", line 66, in _create_slot_var
    validate_shape=validate_shape)
  File "C:\Users\DailJa01\AppData\Local\Continuum\Anaconda3\envs\py35\lib\site-packages\tensorflow\python\ops\variable_scope.py", line 1065, in get_variable
    use_resource=use_resource, custom_getter=custom_getter)
  File "C:\Users\DailJa01\AppData\Local\Continuum\Anaconda3\envs\py35\lib\site-packages\tensorflow\python\ops\variable_scope.py", line 962, in get_variable
    use_resource=use_resource, custom_getter=custom_getter)
  File "C:\Users\DailJa01\AppData\Local\Continuum\Anaconda3\envs\py35\lib\site-packages\tensorflow\python\ops\variable_scope.py", line 367, in get_variable
    validate_shape=validate_shape, use_resource=use_resource)
  File "C:\Users\DailJa01\AppData\Local\Continuum\Anaconda3\envs\py35\lib\site-packages\tensorflow\python\ops\variable_scope.py", line 352, in _true_getter
    use_resource=use_resource)
  File "C:\Users\DailJa01\AppData\Local\Continuum\Anaconda3\envs\py35\lib\site-packages\tensorflow\python\ops\variable_scope.py", line 725, in _get_single_variable
    validate_shape=validate_shape)
  File "C:\Users\DailJa01\AppData\Local\Continuum\Anaconda3\envs\py35\lib\site-packages\tensorflow\python\ops\variables.py", line 200, in __init__
    expected_shape=expected_shape)
  File "C:\Users\DailJa01\AppData\Local\Continuum\Anaconda3\envs\py35\lib\site-packages\tensorflow\python\ops\variables.py", line 319, in _init_from_args
    self._snapshot = array_ops.identity(self._variable, name="read")
  File "C:\Users\DailJa01\AppData\Local\Continuum\Anaconda3\envs\py35\lib\site-packages\tensorflow\python\ops\gen_array_ops.py", line 1303, in identity
    result = _op_def_lib.apply_op("Identity", input=input, name=name)
  File "C:\Users\DailJa01\AppData\Local\Continuum\Anaconda3\envs\py35\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 767, in apply_op
    op_def=op_def)
  File "C:\Users\DailJa01\AppData\Local\Continuum\Anaconda3\envs\py35\lib\site-packages\tensorflow\python\framework\ops.py", line 2506, in create_op
    original_op=self._default_original_op, op_def=op_def)
  File "C:\Users\DailJa01\AppData\Local\Continuum\Anaconda3\envs\py35\lib\site-packages\tensorflow\python\framework\ops.py", line 1269, in __init__
    self._traceback = _extract_stack()

FailedPreconditionError (see above for traceback): Attempting to use uninitialized value weights/S_hat/avg
	 [[Node: weights/S_hat/avg/read = Identity[T=DT_FLOAT, _class=["loc:@weights/S_hat"], _device="/job:localhost/replica:0/task:0/cpu:0"](weights/S_hat/avg)]]
