In [1]:
!killall tensorboard
!rm /tmp/generated_data/1 -rf

import tensorflow as tf
#contrib/opt/python/training/external_optimizer.py
print tf.contrib.opt.ScipyOptimizerInterface


class SummaryManager:
    def __init__(self):
        self.iter_summaries = []

    def add_iter_summary(self, s):
        self.iter_summaries.append(s)
        
    def merge_iters(self):
        return tf.summary.merge(self.iter_summaries)
    
    

class HVar:
    #this contains all alphas in the graph
    all_hvars = []
    
    def __init__(self, var, hSize = 2):
        self.name = var.name.split(":")[0].split("/")[-1]
        
        with tf.name_scope(self.name + '_history'):
            self.var = var
            self.replicas = [] #this taks 2X memory
            self.aplha = []
            self.last_snapshot = tf.Variable(var.initialized_value(), name='snapshot') #this makes it 3X + hSize
            self.next_idx = 0
            self.op_cache = {}

            #counter = tf.Variable(0, dtype=tf.int32, name='sesop_counter')
            for i in range(hSize):
                #self.replicas[tf.placeholder(shape=var.get_shape(), dtype=tf.float32)] =\
                #    np.zeros(var.get_shape())
                self.replicas.append(tf.Variable(np.zeros(var.get_shape()), dtype=var.dtype.base_dtype, name='replica'))
                self.aplha.append(tf.Variable(np.zeros(1), dtype=var.dtype.base_dtype, name='alpha'))


            for i in range(hSize):
                self.push_history_op() #make sure all ops are created

            HVar.all_hvars.append(self)
            assert(self.next_idx == 0)

        
    def out(self):
        with tf.name_scope(self.name + '_out'):
            #return an affine combination of the history vectors
            #and a dictonary to add to feed_dict.
            self.o = self.var
            for r, a in zip(self.replicas, self.aplha):
                self.o += r*a

            return self.o
        
    #returns an op that updates history and snapshot (executed after optimization on alpha)
    def push_history_op(self):
        if self.next_idx not in self.op_cache:
            print 'HVar Cache Miss, creating the op for var ' + str(self.var.name) + ', idx = ' + str(self.next_idx)
            
            with tf.name_scope(self.name + '_update'):
                update_history_op = tf.assign(self.replicas[self.next_idx], self.out() - self.last_snapshot)
                with tf.control_dependencies([update_history_op]):
                    update_snapshot_op = tf.assign(self.last_snapshot, self.out())
                    update_var_op = tf.assign(self.var, self.out())

            self.op_cache[self.next_idx] = tf.group(update_history_op, update_var_op, update_snapshot_op)
            
        old_idx = self.next_idx
        self.next_idx = (self.next_idx + 1)%len(self.replicas)

        return self.op_cache[old_idx]
    
    @classmethod
    def all_trainable_alphas(self):
        alphas = []
        for hvar in HVar.all_hvars:
            alphas.extend(hvar.aplha)
        return alphas
    
    @classmethod
    def all_history_update_ops(self):
        group_op = tf.no_op()
        for hvar in HVar.all_hvars:
            group_op = tf.group(group_op, hvar.push_history_op())
            
        return group_op

    
    
    
class SeboostOptimizer:
    #batched_input, batched_labels are tensors that prodece batches
    #is_training is a tensor that will be true while training and false while testing
    #we run CG once in sesop_freq iterations 
    def __init__(self, loss, batched_input, batched_labels, sesop_freq):
        self.train_step = tf.train.GradientDescentOptimizer(learning_rate=0.1).minimize(loss, name='minimizer')
        self.loss = loss
        self.train_loss = []
        self.loss_before_sesop = []
        self.loss_after_sesop = []
        self.history_update_ops = HVar.all_history_update_ops()
        self.sesop_freq = sesop_freq
        self.sesop_iteration_ran = 0
        self.avg_gain_from_cg = 0.0
        
        self.cg = tf.contrib.opt.ScipyOptimizerInterface(loss, var_list=HVar.all_trainable_alphas(),\
            method='CG', options={'maxiter':5})
        
        self.batched_input, self.batched_labels = batched_input, batched_labels
        
    #_feed_dict is the feed_dict needed to run regular sgd iteration
    #sesop_feed_dict should contain feeds for the batch sesop will use!
    #return a list of train_loss. The last elment in the list contain the loss after sesop.
    def run_sesop_iteration(self, sess, _feed_dict, sesop_feed_dict):
        #run sesop_freq SGD iterations:
        train_loss = []

            
        for i in range(self.sesop_freq):
            _, loss = sess.run([self.train_step, self.loss], feed_dict=_feed_dict)
            train_loss.append(loss)
            
            s = sess.run(self.iter_summaries, feed_dict=_feed_dict)
            self.writer.add_summary(s, self.sesop_iteration_ran*(self.sesop_freq + 1) + i)
        
        self.train_loss.extend(train_loss)
        self.loss_before_sesop.append(train_loss[-1])
        #run 1 CG iteration
        self.cg.minimize(sess, feed_dict=sesop_feed_dict)
        
        self.loss_after_sesop.append(sess.run(self.loss, feed_dict=sesop_feed_dict))
        
        self.avg_gain_from_cg += self.loss_before_sesop[-1] - self.loss_after_sesop[-1] 
        print 'Gain from CG: ' + str(self.avg_gain_from_cg/self.sesop_iteration_ran)
        train_loss.append(self.loss_after_sesop[-1])
        #Now when alphas are optimized, run the update history ops:
        sess.run(self.history_update_ops)
        
        s = sess.run(self.iter_summaries, feed_dict=_feed_dict)
        self.writer.add_summary(s, self.sesop_iteration_ran*(self.sesop_freq + 1) + self.sesop_freq)
            
        self.sesop_iteration_ran += 1
        return train_loss
        
        
summaryMgr = SummaryManager()

tensorboard: no process found
<class 'tensorflow.contrib.opt.python.training.external_optimizer.ScipyOptimizerInterface'>


In [2]:

def fc_layer(input, n_in, n_out, log):
    with tf.name_scope('FC'):
        W = HVar(tf.Variable(tf.random_normal([n_in, n_out]), name='W'))
        b = HVar(tf.Variable(tf.zeros([n_out]), name='b'))

        
        a = tf.matmul(input, W.out()) + b.out()
        
        out = tf.nn.tanh(a)
        
        if log:
            summaryMgr.add_iter_summary(tf.summary.histogram('activations_before_tanh', a))
            summaryMgr.add_iter_summary(tf.summary.histogram('activations_after_tanh', out))
        
        return out



In [3]:
#define the model:


def build_model(x, y, dim, log=False):
    layers = [fc_layer(x, dim, dim, log)]
    for i in range(1):
        layers.append(fc_layer(layers[-1], dim, dim, log))
    layers.append(fc_layer(layers[-1], dim, 1, log))

    model_out = layers[-1]


    
    #when log is true we build a model for training!
    if log:

        loss_per_sample = tf.squared_difference(model_out, y, name='loss_per_sample')
        loss = tf.reduce_mean(loss_per_sample, name='loss')
        summaryMgr.add_iter_summary(tf.summary.scalar('loss', loss))

        return model_out, loss
    #tf.summary.scalar('loss', loss)
    
    return model_out #, loss, train_step


In [4]:
import numpy as np

def generate_random_data(dim, n=5000):
    cov = np.random.rand(dim, dim)
    cov = np.dot(cov, cov.transpose())

    training_data = np.random.multivariate_normal(np.zeros(dim), cov, n)
    testing_data = np.random.multivariate_normal(np.zeros(dim), cov, n)
    
    with tf.name_scope('generating_data'):
        x = tf.placeholder(tf.float32, shape=[None, dim], name='x')
        model_out = build_model(x, None, dim, False)

        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            training_labels = sess.run(model_out, feed_dict={x: training_data})
            testing_labels = sess.run(model_out, feed_dict={x: testing_data})

        return training_data, testing_data, training_labels, testing_labels





In [5]:
dim = 10
#None for the batches
#x = tf.placeholder(tf.float32, shape=[None, dim], name='x')
#y = tf.placeholder(tf.float32, shape=[None, 1], name='y')


training_data, testing_data, training_labels, testing_labels = generate_random_data(dim, 5000)

#print 'training_data = ' + str(training_data)
#print 'testing_data = ' + str(testing_data)
#print 'training_labels = ' + str(training_labels)
#print 'testing_labels = ' + str(testing_labels)





#numpy.random.multivariate_normal() #mean, cov[, size])

#generate random covariance matrix, must be semi-positive definite, so we multiply it by its transpose

        
print training_data.shape
print testing_data.shape

#batch_size
bs = 100
with tf.Session() as sess:
    
    
    with tf.name_scope('data'):
        is_training = tf.placeholder(tf.bool,name='is_training') #must be feed with dict_feed.
        
        def create_training_dataset():
            inputs = tf.cast(tf.constant(training_data, name='train_dataset_x'), tf.float32)
            labels = tf.cast(tf.constant(training_labels, name='train_dataset_y'), tf.float32)
            input, label = tf.train.slice_input_producer([inputs, labels], name='train_slicer')
            batched_input, batched_labels = \
                tf.train.batch([input, label], batch_size=bs, name='train_batcher')
            return batched_input, batched_labels
        
        
        
        def create_testing_dataset():
            inputs = tf.cast(tf.constant(testing_data, name='test_dataset_x'), tf.float32)
            labels = tf.cast(tf.constant(testing_labels, name='test_dataset_y'), tf.float32)
            
            input, label = tf.train.slice_input_producer([inputs, labels], name='test_slicer')
            batched_input, batched_labels = \
                tf.train.batch([input, label], batch_size=bs, name='test_batcher')
            return batched_input, batched_labels
        
        
        #It is very important to call create_training_dataset and create_testing_dataset 
        #create all queues (for train and test)
        train_batched_input, train_batched_labels = create_training_dataset()
        test_batched_input, test_batched_labels = create_testing_dataset()
        
        
        batched_input, batched_labels = tf.cond(is_training, lambda: [train_batched_input, train_batched_labels],\
            lambda: [test_batched_input, test_batched_labels])
        
    
    
    model_out, loss = build_model(batched_input, batched_labels, dim, True)
    
    #class SeboostOptimizer:
    #batched_input, batched_labels are tensors that prodece batches
    #is_training is a tensor that will be true while training and false while testing
    #def __init__(self, loss, batched_input, batched_labels):
    #run sesop once an epoch
    sesop_freq = 5000/bs
    optimizer = SeboostOptimizer(loss, batched_input, batched_labels, sesop_freq)
    
    #method=’CG’

    
    #hold acc loss
    with tf.name_scope('loss_accamulator'):
        acc_loss = tf.Variable(0, name='acc_loss', dtype=tf.float32)
        train_loss_summary = tf.summary.scalar('train_loss', acc_loss)
        test_loss_summary = tf.summary.scalar('test_loss', acc_loss)
    
    iter_summaries = summaryMgr.merge_iters()
    optimizer.iter_summaries = iter_summaries
        
    sess.run(tf.global_variables_initializer())
    sess.run(tf.local_variables_initializer())
    
    #merged_summery = tf.summary.merge_all()
    writer = tf.summary.FileWriter('/tmp/generated_data/1')
    writer.add_graph(sess.graph)
    optimizer.writer = writer
    
    #we must start queue_runners
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(sess=sess, coord=coord)
    
    
    #run 200 epochs:
    for epoch in range(200):
        #run 20 steps (full batch optimization to start with)
        print 'epoch #' + str(epoch)
            
        print 'Computing train error'
        #train error:
        total_loss = 0
        for i in range(0):
            iter_loss = sess.run(loss, feed_dict={is_training: True})
            total_loss += iter_loss
            
        #put the accamulated loss into acc_loss node
        writer.add_summary(sess.run(train_loss_summary, feed_dict={acc_loss: total_loss/(5000/bs)}), epoch)
        
        print 'Computing test error'
        #test error:
        total_loss = 0
        for i in range(0):
            iter_loss = sess.run(loss, feed_dict={is_training: False})
            total_loss += iter_loss
            
        #put the accamulated loss into acc_loss node
        writer.add_summary(sess.run(test_loss_summary, feed_dict={acc_loss: total_loss/(5000/bs)}), epoch)
        
        print 'Training'
        total_loss = 0
        iters_per_epoch = 5000/bs
        #train epoch (This actually extract 2 epochs out of the batcher, because of the summary).
        #so in total, a full epoch loop extract 3 epochs, thus after 8 epochs we will extract 24 epochs,
        #thus since we limited number of 
        #print 'iters_per_epoch = ' + str(iters_per_epoch) #iters_per_epoch = 50
        assert(sesop_freq <= iters_per_epoch)
        for i in range(iters_per_epoch/sesop_freq):
            #_, iter_loss = sess.run([train_step, loss], feed_dict={is_training: True})
            #take a batch:
            batched_input_actual, batched_labels_actual = \
                sess.run([batched_input, batched_labels], feed_dict={is_training: True})
                
            #this runs 100 regular iterations + 1 CG iteration.
            #optimizer.run_sesop_iteration(self, sess, sesop_feed_dict)
            sesop_iter_loss = optimizer.run_sesop_iteration(sess=sess, _feed_dict={is_training: True} ,sesop_feed_dict=\
                {is_training: True, batched_input: batched_input_actual, batched_labels: batched_labels_actual})
            
            
        writer.flush()
    coord.request_stop()
    coord.join(threads)

    #res = sess.run(model_out, feed_dict={x: training_data})
    
    #print res
    
    
    #tf.summary.scalar('model_out', model_out)

HVar Cache Miss, creating the op for var generating_data/FC/W:0, idx = 0
HVar Cache Miss, creating the op for var generating_data/FC/W:0, idx = 1
HVar Cache Miss, creating the op for var generating_data/FC/b:0, idx = 0
HVar Cache Miss, creating the op for var generating_data/FC/b:0, idx = 1
HVar Cache Miss, creating the op for var generating_data/FC_1/W:0, idx = 0
HVar Cache Miss, creating the op for var generating_data/FC_1/W:0, idx = 1
HVar Cache Miss, creating the op for var generating_data/FC_1/b:0, idx = 0
HVar Cache Miss, creating the op for var generating_data/FC_1/b:0, idx = 1
HVar Cache Miss, creating the op for var generating_data/FC_2/W:0, idx = 0
HVar Cache Miss, creating the op for var generating_data/FC_2/W:0, idx = 1
HVar Cache Miss, creating the op for var generating_data/FC_2/b:0, idx = 0
HVar Cache Miss, creating the op for var generating_data/FC_2/b:0, idx = 1
(5000, 10)
(5000, 10)
HVar Cache Miss, creating the op for var FC/W:0, idx = 0
HVar Cache Miss, creating the

InvalidArgumentError: Infinity in summary histogram for: FC/activations_before_tanh
	 [[Node: FC/activations_before_tanh = HistogramSummary[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"](FC/activations_before_tanh/tag, FC/add/_105)]]

Caused by op u'FC/activations_before_tanh', defined at:
  File "/home/shai/anaconda/lib/python2.7/runpy.py", line 174, in _run_module_as_main
    "__main__", fname, loader, pkg_name)
  File "/home/shai/anaconda/lib/python2.7/runpy.py", line 72, in _run_code
    exec code in run_globals
  File "/home/shai/anaconda/lib/python2.7/site-packages/ipykernel/__main__.py", line 3, in <module>
    app.launch_new_instance()
  File "/home/shai/anaconda/lib/python2.7/site-packages/traitlets/config/application.py", line 596, in launch_instance
    app.start()
  File "/home/shai/anaconda/lib/python2.7/site-packages/ipykernel/kernelapp.py", line 442, in start
    ioloop.IOLoop.instance().start()
  File "/home/shai/anaconda/lib/python2.7/site-packages/zmq/eventloop/ioloop.py", line 162, in start
    super(ZMQIOLoop, self).start()
  File "/home/shai/anaconda/lib/python2.7/site-packages/tornado/ioloop.py", line 866, in start
    handler_func(fd_obj, events)
  File "/home/shai/anaconda/lib/python2.7/site-packages/tornado/stack_context.py", line 275, in null_wrapper
    return fn(*args, **kwargs)
  File "/home/shai/anaconda/lib/python2.7/site-packages/zmq/eventloop/zmqstream.py", line 440, in _handle_events
    self._handle_recv()
  File "/home/shai/anaconda/lib/python2.7/site-packages/zmq/eventloop/zmqstream.py", line 472, in _handle_recv
    self._run_callback(callback, msg)
  File "/home/shai/anaconda/lib/python2.7/site-packages/zmq/eventloop/zmqstream.py", line 414, in _run_callback
    callback(*args, **kwargs)
  File "/home/shai/anaconda/lib/python2.7/site-packages/tornado/stack_context.py", line 275, in null_wrapper
    return fn(*args, **kwargs)
  File "/home/shai/anaconda/lib/python2.7/site-packages/ipykernel/kernelbase.py", line 276, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "/home/shai/anaconda/lib/python2.7/site-packages/ipykernel/kernelbase.py", line 228, in dispatch_shell
    handler(stream, idents, msg)
  File "/home/shai/anaconda/lib/python2.7/site-packages/ipykernel/kernelbase.py", line 391, in execute_request
    user_expressions, allow_stdin)
  File "/home/shai/anaconda/lib/python2.7/site-packages/ipykernel/ipkernel.py", line 199, in do_execute
    shell.run_cell(code, store_history=store_history, silent=silent)
  File "/home/shai/anaconda/lib/python2.7/site-packages/IPython/core/interactiveshell.py", line 2705, in run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/home/shai/anaconda/lib/python2.7/site-packages/IPython/core/interactiveshell.py", line 2809, in run_ast_nodes
    if self.run_code(code, result):
  File "/home/shai/anaconda/lib/python2.7/site-packages/IPython/core/interactiveshell.py", line 2869, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-5-3f6564a26bf4>", line 62, in <module>
    model_out, loss = build_model(batched_input, batched_labels, dim, True)
  File "<ipython-input-3-74caaac83eac>", line 5, in build_model
    layers = [fc_layer(x, dim, dim, log)]
  File "<ipython-input-2-cdd5782459ea>", line 13, in fc_layer
    summaryMgr.add_iter_summary(tf.summary.histogram('activations_before_tanh', a))
  File "/home/shai/anaconda/lib/python2.7/site-packages/tensorflow/python/summary/summary.py", line 203, in histogram
    tag=scope.rstrip('/'), values=values, name=scope)
  File "/home/shai/anaconda/lib/python2.7/site-packages/tensorflow/python/ops/gen_logging_ops.py", line 139, in _histogram_summary
    name=name)
  File "/home/shai/anaconda/lib/python2.7/site-packages/tensorflow/python/framework/op_def_library.py", line 763, in apply_op
    op_def=op_def)
  File "/home/shai/anaconda/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 2327, in create_op
    original_op=self._default_original_op, op_def=op_def)
  File "/home/shai/anaconda/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 1226, in __init__
    self._traceback = _extract_stack()

InvalidArgumentError (see above for traceback): Infinity in summary histogram for: FC/activations_before_tanh
	 [[Node: FC/activations_before_tanh = HistogramSummary[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"](FC/activations_before_tanh/tag, FC/add/_105)]]


In [None]:
print model_out

In [None]:
bs = 50
training_data[0:bs].shape
