In [2]:
import tensorflow as tf
import numpy as np
from tensorflow.python.ops import variables

class HVar:
    #this contains all alphas in the graph
    all_hvars = []
    
    def __init__(self, var, hSize = 2):
        self.name = var.name.split(":")[0].split("/")[-1]
        
        with tf.name_scope(self.name + '_history'):
            self.var = var
            self.replicas = [] #this taks 2X memory
            self.aplha = []
            self.last_snapshot = tf.Variable(var.initialized_value(), name='snapshot') #this makes it 3X + hSize
            self.next_idx = 0
            self.op_cache = {}

            #counter = tf.Variable(0, dtype=tf.int32, name='sesop_counter')
            for i in range(hSize):
                #self.replicas[tf.placeholder(shape=var.get_shape(), dtype=tf.float32)] =\
                #    np.zeros(var.get_shape())
                self.replicas.append(tf.Variable(np.zeros(var.get_shape()), dtype=var.dtype.base_dtype, name='replica'))
                self.aplha.append(tf.Variable(np.zeros(1), dtype=var.dtype.base_dtype, name='alpha'))


            for i in range(hSize):
                self.push_history_op() #make sure all ops are created

            HVar.all_hvars.append(self)
            assert(self.next_idx == 0)

        
    def out(self):
        with tf.name_scope(self.name + '_out'):
            #return an affine combination of the history vectors
            #and a dictonary to add to feed_dict.
            self.o = self.var
            for r, a in zip(self.replicas, self.aplha):
                self.o += r*a

            return self.o
        
    #returns an op that updates history and snapshot (executed after optimization on alpha)
    def push_history_op(self):
        if self.next_idx not in self.op_cache:
            print 'HVar Cache Miss, creating the op for var ' + str(self.var.name) + ', idx = ' + str(self.next_idx)
            
            with tf.name_scope(self.name + '_update'):
                update_history_op = tf.assign(self.replicas[self.next_idx], self.out() - self.last_snapshot)
                with tf.control_dependencies([update_history_op]):
                    update_snapshot_op = tf.assign(self.last_snapshot, self.out())
                    update_var_op = tf.assign(self.var, self.out())

            self.op_cache[self.next_idx] = tf.group(update_history_op, update_var_op, update_snapshot_op)
            
        old_idx = self.next_idx
        self.next_idx = (self.next_idx + 1)%len(self.replicas)

        return self.op_cache[old_idx]
    
    @classmethod
    def all_trainable_alphas(self):
        alphas = []
        for hvar in HVar.all_hvars:
            alphas.extend(hvar.aplha)
        return alphas
    
    @classmethod
    def all_history_update_ops(self):
        group_op = tf.no_op()
        for hvar in HVar.all_hvars:
            group_op = tf.group(group_op, hvar.push_history_op())
            
        return group_op

    
    
    
class SeboostOptimizer:
    #batched_input, batched_labels are tensors that prodece batches
    #is_training is a tensor that will be true while training and false while testing
    #we run CG once in sesop_freq iterations 
    def __init__(self, loss, batched_input, batched_labels, sesop_freq):
        self.train_step = tf.train.GradientDescentOptimizer(learning_rate=0.1).minimize(loss, name='minimizer')
        self.loss = loss
        self.train_loss = []
        self.loss_before_sesop = []
        self.loss_after_sesop = []
        self.history_update_ops = HVar.all_history_update_ops()
        self.sesop_freq = sesop_freq
        self.sesop_iteration_ran = 0
        self.avg_gain_from_cg = 0.0
        
        self.cg = tf.contrib.opt.ScipyOptimizerInterface(loss, var_list=HVar.all_trainable_alphas(),\
            method='CG', options={'maxiter':5})
        
        self.batched_input, self.batched_labels = batched_input, batched_labels
        
    #_feed_dict is the feed_dict needed to run regular sgd iteration
    #sesop_feed_dict should contain feeds for the batch sesop will use!
    #return a list of train_loss. The last elment in the list contain the loss after sesop.
    def run_sesop_iteration(self, sess, _feed_dict, sesop_feed_dict):
        #run sesop_freq SGD iterations:
        train_loss = []

            
        for i in range(self.sesop_freq):
            _, loss = sess.run([self.train_step, self.loss], feed_dict=_feed_dict)
            train_loss.append(loss)
            
            s = sess.run(self.iter_summaries, feed_dict=_feed_dict)
            self.writer.add_summary(s, self.sesop_iteration_ran*(self.sesop_freq + 1) + i)
        
        self.train_loss.extend(train_loss)
        self.loss_before_sesop.append(train_loss[-1])
        #run 1 CG iteration
        self.cg.minimize(sess, feed_dict=sesop_feed_dict)
        
        self.loss_after_sesop.append(sess.run(self.loss, feed_dict=sesop_feed_dict))
        
        self.avg_gain_from_cg += self.loss_before_sesop[-1] - self.loss_after_sesop[-1] 
        print 'Gain from CG: ' + str(self.avg_gain_from_cg/self.sesop_iteration_ran)
        train_loss.append(self.loss_after_sesop[-1])
        #Now when alphas are optimized, run the update history ops:
        sess.run(self.history_update_ops)
        
        s = sess.run(self.iter_summaries, feed_dict=_feed_dict)
        self.writer.add_summary(s, self.sesop_iteration_ran*(self.sesop_freq + 1) + self.sesop_freq)
            
        self.sesop_iteration_ran += 1
        return train_loss

In [15]:

dim = 10
#with tf.name_scope('scope'):
#    x = tf.Variable(tf.random_normal([dim, 1]), tf.float32, name='x')

print x.name.split(":")[0].split("/")[-1]
    


x


In [6]:
dim = 10

#quad function

x = tf.Variable(tf.random_normal([dim, 1]), tf.float32, name='x')
HVar(x)

A = tf.random_normal([dim, dim], name='A')
A = tf.matmul(A, tf.transpose(A))
b = tf.random_normal([dim, 1], name='b')
c = tf.random_normal([1, 1], name='c')

model_out = tf.matmul(tf.transpose(x), tf.matmul(A, x)) + tf.matmul(tf.transpose(b), x) + c
loss = model_out

#loss = model_out*model_out
train_step = tf.train.GradientDescentOptimizer(learning_rate=0.01).minimize(loss, name='minimizer')
op = tf.contrib.opt.ScipyOptimizerInterface(loss, method='CG', options={'maxiter': 300, 'gtol': 0.00001})

print op._packed_var.shape
"""
self._packed_var

with tf.Session() as sess:
    
    sess.run(tf.global_variables_initializer())
    sess.run(tf.local_variables_initializer())
    
    for i in range(20):
        iter_loss = sess.run(loss)
        print 'loss = '  + str(iter_loss)
        sess.run(train_step)
        
        #op.minimize(sess)
        
print '----------------'

with tf.Session() as sess:
    
    sess.run(tf.global_variables_initializer())
    sess.run(tf.local_variables_initializer())
    
    for i in range(20):
        iter_loss = sess.run(loss)
        print 'loss = '  + str(iter_loss)
        op.minimize(sess)
        
"""

(50,)


"\nself._packed_var\n\nwith tf.Session() as sess:\n    \n    sess.run(tf.global_variables_initializer())\n    sess.run(tf.local_variables_initializer())\n    \n    for i in range(20):\n        iter_loss = sess.run(loss)\n        print 'loss = '  + str(iter_loss)\n        sess.run(train_step)\n        \n        #op.minimize(sess)\n        \nprint '----------------'\n\nwith tf.Session() as sess:\n    \n    sess.run(tf.global_variables_initializer())\n    sess.run(tf.local_variables_initializer())\n    \n    for i in range(20):\n        iter_loss = sess.run(loss)\n        print 'loss = '  + str(iter_loss)\n        op.minimize(sess)\n        \n"

In [21]:
1 != 0

True