In [1]:
!killall tensorboard
!rm /tmp/generated_data/1 -rf
!rm /tmp/generated_data/2 -rf
!rm /tmp/generated_data/3 -rf
!rm /tmp/generated_data/4 -rf
!rm /tmp/generated_data/5 -rf
!rm /tmp/generated_data/6 -rf
!rm /tmp/generated_data/7 -rf
!rm /tmp/generated_data/8 -rf
!rm /tmp/generated_data/9 -rf
!rm /tmp/generated_data/10 -rf

import tensorflow as tf
#contrib/opt/python/training/external_optimizer.py
print tf.contrib.opt.ScipyOptimizerInterface


class SummaryManager:
    def __init__(self):
        self.iter_summaries = []

    def add_iter_summary(self, s):
        self.iter_summaries.append(s)
        
    def merge_iters(self):
        return tf.summary.merge(self.iter_summaries)
    
summaryMgr = SummaryManager()


class HVar:
    #this contains all alphas in the graph
    all_hvars = []
    
    def __init__(self, var, hSize = 2):
        self.name = var.name.split(":")[0].split("/")[-1]
        
        with tf.name_scope(self.name + '_history'):
            self.var = var
            self.replicas = [] #this taks 2X memory
            self.aplha = []
            self.last_snapshot = tf.Variable(var.initialized_value(), name='snapshot') #this makes it 3X + hSize
            self.next_idx = 0
            self.op_cache = {}
            self.o = None
            
            with tf.name_scope('replicas'):
                for i in range(hSize):
                    self.replicas.append(tf.Variable(np.zeros(var.get_shape()),\
                        dtype=var.dtype.base_dtype, name='replica'))
                    
            with tf.name_scope('alphas'):
                for i in range(hSize):
                    self.aplha.append(tf.Variable(np.zeros(1), dtype=var.dtype.base_dtype, name='alpha'))
                    summaryMgr.add_iter_summary(tf.summary.histogram('alphas', self.aplha[-1]))
                


            for i in range(hSize):
                self.push_history_op() #make sure all ops are created

            HVar.all_hvars.append(self)
            assert(self.next_idx == 0)

        
    def out(self):
        if self.o is not None:
            return self.o
        
        with tf.name_scope(self.name + '_out'):
            #return an affine combination of the history vectors
            #and a dictonary to add to feed_dict.
            self.o = self.var
            for r, a in zip(self.replicas, self.aplha):
                self.o += r*a

            return self.o
        
    #returns an op that updates history and snapshot (executed after optimization on alpha)
    #This must be called when alpahs are non zeros!!!
    def push_history_op(self):
        if self.next_idx not in self.op_cache:
            print 'HVar Cache Miss, creating the op for var ' + str(self.var.name) + ', idx = ' + str(self.next_idx)
            
            with tf.name_scope(self.name + '_update'):
                
                #first we update the original variable to the sesop result
                update_var_op = tf.assign(self.var, self.out())
                with tf.control_dependencies([update_var_op]):
                    #now we update the history (self.var contain the sesop result):
                    update_history_op = tf.assign(self.replicas[self.next_idx], self.var - self.last_snapshot)
                    with tf.control_dependencies([update_history_op]):
                        #now we update the last_snapshot to be the sesop result
                        update_snapshot_op = tf.assign(self.last_snapshot, self.var)
                        with tf.control_dependencies([update_snapshot_op]):
                            #finally we reset all the alphas (infact we can take this out of the dependecy)
                            #as it only affect self.out()
                            reset_alpha_op = self.zero_alpha_op()            
                            self.op_cache[self.next_idx] =\
                                tf.group(update_history_op, update_var_op, update_snapshot_op, reset_alpha_op)
            
        old_idx = self.next_idx
        self.next_idx = (self.next_idx + 1)%len(self.replicas)

        return self.op_cache[old_idx]
    
    def zero_alpha_op(self):
        group_op = tf.no_op()
        for a in self.aplha:
            group_op = tf.group(group_op, tf.assign(a, np.zeros(1)))
        return group_op
        
    #the alphas from sesop (the coefitients that choose the history vector)
    @classmethod
    def all_trainable_alphas(self):
        alphas = []
        for hvar in HVar.all_hvars:
            alphas.extend(hvar.aplha)
        return alphas
    
    #all the regular weights to be trained
    @classmethod
    def all_trainable_weights(self):
        weights = []
        for hvar in HVar.all_hvars:
            weights.append(hvar.var)
        return weights
    
    @classmethod
    def all_history_update_ops(self):
        group_op = tf.no_op()
        for hvar in HVar.all_hvars:
            group_op = tf.group(group_op, hvar.push_history_op())
            
        return group_op

    
    
    
class SeboostOptimizer:
    #batched_input, batched_labels are tensors that prodece batches
    #is_training is a tensor that will be true while training and false while testing
    #we run CG once in sesop_freq iterations 
    def __init__(self, loss, batched_input, batched_labels, sgd_steps):
        
        self.loss = loss
        self.train_loss = []
        self.loss_before_sesop = []
        self.loss_after_sesop = []
        self.sgd_steps = sgd_steps
        self.iteration_ran = 0 
        self.sesop_iteration_ran = 0
        self.avg_gain_from_cg = 0.0
        self.iter_summaries = summaryMgr.merge_iters()
        
        self.train_step = tf.train.GradientDescentOptimizer(learning_rate=0.1).minimize(loss, name='minimizer',\
            var_list=HVar.all_trainable_weights())
        self.cg = tf.contrib.opt.ScipyOptimizerInterface(loss, var_list=HVar.all_trainable_alphas(),\
            method='CG', options={'maxiter':5})
        
        #all_trainable_weights
        self.batched_input, self.batched_labels = batched_input, batched_labels
        
    #_feed_dict is the feed_dict needed to run regular sgd iteration
    #sesop_feed_dict should contain feeds for the batch sesop will use!
    #return a list of train_loss. The last elment in the list contain the loss after sesop.
    def run_sesop_iteration(self, sess, _feed_dict, sesop_feed_dict):
        #run sesop_freq SGD iterations:
        if self.iteration_ran%self.sgd_steps != 0:
            _, loss = sess.run([self.train_step, self.loss], feed_dict=_feed_dict)
            self.iteration_ran += 1
            
            self.train_loss.append(loss)
            self.writer.add_summary(sess.run(self.iter_summaries, feed_dict=_feed_dict), self.iteration_ran)
            return loss

        
        self.loss_before_sesop.append(sess.run(self.loss, feed_dict=sesop_feed_dict))
        #run 1 CG iteration
        self.cg.minimize(sess, feed_dict=sesop_feed_dict)
        self.iteration_ran += 1
        self.sesop_iteration_ran += 1
        
        self.loss_after_sesop.append(sess.run(self.loss, feed_dict=sesop_feed_dict))
        
        self.avg_gain_from_cg += self.loss_before_sesop[-1] - self.loss_after_sesop[-1] 
        print 'Gain from CG: ' + str(self.avg_gain_from_cg/(self.sesop_iteration_ran))
        self.train_loss.append(self.loss_after_sesop[-1])
        
        #We want to capture the values of alpha before we zero them, so we need to call
        #the summary before we zero them in self.history_update_ops
        self.writer.add_summary(sess.run(self.iter_summaries, feed_dict=_feed_dict), self.iteration_ran)
        
        #Now when alphas are optimized, run the update history ops:
        sess.run(HVar.all_history_update_ops())
        
        
            

        return self.loss_after_sesop[-1] 
        
        


IndentationError: expected an indented block (<ipython-input-1-3e23a4b0d774>, line 108)

In [None]:

def fc_layer(input, n_in, n_out, log, hSize):
    with tf.name_scope('FC'):
        if log:
            W = HVar(tf.Variable(tf.random_normal([n_in, n_out]), name='W'), hSize)
            b = HVar(tf.Variable(tf.zeros([n_out]), name='b'), hSize)
            a = tf.matmul(input, W.out()) + b.out()
        else:
            W = tf.Variable(tf.random_normal([n_in, n_out]), name='W')
            b = tf.Variable(tf.zeros([n_out]), name='b')
            a = tf.matmul(input, W) + b
        
        out = tf.nn.tanh(a)
        
        if log:
            summaryMgr.add_iter_summary(tf.summary.histogram('activations_before_tanh', a))
            summaryMgr.add_iter_summary(tf.summary.histogram('activations_after_tanh', out))
        
        return out



In [None]:
#define the model:


def build_model(x, y, dim, log=False, hSize=0):
    layers = [fc_layer(x, dim, dim, log, hSize)]
    for i in range(1):
        layers.append(fc_layer(layers[-1], dim, dim, log, hSize))
    layers.append(fc_layer(layers[-1], dim, 1, log, hSize))

    model_out = layers[-1]


    
    #when log is true we build a model for training!
    if log:
        loss_per_sample = tf.squared_difference(model_out, y, name='loss_per_sample')
        loss = tf.reduce_mean(loss_per_sample, name='loss')
        summaryMgr.add_iter_summary(tf.summary.scalar('loss', loss))

        return model_out, loss
    #tf.summary.scalar('loss', loss)
    
    return model_out #, loss, train_step


In [None]:
import numpy as np

def generate_random_data(dim, n=5000):
    cov = np.random.rand(dim, dim)
    cov = np.dot(cov, cov.transpose())

    training_data = np.random.multivariate_normal(np.zeros(dim), cov, n)
    testing_data = np.random.multivariate_normal(np.zeros(dim), cov, n)
    
    with tf.name_scope('generating_data'):
        x = tf.placeholder(tf.float32, shape=[None, dim], name='x')
        model_out = build_model(x, None, dim, False)

        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            training_labels = sess.run(model_out, feed_dict={x: training_data})
            testing_labels = sess.run(model_out, feed_dict={x: testing_data})

        return training_data, testing_data, training_labels, testing_labels





In [None]:
dim = 10

training_data, testing_data, training_labels, testing_labels = generate_random_data(dim, 5000)
print training_data.shape
print testing_data.shape



In [None]:

#bs is batch size
#sesop_freq is in (0,1) and is the fraction of sesop iterations.
#i.e., if sesop_freq = 0.1 then we do 1 sesop iteration for each one sgd iteration
#epochs is the number of epochs
def run_experiment(bs, sesop_freq, hSize, epochs, file_writer_suffix):
    #batch_size
    with tf.Session() as sess:
        with tf.name_scope('data'):
            is_training = tf.placeholder(tf.bool,name='is_training') #must be feed with dict_feed.

            def create_training_dataset():
                inputs = tf.cast(tf.constant(training_data, name='train_dataset_x'), tf.float32)
                labels = tf.cast(tf.constant(training_labels, name='train_dataset_y'), tf.float32)
                input, label = tf.train.slice_input_producer([inputs, labels], name='train_slicer')
                batched_input, batched_labels = \
                    tf.train.batch([input, label], batch_size=bs, name='train_batcher')
                return batched_input, batched_labels


            def create_testing_dataset():
                inputs = tf.cast(tf.constant(testing_data, name='test_dataset_x'), tf.float32)
                labels = tf.cast(tf.constant(testing_labels, name='test_dataset_y'), tf.float32)

                input, label = tf.train.slice_input_producer([inputs, labels], name='test_slicer')
                batched_input, batched_labels = \
                    tf.train.batch([input, label], batch_size=bs, name='test_batcher')
                return batched_input, batched_labels


            #It is very important to call create_training_dataset and create_testing_dataset 
            #create all queues (for train and test)
            train_batched_input, train_batched_labels = create_training_dataset()
            test_batched_input, test_batched_labels = create_testing_dataset()


            batched_input, batched_labels = tf.cond(is_training, lambda: [train_batched_input, train_batched_labels],\
                lambda: [test_batched_input, test_batched_labels])



        model_out, loss = build_model(batched_input, batched_labels, dim, True, hSize)


        iters_per_epoch = 5000/bs
        sgd_steps = int(1/sesop_freq)
        optimizer = SeboostOptimizer(loss, batched_input, batched_labels, sgd_steps)

        #hold acc loss
        with tf.name_scope('loss_accamulator'):
            acc_loss = tf.Variable(0, name='acc_loss', dtype=tf.float32)
            train_loss_summary = tf.summary.scalar('train_loss', acc_loss)
            test_loss_summary = tf.summary.scalar('test_loss', acc_loss)

        iter_summaries = summaryMgr.merge_iters()
        optimizer.iter_summaries = iter_summaries

        sess.run(tf.global_variables_initializer())
        sess.run(tf.local_variables_initializer())

        #merged_summery = tf.summary.merge_all()
        writer = tf.summary.FileWriter('/tmp/generated_data/' + file_writer_suffix)
        writer.add_graph(sess.graph)
        optimizer.writer = writer

        #we must start queue_runners
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)


        for epoch in range(epochs):
            #run 20 steps (full batch optimization to start with)
            print 'epoch #' + str(epoch)

            print 'Computing train error'
            total_loss = 0
            for i in range(0):
                iter_loss = sess.run(loss, feed_dict={is_training: True})
                total_loss += iter_loss
            #put the accamulated loss into acc_loss node
            writer.add_summary(sess.run(train_loss_summary, feed_dict={acc_loss: total_loss/(5000/bs)}), epoch)

            
            
            print 'Computing test error'
            total_loss = 0
            for i in range(0):
                iter_loss = sess.run(loss, feed_dict={is_training: False})
                total_loss += iter_loss
            #put the accamulated loss into acc_loss node
            writer.add_summary(sess.run(test_loss_summary, feed_dict={acc_loss: total_loss/(5000/bs)}), epoch)

            
            
            print 'Training'
            total_loss = 0
            for i in range(iters_per_epoch):
                #take a batch:
                batched_input_actual, batched_labels_actual = \
                    sess.run([batched_input, batched_labels], feed_dict={is_training: True})


                #this runs 1 iteration and keeps track of when should it do sesop.
                iter_loss = optimizer.run_sesop_iteration(sess=sess, _feed_dict={is_training: True} ,\
                    sesop_feed_dict=\
                    {is_training: True, batched_input: batched_input_actual, batched_labels: batched_labels_actual})


            writer.flush()
        coord.request_stop()
        coord.join(threads)



In [None]:

#def run_experiment(bs, sesop_freq, hSize, epochs, file_writer_suffix)
run_experiment(bs=100, sesop_freq=0.01, hSize=2, epochs=10, file_writer_suffix='2')
