In [1]:
# start a tensorflow session
from pylab import *
import tensorflow as tf
import numpy as np
import matplotlib
matplotlib.style.use('seaborn-whitegrid')
DTYPE = tf.float32
EPS = 1e-9
tf.reset_default_graph()
sess = tf.InteractiveSession()

g = tf.Graph().as_default()

In [2]:
# the EG algorithsm
def eg( loss, w, shift_rate=tf.constant(0, dtype=DTYPE), eta_0=None, max_its=100, tol=tf.constant(1e-6), step_rule='sqrt' ):
    '''
    do batch eg on the given loss tensor. w are the simplex weights
    
    Parameters
    ----------
    loss: A function with returns losses
    w: the parameter tensor
        
    Returns
    -----------
    k: iteration counts    
    weights: tf.TensorArray of weights
    losses: TensorArray of losss
    relchg: TensorArray of relative changes    
    etas: tf.TensorArray of learning raters
    max grads: tf.TensorArray of max gradients
    '''    
    
    def _eg_step( k, weights, changes, losses, etas, max_grads ):
        '''
        eg step with no sleeping expert correction
        # for sleeping expert do this 
        # awake_sum = tf.reduce_sum( tf.where( nan_mask, tf.zeros_like(w), w) )
        # w_s = tf.where( nan_mask, tf.zeros_like(w), tf.divide( w, awake_sum ) )
        
        # theoretical optimal eta = sqrt( 2 log N / K ) / G_inf
        '''
        w = weights.read( k )
        grad = tf.gradients( loss(w), w )[0]
        nan_mask = tf.is_nan( grad )        
        grad = tf.where( nan_mask, tf.zeros_like(grad, dtype=DTYPE), grad )
        # find the new max gradient
        G_inf = tf.maximum( max_grads.read(k), tf.reduce_max( tf.abs( grad ) ) )        
        # pre-condition
        grad = grad - tf.reduce_min( grad )             
        if step_rule == 'constant':
            eta = eta_0 / G_inf
        elif step_rule == 'sqrt':
            eta = eta_0 / G_inf / tf.sqrt( 1 +  tf.cast( k, dtype=DTYPE) )
        elif step_rule == 'inv':
            eta = eta_0 / G_inf / ( 1 + tf.cast( k, dtype=DTYPE) )
        w_n = w * tf.exp( -eta  * grad )
        w_n = w_n / tf.reduce_sum( w_n ) * (1 - shift_rate) + shift_rate     
        # apply the updates
        k = k + 1
        etas = etas.write( k, eta )
        weights = weights.write( k, w_n )
        max_grads = max_grads.write( k, G_inf )
        losses = losses.write( k, loss( weights.read(k ) ) )
        changes = changes.write( k, tf.reduce_sum( tf.abs( weights.read(k) - weights.read(k-1) / tf.reduce_sum( tf.abs( weights.read(k-1) ) ) ) ) )
        print_op = tf.Print( k, [k, eta, losses.read(k), changes.read(k)], 'k, eta, loss, change = \t' )
        with tf.control_dependencies( [print_op] ):            
            return k, weights, changes, losses, etas, max_grads
        
    def _continue_cond( k, weights, changes, *args ):
        chg_w = changes.read(k)
        return tf.logical_and( k < max_its - 1,  chg_w > tol  ) 
    
    with tf.name_scope('EG'):
        if eta_0 is None:
            eta_0 = tf.constant( np.sqrt( 2 * np.log( w.get_shape().as_list()[0] ) / max_its), dtype=DTYPE  )         
        k = tf.constant( 0, dtype=tf.int32 )
        weights = tf.TensorArray( 
            dtype=DTYPE, size=max_its, dynamic_size=True, clear_after_read=False, tensor_array_name='weights' 
        ).write(k, w)
        changes = tf.TensorArray( 
            dtype=DTYPE, size=max_its, dynamic_size=True, clear_after_read=False, tensor_array_name='changes' 
        ).write( k, np.inf )
        losses = tf.TensorArray( 
            dtype=DTYPE, size=max_its, dynamic_size=True, clear_after_read=False, tensor_array_name='losses'
        ).write( k, loss(w) )
        etas = tf.TensorArray( 
            dtype=DTYPE, size=max_its, dynamic_size=True, clear_after_read=False, tensor_array_name='etas'
            
        ).write( k, eta_0 )
        max_grads = tf.TensorArray( 
            dtype=DTYPE, size=max_its, dynamic_size=True, clear_after_read=False, tensor_array_name='max_grads'
        ).write( k, EPS )
        k, weights, changes, losses, etas, max_grads = tf.while_loop( 
            cond=_continue_cond, 
            body=_eg_step, 
            loop_vars=[k, weights, changes, losses, etas, max_grads ],            
            parallel_iterations=1,
            name='while_loop',
        )     
        return  k, weights, changes, losses, etas, max_grads
    

# test EG on a trivial problem
#################################################################
N = 100
q = tf.Variable(name='weights', initial_value=np.ones(N),  dtype=DTYPE)
c = tf.constant( np.arange(N), dtype=DTYPE)
eta_0 = tf.Variable(name='eta0', initial_value=0.05,  dtype=DTYPE)

def loss( q ):
    '''return the loss function for a given set of parameters'''
    return tf.reduce_sum( q * c )

k, weights, changes, losses, etas, max_grads = eg(loss, q, eta_0=eta_0)


tb_writer = tf.summary.FileWriter( '/home/ubuntu/deep-learning/data/otb', graph=sess.graph )
merged = tf.summary.merge_all()

sess.run(tf.global_variables_initializer())
print(k.eval(), changes.read(k).eval(), '\n', weights.read(k).eval())

subplot(221)
plot( etas.stack().eval() )




99 0.00123191 
 [ 0.01531384  0.01517148  0.01503047  0.01489076  0.01475236  0.01461524
  0.0144794   0.01434482  0.01421149  0.0140794   0.01394853  0.01381888
  0.01369044  0.0135632   0.01343713  0.01331223  0.0131885   0.01306591
  0.01294446  0.01282416  0.01270496  0.01258687  0.01246988  0.01235397
  0.01223915  0.01212538  0.01201269  0.01190102  0.01179041  0.01168082
  0.01157225  0.01146469  0.01135813  0.01125255  0.01114797  0.01104435
  0.01094169  0.01083999  0.01073924  0.01063943  0.01054053  0.01044256
  0.0103455   0.01024934  0.01015408  0.01005968  0.00996619  0.00987356
  0.00978179  0.00969087  0.00960079  0.00951155  0.00942315  0.00933557
  0.00924878  0.00916282  0.00907765  0.00899329  0.0089097   0.00882688
  0.00874484  0.00866356  0.00858303  0.00850325  0.00842422  0.00834592
  0.00826835  0.0081915   0.00811535  0.00803993  0.00796521  0.00789116
  0.00781782  0.00774514  0.00767316  0.00760184  0.00753118  0.00746118
  0.00739184  0.00732313  0.0072550

AttributeError: 'TensorArray' object has no attribute 'stack'

In [3]:
tf.__version__



'0.12.1'

In [None]:
# solve the discrepancy problem
####################################################
%matplotlib nbagg
from numba import jit
from numpy.random import multinomial
from pandas import *

# generate the data from a markov process
T = 10000
N = 100
R = 100

@jit(nopython=True)
def markov_sample( M, T=T, betas=betas, seed=1 ):
    beta_seq = np.zeros( (T, N) )    
    # initial state
    r = multinomial( 1, np.ones(R)/R ).nonzero()[0][0]  
    rs = np.empty(T)
    # sample from the markov probability
    for t in range(T):      
        r = multinomial(1, pvals=M[r, :] ).nonzero()[0][0] 
        beta_seq[t,:] = betas[r, :]
        rs[t] = r
    return beta_seq, rs

X = np.array( [ np.sin( np.arange(T) * 2 *  f / T * pi ) for f in arange(N) ]).T
betas = randn( R, N )
a = 0.01
M = (1-a) *np.eye(R) * (R-1)/R + a * np.ones( (R,R) ) / R
M = M / M.sum( axis=1)
beta_seq, rs =  markov_sample(M )
yorg = (X * beta_seq).sum(axis=1)
y = yorg + 0.1 * randn(T)

if False:
    figure()
    subplot(121)
    imshow(beta_seq, aspect='auto')
    subplot(122)
    plot(rs, alpha=0.1)   

X_trg, y_trg = X[0:T//2], y[0:T//2]
X_tst, y_tst = X[T//2:], y[T//2:]

# solve using OLS
from statsmodels.api import OLS
ols = OLS( exog=X_trg, endog=y_trg).fit()
print('trg corr', Series( ols.predict( X_trg )).corr(Series(y_trg)) , 'tst corr', Series( ols.predict( X_tst)).corr(Series(y_tst) ) )


# solve using discrpenacy
# \min_beta_T |sum_t q_t ||y_t - X_t \beta_T||^2  + lambda ||beta_T||^2
# where q_t = \argmin_q | \sum_t q_t L_t(\beta_T) - \sum_s L_s(\beta_s) )





    

In [None]:
beta_seq.shape

In [None]:
multinomial( 1, np.ones(R)/R).nonzero()[0][0]

In [None]:
np.squeeze(multinomial( 1, np.ones(R)/R, size=1)).nonzero

In [None]:
M.sum(axis=1)

In [None]:
%matplotlib nbagg
imshow(X, aspect='auto')

In [None]:

Out[15][0].eval()