In [1]:
# start a tensorflow session
from pylab import *
import tensorflow as tf
import numpy as np
DTYPE = tf.float32
tf.reset_default_graph()
sess = tf.InteractiveSession()
g = tf.Graph().as_default()

In [33]:
# the EG algorithsm
def eg( loss, w, shift_rate=tf.constant(0, dtype=DTYPE), eta_0=None, max_its=100, tol=tf.constant(1e-6), step_rule='sqrt' ):
    '''
    do batch eg on the given loss tensor. w are the simplex weights
        
    Returns
    -----------
    weights: tf.Tensor
    relative change in weights: tf.Tensor
    iteration count: tf.Tensor
    max grad: tf.Tensor
    losses: tf.TensorArray of losses to date
    '''    
    
    def _eg_step( w, chg_w, k, G_inf, losses ):
        '''
        Parameters
        --------------
        losses: tensor of losses to date
        
        eg step with no sleeping expert correction
        # for sleeping expert do this 
        # awake_sum = tf.reduce_sum( tf.where( nan_mask, tf.zeros_like(w), w) )
        # w_s = tf.where( nan_mask, tf.zeros_like(w), tf.divide( w, awake_sum ) )
        
        # theoretical optimal eta = sqrt( 2 log N / K ) / G_inf
        '''
        grad = tf.gradients( loss(w), w )[0]
        nan_mask = tf.is_nan( grad )        
        grad = tf.where( nan_mask, tf.zeros_like(grad, dtype=DTYPE), grad )
        # find the new max gradient
        G_inf = tf.maximum( G_inf, tf.reduce_max( tf.abs( grad ) ) )        
        # pre-condition
        grad = grad - tf.reduce_min( grad )             
        if step_rule == 'constant':
            eta = eta_0 / G_inf
        elif step_rule == 'sqrt':
            eta = eta_0 / tf.sqrt( 1 +  tf.cast( k, dtype=DTYPE) )
        elif step_rule == 'inv':
            eta = eta_0 / ( 1 + tf.cast( k, dtype=DTYPE) )
        w_n = w * tf.exp( -eta  * grad )
        w_n = w_n / tf.reduce_sum( w_n ) * (1 - shift_rate) + shift_rate     
        chg_w = tf.reduce_sum( tf.abs(w_n - w) ) / tf.reduce_sum( tf.abs( w ))                
        with tf.control_dependencies( [tf.Print( eta, [k, eta, loss(w_n), chg_w], '\tk, eta, loss(w), chg_w = ' )] ):            
            return w_n, chg_w, k + 1, G_inf, losses.write( k, loss(w_n) )
        
    def _continue_cond( w, chg_w, k, G_inf, losses):
        return tf.logical_not( tf.logical_or( k >= max_its, chg_w < tol  ) )
    
    if eta_0 is None:
        eta_0 = tf.constant( np.sqrt( 2 * np.log( w.get_shape().as_list()[0] ) / max_its), dtype=DTYPE  )         
    chg_w = tf.constant( np.inf, dtype=DTYPE )
    k = tf.constant( 0, dtype=tf.int32 )
    G_inf = tf.constant(-np.inf, dtype=DTYPE)
    losses = tf.TensorArray( dtype=DTYPE, size=max_its, dynamic_size=True, clear_after_read=False, tensor_array_name='EG_losses' )
    w, chg_w, k, G_inf, losses = tf.while_loop( cond=_continue_cond, body=_eg_step, loop_vars=[w, chg_w, k, G_inf, losses] )     
    return w, chg_w, k, G_inf, losses

# test EG on a trivial problem
#################################################################
T = 100
N = 100
q = tf.Variable(name='weights', initial_value=np.ones(N),  dtype=DTYPE)
c = tf.constant( np.arange(T), dtype=DTYPE)
eta_0 = tf.Variable(name='eta0', initial_value=0.05,  dtype=DTYPE)

def loss( q ):
    '''return the loss function for a given set of parameters'''
    return tf.reduce_sum( q * c )

sess.run(tf.initialize_all_variables())
qf, chg_w, k, G_inf, losses = eg(loss, q, eta_0=eta_0)
print( loss(qf).eval(), '\n', qf.eval())



Instructions for updating:
Use `tf.global_variables_initializer` instead.


Instructions for updating:
Use `tf.global_variables_initializer` instead.


0.652233 
 [  6.05241776e-01   2.38923907e-01   9.43173319e-02   3.72325554e-02
   1.46979075e-02   5.80213312e-03   2.29044212e-03   9.04172543e-04
   3.56930279e-04   1.40901248e-04   5.56220439e-05   2.19572739e-05
   8.66783193e-06   3.42170324e-06   1.35074799e-06   5.33219747e-07
   2.10493369e-07   8.30940863e-08   3.28021130e-08   1.29489308e-08
   5.11170439e-09   2.01789097e-09   7.96578692e-10   3.14456877e-10
   1.24134772e-10   4.90032702e-11   1.93444739e-11   7.63639846e-12
   3.01453784e-12   1.19001597e-12   4.69768755e-13   1.85445463e-13
   7.32061980e-14   2.88988194e-14   1.14080642e-14   4.50342812e-15
   1.77776906e-15   7.01790620e-16   2.77037901e-16   1.09363249e-16
   4.31720749e-17   1.70425726e-17   6.72769815e-18   2.65581949e-18
   1.04840831e-18   4.13868394e-19   1.63378407e-19   6.44950333e-20
   2.54599956e-20   1.00505546e-20   3.96754434e-21   1.56622337e-21
   6.18280602e-22   2.44071772e-22   9.63494767e-23   3.80348339e-23
   1.50145679e-23   5.9

In [None]:
a = 0.66

In [None]:
# solve the discrepancy problem
####################################################
%matplotlib nbagg
from numba import jit
from numpy.random import multinomial
from pandas import *

# generate the data from a markov process
T = 10000
N = 100
R = 100


@jit(nopython=True)
def markov_sample( M, T=T, betas=betas, seed=1 ):
    beta_seq = np.zeros( (T, N) )    
    # initial state
    r = multinomial( 1, np.ones(R)/R ).nonzero()[0][0]  
    rs = np.empty(T)
    # sample from the markov probability
    for t in range(T):      
        r = multinomial(1, pvals=M[r, :] ).nonzero()[0][0] 
        beta_seq[t,:] = betas[r, :]
        rs[t] = r
    return beta_seq, rs

X = np.array( [ np.sin( np.arange(T) * 2 *  f / T * pi ) for f in arange(N) ]).T
betas = randn( R, N )
a = 0.01
M = (1-a) *np.eye(R) * (R-1)/R + a * np.ones( (R,R) ) / R
M = M / M.sum( axis=1)
beta_seq, rs =  markov_sample(M )
yorg = (X * beta_seq).sum(axis=1)
y = yorg + 0.1 * randn(T)

if False:
    figure()
    subplot(121)
    imshow(beta_seq, aspect='auto')
    subplot(122)
    plot(rs, alpha=0.1)   

X_trg, y_trg = X[0:T//2], y[0:T//2]
X_tst, y_tst = X[T//2:], y[T//2:]

# solve using OLS
from statsmodels.api import OLS
ols = OLS( exog=X_trg, endog=y_trg).fit()
print('trg corr', Series( ols.predict( X_trg )).corr(Series(y_trg)) , 'tst corr', Series( ols.predict( X_tst)).corr(Series(y_tst) ) )


# solve using discrpenacy
# \min_beta_T |sum_t q_t ||y_t - X_t \beta_T||^2  + lambda ||beta_T||^2
# where q_t = \argmin_q | \sum_t q_t L_t(\beta_T) - \sum_s L_s(\beta_s) )





    

In [None]:
beta_seq.shape

In [None]:
multinomial( 1, np.ones(R)/R).nonzero()[0][0]

In [None]:
np.squeeze(multinomial( 1, np.ones(R)/R, size=1)).nonzero

In [None]:
M.sum(axis=1)

In [None]:
%matplotlib nbagg
imshow(X, aspect='auto')

In [None]:

Out[15][0].eval()