In [None]:
from matplotlib import cm
from mpl_toolkits.mplot3d import Axes3D

import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
%matplotlib inline
%load_ext autoreload
%autoreload 2

import sys
sys.path.insert(0, "/home/yanzhaowu/research/LRBench") # LRFunctions module
import os
if not os.path.exists('figures'):
    os.makedirs('figures')

In [None]:
# cost function
def cost_func(x=None, y=None):
    '''Cost function.
    For visualizing contour plot, call f() and collect placeholder nodes for fast GPU calc.
    To incorporate variables to optimize, pass them in as argument to attach as x and y.

    Args:
        x: None if placeholder tensor is used as input. Specify x to use x as input tensor.
        y: None if placeholder tensor is used as input. Specify y to use y as input tensor.

    Returns:
        Tuple (x, y, z) where x and y are input tensors and z is output tensor.
    '''
    if not x:
        x = tf.placeholder(tf.float32, shape=[None, 1])
    if not y:
        y = tf.placeholder(tf.float32, shape=[None, 1])

    # two local minima near (0, 0)
    z = __f1(x, y)
    
    # two steep gaussian trenches
    z -= __f2(x, y, x_mean=1.0, y_mean=-0.5, x_sig=0.25, y_sig=0.25)
    z -= __f2(x, y, x_mean=-1.0, y_mean=0.5, x_sig=0.2, y_sig=0.2)
    
    return x, y, z


# noisy hills of the cost function
def __f1(x, y):
    return -1.0 * tf.sin(x * x) * tf.cos(3.0 * y * y) * tf.exp(-(x * y) * (x * y)) - 2.5*tf.exp(-(x + y) * (x + y))


# bivar gaussian hills of the cost function
def __f2(x, y, x_mean, y_mean, x_sig, y_sig):
    normalizing = 1 / (2 * np.pi * x_sig * y_sig)
    x_exp = (-1 * tf.square(x - x_mean)) / (2 * tf.square(x_sig))
    y_exp = (-1 * tf.square(y - y_mean)) / (2 * tf.square(y_sig))
    return normalizing * tf.exp(x_exp + y_exp)

# pyplot settings
plt.ion()
fig = plt.figure(figsize=(3, 2), dpi=300)
ax = fig.add_subplot(111, projection='3d')
plt.subplots_adjust(left=0, bottom=0, right=1, top=1, wspace=0, hspace=0)
params = {'legend.fontsize': 3,
          'legend.handlelength': 3}
plt.rcParams.update(params)
plt.axis('off')

# input (x, y) and output (z) nodes of cost-function graph
x, y, z = cost_func()

# visualize cost function as a contour plot
x_val = y_val = np.arange(-1.5, 1.5, 0.005, dtype=np.float32)
x_val_mesh, y_val_mesh = np.meshgrid(x_val, y_val)
x_val_mesh_flat = x_val_mesh.reshape([-1, 1])
y_val_mesh_flat = y_val_mesh.reshape([-1, 1])
with tf.Session() as sess:
    z_val_mesh_flat = sess.run(z, feed_dict={x: x_val_mesh_flat, y: y_val_mesh_flat})
z_val_mesh = z_val_mesh_flat.reshape(x_val_mesh.shape)
levels = np.arange(-10, 1, 0.05)
norm = plt.Normalize(z_val_mesh.min(), 2.0*z_val_mesh.max())
surf = ax.plot_surface(x_val_mesh, y_val_mesh, z_val_mesh, linewidths=0.1, facecolors=cm.hsv(norm(z_val_mesh)), shade=False, zorder=0)
surf.set_facecolor((0,0,0,0))

def outlier(x_val, y_val, z_val):
    if x_val < -1.5 or x_val > 1.5:
        return True
    if y_val < -1.5 or y_val > 1.5:
        return True
    if z_val < z_val_mesh.min() or z_val > z_val_mesh.max():
        return True
    return False

# starting location for variables
x_i = 0.75
y_i = 1.0

# create variable pair (x, y) for each optimizer
x_var, y_var = [], []
for i in range(7):
    x_var.append(tf.Variable(x_i, [1], dtype=tf.float32))
    y_var.append(tf.Variable(y_i, [1], dtype=tf.float32))

# create separate graph for each variable pairs
cost = []
for i in range(7):
    cost.append(cost_func(x_var[i], y_var[i])[2])

# define method of gradient descent for each graph
# optimizer label name, learning rate, color

lr = tf.placeholder(tf.float32)
totalIterations=50
LRparameters = np.array([{'lrPolicy': 'FIX', 'k0': 0.025},
                {'lrPolicy': 'NSTEP', 'k0': 0.05, 'gamma': 0.1, 'l': [35, 45, 50]},
                {'lrPolicy': 'TRIEXP', 'k0': 0.05, 'k1': 0.3, 'l': 50, 'gamma': 0.94},
                ])

colors = ['k', # FIX
          'r', # NSTEP
          'y', # TRIEXP
         ]

lr_param = ['k0=0.025',
            'k0=0.05,$\gamma$=0.1,l=[50, 100, 150,]',
            'k0=0.05,k1=0.3,l=100,$\gamma$=0.94',
           ]

ops_param = []

from LRBench.lr.LRfunctions import getLRFunction

optimizer = tf.train.MomentumOptimizer(lr, momentum=0.9, use_nesterov=True) #Nesterov
ops = []

for i, lrp in enumerate(LRparameters):
    ops_param.append([lrp['lrPolicy'], getLRFunction(**lrp), colors[i]])
    ops.append(optimizer.minimize(cost[i]))

ops_param = np.array(ops_param)

print 'finish init!'

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())

    # use last location to draw a line to the current location
    last_x, last_y, last_z = [], [], []
    plot_cache = [None for _ in range(len(ops))]

    # loop each step of the optimization algorithm
    steps = totalIterations
    for iter in range(steps):
        for i in range(len(ops_param)):
            # run a step of optimization and collect new x and y variable values
            learningRate = ops_param[i][1](iter)
            #print learningRate
            _, x_val, y_val, z_val = sess.run([ops[i], x_var[i], y_var[i], cost[i]], feed_dict={lr:learningRate,})
            if outlier(x_val, y_val, z_val):
                continue
            print x_val, y_val, z_val
            
            # move dot to the current value
            if plot_cache[i]:
                plot_cache[i].pop(0).remove()
            plot_cache[i] = ax.plot([x_val], [y_val], [z_val], marker='o', markersize=2.5, label=ops_param[i, 0], color=ops_param[i, 2], zorder=11)

            # draw a line from the previous value
            if iter == 0:
                last_z.append(z_val)
                last_x.append(x_i)
                last_y.append(y_i)
            ax.plot([last_x[i], x_val], [last_y[i], y_val], [last_z[i], z_val], linewidth=0.7, color=ops_param[i, 2], zorder=10)
            last_x[i] = x_val
            last_y[i] = y_val
            last_z[i] = z_val

        if iter == 0:
            pass

        plt.savefig('figures/' + str(iter) + '.png')
        print('iteration: {}'.format(iter))

        #plt.pause(0.0001)

print("done")